python一些程式碼總結
阿新 • • 發佈:2018-12-23
1.繪畫混淆矩陣
import matplotlib.pyplot as plt import itertools def plot_confusion_matrix(cm, classes, title='Confusion matrix', cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. """ plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=0) plt.yticks(tick_marks, classes) thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(gender_train,genderlabel,test_size = 0.2, random_state = 0) LR_model = LogisticRegression() LR_model.fit(X_train,y_train) y_pred = LR_model.predict(X_test) print (LR_model.score(X_test,y_test)) cnf_matrix = confusion_matrix(y_test,y_pred) print("Recall metric in the testing dataset: ", cnf_matrix[1,1]/(cnf_matrix[1,0]+cnf_matrix[1,1])) print("accuracy metric in the testing dataset: ", (cnf_matrix[1,1]+cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[1,1]+cnf_matrix[1,0]+cnf_matrix[0,1])) # Plot non-normalized confusion matrix class_names = [0,1] plt.figure() plot_confusion_matrix(cnf_matrix , classes=class_names , title='Gender-Confusion matrix') plt.show()
2.網格搜尋
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.model_selection import cross_val_score from sklearn.ensemble import RandomForestRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import AdaBoostRegressor from sklearn.ensemble import BaggingRegressor from sklearn.model_selection import GridSearchCV from sklearn.ensemble import GradientBoostingRegressor from sklearn.decomposition import PCA from sklearn.linear_model import Lasso from mlxtend.regressor import StackingCVRegressor from xgboost import XGBRegressor ##選取grbt進行調參 param_grid = [ {'n_estimators': [50,100,150], 'max_features': [2, 4, 6, 8],'max_depth':[3,5,7]} ] grbt_reg = GradientBoostingRegressor() grid_search = GridSearchCV(grbt_reg, param_grid, cv=5, scoring='neg_mean_squared_error', return_train_score=True) grid_search.fit(x_train, y_train.ravel()) cvres = grid_search.cv_results_ for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]): print(np.sqrt(-mean_score), params) grid_search.best_params_ feature_importances = grid_search.best_estimator_.feature_importances_