機器學習之grid_search--引數優化
阿新 • • 發佈:2018-12-23
- 機器學習之grid_search–引數優化
# -*- coding: utf-8 -*- """ Created on Mon Dec 10 14:37:14 2018 @author: muli """ from sklearn.datasets import load_digits from sklearn.linear_model import LogisticRegression from sklearn.model_selection import GridSearchCV,RandomizedSearchCV from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split import scipy def test_GridSearchCV(): ''' 測試 GridSearchCV 的用法。使用 LogisticRegression 作為分類器,主要優化 C、penalty、multi_class 等引數 :return: None ''' ### 載入資料 digits = load_digits() X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target,test_size=0.25, random_state=0,stratify=digits.target) #### 引數優化 ###### tuned_parameters = [{'penalty': ['l1','l2'], 'C': [0.01,0.05,0.1,0.5,1,5,10,50,100], 'solver':['liblinear'], 'multi_class': ['ovr']}, {'penalty': ['l2'], 'C': [0.01,0.05,0.1,0.5,1,5,10,50,100], 'solver':['lbfgs'], 'multi_class': ['ovr','multinomial']}, ] clf=GridSearchCV(LogisticRegression(tol=1e-6),tuned_parameters,cv=10) clf.fit(X_train,y_train) print("Best parameters set found:",clf.best_params_) print("Grid scores:") for params, mean_score, scores in clf.grid_scores_: print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params)) print("Optimized Score:",clf.score(X_test,y_test)) print("Detailed classification report:") y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred)) def test_RandomizedSearchCV(): ''' 測試 RandomizedSearchCV 的用法。使用 LogisticRegression 作為分類器,主要優化 C、multi_class 等引數。其中 C 的分佈函式為指數分佈 :return: None ''' ### 載入資料 digits = load_digits() X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target, test_size=0.25,random_state=0,stratify=digits.target) #### 引數優化 ###### tuned_parameters ={ 'C': scipy.stats.expon(scale=100), # 指數分佈 'multi_class': ['ovr','multinomial']} clf=RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6), tuned_parameters,cv=10,scoring="accuracy",n_iter=100) clf.fit(X_train,y_train) print("Best parameters set found:",clf.best_params_) print("Randomized Grid scores:") for params, mean_score, scores in clf.grid_scores_: print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params)) print("Optimized Score:",clf.score(X_test,y_test)) print("Detailed classification report:") y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred)) if __name__=='__main__': # 呼叫 test_GridSearchCV # test_GridSearchCV() # 呼叫 test_RandomizedSearchCV test_RandomizedSearchCV()