1. 程式人生 > >機器學習之grid_search--引數優化

機器學習之grid_search--引數優化

  • 機器學習之grid_search–引數優化
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 10 14:37:14 2018

@author: muli
"""

from sklearn.datasets import load_digits
from sklearn.linear_model import  LogisticRegression
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import scipy

def test_GridSearchCV():
    '''
    測試 GridSearchCV 的用法。使用 LogisticRegression 作為分類器,主要優化 C、penalty、multi_class 等引數

    :return: None
    '''
    ### 載入資料
    digits = load_digits()
    X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target,test_size=0.25,
                random_state=0,stratify=digits.target)
    #### 引數優化 ######
    tuned_parameters = [{'penalty': ['l1','l2'],
                        'C': [0.01,0.05,0.1,0.5,1,5,10,50,100],
                        'solver':['liblinear'],
                        'multi_class': ['ovr']},

                        {'penalty': ['l2'],
                        'C': [0.01,0.05,0.1,0.5,1,5,10,50,100],
                         'solver':['lbfgs'],
                        'multi_class': ['ovr','multinomial']},
                        ]
    clf=GridSearchCV(LogisticRegression(tol=1e-6),tuned_parameters,cv=10)
    clf.fit(X_train,y_train)
    print("Best parameters set found:",clf.best_params_)
    print("Grid scores:")
    for params, mean_score, scores in clf.grid_scores_:
             print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params))

    print("Optimized Score:",clf.score(X_test,y_test))
    print("Detailed classification report:")
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))


def test_RandomizedSearchCV():
    '''
    測試 RandomizedSearchCV 的用法。使用 LogisticRegression 作為分類器,主要優化 C、multi_class 等引數。其中 C 的分佈函式為指數分佈

    :return:  None
    '''
    ### 載入資料
    digits = load_digits()
    X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target,
                test_size=0.25,random_state=0,stratify=digits.target)
    #### 引數優化 ######
    tuned_parameters ={  'C': scipy.stats.expon(scale=100), # 指數分佈
                        'multi_class': ['ovr','multinomial']}
    clf=RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6),
                        tuned_parameters,cv=10,scoring="accuracy",n_iter=100)
    clf.fit(X_train,y_train)
    print("Best parameters set found:",clf.best_params_)
    print("Randomized Grid scores:")
    for params, mean_score, scores in clf.grid_scores_:
             print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params))

    print("Optimized Score:",clf.score(X_test,y_test))
    print("Detailed classification report:")
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
 

if __name__=='__main__':
    # 呼叫 test_GridSearchCV
#    test_GridSearchCV()
    # 呼叫 test_RandomizedSearchCV
    test_RandomizedSearchCV()