1. 程式人生 > >Sklearn常用模型及網格搜尋總結(1)---程式碼

Sklearn常用模型及網格搜尋總結(1)---程式碼

下面總結了一些常用的模型呼叫方法,但是具體的超引數未列舉出來,具體引數還是要查詢API。

方法中都有Classifier(分類)和Regression(迴歸)

常用工具:

from sklearn.model_selection import train_test_split
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore")
model.predict_proba([[0.9]])  #輸出概率

模型評估

acc=metrics.accuracy_score(y_test,pre)  
print(acc)  

KNN

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(train_x, train_y)

樸素貝葉斯

from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB(alpha=0.01)
model.fit(train_x, train_y)

線性迴歸

from sklearn.linear_model import LogisticRegression
model = LogisticRegression(penalty='l2')
model.fit(train_x, train_y)

GBDT

from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier(n_estimators=200)
model.fit(train_x, train_y)

隨機森林

from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=8)
model.fit(train_x, train_y)

支援向量機

from sklearn.svm import SVC
model = SVC(kernel='rbf', probability=True)
model.fit(train_x, train_y)

XGBOOST

import xgboost as xgb
from xgboost.sklearn import XGBClassifier

  1、xgb模型訓練

xgb1 = XGBClassifier(
 learning_rate =0.05,
 n_estimators=2800,
 max_depth=5,
 min_child_weight=1,
 gamma=0.21,
 subsample=0.8,
 colsample_bytree=0.75,
 objective= 'binary:logistic',
 nthread=4,
 scale_pos_weight=1,
 seed=27)
print("fiting")
xgb1.fit(X_train,y_train)
pre=xgb1.predict(X_test)
print(pre)
print(y_test)
acc=metrics.accuracy_score(y_test,pre)
print(acc)

  2、xgb超參搜尋

param_test1 = {
 'max_depth':range(3,7,2),
 'min_child_weight':range(1,6,2)
}
param_test2 = {
 'max_depth':[4,5,6],
 'min_child_weight':[1,2,3]
}
param_test2b = {
 'min_child_weight':[6,8,10,12]
 }
#[0.0, 0.1, 0.2, 0.3, 0.4]
param_test3 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
param_test3b = {
 'gamma':[0.17,0.18,0.19,0.20,0.21,0.22,0.23,0.24,0.25]
}
param_test4 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
param_test5 = {
 'subsample':[i/100.0 for i in range(75,90,5)],
 'colsample_bytree':[i/100.0 for i in range(75,90,5)]
}
param_test6 = {
 'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]
}
param_test7 = {
 'reg_alpha':[0, 0.001, 0.005, 0.01, 0.05]
}
param_test8 = {
 'seed':[24,25,26,27,28]
}
param_test9 = {
 'learning_rate':[0.04,0.05,0.06]
}
gsearch1 = GridSearchCV(estimator =
                        XGBClassifier(
                         learning_rate =0.06,
                         n_estimators=2500,
                         max_depth=6,
                         min_child_weight=1,
                         gamma=0.2,
                         subsample=0.8,
                         colsample_bytree=0.75,
                         objective= 'binary:logistic',
                         nthread=2,
                         scale_pos_weight=1,
                         seed=25),
         param_grid=param_test9,cv=5,verbose=5)
gsearch1.fit(X_train,y_train)
print(gsearch1.best_params_,gsearch1.best_score_)

網格超引數搜尋

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
model = SVC(kernel='rbf', probability=True)
param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]}
grid_search = GridSearchCV(model, param_grid, n_jobs=1, verbose=1)
grid_search.fit(train_x, train_y)
best_parameters = grid_search.best_estimator_.get_params()
for para, val in list(best_parameters.items()):
    print(para, val)
model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True)
model.fit(train_x, train_y)