機器學習常用模板—(XGB)
阿新 • • 發佈:2019-01-29
XGB分為原生介面和Sklearn介面,下面是基於Sklearn的介面
基於Sklearn介面
分類
from sklearn.datasets import load_iris import xgboost as xgb from xgboost import plot_importance from matplotlib import pyplot as plt from sklearn.model_selection import train_test_split # read in the iris data iris = load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # 訓練模型 model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=160, silent=True, objective='multi:softmax') model.fit(X_train, y_train) # 對測試集進行預測 ans = model.predict(X_test) # 計算準確率 cnt1 = 0 cnt2 = 0 for i in range(len(y_test)): if ans[i] == y_test[i]: cnt1 += 1 else: cnt2 += 1 print("Accuracy: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2))) # 顯示重要特徵 plot_importance(model) plt.show()
迴歸
注:轉自與知乎 作者:章華燕import xgboost as xgb from xgboost import plot_importance from matplotlib import pyplot as plt from sklearn.model_selection import train_test_split # 讀取檔案原始資料 data = [] labels = [] labels2 = [] with open("lppz5.csv", encoding='UTF-8') as fileObject: for line in fileObject: line_split = line.split(',') data.append(line_split[10:]) labels.append(line_split[8]) X = [] for row in data: row = [float(x) for x in row] X.append(row) y = [float(x) for x in labels] # XGBoost訓練過程 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) model = xgb.XGBRegressor(max_depth=5, learning_rate=0.1, n_estimators=160, silent=True, objective='reg:gamma') model.fit(X_train, y_train) # 對測試集進行預測 ans = model.predict(X_test) # 顯示重要特徵 plot_importance(model) plt.show()