1. 程式人生 > >機器學習常用模板—(XGB)

機器學習常用模板—(XGB)

XGB分為原生介面和Sklearn介面,下面是基於Sklearn的介面

基於Sklearn介面

分類


from sklearn.datasets import load_iris
import xgboost as xgb
from xgboost import plot_importance
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

# read in the iris data
iris = load_iris()

X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# 訓練模型
model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=160, silent=True, objective='multi:softmax')
model.fit(X_train, y_train)

# 對測試集進行預測
ans = model.predict(X_test)

# 計算準確率
cnt1 = 0
cnt2 = 0
for i in range(len(y_test)):
    if ans[i] == y_test[i]:
        cnt1 += 1
    else:
        cnt2 += 1

print("Accuracy: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2)))

# 顯示重要特徵
plot_importance(model)
plt.show()

迴歸


import xgboost as xgb
from xgboost import plot_importance
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

# 讀取檔案原始資料
data = []
labels = []
labels2 = []
with open("lppz5.csv", encoding='UTF-8') as fileObject:
    for line in fileObject:
        line_split = line.split(',')
        data.append(line_split[10:])
        labels.append(line_split[8])

X = []
for row in data:
    row = [float(x) for x in row]
    X.append(row)

y = [float(x) for x in labels]

# XGBoost訓練過程
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = xgb.XGBRegressor(max_depth=5, learning_rate=0.1, n_estimators=160, silent=True, objective='reg:gamma')
model.fit(X_train, y_train)

# 對測試集進行預測
ans = model.predict(X_test)

# 顯示重要特徵
plot_importance(model)
plt.show()
注:轉自與知乎 作者:章華燕