Titanic模型整合
阿新 • • 發佈:2019-01-08
1.使用隨機森林做預測並繪製ROC曲線
X = train[['Pclass', 'Sex', 'SibSp', 'Parch', 'Cabin', 'Embarked', 'title', 'isalone', 'Family', 'mother', 'person', 'ticket-same', 'age', 'fare']] Y = train['Survived'] #使用隨機森林做預測 from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import cross_val_score, train_test_split from sklearn.metrics import roc_curve, auc import matplotlib.pyplot as plt clf = RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0) clf.fit(X, Y) scores = cross_val_score(clf, X, Y, cv=5) print('正確率:', np.mean(scores), scores) train_x, test_x, train_label, test_label = train_test_split(X, Y, test_size=0.3, random_state= 0) clf.fit(train_x, train_label) probas_ = clf.predict_proba(test_x) fpr, tpr, thresholds = roc_curve(test_label, probas_[:, 1]) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.2f)' % (roc_auc)) #畫對角線 plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Roc-rf') plt.legend(loc="lower right") plt.show()
2.使用xgb做預測並繪製ROC曲線
from xgboost import XGBClassifier clf = XGBClassifier() clf.fit(X,Y) scores = cross_val_score(clf, X, Y, cv=5) print('正確率:', np.mean(scores), scores) train_x, test_x, train_label, test_label = train_test_split(X, Y, test_size=0.3, random_state= 0) clf.fit(train_x, train_label) probas_ = clf.predict_proba(test_x) fpr, tpr, thresholds = roc_curve(test_label, probas_[:, 1]) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.2f)' % (roc_auc)) #畫對角線 plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Roc-xgb') plt.legend(loc="lower right") plt.show()
從ROC曲線來看xgb的效果要好一些。
參考資料https://blog.csdn.net/u010454729/article/details/45098305