1. 程式人生 > 其它 >取相關係數大於0.3的決策樹baseline

取相關係數大於0.3的決策樹baseline

模型在測試集的準確率為0.74提升了一些說明根據相關係數取模型是不錯的選擇。 import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns df = pd.read_csv('train.csv') df=df.drop(['ID'],axis=1) df=df.to_numpy() feature=np.abs(np.fft.fft(df[:,:-1])) feature=np.concatenate((feature,np.reshape(df[:,-1],(-1,1))),axis=1) train=pd.DataFrame(feature) heat=train.corr() fe=heat.index[abs(heat[240])>0.3] train=train.to_numpy() train=train[:,fe] from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn import tree from sklearn.model_selection import cross_val_score from sklearn.model_selection import KFold kf=KFold(n_splits=5,shuffle=False) for k in range(30):     sum=0     sum1=0     i=0     for train_index,test_index in kf.split(train):         i=i+1         tfeature=train[train_index,:-1]         label=train[train_index,-1]         clf=tree.DecisionTreeClassifier(criterion='entropy',random_state=0,max_depth=k+1)             clf.fit(tfeature,label)         l=clf.predict(tfeature)         ttest=train[test_index,:-1]         testlabel=train[test_index,-1]         l1=clf.predict(ttest)         pr=accuracy_score(label, l)         pr1=accuracy_score(testlabel, l1)         sum=sum+pr         sum1=sum1+pr1     clf1=tree.DecisionTreeClassifier(criterion='entropy',random_state=0,max_depth=k+1)     scores = cross_val_score(clf1, train[:,:-1], train[:,-1], cv=5)     print(k,sum/i,sum1/i,scores.mean())     clf1=tree.DecisionTreeClassifier(criterion='entropy',random_state=0,max_depth=4+1)     clf1.fit(train[:,:-1],train[:,-1]) df1 = pd.read_csv('test.csv') df1=df1.drop(['ID'],axis=1) df1=df1.to_numpy() feature=np.abs(np.fft.fft(df1[:,:])) feature=feature[:,fe[:-1]] out=clf1.predict(feature) out=pd.DataFrame(out) out.columns = ['CLASS'] w=[] for k in range(out.shape[0]):     w.append(k+210) out['ID']=np.reshape(w,(-1,1)) out[['ID','CLASS']].to_csv('out3.csv',index=False)