塞了一堆特徵模型的準確率並沒提高多少
阿新 • • 發佈:2022-12-05
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.fft as fft
from sklearn import tree
df = pd.read_csv('train.csv')
df=df.drop(['ID'],axis=1)
nmp=df.to_numpy()
feature=nmp[:,:-1]
df=pd.DataFrame(feature)
sk=np.reshape(np.array(df.skew(axis=1)),(-1,1))
Q25=np.reshape(np.array(df.quantile(q=0.25,axis=1)),(-1,1))
Q75=np.reshape(np.array(df.quantile(q=0.75,axis=1)),(-1,1))
dmax=np.reshape(np.array(df.diff(1).max(axis=1)),(-1,1))
dmin=np.reshape(np.array(df.diff(1).min(axis=1)),(-1,1))
range_diff1=dmax-dmin
dvar=np.reshape(np.array(df.diff(1).var(axis=1)),(-1,1))
dstd=np.reshape(np.array(df.diff(1).std(axis=1)),(-1,1))
dmean=np.reshape(np.array(df.diff(1).mean(axis=1)),(-1,1))
dmedia=np.reshape(np.array(df.diff(1).median(axis=1)),(-1,1))
dsk=np.reshape(np.array(df.diff(1).skew(axis=1)),(-1,1))
dQ25=np.reshape(np.array(df.diff(1).quantile(q=0.25,axis=1)),(-1,1))
dQ75=np.reshape(np.array(df.diff(1).quantile(q=0.75,axis=1)),(-1,1))
dk=np.reshape(np.array(df.diff(1).kurtosis(axis=1)),(-1,1))
feature=nmp[:,:-1] label=nmp[:,-1]#(210,240) min=np.reshape(nmp[:,:-1].min(-1),(-1,1)) max=np.reshape(nmp[:,:-1].max(-1),(-1,1)) ra=min-max var=np.reshape(np.var(nmp[:,:-1],axis=1),(-1,1)) std=np.reshape(np.std(nmp[:,:-1],axis=1),(-1,1)) mean=np.reshape(np.mean(nmp[:,:-1],axis=1),(-1,1)) media=np.reshape(np.median(nmp[:,:-1],axis=1),(-1,1))
feature1=torch.fft.fft(torch.Tensor(feature)) feature1=torch.abs(feature1)/240*2 feature1=feature1.detach().numpy() df=pd.DataFrame(feature1)
sk3=np.reshape(np.array(df.skew(axis=1)),(-1,1)) Q253=np.reshape(np.array(df.quantile(q=0.25,axis=1)),(-1,1)) Q753=np.reshape(np.array(df.quantile(q=0.75,axis=1)),(-1,1)) dmax3=np.reshape(np.array(df.diff(1).max(axis=1)),(-1,1)) dmin3=np.reshape(np.array(df.diff(1).min(axis=1)),(-1,1)) range_diff3=dmax-dmin dvar3=np.reshape(np.array(df.diff(1).var(axis=1)),(-1,1)) dstd3=np.reshape(np.array(df.diff(1).std(axis=1)),(-1,1)) dmean3=np.reshape(np.array(df.diff(1).mean(axis=1)),(-1,1)) dmedia3=np.reshape(np.array(df.diff(1).median(axis=1)),(-1,1)) dsk3=np.reshape(np.array(df.diff(1).skew(axis=1)),(-1,1)) dQ253=np.reshape(np.array(df.diff(1).quantile(q=0.25,axis=1)),(-1,1)) dQ753=np.reshape(np.array(df.diff(1).quantile(q=0.75,axis=1)),(-1,1)) dk3=np.reshape(np.array(df.diff(1).kurtosis(axis=1)),(-1,1))
min1=np.reshape(feature1.min(-1),(-1,1)) max1=np.reshape(feature1.max(-1),(-1,1)) var1=np.reshape(np.var(feature1,axis=1),(-1,1)) std1=np.reshape(np.std(feature1,axis=1),(-1,1)) mean1=np.reshape(np.mean(feature1,axis=1),(-1,1)) media1=np.reshape(np.median(feature1,axis=1),(-1,1))
import scipy kur=np.reshape(scipy.stats.kurtosis(feature,axis=1),(-1,1)) ne=np.concatenate((sk3,Q253,Q753,dmax3,dmin3,dk3,range_diff3,dvar3,dstd3,dmean3,dsk3,dQ253,dQ753,dmedia3,dk,dsk,dQ25,dQ75,dmean,dmedia,dvar,dstd,range_diff1,dmax,dmin,Q25,Q75,sk,kur,feature,ra,min,max,var,std,mean,media,feature1,min1,max1,var1,std1,mean1,media1),axis=1)
from sklearn.model_selection import cross_val_score from sklearn import svm import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import GradientBoostingClassifier for i in range(300): for j in range (20): #clf = tree.DecisionTreeClassifier(criterion='gini',random_state=0,max_depth=i) #clf=RandomForestClassifier(criterion='gini',n_estimators=10*i+1,max_depth=j+1) clf=GradientBoostingClassifier(criterion='mse',n_estimators=5*i+1,max_depth=j+1) scores = cross_val_score(clf,feature,label,cv=10) print(i,j,scores.mean()) if scores.mean()>=0.9: print('123')
feature=nmp[:,:-1] label=nmp[:,-1]#(210,240) min=np.reshape(nmp[:,:-1].min(-1),(-1,1)) max=np.reshape(nmp[:,:-1].max(-1),(-1,1)) ra=min-max var=np.reshape(np.var(nmp[:,:-1],axis=1),(-1,1)) std=np.reshape(np.std(nmp[:,:-1],axis=1),(-1,1)) mean=np.reshape(np.mean(nmp[:,:-1],axis=1),(-1,1)) media=np.reshape(np.median(nmp[:,:-1],axis=1),(-1,1))
feature1=torch.fft.fft(torch.Tensor(feature)) feature1=torch.abs(feature1)/240*2 feature1=feature1.detach().numpy() df=pd.DataFrame(feature1)
sk3=np.reshape(np.array(df.skew(axis=1)),(-1,1)) Q253=np.reshape(np.array(df.quantile(q=0.25,axis=1)),(-1,1)) Q753=np.reshape(np.array(df.quantile(q=0.75,axis=1)),(-1,1)) dmax3=np.reshape(np.array(df.diff(1).max(axis=1)),(-1,1)) dmin3=np.reshape(np.array(df.diff(1).min(axis=1)),(-1,1)) range_diff3=dmax-dmin dvar3=np.reshape(np.array(df.diff(1).var(axis=1)),(-1,1)) dstd3=np.reshape(np.array(df.diff(1).std(axis=1)),(-1,1)) dmean3=np.reshape(np.array(df.diff(1).mean(axis=1)),(-1,1)) dmedia3=np.reshape(np.array(df.diff(1).median(axis=1)),(-1,1)) dsk3=np.reshape(np.array(df.diff(1).skew(axis=1)),(-1,1)) dQ253=np.reshape(np.array(df.diff(1).quantile(q=0.25,axis=1)),(-1,1)) dQ753=np.reshape(np.array(df.diff(1).quantile(q=0.75,axis=1)),(-1,1)) dk3=np.reshape(np.array(df.diff(1).kurtosis(axis=1)),(-1,1))
min1=np.reshape(feature1.min(-1),(-1,1)) max1=np.reshape(feature1.max(-1),(-1,1)) var1=np.reshape(np.var(feature1,axis=1),(-1,1)) std1=np.reshape(np.std(feature1,axis=1),(-1,1)) mean1=np.reshape(np.mean(feature1,axis=1),(-1,1)) media1=np.reshape(np.median(feature1,axis=1),(-1,1))
import scipy kur=np.reshape(scipy.stats.kurtosis(feature,axis=1),(-1,1)) ne=np.concatenate((sk3,Q253,Q753,dmax3,dmin3,dk3,range_diff3,dvar3,dstd3,dmean3,dsk3,dQ253,dQ753,dmedia3,dk,dsk,dQ25,dQ75,dmean,dmedia,dvar,dstd,range_diff1,dmax,dmin,Q25,Q75,sk,kur,feature,ra,min,max,var,std,mean,media,feature1,min1,max1,var1,std1,mean1,media1),axis=1)
from sklearn.model_selection import cross_val_score from sklearn import svm import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import GradientBoostingClassifier for i in range(300): for j in range (20): #clf = tree.DecisionTreeClassifier(criterion='gini',random_state=0,max_depth=i) #clf=RandomForestClassifier(criterion='gini',n_estimators=10*i+1,max_depth=j+1) clf=GradientBoostingClassifier(criterion='mse',n_estimators=5*i+1,max_depth=j+1) scores = cross_val_score(clf,feature,label,cv=10) print(i,j,scores.mean()) if scores.mean()>=0.9: print('123')