1. 程式人生 > >用python切分csv訓練集測試集

用python切分csv訓練集測試集

sklearn庫中訓練集測試集的切分

from sklearn import neighbors

knn = neighbors.KNeighborsClassifier()#(n_neighbors=10)
from numpy import genfromtxt
a = open('list.csv', 'r+')
reader = csv.reader(a)#按行讀取內容
#print(reader)
headers = next(reader)#打印出為title那行
 #print(headers)

在這裡插入圖片描述
原資料
在這裡插入圖片描述

dataPath = r"list.csv"
featureList = genfromtxt(dataPath, skip_header=1,delimiter=',',usecols=(1,2,3,4,5,6,7))
labelList = genfromtxt(dataPath, skip_header=1,delimiter=',',usecols=(0))
#print ("featureList")
x= featureList[:]
print(len(x))
print (x)
#print ("labelList")
y = labelList[:]
print(y)

在這裡插入圖片描述
from sklearn.model_selection import train_test_split#分割資料集
X_train, X_test, y_train, y_test = train_test_split(
x, y, test_size=0.25)
print(X_train)
在這裡插入圖片描述