sklearn的快速使用之三(邏輯迴歸)
阿新 • • 發佈:2018-11-19
import sys
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model.logistic import LogisticRegression
X = []
# 前三行作為輸入樣本 中文的話用結巴分詞
X.append("fuck you")
X.append("fuck you all")
X.append("hello everyone")
# y為樣本標註
y = [1,1,0]
# 後兩句作為測試樣本
X.append("fuck me")
X.append("hello boy")
vectorizer = TfidfVectorizer()
# 取X的前三句作為輸入做tfidf轉換
X_train = vectorizer.fit_transform(X[:-2])
print (X_train)
# 取X的後兩句用上句生成的tfidf做轉換
X_test = vectorizer.transform(X[-2:])
print (X_test)
# 用邏輯迴歸模型做訓練
classifier = LogisticRegression()
classifier.fit(X_train, y)
# 做測試樣例的預測
predictions = classifier.predict(X_test)
print (predictions)