1. 程式人生 > 其它 >貝葉斯公式實戰

貝葉斯公式實戰

技術標籤:ML

from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB


def nb_news():
    """
    用貝葉斯演算法,對新聞進行分類
    :return:
    """
# 1)獲取資料 news = fetch_20newsgroups(subset="all") # print(news) # 2)劃分資料集 x_train, x_test, y_train, y_test = train_test_split(news.data, news.target) # 3)特徵工程:文字的特徵抽取tfidf tf = TfidfVectorizer() x_train = tf.fit_transform(x_train) x_test = tf.transform(x_test)
# 4)貝葉斯演算法預估器流程 estimator = MultinomialNB() estimator.fit(x_train, y_train) # 5)模型的評估 # 方法一:直接比較 y_predict = estimator.predict(x_test) print("預測值和真實值對比:\n", y_test == y_predict) # 方法二:計算準確率 score = estimator.score(x_test, y_test) print("準確率為:\n"
, score) return None # def nbcls(): # """ # 樸素貝葉斯對新聞資料集進行預測 # :return: # """ # # 獲取新聞的資料,20個類別 # news = fetch_20newsgroups(subset='all') # # # 進行資料集分割 # x_train, x_test, y_train, y_test = train_test_split(news.data, news.target, test_size=0.3) # # # 對於文字資料,進行特徵抽取 # tf = TfidfVectorizer() # # x_train = tf.fit_transform(x_train) # # 這裡打印出來的列表是:訓練集當中的所有不同詞的組成的一個列表 # print(tf.get_feature_names()) # # print(x_train.toarray()) # # # 不能呼叫fit_transform # x_test = tf.transform(x_test) # # # estimator估計器流程 # mlb = MultinomialNB(alpha=1.0) # # mlb.fit(x_train, y_train) # # # 進行預測 # y_predict = mlb.predict(x_test) # # print("預測每篇文章的類別:", y_predict[:100]) # print("真實類別為:", y_test[:100]) # # print("預測準確率為:", mlb.score(x_test, y_test)) # # return None if __name__ == "__main__": nb_news()