1. 程式人生 > >字典特徵抽取

字典特徵抽取

#特徵抽取 feature_extraction
#導包
# from sklearn.feature_extraction.text import CountVectorizer
#
# vector = CountVectorizer()
#
# res = vector.fit_transform(["life is short,i like python ", "life is too long ,i dislike python"])
#
# print(vector.get_feature_names())
#
# print(res.toarray())

#
導包 字典特徵抽取 from sklearn.feature_extraction import DictVectorizer #字典資料抽取:把字典中一些類別資料,分別進行轉換成特徵 def dictvec(): #例項化 dict = DictVectorizer(sparse=False) #sparse=False 取消稀疏矩陣 data = dict.fit_transform([{'city': '北京', 'temp': 100}, {'city': '上海', 'temp': 60}, {'city': '江西', 'temp': 30}])
print(data)#sparse格式 節約記憶體 便於讀取處理 # [[0. 1. 0. 100.] # [1. 0. 0. 60.] # [0. 0. 1. 30.]] print(dict.get_feature_names()) #讀取特徵值 # ['city=上海', 'city=北京', 'city=江西', 'temp'] return None if __name__ == "__main__": dictvec()

執行結果: