字典特徵抽取
阿新 • • 發佈:2018-12-22
#特徵抽取 feature_extraction #導包 # from sklearn.feature_extraction.text import CountVectorizer # # vector = CountVectorizer() # # res = vector.fit_transform(["life is short,i like python ", "life is too long ,i dislike python"]) # # print(vector.get_feature_names()) # # print(res.toarray()) #導包 字典特徵抽取 from sklearn.feature_extraction import DictVectorizer #字典資料抽取:把字典中一些類別資料,分別進行轉換成特徵 def dictvec(): #例項化 dict = DictVectorizer(sparse=False) #sparse=False 取消稀疏矩陣 data = dict.fit_transform([{'city': '北京', 'temp': 100}, {'city': '上海', 'temp': 60}, {'city': '江西', 'temp': 30}])print(data)#sparse格式 節約記憶體 便於讀取處理 # [[0. 1. 0. 100.] # [1. 0. 0. 60.] # [0. 0. 1. 30.]] print(dict.get_feature_names()) #讀取特徵值 # ['city=上海', 'city=北京', 'city=江西', 'temp'] return None if __name__ == "__main__": dictvec()
執行結果: