特徵工程(五)length
阿新 • • 發佈:2018-12-11
''' 將原始資料的word的長度特徵,並將結果儲存到本地 article特徵可做類似處理 ''' df_train=pd.read_csv('train_set.csv') df_test=pd.read_csv('test_set.csv') def get_word_len(df_series): word_len=[] for row in df_series: word_len.append(len(row.split(' '))) return word_len df_train_word = pd.DataFrame({'id':df_train['id'].values.tolist(),'word_len':get_word_len(df_train['word_seg'])}) df_test_word = pd.DataFrame({'id':df_test['id'].values.tolist(),'word_len':get_word_len(df_test['word_seg'])}) df_train_word.to_csv('./train_word_len.csv',index=False) df_test_word.to_csv('./test_word_len.csv',index=False)