gensim word2vec把訓練好的模型儲存成txt
阿新 • • 發佈:2019-02-02
import gensim import codecs from gensim.models import word2vec import re from gensim.corpora.dictionary import Dictionary import pickle import logging import numpy as np # 引入日誌配置 logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) sentences = word2vec.Text8Corpus('D:/csvtxt/corpus.txt') model = word2vec.Word2Vec(sentences, size=100,min_count=1)###不過濾只出現1次的詞 model.save('word2vec.model') print(model.similarity('怎麼','如何'))
###將模型儲存為txtfile=codecs.open('D:/csvtxt/corpus.txt','r+',encoding='utf-8').read() file1=re.sub('\r\n',' ',file) file2=file1.split(' ') vector=[] for each in file2: line=list(model[each])lines=[str(i) for i in line] linestr=' '.join(lines) L=each+' '+linestr vector.append(L) vect='\n'.join(vector) ff=codecs.open('D:/csvtxt/xyz-add-wordvec.txt','w+',encoding='utf-8') ff.write(vect)