阿新 • • 發佈:2018-11-09
# -*- coding: utf-8 -*- """ Created on Thu Oct 11 10:06:50 2018 @author: www """ #CONTEXT_SIZE 表示我們希望由前面幾個單詞來預測這個單詞,這裡使用兩個單詞 CONTEXT_SIZE = 2 # 依據的單詞數 EMBEDDING_DIM = 10 # 詞向量的維度 # 我們使用莎士比亞的詩 test_sentence = """When forty winters shall besiege thy brow, And dig deep trenches in thy beauty's field, Thy youth's proud livery so gazed on now, Will be a totter'd weed of small worth held: Then being asked, where all thy beauty lies, Where all the treasure of thy lusty days; To say, within thine own deep sunken eyes, Were an all-eating shame, and thriftless praise. How much more praise deserv'd thy beauty's use, If thou couldst answer 'This fair child of mine Shall sum my count, and make my old excuse,' Proving his beauty by succession thine! This were to be new made when thou art old, And see thy blood warm when thou feel'st it cold.""".split() ##CONTEXT_SIZE 表示我們希望由前面幾個單詞來預測這個單詞,這裡使用兩個單詞,EMBEDDING_DIM 表示詞嵌入的維度 trigram = [((test_sentence[i], test_sentence[i+1]), test_sentence[i+2]) for i in range(len(test_sentence)-2)] # 總的資料量 len(trigram) #113 # 總的資料量 len(trigram) #(('When', 'forty'), 'winters') #建立每個詞與數字的編碼,據此構建詞嵌入 vocb = set(test_sentence) # 使用 set 將重複的元素去掉 word_to_idx = {word: i for i, word in enumerate(vocb)} idx_to_word = {word_to_idx[word]: word for word in word_to_idx} #從上面可以看到每個詞都對應一個數字,且這裡的單詞都各不相同 import torch from torch import nn import torch.nn.functional as F from torch.autograd import Variable # 定義模型 class n_gram(nn.Module): def __init__(self, vocab_size, context_size=CONTEXT_SIZE, n_dim=EMBEDDING_DIM): super(n_gram, self).__init__() self.embed = nn.Embedding(vocab_size, n_dim) self.classify = nn.Sequential( nn.Linear(context_size * n_dim, 128), nn.ReLU(True), nn.Linear(128, vocab_size) ) def forward(self, x): voc_embed = self.embed(x) # 得到詞嵌入 voc_embed = voc_embed.view(1, -1) # 將兩個詞向量拼在一起 out = self.classify(voc_embed) return out net = n_gram(len(word_to_idx)) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=1e-2, weight_decay=1e-5) for e in range(100): train_loss = 0 for word, label in trigram: # 使用前 100 個作為訓練集 word = Variable(torch.LongTensor([word_to_idx[i] for i in word])) # 將兩個詞作為輸入 label = Variable(torch.LongTensor([word_to_idx[label]])) # 前向傳播 out = net(word) loss = criterion(out, label) train_loss += loss.item() # 反向傳播 optimizer.zero_grad() loss.backward() optimizer.step() if (e + 1) % 20 == 0: print('epoch: {}, Loss: {:.6f}'.format(e + 1, train_loss / len(trigram))) net = net.eval() # 測試一下結果 word, label = trigram[24] print('input: {}'.format(word)) print('label: {}'.format(label)) print() word = Variable(torch.LongTensor([word_to_idx[i] for i in word])) out = net(word) pred_label_idx = out.max(1)[1].item() predict_word = idx_to_word[pred_label_idx] print('real word is {}, predicted word is {}'.format(label, predict_word)) #可以看到網路在訓練集上基本能夠預測準確,不過這裡樣本太少,特別容易過擬合