rnn學習,keras rnn實踐 文字生成
阿新 • • 發佈:2019-01-28
實踐上與上面還是有差別,通過實踐理解更深刻了
其實就是看到cs231n課堂上用rnn生成文字,然後就想用keras試試看,順便複習複習python語法,有點生疏了,小技巧比較多
課堂的例子(需要翻牆)不得不說人家一百行程式碼效果就非常好,而且是純python,不用任何框架輔助,佩服
import keras import word2vec import numpy as np from keras.utils import plot_model from keras.preprocessing.image import ImageDataGenerator from keras.models import * from keras.layers import * from keras.callbacks import * from keras import backend as K import h5py filename = "input.txt" raw_text = open(filename).read() raw_text = raw_text.lower() chars = sorted(list(set(raw_text))) char_to_int = dict((c, i) for i, c in enumerate(chars)) int_to_char = dict((i, c) for i, c in enumerate(chars)) n_chars = len(raw_text) n_vocab = len(chars) print 'vocal: ',n_vocab # data = open('input.txt','r').read() # data=data.lower() #上面基本都是仿照課堂上老師給的原始碼 seq_length = 32 dataX = [] dataY = [] for i in range(0, n_chars - seq_length, 1): seq_in = raw_text[i:i + seq_length] seq_out = raw_text[i + seq_length] dataX.append([char_to_int[char] for char in seq_in]) dataY.append(char_to_int[seq_out]) n_patterns = len(dataX) print "Total Patterns: ", n_patterns # reshape X to be [samples, time steps, features] X = np.reshape(dataX, (n_patterns, seq_length, 1)) # X = X / n_vocab #歸一化後效果不好 #下面可以用函式直接轉成多元分類的 ,例如:valY = np_utils.to_categorical(valY, num_classes=NUM_CLASS) Y = [] for i in range(n_patterns): y = np.zeros((n_vocab, 1)) y[dataY[i]] = 1 Y.append(y) Y = np.reshape(Y, (n_patterns, n_vocab)) print Y.shape #設定檢查點,儲存權重 filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] model = Sequential() model.add(LSTM(64, input_shape=(X.shape[1], X.shape[2]),return_sequences=True)) # # model.add(LSTM(32,return_sequences=True)) # model.add(LSTM(8)) # model.add(LSTM( # batch_input_shape=(None, TIME_STEPS, INPUT_SIZE), # Or: input_dim=INPUT_SIZE, input_length=TIME_STEPS, # output_dim=CELL_SIZE, # return_sequences=True, # True: output at all steps. False: output as last step. # stateful=True, # True: the final state of batch1 is feed into the initial state of batch2 # )) # model.add(Dropout(0.2)) model.add(Dense(n_vocab,activation='softmax')) adam=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) adagrad=keras.optimizers.Adagrad(lr=0.001, epsilon=1e-06) model.compile(loss='categorical_crossentropy', optimizer='adam') print model.layers[1].input #use the index of layer to find the input and output shape print model.layers[1].output plot_model(model, to_file='model.png') #嘗試過多層rnn和單層不同寬度,效果都不怎麼好,而且收斂很慢,而且這樣的實現和老師的程式碼演算法上還是有很大區別的,最終效果loss在0.1以下會生產一些單詞,句子基本不可讀 #model.fit(X, Y, nb_epoch=20, batch_size=128, callbacks=callbacks_list) # model.fit(X, Y, epochs=500, batch_size=128) # model.save('word_pre.h5')