神經網路解決XOR(抑或)問題
阿新 • • 發佈:2018-12-16
引言
神經網路用於解決XOR問題也算是一個神經網路的一個重大突破。
首先,我們需要知道什麼是抑或問題,這裡我就不多說了。
抑或問題有4個輸入,1個輸出。輸入是二維的,且每維要麼是0要麼是1,輸出是一維的,要麼是0要麼是1.
當輸入為0,0或者1,1時,輸出為0,當輸入為1,0或者0,1是輸出我1.
各位可以自行將其畫到直角座標系中,這是用線性分類器無法解決的。
目標
本文想搭建一個神經網路架構來訓練引數。
但是要求,架構是可調的,也就是說隱層的數量,學習率等都是可調的。
最後得到引數並進行預測。
在預測時,將結果使用3D圖畫出來,以表示分類結果。
這裡的3D是指,雖然輸入中每個維度要麼是0要麼是1,但是預測時,每一個維度可以是0到1之間的一個小數。
比如,測試資料是0.2,0.2,我們知道這個資料距離0,0這點事最近的,我們會將其近似的看做就是0,0這個點,然後也是有對應輸出的,比如和0,0這個點相同。
一些說明
這裡我將這個問題看做分類問題,所以最後一層採用Softmax分類器。
啟用函式就使用最原始的sigmoid函式。
程式
這裡程式分為兩個部分,一個部分負責搭建框架,另一個部分負責輸入資料並進行訓練。
程式碼可以直接去CSDN下載中搜索後下載。
框架程式:
(基本上根據cs231n的課程作業修改得到)
# coding=utf-8 import numpy as np def basic_forard(x, w, b): x = x.reshape(x.shape[0], -1) out = np.dot(x, w) + b cache = (x, w, b) return out, cache def basic_backward(dout, cache): x, w, b = cache dx = np.dot(dout, w.T) # dx = np.reshape(dx, x.shape) # x = x.reshape(x.shape[0], -1) dw = np.dot(x.T, dout) db = np.reshape(np.sum(dout, axis=0), b.shape) return dx, dw, db def sigmoid_forward(x): x = x.reshape(x.shape[0], -1) out = 1 / (1 + np.exp(-1 * x)) cache = out return out, cache def sigmoid_backward(dout, cache): out = cache dx = out * (1 - out) dx *= dout return dx def basic_sigmoid_forward(x, w, b): basic_out, basic_cache = basic_forard(x, w, b) sigmoid_out, sigmoid_cache = sigmoid_forward(basic_out) cache = (basic_cache, sigmoid_cache) return sigmoid_out, cache def basic_sigmoid_backward(dout, cache): basic_cache, sigmoid_cache = cache dx_sigmoid = sigmoid_backward(dout, sigmoid_cache) dx, dw, db = basic_backward(dx_sigmoid, basic_cache) return dx, dw, db def softmax_loss(x, y): shifted_logits = x - np.max(x, axis=1, keepdims=True) Z = np.sum(np.exp(shifted_logits), axis=1, keepdims=True) log_probs = shifted_logits - np.log(Z) probs = np.exp(log_probs) N = x.shape[0] loss = -np.sum(log_probs[np.arange(N), y]) / N dx = probs.copy() dx[np.arange(N), y] -= 1 dx /= N # print(x.shape) # print(y.shape) # print(dx.shape) return loss, dx class muliti_layer_net(object): def __init__(self, hidden_dim, input_dim=2, num_classes=2, dtype=np.float32, seed=None, reg=0.0): self.num_layers = 1 + len(hidden_dim) self.dtype = dtype self.reg = reg self.params = {} # init all parameters layers_dims = [input_dim] + hidden_dim + [num_classes] for i in range(self.num_layers): self.params['W' + str(i + 1)] = np.random.randn(layers_dims[i], layers_dims[i + 1]) self.params['b' + str(i + 1)] = np.zeros((1, layers_dims[i + 1])) def loss(self, X, y=None): X = X.astype(self.dtype) mode = 'test' if y is None else 'train' # compute the forward data and cache basic_sigmoid_cache = {} layer_input = X for lay in range(self.num_layers): layer_input, basic_sigmoid_cache[lay] = basic_sigmoid_forward(layer_input, self.params['W' + str(lay + 1)], self.params['b' + str(lay + 1)]) score = layer_input # print(score.shape) if mode == 'test': return score # compute the gradient loss, dscore = softmax_loss(score, y) dx = dscore grads = {} for index in range(self.num_layers): lay = self.num_layers - index - 1 loss += 0.5 * self.reg * np.sum(self.params['W' + str(lay + 1)] * self.params['b' + str(lay + 1)]) dx, dw, db = basic_sigmoid_backward(dx, basic_sigmoid_cache[lay]) grads['W' + str(lay + 1)] = dw + self.reg * self.params['W' + str(lay + 1)] grads['b' + str(lay + 1)] = db return loss, grads def sgd_momentum(w, dw, config=None): if config is None: config = {} config.setdefault('learning_rate', 1e-2) config.setdefault('momentum', 0.9) v = config.get('velocity', np.zeros_like(w)) v = config['momentum'] * v - config['learning_rate'] * dw next_w = w + v config['velocity'] = v return next_w, config class Solver(object): def __init__(self, model, data, **kwargs): self.model = model self.X_train = data['X_train'] self.y_train = data['y_train'] self.X_val = data['X_val'] self.y_val = data['y_val'] self.update_rule = kwargs.pop('update_rule', 'sgd_momentum') self.optim_config = kwargs.pop('optim_config', {}) self.lr_decay = kwargs.pop('lr_decay', 1.0) self.batch_size = kwargs.pop('batch_size', 100) self.num_epochs = kwargs.pop('num_epochs', 10) self.print_every = kwargs.pop('print_every', 10) self.verbose = kwargs.pop('verbose', True) if len(kwargs) > 0: extra = ', '.join('"%s"' % k for k in kwargs.keys()) raise ValueError('Unrecognized argements %s' % extra) # if not hasattr(optim, self.update_rule): # raise ValueError('Invalid update_rule "%s"' % self.update_rule) # self.update_rule = getattr(optim, self.update_rule) self._reset() def _reset(self): """ Set up some book-keeping variables for optimization. Don't call this manually. """ # Set up some variables for book-keeping self.epoch = 0 self.best_val_acc = 0 self.best_params = {} self.loss_history = [] self.train_acc_history = [] self.val_acc_history = [] self.optim_configs = {} for p in self.model.params: d = {k: v for k, v in self.optim_config.items()} self.optim_configs[p] = d def _step(self): num_train = self.X_train.shape[0] batch_mask = np.random.choice(num_train, self.batch_size) X_batch = self.X_train[batch_mask] y_batch = self.y_train[batch_mask] loss, grads = self.model.loss(X_batch, y_batch) self.loss_history.append(loss) for p, w in self.model.params.items(): dw = grads[p] config = self.optim_configs[p] next_w, next_config = sgd_momentum(w, dw, config) self.model.params[p] = next_w self.optim_configs[p] = next_config def check_accuracy(self, X, y, num_samples=None, batch_size=100): N = X.shape[0] if num_samples is not None and N > num_samples: mask = np.random.choice(N, num_samples) N = num_samples X = X[mask] y = y[mask] num_batches = N / batch_size if N % batch_size != 0: num_batches += 1 y_pred = [] for i in range(int(num_batches)): start = i * batch_size end = (i + 1) * batch_size scores = self.model.loss(X[start:end]) y_pred.append(np.argmax(scores, axis=1)) y_pred = np.hstack(y_pred) acc = np.mean(y_pred == y) return acc def train(self): num_train = self.X_train.shape[0] iterations_per_epoch = max(num_train / self.batch_size, 1) num_iterations = self.num_epochs * iterations_per_epoch for t in range(int(num_iterations)): self._step() if self.verbose and self.print_every == 0: print('Iteration {:d} / {:d}, loss: {:f}'.format(t+1, num_iterations, self.loss_history[-1])) epoch_end = (t + 1) % iterations_per_epoch == 0 if epoch_end: self.epoch += 1 for k in self.optim_configs: self.optim_configs[k]['learning_rate'] *= self.lr_decay first_it = (t == 0) last_it = (t == num_iterations + 1) if first_it or last_it or epoch_end: train_acc = self.check_accuracy(self.X_train, self.y_train, num_samples=10) val_acc = self.check_accuracy(self.X_val, self.y_val) self.train_acc_history.append(train_acc) self.val_acc_history.append(val_acc) if self.verbose: print('Epoch {:d} / {:d}, train_acc: {:f}, val_acc: {:f}'.format(self.epoch, self.num_epochs, train_acc, val_acc)) if val_acc > self.best_val_acc: self.best_val_acc = val_acc self.best_params = {} for k,v in self.model.params.items(): self.best_params[k] = v.copy() self.model.params = self.best_params
訓練和測試程式
import layers import numpy as np import matplotlib.pyplot as plt small_data = { 'X_train': np.array([[0, 0], [0, 1], [1, 1], [1, 0]]), 'y_train': np.array([0, 1, 0, 1]), 'X_val': np.array([[0, 0], [0, 1], [1, 1], [1, 0]]), 'y_val': np.array([0, 1, 0, 1]), } learning_rate = 0.2 reg = 0.0 model = layers.muliti_layer_net(hidden_dim=[2,2], input_dim=2, num_classes=2, reg=reg, dtype=np.float64) solver = layers.Solver(model, small_data, print_every=1, num_epochs=5000, batch_size=4, update_rule='sgd_momentum', optim_config={'learning_rate': learning_rate}) solver.train() print(model.params) best_model = model # plt.plot(solver.loss_history, 'o') # plt.title('Training loss history') # plt.xlabel('Iteration') # plt.ylabel('Training loss') # plt.show() # x_ = [x_1, x_2] # x_ = np.array(x_) # x_ = x_.T # print(x_.shape) # # print(x_[20]) # test_pred = np.argmax(best_model.loss(x_), axis=1) # print(test_pred) x_1 = np.arange(0, 1, 0.01) x_2 = np.arange(0, 1, 0.01) x_test = np.zeros((len(x_1)*len(x_2), 2)) print(x_test.shape) index = 0 for i in range(len(x_1)): for j in range(len(x_2)): x_test[int(index), 0] = x_1[int(i)] x_test[int(index), 1] = x_2[int(j)] index += 1 print(x_test[0]) print(x_test[903]) print(x_test[5203]) test_pred = np.argmax(best_model.loss(x_test), axis=1) print(test_pred) from mpl_toolkits.mplot3d import Axes3D x_1, x_2 = np.meshgrid(x_1, x_2) figure = plt.figure() ax = Axes3D(figure) test_pred = test_pred.reshape(len(x_1), len(x_2)) ax.plot_surface(x_1, x_2, test_pred, rstride=1, cstride=1, cmap='rainbow') plt.show()
一些補充
1.程式碼並不是總能得到得到百分之百的正確率
2.如果迭代次數過少,正確率也會比較低,epoch一般都要3000以上才能得到100準確率
3.一些引數對正確率也有一定影響
4.本實驗對於瞭解基本神經網路有一定作用