1. 程式人生 > >神經網路解決XOR(抑或)問題

神經網路解決XOR(抑或)問題

引言

神經網路用於解決XOR問題也算是一個神經網路的一個重大突破。
首先,我們需要知道什麼是抑或問題,這裡我就不多說了。
抑或問題有4個輸入,1個輸出。輸入是二維的,且每維要麼是0要麼是1,輸出是一維的,要麼是0要麼是1.
當輸入為0,0或者1,1時,輸出為0,當輸入為1,0或者0,1是輸出我1.
各位可以自行將其畫到直角座標系中,這是用線性分類器無法解決的。

目標

本文想搭建一個神經網路架構來訓練引數。
但是要求,架構是可調的,也就是說隱層的數量,學習率等都是可調的。
最後得到引數並進行預測。
在預測時,將結果使用3D圖畫出來,以表示分類結果。
這裡的3D是指,雖然輸入中每個維度要麼是0要麼是1,但是預測時,每一個維度可以是0到1之間的一個小數。
比如,測試資料是0.2,0.2,我們知道這個資料距離0,0這點事最近的,我們會將其近似的看做就是0,0這個點,然後也是有對應輸出的,比如和0,0這個點相同。

一些說明

這裡我將這個問題看做分類問題,所以最後一層採用Softmax分類器。
啟用函式就使用最原始的sigmoid函式。

程式

這裡程式分為兩個部分,一個部分負責搭建框架,另一個部分負責輸入資料並進行訓練。
程式碼可以直接去CSDN下載中搜索後下載。

框架程式:
(基本上根據cs231n的課程作業修改得到)

# coding=utf-8

import numpy as np


def basic_forard(x, w, b):
    x = x.reshape(x.shape[0], -1)
    out = np.dot(x, w) + b
    cache = (x, w, b)

    return out, cache


def basic_backward(dout, cache):
    x, w, b = cache
    dx = np.dot(dout, w.T)
    # dx = np.reshape(dx, x.shape)
    # x = x.reshape(x.shape[0], -1)
    dw = np.dot(x.T, dout)
    db = np.reshape(np.sum(dout, axis=0), b.shape)

    return dx, dw, db


def sigmoid_forward(x):
    x = x.reshape(x.shape[0], -1)
    out = 1 / (1 + np.exp(-1 * x))
    cache = out

    return out, cache


def sigmoid_backward(dout, cache):
    out = cache
    dx = out * (1 - out)
    dx *= dout

    return dx


def basic_sigmoid_forward(x, w, b):
    basic_out, basic_cache = basic_forard(x, w, b)
    sigmoid_out, sigmoid_cache = sigmoid_forward(basic_out)
    cache = (basic_cache, sigmoid_cache)

    return sigmoid_out, cache


def basic_sigmoid_backward(dout, cache):
    basic_cache, sigmoid_cache = cache
    dx_sigmoid = sigmoid_backward(dout, sigmoid_cache)
    dx, dw, db = basic_backward(dx_sigmoid, basic_cache)

    return dx, dw, db


def softmax_loss(x, y):
    shifted_logits = x - np.max(x, axis=1, keepdims=True)
    Z = np.sum(np.exp(shifted_logits), axis=1, keepdims=True)
    log_probs = shifted_logits - np.log(Z)
    probs = np.exp(log_probs)
    N = x.shape[0]
    loss = -np.sum(log_probs[np.arange(N), y]) / N
    dx = probs.copy()
    dx[np.arange(N), y] -= 1
    dx /= N
    # print(x.shape)
    # print(y.shape)
    # print(dx.shape)
    return loss, dx


class muliti_layer_net(object):
    def __init__(self, hidden_dim, input_dim=2, num_classes=2, dtype=np.float32, seed=None, reg=0.0):
        self.num_layers = 1 + len(hidden_dim)
        self.dtype = dtype
        self.reg = reg
        self.params = {}

        # init all parameters
        layers_dims = [input_dim] + hidden_dim + [num_classes]

        for i in range(self.num_layers):
            self.params['W' + str(i + 1)] = np.random.randn(layers_dims[i], layers_dims[i + 1])
            self.params['b' + str(i + 1)] = np.zeros((1, layers_dims[i + 1]))


    def loss(self, X, y=None):
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # compute the forward data and cache
        basic_sigmoid_cache = {}

        layer_input = X

        for lay in range(self.num_layers):
            layer_input, basic_sigmoid_cache[lay] = basic_sigmoid_forward(layer_input,
                                                                          self.params['W' + str(lay + 1)],
                                                                          self.params['b' + str(lay + 1)])

        score = layer_input
        # print(score.shape)

        if mode == 'test':
            return score

        # compute the gradient
        loss, dscore = softmax_loss(score, y)
        dx = dscore
        grads = {}

        for index in range(self.num_layers):
            lay = self.num_layers - index - 1
            loss += 0.5 * self.reg * np.sum(self.params['W' + str(lay + 1)] * self.params['b' + str(lay + 1)])
            dx, dw, db = basic_sigmoid_backward(dx, basic_sigmoid_cache[lay])

            grads['W' + str(lay + 1)] = dw + self.reg * self.params['W' + str(lay + 1)]
            grads['b' + str(lay + 1)] = db

        return loss, grads


def sgd_momentum(w, dw, config=None):
    if config is None: config = {}
    config.setdefault('learning_rate', 1e-2)
    config.setdefault('momentum', 0.9)

    v = config.get('velocity', np.zeros_like(w))
    v = config['momentum'] * v - config['learning_rate'] * dw
    next_w = w + v

    config['velocity'] = v

    return next_w, config


class Solver(object):

    def __init__(self, model, data, **kwargs):
        self.model = model
        self.X_train = data['X_train']
        self.y_train = data['y_train']
        self.X_val = data['X_val']
        self.y_val = data['y_val']

        self.update_rule = kwargs.pop('update_rule', 'sgd_momentum')
        self.optim_config = kwargs.pop('optim_config', {})
        self.lr_decay = kwargs.pop('lr_decay', 1.0)
        self.batch_size = kwargs.pop('batch_size', 100)
        self.num_epochs = kwargs.pop('num_epochs', 10)

        self.print_every = kwargs.pop('print_every', 10)
        self.verbose = kwargs.pop('verbose', True)

        if len(kwargs) > 0:
            extra = ', '.join('"%s"' % k for k in kwargs.keys())
            raise ValueError('Unrecognized argements %s' % extra)

        # if not hasattr(optim, self.update_rule):
        #     raise ValueError('Invalid update_rule "%s"' % self.update_rule)
        # self.update_rule = getattr(optim, self.update_rule)

        self._reset()

    def _reset(self):
        """
        Set up some book-keeping variables for optimization. Don't call this
        manually.
        """
        # Set up some variables for book-keeping
        self.epoch = 0
        self.best_val_acc = 0
        self.best_params = {}
        self.loss_history = []
        self.train_acc_history = []
        self.val_acc_history = []

        self.optim_configs = {}
        for p in self.model.params:
            d = {k: v for k, v in self.optim_config.items()}
            self.optim_configs[p] = d

    def _step(self):
        num_train = self.X_train.shape[0]
        batch_mask = np.random.choice(num_train, self.batch_size)
        X_batch = self.X_train[batch_mask]
        y_batch = self.y_train[batch_mask]

        loss, grads = self.model.loss(X_batch, y_batch)
        self.loss_history.append(loss)

        for p, w in self.model.params.items():
            dw = grads[p]
            config = self.optim_configs[p]
            next_w, next_config = sgd_momentum(w, dw, config)
            self.model.params[p] = next_w
            self.optim_configs[p] = next_config

    def check_accuracy(self, X, y, num_samples=None, batch_size=100):
        N = X.shape[0]
        if num_samples is not None and N > num_samples:
            mask = np.random.choice(N, num_samples)
            N = num_samples
            X = X[mask]
            y = y[mask]

        num_batches = N / batch_size
        if N % batch_size != 0:
            num_batches += 1

        y_pred = []

        for i in range(int(num_batches)):
            start = i * batch_size
            end = (i + 1) * batch_size
            scores = self.model.loss(X[start:end])
            y_pred.append(np.argmax(scores, axis=1))
        y_pred = np.hstack(y_pred)
        acc = np.mean(y_pred == y)

        return acc


    def train(self):
        num_train = self.X_train.shape[0]
        iterations_per_epoch = max(num_train / self.batch_size, 1)
        num_iterations = self.num_epochs * iterations_per_epoch

        for t in range(int(num_iterations)):
            self._step()

            if self.verbose and self.print_every == 0:
                print('Iteration {:d} / {:d}, loss: {:f}'.format(t+1, num_iterations, self.loss_history[-1]))

            epoch_end = (t + 1) % iterations_per_epoch == 0
            if epoch_end:
                self.epoch += 1
                for k in self.optim_configs:
                    self.optim_configs[k]['learning_rate'] *= self.lr_decay

            first_it = (t == 0)
            last_it = (t == num_iterations + 1)
            if first_it or last_it or epoch_end:
                train_acc = self.check_accuracy(self.X_train, self.y_train, num_samples=10)
                val_acc = self.check_accuracy(self.X_val, self.y_val)

                self.train_acc_history.append(train_acc)
                self.val_acc_history.append(val_acc)

                if self.verbose:
                    print('Epoch {:d} / {:d}, train_acc: {:f}, val_acc: {:f}'.format(self.epoch, self.num_epochs, train_acc, val_acc))

                if val_acc > self.best_val_acc:
                    self.best_val_acc = val_acc
                    self.best_params = {}
                    for k,v in self.model.params.items():
                        self.best_params[k] = v.copy()

        self.model.params = self.best_params

訓練和測試程式

import layers
import numpy as np
import matplotlib.pyplot as plt

small_data = {
  'X_train': np.array([[0, 0], [0, 1], [1, 1], [1, 0]]),
  'y_train': np.array([0, 1, 0, 1]),
  'X_val': np.array([[0, 0], [0, 1], [1, 1], [1, 0]]),
  'y_val': np.array([0, 1, 0, 1]),
}

learning_rate = 0.2
reg = 0.0
model = layers.muliti_layer_net(hidden_dim=[2,2], input_dim=2, num_classes=2, reg=reg, dtype=np.float64)
solver = layers.Solver(model, small_data,
                       print_every=1, num_epochs=5000, batch_size=4,
                       update_rule='sgd_momentum',
                       optim_config={'learning_rate': learning_rate})
solver.train()
print(model.params)
best_model = model

# plt.plot(solver.loss_history, 'o')
# plt.title('Training loss history')
# plt.xlabel('Iteration')
# plt.ylabel('Training loss')
# plt.show()


# x_ = [x_1, x_2]
# x_ = np.array(x_)
# x_ = x_.T
# print(x_.shape)
# # print(x_[20])
# test_pred = np.argmax(best_model.loss(x_), axis=1)
# print(test_pred)
x_1 = np.arange(0, 1, 0.01)
x_2 = np.arange(0, 1, 0.01)
x_test = np.zeros((len(x_1)*len(x_2), 2))
print(x_test.shape)
index = 0
for i in range(len(x_1)):
    for j in range(len(x_2)):
        x_test[int(index), 0] = x_1[int(i)]
        x_test[int(index), 1] = x_2[int(j)]
        index += 1
print(x_test[0])
print(x_test[903])
print(x_test[5203])

test_pred = np.argmax(best_model.loss(x_test), axis=1)
print(test_pred)

from mpl_toolkits.mplot3d import Axes3D

x_1, x_2 = np.meshgrid(x_1, x_2)
figure = plt.figure()
ax = Axes3D(figure)
test_pred = test_pred.reshape(len(x_1), len(x_2))
ax.plot_surface(x_1, x_2, test_pred, rstride=1, cstride=1, cmap='rainbow')
plt.show()

預測結果圖

一些補充
1.程式碼並不是總能得到得到百分之百的正確率
2.如果迭代次數過少,正確率也會比較低,epoch一般都要3000以上才能得到100準確率
3.一些引數對正確率也有一定影響
4.本實驗對於瞭解基本神經網路有一定作用