Assignment 2 | 斯坦福CS231n-深度學習與計算機視覺課程
該筆記是以斯坦福cs231n課程的python程式設計任務為主線,展開對該課程主要內容的理解和部分數學推導。這篇文章是第二篇。
CS231n簡介
CS231n的全稱是CS231n: Convolutional Neural Networks for Visual Recognition,即面向視覺識別的卷積神經網路。該課程是斯坦福大學計算機視覺實驗室推出的課程。需要注意的是,目前大家說CS231n,大都指的是2016年冬季學期(一月到三月)的最新版本。
課程描述 Information 計算機視覺在社會中已經逐漸普及,並廣泛運用於搜尋檢索、影象理解、手機應用、地圖導航、醫療製藥、無人機和無人駕駛汽車等領域。而這些應用的核心技術就是影象分類、影象定位和影象探測等視覺識別任務。近期神經網路(也就是“深度學習”)方法上的進展極大地提升了這些代表當前發展水平的視覺識別系統的效能。 本課程將深入講解深度學習框架的細節問題,聚焦面向視覺識別任務(尤其是影象分類任務)的端到端學習模型。在10周的課程中,學生們將會學習如何實現、訓練和除錯他們自己的神經網路,並建立起對計算機視覺領域的前沿研究方向的細節理解。最終的作業將包括訓練一個有幾百萬引數的卷積神經網路,並將其應用到最大的影象分類資料庫(ImageNet)上。我們將會聚焦於教授如何確定影象識別問題,學習演算法(比如反向傳播演算法),對網路的訓練和精細調整(fine-tuning)中的工程實踐技巧,指導學生動手完成課程作業和最終的課程專案。
視訊入口
Assignment 2
02
Python程式設計任務(線性分類器)
· 我用的IDE是Pycharm。 · Assignment1的線性分類器部分,我們需要完成 linear_svm.py,softmax.py,linear_classifier.py。在完成後,你可以用svm.ipynb和softmax.ipynb裡的程式碼來debug你的模型,獲得最優模型,然後在測試集上測試分類水平。 · Assignment1用的影象庫是CIFAR-10,你也可以從這裡下載。
linear_svm.py程式碼如下:
__coauthor__ = 'Deeplayer' # 5.19.2016import numpy as np def svm_loss_naive(W, X, y, reg): """ Inputs: - W: A numpy array of shape (D, C) containing weights. - X: A numpy array of shape (N, D) containing a minibatch of data. - y: A numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label c, where 0 <= c < C. - reg: (float) regularization strength Returns a tuple of: - loss as single float - gradient with respect to weights W; an array of same shape as W """ dW = np.zeros(W.shape) # initialize the gradient as zero # compute the loss and the gradient num_classes = W.shape[1] num_train = X.shape[0] loss = 0.0 for i in xrange(num_train): scores = X[i].dot(W) correct_class_score = scores[y[i]] for j in xrange(num_classes): if j == y[i]: continue margin = scores[j] - correct_class_score + 1 # note delta = 1 if margin > 0: loss += margin dW[:, y[i]] += -X[i, :] # compute the correct_class gradients dW[:, j] += X[i, :] # compute the wrong_class gradients # Right now the loss is a sum over all training examples, but we want it # to be an average instead so we divide by num_train. loss /= num_train dW /= num_train # Add regularization to the loss. loss += 0.5 * reg * np.sum(W * W) dW += reg * W return loss, dW def svm_loss_vectorized(W, X, y, reg): """ Structured SVM loss function, vectorized implementation.Inputs and outputs are the same as svm_loss_naive. """ loss = 0.0 dW = np.zeros(W.shape) # initialize the gradient as zero scores = X.dot(W) # N by C num_train = X.shape[0] num_classes = W.shape[1] scores_correct = scores[np.arange(num_train), y] # 1 by N scores_correct = np.reshape(scores_correct, (num_train, 1)) # N by 1 margins = scores - scores_correct + 1.0 # N by C margins[np.arange(num_train), y] = 0.0 margins[margins <= 0] = 0.0 loss += np.sum(margins) / num_train loss += 0.5 * reg * np.sum(W * W) # compute the gradient margins[margins > 0] = 1.0 row_sum = np.sum(margins, axis=1) # 1 by N margins[np.arange(num_train), y] = -row_sum dW += np.dot(X.T, margins)/num_train + reg * W # D by C return loss, dW
softmax.py程式碼如下:
__coauthor__ = 'Deeplayer' # 5.19.2016 import numpy as np def softmax_loss_naive(W, X, y, reg): # Initialize the loss and gradient to zero. loss = 0.0 dW = np.zeros_like(W) # D by C dW_each = np.zeros_like(W) num_train, dim = X.shape num_class = W.shape[1] f = X.dot(W) # N by C # Considering the Numeric Stability f_max = np.reshape(np.max(f, axis=1), (num_train, 1)) # N by 1 prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) # N by C y_trueClass = np.zeros_like(prob) y_trueClass[np.arange(num_train), y] = 1.0 for i in xrange(num_train): for j in xrange(num_class): loss += -(y_trueClass[i, j] * np.log(prob[i, j])) dW_each[:, j] = -(y_trueClass[i, j] - prob[i, j]) * X[i, :] dW += dW_each loss /= num_train loss += 0.5 * reg * np.sum(W * W) dW /= num_train dW += reg * W return loss, dW def softmax_loss_vectorized(W, X, y, reg): """ Softmax loss function, vectorized version. Inputs and outputs are the same as softmax_loss_naive. """ # Initialize the loss and gradient to zero. loss = 0.0 dW = np.zeros_like(W) # D by C num_train, dim = X.shape f = X.dot(W) # N by C # Considering the Numeric Stability f_max = np.reshape(np.max(f, axis=1), (num_train, 1)) # N by 1 prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) y_trueClass = np.zeros_like(prob) y_trueClass[range(num_train), y] = 1.0 # N by C loss += -np.sum(y_trueClass * np.log(prob)) / num_train + 0.5 * reg * np.sum(W * W) dW += -np.dot(X.T, y_trueClass - prob) / num_train + reg * W return loss, dW
linear_classifier.py程式碼如下:
__coauthor__ = 'Deeplayer'
# 5.19.2016
from linear_svm import *
from softmax import *
class LinearClassifier(object):
def __init__(self):
self.W = None
def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
batch_size=200, verbose=True):
Train this linear classifier using stochastic gradient descent.
Inputs:
- X: A numpy array of shape (N, D) containing training data; there are N
training samples each of dimension D.
- y: A numpy array of shape (N,) containing training labels; y[i] = c
means that X[i] has label 0 <= c < C for C classes.
- learning_rate: (float) learning rate for optimization.
- reg: (float) regularization strength.
- num_iters: (integer) number of steps to take when optimizing
- batch_size: (integer) number of training examples to use at each step.
- verbose: (boolean) If true, print progress during optimization.
Outputs:
A list containing the value of the loss function at each training iteration.
"""
num_train, dim = X.shape
# assume y takes values 0...K-1 where K is number of classes
num_classes = np.max(y) + 1
if self.W is None:
# lazily initialize W
self.W = 0.001 * np.random.randn(dim, num_classes) # D by C
# Run stochastic gradient descent(Mini-Batch) to optimize W
loss_history = []
for it in xrange(num_iters):
X_batch = None
y_batch = None
# Sampling with replacement is faster than sampling without replacement.
sample_index = np.random.choice(num_train, batch_size, replace=False)
X_batch = X[sample_index, :] # batch_size by D
y_batch = y[sample_index] # 1 by batch_size
# evaluate loss and gradient
loss, grad = self.loss(X_batch, y_batch, reg)
loss_history.append(loss)
# perform parameter update
self.W += -learning_rate * grad
if verbose and it % 100 == 0:
print 'Iteration %d / %d: loss %f' % (it, num_iters, loss)
return loss_history def predict(self, X):
"""
Use the trained weights of this linear classifier to predict labels for
data points.
Inputs:
- X: D x N array of training data. Each column is a D-dimensional point.
Returns:
- y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
array of length N, and each element is an integer giving the
predicted class.
""" y_pred = np.zeros(X.shape[1]) # 1 by N
y_pred = np.argmax(np.dot(self.W.T, X), axis=0)
return y_pred
def loss(self, X_batch, y_batch, reg):
"""
Compute the loss function and its derivative.
Subclasses will override this.
Inputs:
- X_batch: A numpy array of shape (N, D) containing a minibatch of N
data points; each point has dimension D.
- y_batch: A numpy array of shape (N,) containing labels for the minibatch.
- reg: (float) regularization strength.
Returns: A tuple containing:
- loss as a single float
- gradient with respect to self.W; an array of the same shape as W
"""
pass
class LinearSVM(LinearClassifier):
"""
A subclass that uses the Multiclass SVM loss function
"""
def loss(self, X_batch, y_batch, reg):
return svm_loss_vectorized(self.W, X_batch, y_batch, reg) class Softmax(LinearClassifier):
"""
A subclass that uses the Softmax + Cross-entropy loss function
"""
def loss(self, X_batch, y_batch, reg):
return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
下面我貼一下微調超引數獲得最優模型的程式碼,並給出一些執行結果和圖:
1、 LinearClassifier_svm_start.py
__coauthor__ = 'Deeplayer'
# 5.20.2016 import numpy as np
import matplotlib.pyplot as plt
import math
from linear_classifier import
* from data_utils import load_CIFAR10
# Load the raw CIFAR-10 data.
cifar10_dir = 'E:/PycharmProjects/ML/CS231n/cifar-10-batches-py' # u should change this
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# As a sanity check, we print out the size of the training and test data.
print 'Training data shape: ', X_train.shape # (50000,32,32,3)
print 'Training labels shape: ', y_train.shape # (50000L,)
print 'Test data shape: ', X_test.shape # (10000,32,32,3)
print 'Test labels shape: ', y_test.shape # (10000L,)
# Visualize some examples from the dataset
. # We show a few examples of training images from each class.
classes = ['plane', 'car', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes) samples_per_class = 7
for y, cls in enumerate(classes):
idxs = np.flatnonzero(y_train == y)
idxs = np.random.choice(idxs, samples_per_class, replace=False)
for i, idx in enumerate(idxs):
plt_idx = i * num_classes + y + 1
plt.subplot(samples_per_class, num_classes, plt_idx)
plt.imshow(X_train[idx].astype('uint8'))
plt.axis('off')
if i == 0:
plt.title(cls)
plt.show()
# Split the data into train, val, and test sets.
num_training = 49000
num_validation = 1000
num_test = 1000
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask] # (1000,32,32,3)]
y_val = y_train[mask] # (1,1000)
mask = range(num_training
) X_train = X_train[mask] # (49000,32,32,3)
y_train = y_train[mask] # (1,49000)
mask = range(num_test)
X_test = X_test[mask] # (1000,32,32,3)
y_test = y_test[mask] # (1,1000)
# Preprocessing1: reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1)) # (49000,3072)
X_val = np.reshape(X_val, (X_val.shape[0], -1)) # (1000,3072)
X_test = np.reshape(X_test, (X_test.shape[0], -1)) # (1000,3072)
# Preprocessing2: subtract the mean image
mean_image = np.mean(X_train, axis=0) # (1,3072)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# Visualize the mean image
plt.figure(figsize=(4, 4))
plt.imshow(mean_image.reshape((32, 32, 3)).astype('uint8'))
plt.show()
# Bias trick, extending the data
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) # (49000,3073)
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) # (1000,3073)
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) # (1000,3073)
# Use the validation set to tune hyperparameters (regularization strength
# and learning rate).
learning_rates = [1e-7, 5e-5]
regularization_strengths = [5e4, 1e5]
results = {}best_val = -1 # The highest validation accuracy that we have seen so far.
best_svm = None # The LinearSVM object that achieved the highest validation rate.
iters = 1500
for lr in learning_rates: for rs in regularization_strengths:
svm = LinearSVM()
svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)
Tr_pred = svm.predict(X_train.T)
acc_train = np.mean(y_train == Tr_pred)
Val_pred = svm.predict(X_val.T)
acc_val = np.mean(y_val == Val_pred)
results[(lr, rs)] = (acc_train, acc_val)
if best_val < acc_val:
best_val = acc_val
best_svm = svm
# print results for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print 'lr %e reg %e train accuracy: %f val accuracy: %f' %
(lr, reg, train_accuracy, val_accuracy)
print 'Best validation accuracy achieved during validation: %f' %
best_val # around 38.2% # Visualize the learned weights for each class
w = best_svm
.W[:-1, :] # strip out the bias w = w.reshape(32, 32, 3, 10)
w_min, w_max = np.min(w), np.max(w)
classes = ['plane', 'car', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck'] for i in xrange(10):
plt.subplot(2, 5, i + 1)
# Rescale the weights to be between 0 and 255
wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
plt.imshow(wimg.astype('uint8'))
plt.axis('off')
plt.title(classes[i])
plt.show()
# Evaluate the best svm on test set
Ts_pred = best_svm.predict(X_test.T)
test_accuracy = np.mean(y_test == Ts_pred) # around 37.1%
print 'LinearSVM on raw pixels of CIFAR-10 final test set accuracy: %f' % test_accuracy
下面視覺化一下部分原始圖片、均值影象和學習到的權重:
figure_1.png
figure_2.png
figure_3.png
2、 LinearClassifier_softmax_start.py
__coauthor__ = 'Deeplayer'
# 5.20.2016
import numpy as np
from data_utils import load_CIFAR10
from linear_classifier import *
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for the linear classifier. These are the same steps as we used for the SVM,
but condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'E:/PycharmProjects/ML/CS231n/cifar-10-batches-py' # make a change
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
# Preprocessing: reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
# subtract the mean image
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# add bias dimension and transform into columns
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
return X_train, y_train, X_val, y_val, X_test, y_test
# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
# Use the validation set to tune hyperparameters (regularization strength
# and learning rate).
results = {}
best_val = -1
best_softmax = None
learning_rates = [1e-7, 5e-7]
regularization_strengths = [5e4, 1e4]
iters = 1500
for lr in learning_rates:
for rs in regularization_strengths:
softmax = Softmax()
softmax.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)
Tr_pred = softmax.predict(X_train.T)
acc_train = np.mean(y_train == Tr_pred)
Val_pred = softmax.predict(X_val.T)
acc_val = np.mean(y_val == Val_pred)
results[(lr, rs)] = (acc_train, acc_val)
if best_val < acc_val:
best_val = acc_val
best_softmax = softmax
# Print out results.
for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print 'lr %e reg %e train accuracy: %f val accuracy: %f' %
(lr, reg, train_accuracy, val_accuracy)
# around 38.9%
print 'best validation accuracy achieved during cross-validation: %f' % best_val
# Evaluate the best softmax on test set.
Ts_pred = best_softmax.predict(X_test.T)
test_accuracy = np.mean(y_test == Ts_pred) # around 37.4%
print 'Softmax on raw pixels of CIFAR-10 final test set accuracy: %f' % test_accuracy
最後以SVM為例,比較一下向量化和非向量化程式設計在運算速度上的差異:
--> naive_vs_vectorized.py
__coauthor__ = 'Deeplayer'
# 5.20.2016
import time
from linear_svm import *
from data_utils import load_CIFAR10
def get_CIFAR10_data(num_training=49000, num_dev=500):
# Load the raw CIFAR-10 data
cifar10_dir = 'E:/PycharmProjects/ML/CS231n/cifar-10-batches-py' # make a change
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
mask = range(num_training)
X_train = X_train[mask]
mask = np.random.choice(num_training, num_dev, replace=False)
X_dev = X_train[mask]
y_dev = y_train[mask]
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
mean_image = np.mean(X_train, axis=0)
X_dev -= mean_image
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
return X_dev, y_dev
X_dev, y_dev = get_CIFAR10_data()
# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'Naive loss and gradient: computed in %fs' % (toc - tic) # around 0.198s
tic = time.time()
loss_vectorized, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'Vectorized loss and gradient: computed in %fs' % (toc - tic) # around 0.005s