import numpy as np
from random import shuffle
#from past.builtins import xrange
def softmax_loss_naive(W, X, y, reg):
Softmax loss function, naive implementation (with loops)
Inputs have dimension D, there are C classes, and we operate on minibatches
of N examples.
- W: A numpy array of shape (D, C) containing weights.
- X: A numpy array of shape (N, D) containing a minibatch of data.
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
that X[i] has label c, where 0 <= c < C.
- reg: (float) regularization strength
Returns a tuple of:
- loss as single float
- gradient with respect to weights W; an array of same shape as W
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
# TODO: Compute the softmax loss and its gradient using explicit loops. #
# Store the loss in loss and the gradient in dW. If you are not careful #
# here, it is easy to run into numeric instability. Don't forget the #
# regularization! #
### ##########################################################################
dW_each = np.zeros_like(W)
num_train,dim = X.shape
num_class = W.shape[1]
f = X.dot(W)
f_max = np.reshape(np.max(f,axis=1),(num_train,1))
prob = np.exp(f-f_max)/np.sum(np.exp(f-f_max),axis=1,keepdims=True)
y_trueClass = np.zeros_like(prob)
y_trueClass[np.arange(num_train),y] = 1.0
for i in xrange(num_train):
for j in xrange(num_class):
loss += -(y_trueClass[i,j]*np.log(prob[i,j]))
dW_each[:,j] = -(y_trueClass[i,j]-prob[i,j])*X[i,:]
dW +=dW_each
loss /=num_train
loss +=0.5*reg*np.sum(W*W)
dW /=num_train
W += reg*W
return loss, dW
def softmax_loss_vectorized(W, X, y, reg):
Softmax loss function, vectorized version.
Inputs and outputs are the same as softmax_loss_naive.
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
# TODO: Compute the softmax loss and its gradient using no explicit loops. #
# Store the loss in loss and the gradient in dW. If you are not careful #
# here, it is easy to run into numeric instability. Don't forget the #
# regularization! #
num_train,dim = X.shape
f = X.dot(W)
f_max = np.reshape(np.max(f,axis=1),(num_train,1))
prob = np.exp(f-f_max)/np.sum(np.exp(f-f_max),axis=1,keepdims=True)
y_trueClass = np.zeros_like(prob)
y_trueClass[range(num_train),y] = 1.0
loss += -np.sum(y_trueClass*np.log(prob))/num_train+0.5*reg*np.sum(W*W)
dW += -np.dot(X.T,y_trueClass - prob)/num_train + reg*W
return loss, dW
# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set.
from cs231n.classifiers import Softmax
results = {}
best_val = -1
best_softmax = None
learning_rates = [1e-7, 5e-7]
regularization_strengths = [2.5e4, 5e4]
# TODO: #
# Use the validation set to set the learning rate and regularization strength. #
# This should be identical to the validation that you did for the SVM; save #
# the best trained softmax classifer in best_softmax. #
num_splt_lr = 3
num_splt_rs = 8
for i in xrange(num_splt_lr):
for j in xrange(num_splt_rs):
learning_rate_ij = learning_rates[0] + i * (learning_rates[1] - learning_rates[0]) / num_splt_lr
reg_ij = regularization_strengths[0] + j * (regularization_strengths[1] - regularization_strengths[0])/ num_splt_rs
softmax = Softmax()
loss_hist = softmax.train(X_train, y_train, learning_rate=learning_rate_ij, reg=reg_ij,
num_iters=1500, verbose=False)
y_train_pred = softmax.predict(X_train)
accuracy_train = np.mean(y_train == y_train_pred)
y_val_pred = softmax.predict(X_val)
accuracy_val = np.mean(y_val == y_val_pred)
results[(learning_rate_ij, reg_ij)] = (accuracy_train, accuracy_val)
if accuracy_val > best_val:
best_val = accuracy_val
best_softmax = softmax
# Print out results.
for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print('lr %e reg %e train accuracy: %f val accuracy: %f' % (
lr, reg, train_accuracy, val_accuracy))
print('best validation accuracy achieved during cross-validation: %f' % best_val)
