計算訓練集,測試集的距離
阿新 • • 發佈:2019-01-01
計算訓練集中的資料與訓練集中每個資料的距離(之後為測試集中每個資料找出訓練集中離它距離最小的k個)
用第二種方法,向量化計算距離的效率高
def compute_distances_two_loops(self, X):
"""
Compute the distance between each test point in X and each training point
self.X_train is training data and the
X is test data.
Inputs:
- X: A numpy array of shape (num_test, D)((500,3072)) containing test data.
self.X_train :(5000,3072)
Returns:
- dists: A numpy array of shape (num_test, num_train) ((500,5000))where dists[i, j]
is the Euclidean distance between the ith test point and the jth training
point.
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
dist = np.sqrt(np.sum(np.square(X[i] - self.X_train[j])))
dists[i, j] = dist
return dists
def compute_distances_no_loops (self, X):
"""
Compute the distance between each test point in X and each training point
in self.X_train using no explicit loops.
Input / Output: Same as compute_distances_two_loops
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train) )
#formulate the l2 distance using matrix multiplication #
M = np.dot(X, self.X_train.T)
print(X.shape, self.X_train.shape)
print(M.shape)
nrow, ncol = M.shape[0], M.shape[1]
te = np.diag(np.dot(X, X.T))#the element on digonal is quardratic sum of every vector of X
tr = np.diag(np.dot(self.X_train, self.X_train.T))
te = np.reshape(np.repeat(te, ncol), M.shape)#copy M.shape times
tr = np.reshape(np.repeat(tr, nrow), M.T.shape)
distance_square = -2 * M + te + tr.T
dists = np.sqrt(distance_square)
return dists