C# SMTP傳送郵件
一、程式碼練習
- 完善HybridSN高光譜分類網路
HybridSN 高光譜分類
S. K. Roy, G. Krishna, S. R. Dubey, B. B. Chaudhuri HybridSN: Exploring 3-D–2-D CNN Feature Hierarchy for Hyperspectral Image Classification, IEEE GRSL 2020
這篇論文構建了一個 混合網路 解決高光譜影象分類問題,首先用 3D卷積,然後使用 2D卷積,程式碼相對簡單,下面是程式碼的解析。
首先取得資料,並引入基本函式庫。
! wget http://www.ehu.eus/ccwintco/uploads/6/67/Indian_pines_corrected.mat ! wget http://www.ehu.eus/ccwintco/uploads/c/c4/Indian_pines_gt.mat ! pip install spectral
import numpy as np import matplotlib.pyplot as plt import scipy.io as sio from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, cohen_kappa_score import spectral import torch import torchvision import torch.nn as nn import torch.nn.functional as F import torch.optim as optim
1. 定義 HybridSN 類
模型的網路結構為如下圖所示:
下面是 HybridSN 類的程式碼:
class_num = 16 class HybridSN(nn.Module): def __init__(self): super(HybridSN, self).__init__() self.conv3d_1 = nn.Sequential( nn.Conv3d(1, 8, kernel_size=(7, 3, 3), stride=1, padding=0), nn.BatchNorm3d(8), nn.ReLU(inplace = True), ) self.conv3d_2 = nn.Sequential( nn.Conv3d(8, 16, kernel_size=(5, 3, 3), stride=1, padding=0), nn.BatchNorm3d(16), nn.ReLU(inplace = True), ) self.conv3d_3 = nn.Sequential( nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=0), nn.BatchNorm3d(32), nn.ReLU(inplace = True) ) self.conv2d_4 = nn.Sequential( nn.Conv2d(576, 64, kernel_size=(3, 3), stride=1, padding=0), nn.BatchNorm2d(64), nn.ReLU(inplace = True), ) self.fc1 = nn.Linear(18496,256) self.fc2 = nn.Linear(256,128) self.fc3 = nn.Linear(128,16) self.dropout = nn.Dropout(p = 0.4) def forward(self,x): out = self.conv3d_1(x) out = self.conv3d_2(out) out = self.conv3d_3(out) out = self.conv2d_4(out.reshape(out.shape[0],-1,19,19)) out = out.reshape(out.shape[0],-1) out = F.relu(self.dropout(self.fc1(out))) out = F.relu(self.dropout(self.fc2(out))) out = self.fc3(out) return out
# 對高光譜資料 X 應用 PCA 變換
def applyPCA(X, numComponents):
newX = np.reshape(X, (-1, X.shape[2]))
pca = PCA(n_components=numComponents, whiten=True)
newX = pca.fit_transform(newX)
newX = np.reshape(newX, (X.shape[0], X.shape[1], numComponents))
return newX
# 對單個畫素周圍提取 patch 時,邊緣畫素就無法取了,因此,給這部分畫素進行 padding 操作
def padWithZeros(X, margin=2):
newX = np.zeros((X.shape[0] + 2 * margin, X.shape[1] + 2* margin, X.shape[2]))
x_offset = margin
y_offset = margin
newX[x_offset:X.shape[0] + x_offset, y_offset:X.shape[1] + y_offset, :] = X
return newX
# 在每個畫素周圍提取 patch ,然後建立成符合 keras 處理的格式
def createImageCubes(X, y, windowSize=5, removeZeroLabels = True):
# 給 X 做 padding
margin = int((windowSize - 1) / 2)
zeroPaddedX = padWithZeros(X, margin=margin)
# split patches
patchesData = np.zeros((X.shape[0] * X.shape[1], windowSize, windowSize, X.shape[2]))
patchesLabels = np.zeros((X.shape[0] * X.shape[1]))
patchIndex = 0
for r in range(margin, zeroPaddedX.shape[0] - margin):
for c in range(margin, zeroPaddedX.shape[1] - margin):
patch = zeroPaddedX[r - margin:r + margin + 1, c - margin:c + margin + 1]
patchesData[patchIndex, :, :, :] = patch
patchesLabels[patchIndex] = y[r-margin, c-margin]
patchIndex = patchIndex + 1
if removeZeroLabels:
patchesData = patchesData[patchesLabels>0,:,:,:]
patchesLabels = patchesLabels[patchesLabels>0]
patchesLabels -= 1
return patchesData, patchesLabels
def splitTrainTestSet(X, y, testRatio, randomState=345):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testRatio, random_state=randomState, stratify=y)
return X_train, X_test, y_train, y_test
2.下面讀取並建立資料集
# 地物類別
class_num = 16
X = sio.loadmat('Indian_pines_corrected.mat')['indian_pines_corrected']
y = sio.loadmat('Indian_pines_gt.mat')['indian_pines_gt']
# 用於測試樣本的比例
test_ratio = 0.90
# 每個畫素周圍提取 patch 的尺寸
patch_size = 25
# 使用 PCA 降維,得到主成分的數量
pca_components = 30
print('Hyperspectral data shape: ', X.shape)
print('Label shape: ', y.shape)
print('\n... ... PCA tranformation ... ...')
X_pca = applyPCA(X, numComponents=pca_components)
print('Data shape after PCA: ', X_pca.shape)
print('\n... ... create data cubes ... ...')
X_pca, y = createImageCubes(X_pca, y, windowSize=patch_size)
print('Data cube X shape: ', X_pca.shape)
print('Data cube y shape: ', y.shape)
print('\n... ... create train & test data ... ...')
Xtrain, Xtest, ytrain, ytest = splitTrainTestSet(X_pca, y, test_ratio)
print('Xtrain shape: ', Xtrain.shape)
print('Xtest shape: ', Xtest.shape)
# 改變 Xtrain, Ytrain 的形狀,以符合 keras 的要求
Xtrain = Xtrain.reshape(-1, patch_size, patch_size, pca_components, 1)
Xtest = Xtest.reshape(-1, patch_size, patch_size, pca_components, 1)
print('before transpose: Xtrain shape: ', Xtrain.shape)
print('before transpose: Xtest shape: ', Xtest.shape)
# 為了適應 pytorch 結構,資料要做 transpose
Xtrain = Xtrain.transpose(0, 4, 3, 1, 2)
Xtest = Xtest.transpose(0, 4, 3, 1, 2)
print('after transpose: Xtrain shape: ', Xtrain.shape)
print('after transpose: Xtest shape: ', Xtest.shape)
""" Training dataset"""
class TrainDS(torch.utils.data.Dataset):
def __init__(self):
self.len = Xtrain.shape[0]
self.x_data = torch.FloatTensor(Xtrain)
self.y_data = torch.LongTensor(ytrain)
def __getitem__(self, index):
# 根據索引返回資料和對應的標籤
return self.x_data[index], self.y_data[index]
def __len__(self):
# 返回檔案資料的數目
return self.len
""" Testing dataset"""
class TestDS(torch.utils.data.Dataset):
def __init__(self):
self.len = Xtest.shape[0]
self.x_data = torch.FloatTensor(Xtest)
self.y_data = torch.LongTensor(ytest)
def __getitem__(self, index):
# 根據索引返回資料和對應的標籤
return self.x_data[index], self.y_data[index]
def __len__(self):
# 返回檔案資料的數目
return self.len
# 建立 trainloader 和 testloader
trainset = TrainDS()
testset = TestDS()
train_loader = torch.utils.data.DataLoader(dataset=trainset, batch_size=128, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset=testset, batch_size=128, shuffle=False, num_workers=2)
3.開始訓練
# 使用GPU訓練,可以在選單 "程式碼執行工具" -> "更改執行時型別" 裡進行設定
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 網路放到GPU上
net = HybridSN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
# 開始訓練
total_loss = 0
for epoch in range(100):
for i, (inputs, labels) in enumerate(train_loader):
inputs = inputs.to(device)
labels = labels.to(device)
# 優化器梯度歸零
optimizer.zero_grad()
# 正向傳播 + 反向傳播 + 優化
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
print('[Epoch: %d] [loss avg: %.4f] [current loss: %.4f]' %(epoch + 1, total_loss/(epoch+1), loss.item()))
print('Finished Training')
4.模型測試
net.eval()
count = 0
# 模型測試
for inputs, _ in test_loader:
inputs = inputs.to(device)
outputs = net(inputs)
outputs = np.argmax(outputs.detach().cpu().numpy(), axis=1)
if count == 0:
y_pred_test = outputs
count = 1
else:
y_pred_test = np.concatenate( (y_pred_test, outputs) )
# 生成分類報告
classification = classification_report(ytest, y_pred_test, digits=4)
print(classification)
在Hybrid中新增SENet
class HybridSN(nn.Module):
def __init__(self):
super(HybridSN, self).__init__()
# 先做二維卷積
self.conv1_2d = nn.Conv2d(30,64,(3,3))
self.relu1 = nn.ReLU()
# 3個三維卷積
# conv2:(1, 30, 25, 25), 8個 7x3x3 的卷積核 ==> (8, 24, 23, 23)
self.conv2_3d = nn.Conv3d(1, 8, kernel_size=(7, 3, 3), stride=1, padding=0)
self.relu2 = nn.ReLU()
# conv3:(8, 24, 23, 23), 16個 5x3x3 的卷積核 ==>(16, 20, 21, 21)
self.conv3_3d = nn.Conv3d(8, 16, kernel_size=(5, 3, 3), stride=1, padding=0)
self.relu3 = nn.ReLU()
# conv4:(16, 20, 21, 21),32個 3x3x3 的卷積核 ==>(32, 18, 19, 19)
self.conv4_3d = nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=0)
self.relu4 = nn.ReLU()
# 接下來依次為256,128節點的全連線層,都使用比例為0.4的 Dropout
self.fn1 = nn.Linear(480896,256)
self.fn2 = nn.Linear(256,128)
self.fn3 = nn.Linear(128,16)
self.drop = nn.Dropout(p = 0.4)
def forward(self, x):
# 先降到二維
out = x.view(x.shape[0],x.shape[2],x.shape[3],x.shape[4])
out = self.conv1_2d(out)
# 升維(64, 23, 23)-->(1,64, 23, 23)
out = out.view(out.shape[0],1,out.shape[1],out.shape[2],out.shape[3])
out = self.conv2_3d(out)
out = self.relu2(out)
out = self.conv3_3d(out)
out = self.relu3(out)
out = self.conv4_3d(out)
out = self.relu4(out)
# 進行重組
out = out.view(out.shape[0],-1)
out = self.fn1(out)
out = self.drop(out)
out = self.fn2(out)
out = self.drop(out)
out = self.fn3(out)
return outclass_num = 16
class HybridSN(nn.Module):
def __init__(self):
super(HybridSN, self).__init__()
# 先做二維卷積
self.conv1_2d = nn.Conv2d(30,64,(3,3))
self.relu1 = nn.ReLU()
# 3個三維卷積
# conv2:(1, 30, 25, 25), 8個 7x3x3 的卷積核 ==> (8, 24, 23, 23)
self.conv2_3d = nn.Conv3d(1, 8, kernel_size=(7, 3, 3), stride=1, padding=0)
self.relu2 = nn.ReLU()
# conv3:(8, 24, 23, 23), 16個 5x3x3 的卷積核 ==>(16, 20, 21, 21)
self.conv3_3d = nn.Conv3d(8, 16, kernel_size=(5, 3, 3), stride=1, padding=0)
self.relu3 = nn.ReLU()
# conv4:(16, 20, 21, 21),32個 3x3x3 的卷積核 ==>(32, 18, 19, 19)
self.conv4_3d = nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=0)
self.relu4 = nn.ReLU()
# 接下來依次為256,128節點的全連線層,都使用比例為0.4的 Dropout
self.fn1 = nn.Linear(480896,256)
self.fn2 = nn.Linear(256,128)
self.fn3 = nn.Linear(128,16)
self.drop = nn.Dropout(p = 0.4)
def forward(self, x):
# 先降到二維
out = x.view(x.shape[0],x.shape[2],x.shape[3],x.shape[4])
out = self.conv1_2d(out)
# 升維(64, 23, 23)-->(1,64, 23, 23)
out = out.view(out.shape[0],1,out.shape[1],out.shape[2],out.shape[3])
out = self.conv2_3d(out)
out = self.relu2(out)
out = self.conv3_3d(out)
out = self.relu3(out)
out = self.conv4_3d(out)
out = self.relu4(out)
# 進行重組
out = out.view(out.shape[0],-1)
out = self.fn1(out)
out = self.drop(out)
out = self.fn2(out)
out = self.drop(out)
out = self.fn3(out)
return out
- Hybrid中新增SENet
class SELayer(nn.Module):
def __init__(self,channel,r=16):
super(SELayer,self).__init__()
# 定義自適應平均池化函式,降取樣
self.avg_pool = nn.AdaptiveAvgPool2d(1)
# 定義兩個全連線層
self.fc = nn.Sequential(
nn.Linear(channel,round(channel/r)),
nn.ReLU(inplace = True),
nn.Linear(round(channel/r),channel),
nn.Sigmoid()
)
def forward(self,x):
b,c,_,_ = x.size()
out = self.avg_pool(x).view(b,c)
out = self.fc(out).view(b,c,1,1)
out = x * out.expand_as(x)
return out
class HybridSN(nn.Module):
def __init__(self):
super(HybridSN, self).__init__()
self.conv3d_1 = nn.Sequential(
nn.Conv3d(1, 8, kernel_size=(7, 3, 3), stride=1, padding=0),
nn.BatchNorm3d(8),
nn.ReLU(inplace = True),
)
self.conv3d_2 = nn.Sequential(
nn.Conv3d(8, 16, kernel_size=(5, 3, 3), stride=1, padding=0),
nn.BatchNorm3d(16),
nn.ReLU(inplace = True),
)
self.conv3d_3 = nn.Sequential(
nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=0),
nn.BatchNorm3d(32),
nn.ReLU(inplace = True)
)
self.conv2d_4 = nn.Sequential(
nn.Conv2d(576, 64, kernel_size=(3, 3), stride=1, padding=0),
nn.BatchNorm2d(64),
nn.ReLU(inplace = True),
)
self.SElayer = SELayer(64,16)
self.fc1 = nn.Linear(18496,256)
self.fc2 = nn.Linear(256,128)
self.fc3 = nn.Linear(128,16)
self.dropout = nn.Dropout(p = 0.4)
def forward(self,x):
out = self.conv3d_1(x)
out = self.conv3d_2(out)
out = self.conv3d_3(out)
out = self.conv2d_4(out.reshape(out.shape[0],-1,19,19))
out = self.SElayer(out)
out = out.reshape(out.shape[0],-1)
out = F.relu(self.dropout(self.fc1(out)))
out = F.relu(self.dropout(self.fc2(out)))
out = self.fc3(out)
return out
新增SENet模組後,模型的準確率有所提升,但是提升效果不明顯。
二、視訊學習
-
《語義分割中的自注意力機制和低秩重建》
語義分割是將標籤分配給影象中的畫素的過程。這與分類形成了鮮明的對比,在分類中,一個標籤被分配給整個圖片。語義分割將同一類的多個物件視為一個實體。
在《Fully convolutional networks for semantic segmentation》中提出了一種end-to-end的做semantic segmentation的方法,提出了全卷積網路的概念,將Alexnet這種的最後的全連線層轉換為卷積層,好處就是可以輸入任意的scale。只不過在輸出的scale不同的時候,feature map的大小也不同,因為這裡的目的是最piexl的語義分割,所以其實不重要。在Alexnet基礎上, 最後的channel=4096的feature map經過一個1x1的卷積層, 變為channel=21的feature map, 然後經過上取樣和crop, 變為與輸入影象同樣大小的channel=21的feature map, 也就是圖中的pixel-wise prediction。 在Longjon的試驗中一共有20個語義類別, 加上背景類別每個畫素應該有21個softmax預測類, 因此pixel-wise prediction中channel=21。
其中 , 是畫素個數, 是畫素特徵維度(通道數), 計算 和 之間的相關度(或稱“能量”), 對 進行變換。可以看作對 的加權平均得到 ,作為對 的重構,這裡權重為 。
關於 和 的選擇,作者列出了多個選項,並最終選擇了
的形式,其中 分別對應 NLP Transformer 裡的 query,key 和 value。此外, 經過 卷積後和 相加,作為 Non-local 模組的輸出。最後結構圖如下:
Non-local Block
其實,這裡 和 的具體選擇,對效果影響不大。這樣計算出的 是個對稱矩陣。甚至可以考慮將 轉換省略,直接用 本身計算,而把 卷積放在模組之前之後,這樣的效果也不遜色。
-
《影象語義分割前沿進展》
富尺度空間的深度神經網路通用架構Res2Net和其應用場景以及自適應的池化方式Strip Pooling。
基於條帶池化,我們深入瞭解了空間池化的架構設計:(1)引入了新的條帶池化模型,可以使主幹網路可以有效地捕捉長距離的依賴關係;(2)提出了一個新穎的、可以將不同的空間池化作為核心的構件塊;(3)有組織地在效能上比較了所提出的條帶池化和傳統的空間池化技術的差別。
條帶池化 Strip Pooling
平均值池化操作:
Standard Spatial Average Pooling:記輸入的二維張量為 ,尺寸為 。在平均值池化層中,需要池化的空間範圍 。因此,輸出的二維張量 ,尺寸為 。平均值池化的過程可以表示為: 其中, ,每一個 的位置都對應於一個 的視窗。上述池化操作已成功應用於收集遠端上下文的先前工作。但是,在處理形狀不規則的物體時,可能會不可避免地合併許多不相關的區域。
Strip Pooling:為了緩解上述問題,我們在這裡提出“條帶池化”的概念,它使用帶狀池化視窗沿著水平或垂直維度執行池化。在數學上,記輸入的二維張量為 ,尺寸為 ,在條帶池化中,池化視窗為 或 。與二維平均值池化不同的是,條帶池化對行或列中的所有特徵值進行平均。其表示式為: 給定水平和垂直條帶池化層,由於長而窄的核形狀,很容易在離散分佈的區域之間建立遠端依賴關係,並對帶狀形狀的區域進行編碼。同時,由於其沿其他維度的窄核形狀,它還專注於捕獲區域性細節。這些特性使提出的條帶池化與依賴於方形核心的常規空間池化不同。