標籤平滑Label Smoothing Demo(附pytorch的NLLLoss(),gather())
LabelSmoothing.py
import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable # Wangleiofficial # https://github.com/pytorch/pytorch/issues/7455#issuecomment-720100742 class LabelSmoothingLoss(torch.nn.Module): def __init__(self, smoothing: float = 0.1, reductionView Code="mean", weight=None): super(LabelSmoothingLoss, self).__init__() self.smoothing = smoothing self.reduction = reduction self.weight = weight def reduce_loss(self, loss): return loss.mean() if self.reduction == 'mean' else loss.sum() \ if self.reduction == 'sum' else loss def linear_combination(self, x, y): return self.smoothing * x + (1 - self.smoothing) * y def forward(self, preds, target): assert 0 <= self.smoothing < 1 if self.weight is not None: self.weight = self.weight.to(preds.device) n= preds.size(-1) log_preds = F.log_softmax(preds, dim=-1) loss = self.reduce_loss(-log_preds.sum(dim=-1)) nll = F.nll_loss( log_preds, target, reduction=self.reduction, weight=self.weight ) return self.linear_combination(loss / n, nll) # NVIDIA # https://github.com/NVIDIA/DeepLearningExamples/blob/8d8b21a933fff3defb692e0527fca15532da5dc6/PyTorch/Classification/ConvNets/image_classification/smoothing.py#L18 class LabelSmoothing(nn.Module): """NLL loss with label smoothing. """ def __init__(self, smoothing=0.0): # 平滑因子 """Constructor for the LabelSmoothing module. :param smoothing: label smoothing factor """ super(LabelSmoothing, self).__init__() self.confidence = 1.0 - smoothing self.smoothing = smoothing def forward(self, x, target): logprobs = torch.nn.functional.log_softmax(x, dim=-1) # x: (batch size * class數量),即log(p(k)) nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) # target: (batch size) 數字標籤 # 相當於取出logprobs中的真實標籤的那個位置的logit的負值 nll_loss = nll_loss.squeeze(1) # (batch size * 1)再squeeze成batch size,即log(p(k))δk,y,δk,y表示除了k=y時該值為1,其餘為0 smooth_loss = -logprobs.mean(dim=-1) # 在class維度取均值,就是對每個樣本x的所有類的logprobs取了平均值。 # smooth_loss = -log(p(k))u(k) = -log(p(k))∗ 1/k loss = self.confidence * nll_loss + self.smoothing * smooth_loss # (batch size) # loss = (1−ϵ)log(p(k))δk,y + ϵlog(p(k))u(k) return loss.mean() # −∑ k=1~K [(1−ϵ)log(p(k))δk,y+ϵlog(p(k))u(k)] if __name__=="__main__": # Wangleiofficial crit = LabelSmoothingLoss(smoothing=0.3, reduction="mean") predict = torch.FloatTensor([[0, 0.2, 0.7, 0.1, 0], [0, 0.9, 0.2, 0.2, 1], [1, 0.2, 0.7, 0.9, 1]]) v = crit(Variable(predict), Variable(torch.LongTensor([2, 1, 0]))) print(v) # NVIDIA crit = LabelSmoothing(smoothing=0.3) predict = torch.FloatTensor([[0, 0.2, 0.7, 0.1, 0], [0, 0.9, 0.2, 0.2, 1], [1, 0.2, 0.7, 0.9, 1]]) v = crit(Variable(predict), Variable(torch.LongTensor([2, 1, 0]))) print(v) # tensor(1.3883) # tensor(1.3883)
上面程式碼可以直接跑出LS的結果
兩個LS的實現方法來源於
# Wangleiofficial
# https://github.com/pytorch/pytorch/issues/7455#issuecomment-720100742
# NVIDIA
# https://github.com/NVIDIA/DeepLearningExamples/blob/8d8b21a933fff3defb692e0527fca15532da5dc6/PyTorch/Classification/ConvNets/image_classification/smoothing.py#L18
主要講解NVIDIA裡的實現過程
class LabelSmoothing(nn.Module): """NLL loss with label smoothing. """ def __init__(self, smoothing=0.0): # 平滑因子 """Constructor for the LabelSmoothing module. :param smoothing: label smoothing factor """ super(LabelSmoothing, self).__init__() self.confidence = 1.0 - smoothing self.smoothing = smoothing def forward(self, x, target): logprobs = torch.nn.functional.log_softmax(x, dim=-1) # x: (batch size * class數量),即log(p(k)) nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) # target: (batch size) 數字標籤 # 相當於取出logprobs中的真實標籤的那個位置的logit的負值 nll_loss = nll_loss.squeeze(1) # (batch size * 1)再squeeze成batch size,即log(p(k))δk,y,δk,y表示除了k=y時該值為1,其餘為0 smooth_loss = -logprobs.mean(dim=-1) # 在class維度取均值,就是對每個樣本x的所有類的logprobs取了平均值。 # smooth_loss = -log(p(k))u(k) = -log(p(k))∗ 1/k loss = self.confidence * nll_loss + self.smoothing * smooth_loss # (batch size) # loss = (1−ϵ)log(p(k))δk,y + ϵlog(p(k))u(k) return loss.mean() # −∑ k=1~K [(1−ϵ)log(p(k))δk,y+ϵlog(p(k))u(k)]
LS通俗易懂的推導
https://blog.csdn.net/weixin_41811314/article/details/115863126保姆級程式碼
狄拉克delta
講解NLLLoss()和CrossEntropyLoss()的區別
https://blog.csdn.net/qq_22210253/article/details/85229988/
使用NLLLoss()的時候(即Negative Log Likelihood Loss),需要在網路的最後輸出層加上啟用函式,這也正是likelihood的定義,即輸出一個概率;而使用CrossEntropyLoss()的時候,網路的最後輸出不要加啟用,在 CrossEntropyLoss()中會幫我們完成此操作
NLLLoss (Negative Log Likelihood Loss)翻譯為 “負對數似然損失函式”,但並不計算對數,只是利用對數運算後的值(故需要和LogSofmax組合),進一步結合真實標籤計算“負對數似然值”。 “似然”的含義為:所求概率分佈與真實概率分佈的相似程度。在分類任務中,“似然”的含義就是預測值與真實標籤(one-hot型別)的接近程度(實質沒有改變)。
pytorch中的CrossEntropyLoss中是結合了logsoftmax和Nllloss
softmax,取自然對數後,與Label對應的那個值拿出來,再取相反數,再求均值(除以樣本數)。
Pytorch中的torch.gather函式詳解,從抽象到具體
https://blog.csdn.net/weixin_41811314/article/details/115869024
torch.gather(input, dim, index, *, sparse_grad=False, out=None)
dim=1時,out[i][j][k] = input[i][index[i][j][k]][k]
out[i][j][k]就是先獲取index[i][j][k],設為a,dim為1,結果即為input[i][a][k]
input是個倉庫
更巨集觀的理解
也就是說,對於out中的每一個{m}位置,我們都去input這個倉庫裡找到對應的那一個點!
找到那一個點之後,我們從這個點出發,發射一條只照亮"dim"這條線的光線,在這條光線上再根據index找到我們真正需要的那個點就行了!