pytorch-自編碼器與變分自編碼器-有損影象壓縮
阿新 • • 發佈:2020-09-08
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import visdom
1. 自編碼器(Auto-Encoder)
class AE(nn.Module): def __init__(self): super(AE, self).__init__() # [b, 784] => [b, 20] self.encoder = nn.Sequential( nn.Linear(784, 256), nn.ReLU(), nn.Linear(256, 64), nn.ReLU(), nn.Linear(64, 20), nn.ReLU() ) # [b, 20] => [b, 784] self.decoder = nn.Sequential( nn.Linear(20, 64), nn.ReLU(), nn.Linear(64, 256), nn.ReLU(), nn.Linear(256, 784), nn.Sigmoid() ) def forward(self, x): #x.shape=[b, 1, 28, 28] batchsz = x.size(0) x = x.view(batchsz, 784) #flatten x = self.encoder(x) #encoder [b, 20] x = self.decoder(x) #decoder [b, 784] x = x.view(batchsz, 1, 28, 28) #reshape [b, 1, 28, 28] return x, None
2. 變分自動編碼器(Variational Auto-Encoder)
程式碼中的h和圖中的ci,計算方法略有不同,程式碼中沒有用指數。
KL散度計算公式(程式碼中與sigma相乘的torch.randn_like(sigma)符合正態分佈):
class VAE(nn.Module): def __init__(self): super(VAE, self).__init__() # [b, 784] => [b, 20] self.encoder = nn.Sequential( nn.Linear(784, 256), nn.ReLU(), nn.Linear(256, 64), nn.ReLU(), nn.Linear(64, 20), nn.ReLU() ) # [b, 20] => [b, 784] self.decoder = nn.Sequential( nn.Linear(10, 64), nn.ReLU(), nn.Linear(64, 256), nn.ReLU(), nn.Linear(256, 784), nn.Sigmoid() ) self.criteon = nn.MSELoss() def forward(self, x): #x.shape=[b, 1, 28, 28] batchsz = x.size(0) x = x.view(batchsz, 784) #flatten h_ = self.encoder(x) #encoder [b, 20], including mean and sigma mu, sigma = h_.chunk(2, dim=1) #[b, 20] => mu[b, 10] and sigma[b, 10] h = mu + sigma * torch.randn_like(sigma) #reparametrize trick, epison~N(0, 1) x_hat = self.decoder(h) #decoder [b, 784] x_hat = x_hat.view(batchsz, 1, 28, 28) #reshape [b, 1, 28, 28] kld = 0.5 * torch.sum(mu**2 + sigma**2 - torch.log(1e-8 + sigma**2) - 1) / (batchsz*28*28) #KL散度計算 return x_hat, kld
3. MINIST資料集上分別呼叫上面的編碼器
def main(): mnist_train = datasets.MNIST('mnist', train=True, transform=transforms.Compose([transforms.ToTensor()]), download=True) mnist_train = DataLoader(mnist_train, batch_size=32, shuffle=True) mnist_test = datasets.MNIST('mnist', train=False, transform=transforms.Compose([transforms.ToTensor()]), download=True) mnist_test = DataLoader(mnist_test, batch_size=32, shuffle=True) x, _ = iter(mnist_train).next() #x: torch.Size([32, 1, 28, 28]) _: torch.Size([32]) model = AE() # model = VAE() criteon = nn.MSELoss() #均方損失 optimizer = optim.Adam(model.parameters(), lr=1e-3) print(model) viz = visdom.Visdom() for epoch in range(20): for batchidx, (x, _) in enumerate(mnist_train): x_hat, kld = model(x) loss = criteon(x_hat, x) #x_hat和x的shape=[b, 1, 28, 28] if kld is not None: elbo = - loss - 1.0 * kld #elbo為證據下界 loss = - elbo optimizer.zero_grad() loss.backward() optimizer.step() print(epoch, 'loss:', loss.item()) # print(epoch, 'loss:', loss.item(), 'kld:', kld.item()) x, _ = iter(mnist_test).next() with torch.no_grad(): x_hat, kld = model(x) viz.images(x, nrow=8, win='x', opts=dict(title='x')) viz.images(x_hat, nrow=8, win='x_hat', opts=dict(title='x_hat')) if __name__ == '__main__': main()
開啟監聽程序: python -m visdom.server
當呼叫AE時:
當呼叫VAE時: