1. 程式人生 > 實用技巧 >pytorch-自編碼器與變分自編碼器-有損影象壓縮

pytorch-自編碼器與變分自編碼器-有損影象壓縮

筆記摘抄

import  torch
from    torch import nn, optim
from    torch.utils.data import DataLoader
from    torchvision import transforms, datasets

import  visdom

1. 自編碼器(Auto-Encoder)

class AE(nn.Module):

    def __init__(self):
        super(AE, self).__init__()

        # [b, 784] => [b, 20]
        self.encoder = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 20),
            nn.ReLU()
        )
        # [b, 20] => [b, 784]
        self.decoder = nn.Sequential(
            nn.Linear(20, 64),
            nn.ReLU(),
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 784),
            nn.Sigmoid()
        )

    def forward(self, x):                 #x.shape=[b, 1, 28, 28]

        batchsz = x.size(0)
        x = x.view(batchsz, 784)          #flatten
        x = self.encoder(x)               #encoder [b, 20]
        x = self.decoder(x)               #decoder [b, 784]
        x = x.view(batchsz, 1, 28, 28)    #reshape [b, 1, 28, 28]

        return x, None

2. 變分自動編碼器(Variational Auto-Encoder)

程式碼中的h和圖中的ci,計算方法略有不同,程式碼中沒有用指數。

KL散度計算公式(程式碼中與sigma相乘的torch.randn_like(sigma)符合正態分佈):

class VAE(nn.Module):

    def __init__(self):
        super(VAE, self).__init__()

        # [b, 784] => [b, 20]
        self.encoder = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 20),
            nn.ReLU()
        )
        # [b, 20] => [b, 784]
        self.decoder = nn.Sequential(
            nn.Linear(10, 64),
            nn.ReLU(),
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 784),
            nn.Sigmoid()
        )

        self.criteon = nn.MSELoss()

    def forward(self, x):              #x.shape=[b, 1, 28, 28]

        batchsz = x.size(0)
        x = x.view(batchsz, 784)                 #flatten

        h_ = self.encoder(x)                     #encoder  [b, 20], including mean and sigma
        mu, sigma = h_.chunk(2, dim=1)           #[b, 20] => mu[b, 10] and sigma[b, 10]
        h = mu + sigma * torch.randn_like(sigma) #reparametrize trick, epison~N(0, 1)
        x_hat = self.decoder(h)                  #decoder  [b, 784]
        x_hat = x_hat.view(batchsz, 1, 28, 28)   #reshape  [b, 1, 28, 28]

        kld = 0.5 * torch.sum(mu**2 + sigma**2 - torch.log(1e-8 + sigma**2) - 1) / (batchsz*28*28)   #KL散度計算

        return x_hat, kld

3. MINIST資料集上分別呼叫上面的編碼器

def main():
    mnist_train = datasets.MNIST('mnist', train=True, transform=transforms.Compose([transforms.ToTensor()]), download=True)
    mnist_train = DataLoader(mnist_train, batch_size=32, shuffle=True)

    mnist_test = datasets.MNIST('mnist', train=False, transform=transforms.Compose([transforms.ToTensor()]), download=True)
    mnist_test = DataLoader(mnist_test, batch_size=32, shuffle=True)

    x, _ = iter(mnist_train).next()    #x: torch.Size([32, 1, 28, 28]) _: torch.Size([32])

    model = AE()
    # model = VAE()

    criteon = nn.MSELoss()             #均方損失
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    print(model)

    viz = visdom.Visdom()

    for epoch in range(20):

        for batchidx, (x, _) in enumerate(mnist_train):

            x_hat, kld = model(x)
            loss = criteon(x_hat, x)        #x_hat和x的shape=[b, 1, 28, 28]

            if kld is not None:
                elbo = - loss - 1.0 * kld   #elbo為證據下界
                loss = - elbo

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(epoch, 'loss:', loss.item())
        # print(epoch, 'loss:', loss.item(), 'kld:', kld.item())

        x, _ = iter(mnist_test).next()

        with torch.no_grad():
            x_hat, kld = model(x)
        viz.images(x, nrow=8, win='x', opts=dict(title='x'))
        viz.images(x_hat, nrow=8, win='x_hat', opts=dict(title='x_hat'))


if __name__ == '__main__':
    main()

開啟監聽程序: python -m visdom.server

訪問:http://localhost:8097

當呼叫AE時:

當呼叫VAE時: