torch09:variational_autoencoder(VAE)--MNIST和自己資料集
阿新 • • 發佈:2019-02-08
MachineLP的部落格目錄:小鵬的部落格目錄
本小節使用torch搭建VAE模型,訓練和測試:
(1)定義模型超引數:輸入大小、隱含單元、迭代次數、批量大小、學習率。
(2)定義訓練資料。
(3)定義模型(定義需要的VAE結構)。
(4)定義損失函式,選用適合的損失函式。
(5)定義優化演算法(SGD、Adam等)。
(6)儲存模型。
---------------------------------我是可愛的分割線---------------------------------
程式碼部分:
import os import torch import torch.nn as nn import torch.nn.functional as F import torchvision from torchvision import transforms from torchvision.utils import save_image # 判定GPU是否存在 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 建立一個目錄, 用於儲存VAE輸出的影象儲存 sample_dir = 'samples' if not os.path.exists(sample_dir): os.makedirs(sample_dir) # 模型的超引數:輸入大小、隱含層、迭代次數、batch_size、學習率。 image_size = 784 h_dim = 400 z_dim = 20 num_epochs = 15 batch_size = 128 learning_rate = 1e-3 # MNIST 資料集 dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) # 構建資料管道, 使用自己的資料集請參考:https://blog.csdn.net/u014365862/article/details/80506147 data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True) # VAE 模型 class VAE(nn.Module): def __init__(self, image_size=784, h_dim=400, z_dim=20): super(VAE, self).__init__() self.fc1 = nn.Linear(image_size, h_dim) self.fc2 = nn.Linear(h_dim, z_dim) self.fc3 = nn.Linear(h_dim, z_dim) self.fc4 = nn.Linear(z_dim, h_dim) self.fc5 = nn.Linear(h_dim, image_size) def encode(self, x): h = F.relu(self.fc1(x)) return self.fc2(h), self.fc3(h) # 用語兩個z_dim相加。 def reparameterize(self, mu, log_var): std = torch.exp(log_var/2) eps = torch.randn_like(std) return mu + eps * std def decode(self, z): h = F.relu(self.fc4(z)) return F.sigmoid(self.fc5(h)) def forward(self, x): mu, log_var = self.encode(x) z = self.reparameterize(mu, log_var) x_reconst = self.decode(z) return x_reconst, mu, log_var # 定義模型。 model = VAE().to(device) # 定義優化演算法 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # 訓練模型 for epoch in range(num_epochs): for i, (x, _) in enumerate(data_loader): # Forward pass x = x.to(device).view(-1, image_size) x_reconst, mu, log_var = model(x) # 計算重構誤差和KL變換 # For KL divergence, see Appendix B in VAE paper or http://yunjey47.tistory.com/43 reconst_loss = F.binary_cross_entropy(x_reconst, x, size_average=False) kl_div = - 0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp()) # 後向傳播+調整引數 loss = reconst_loss + kl_div optimizer.zero_grad() loss.backward() optimizer.step() # 每10個batch列印一次資料 if (i+1) % 10 == 0: print ("Epoch[{}/{}], Step [{}/{}], Reconst Loss: {:.4f}, KL Div: {:.4f}" .format(epoch+1, num_epochs, i+1, len(data_loader), reconst_loss.item(), kl_div.item())) # 模型測試部分 # 測試階段不需要計算梯度,注意 with torch.no_grad(): # Save the sampled images z = torch.randn(batch_size, z_dim).to(device) out = model.decode(z).view(-1, 1, 28, 28) save_image(out, os.path.join(sample_dir, 'sampled-{}.png'.format(epoch+1))) # 儲存重構後的圖片 out, _, _ = model(x) x_concat = torch.cat([x.view(-1, 1, 28, 28), out.view(-1, 1, 28, 28)], dim=3) save_image(x_concat, os.path.join(sample_dir, 'reconst-{}.png'.format(epoch+1)))
加餐1:在自己資料集上使用:
其中,train.txt中的資料格式:
gender/0male/0(2).jpg 1
gender/0male/0(3).jpeg 1
gender/0male/0(1).jpg 0
test.txt中的資料格式如下:
gender/0male/0(3).jpeg 1
gender/0male/0(1).jpg 0
gender/1female/1(6).jpg 1
程式碼部分:
import os import torch import torch.nn as nn import torch.nn.functional as F import torchvision from torch.utils.data import Dataset, DataLoader from torchvision import transforms from torchvision.utils import save_image from PIL import Image # 判定GPU是否存在 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 建立一個目錄, 用於儲存VAE輸出的影象儲存 sample_dir = 'samples' if not os.path.exists(sample_dir): os.makedirs(sample_dir) # 模型的超引數:輸入大小、隱含層、迭代次數、batch_size、學習率。 image_size = 784 h_dim = 400 z_dim = 20 num_epochs = 15 batch_size = 2 learning_rate = 1e-3 def default_loader(path): # 注意要保證每個batch的tensor大小時候一樣的。 return Image.open(path).convert('RGB') class MyDataset(Dataset): def __init__(self, txt, transform=None, target_transform=None, loader=default_loader): fh = open(txt, 'r') imgs = [] for line in fh: line = line.strip('\n') # line = line.rstrip() words = line.split(' ') imgs.append((words[0],int(words[1]))) self.imgs = imgs self.transform = transform self.target_transform = target_transform self.loader = loader def __getitem__(self, index): fn, label = self.imgs[index] img = self.loader(fn) if self.transform is not None: img = self.transform(img) return img,label def __len__(self): return len(self.imgs) def get_loader(dataset='train.txt', crop_size=128, image_size=28, batch_size=2, mode='train', num_workers=1): """Build and return a data loader.""" transform = [] if mode == 'train': transform.append(transforms.RandomHorizontalFlip()) transform.append(transforms.CenterCrop(crop_size)) transform.append(transforms.Resize(image_size)) transform.append(transforms.ToTensor()) transform.append(transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))) transform = transforms.Compose(transform) train_data=MyDataset(txt=dataset, transform=transform) data_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=(mode=='train'), num_workers=num_workers) return data_loader # 注意要保證每個batch的tensor大小時候一樣的。 # data_loader = DataLoader(train_data, batch_size=2,shuffle=True) data_loader = get_loader('train.txt', batch_size=batch_size) print(len(data_loader)) test_loader = get_loader('test.txt', batch_size=batch_size) print(len(test_loader)) # VAE 模型 class VAE(nn.Module): def __init__(self, image_size=784, h_dim=400, z_dim=20): super(VAE, self).__init__() self.fc1 = nn.Linear(image_size, h_dim) self.fc2 = nn.Linear(h_dim, z_dim) self.fc3 = nn.Linear(h_dim, z_dim) self.fc4 = nn.Linear(z_dim, h_dim) self.fc5 = nn.Linear(h_dim, image_size) def encode(self, x): h = F.relu(self.fc1(x)) return self.fc2(h), self.fc3(h) # 用語兩個z_dim相加。 def reparameterize(self, mu, log_var): std = torch.exp(log_var/2) eps = torch.randn_like(std) return mu + eps * std def decode(self, z): h = F.relu(self.fc4(z)) return F.sigmoid(self.fc5(h)) def forward(self, x): mu, log_var = self.encode(x) z = self.reparameterize(mu, log_var) x_reconst = self.decode(z) return x_reconst, mu, log_var # 定義模型。 model = VAE().to(device) # 定義優化演算法 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # 訓練模型 for epoch in range(num_epochs): for i, (x, _) in enumerate(data_loader): # Forward pass x = x.to(device).view(-1, image_size) x_reconst, mu, log_var = model(x) # 計算重構誤差和KL變換 # For KL divergence, see Appendix B in VAE paper or http://yunjey47.tistory.com/43 reconst_loss = F.binary_cross_entropy(x_reconst, x, size_average=False) kl_div = - 0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp()) # 後向傳播+調整引數 loss = reconst_loss + kl_div optimizer.zero_grad() loss.backward() optimizer.step() # 每10個batch列印一次資料 if (i+1) % 10 == 0: print ("Epoch[{}/{}], Step [{}/{}], Reconst Loss: {:.4f}, KL Div: {:.4f}" .format(epoch+1, num_epochs, i+1, len(data_loader), reconst_loss.item(), kl_div.item())) # 模型測試部分 # 測試階段不需要計算梯度,注意 with torch.no_grad(): # Save the sampled images z = torch.randn(batch_size, z_dim).to(device) out = model.decode(z).view(-1, 1, 28, 28) save_image(out, os.path.join(sample_dir, 'sampled-{}.png'.format(epoch+1))) # 儲存重構後的圖片 out, _, _ = model(x) x_concat = torch.cat([x.view(-1, 1, 28, 28), out.view(-1, 1, 28, 28)], dim=3) save_image(x_concat, os.path.join(sample_dir, 'reconst-{}.png'.format(epoch+1)))
---------------------------------我是可愛的分割線---------------------------------
總結:
本節torch實現VAE,可以自行替換需要的網路結構進行訓練。
torch系列: