数据集
香港中文大学,CeleA数据集,http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html 总共202599张面部图片。
制作HDF5压缩格式文件
分层数据格式(hierarchical data format)是一种成熟的,开源的压缩数据格式,专门用于存储非常大量的数据。 在《PyTorch 生成对抗网络编程》[英]塔克里·拉希德一书中将数据处理成HDF5格式运行代码,本来想重构一下的。然后发现如果用ImgaeFloder直接读取的话基本要重构所有代码。还是制作HDF5吧!
import os
import zipfile
import h5py
import imageio
hdf5_file = './celeba_aligned_small.h5py'
total_images = 202599
with h5py.File(hdf5_file, 'w') as hf:
count = 0
with zipfile.ZipFile('img_align_celeba.zip', 'r') as zf:
for i in zf.namelist():
if i[-4:] == '.jpg':
ofile = zf.extract(i)
img = imageio.imread(ofile)
os.remove(ofile)
hf.create_dataset('img_align_celeba/'+str(count)+'.jpg',
data=img, compression='gzip', compression_opts=9)
count = count + 1
if count % 1000 == 0:
print('images done ...', count)
if count == total_images:
break
生成人脸
上篇博文使用的是MLP(全连接神经网络),这次生成人脸换成了CNN(卷积神经网络)。并采用GPU加速
import h5py
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
import torch
import torch.nn as nn
def crop_centre(img, new_width, new_height):
height, width, _ = img.shape
startx = width // 2 - new_width // 2
starty = height // 2 - new_height // 2
return img[starty:starty + new_height, startx:startx + new_width, :]
def generate_random_image(size):
random_data = torch.rand(size)
return torch.cuda.FloatTensor(random_data)
def generate_random_seed(size):
random_data = torch.randn(size)
return random_data
class CelebADataset(Dataset):
def __init__(self, file):
self.file_object = h5py.File(file, 'r')
self.dataset = self.file_object['img_align_celeba']
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
if index >= len(self.dataset):
raise IndexError
img = np.array(self.dataset[str(index) + '.jpg'])
img = crop_centre(img, 128, 128)
return torch.cuda.FloatTensor(img).permute(2, 0, 1).view(1, 3, 128, 128) / 255.0
def plot_image(self, index):
img = np.array(self.dataset[str(index) + '.jpg'])
img = crop_centre(img, 128, 128)
plt.imshow(img, interpolation='nearest')
plt.show()
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.feature = nn.Sequential(
nn.Conv2d(3, 256, kernel_size=8, stride=2),
nn.BatchNorm2d(256),
nn.GELU(),
nn.Conv2d(256, 256, kernel_size=8, stride=2),
nn.BatchNorm2d(256),
nn.GELU(),
nn.Conv2d(256, 3, kernel_size=8, stride=2),
nn.GELU(),
)
self.classifier = nn.Sequential(
nn.Linear(3 * 10 * 10, 1),
nn.Sigmoid()
)
self.loss_function = nn.BCELoss()
self.optimiser = torch.optim.Adam(self.parameters(), lr=0.01)
self.counter = 0
self.progress = []
def forward(self, inputs):
x = self.feature(inputs)
x = x.view(-1)
x = self.classifier(x)
return x
def train(self, inputs, targets):
outputs = self.forward(inputs)
loss = self.loss_function(outputs, targets)
self.counter += 1
if self.counter % 10 == 0:
self.progress.append(loss.item())
if self.counter % 10000 == 0:
print("counter = ", self.counter)
self.optimiser.zero_grad()
loss.backward()
self.optimiser.step()
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.linear = nn.Sequential(
nn.Linear(100, 3*11*11),
nn.GELU(),
)
self.feature = nn.Sequential(
nn.ConvTranspose2d(3, 256, kernel_size=8, stride=2),
nn.BatchNorm2d(256),
nn.GELU(),
nn.ConvTranspose2d(256, 256, kernel_size=8, stride=2),
nn.BatchNorm2d(256),
nn.GELU(),
nn.ConvTranspose2d(256, 3, kernel_size=8, stride=2, padding=1),
nn.BatchNorm2d(3),
nn.Sigmoid()
)
self.optimiser = torch.optim.Adam(self.parameters(), lr=0.01)
self.counter = 0
self.progress = []
def forward(self, x):
x = self.linear(x)
x = x.view(1, 3, 11, 11)
x = self.feature(x)
return x
def train(self, D, inputs, targets):
g_output = self.forward(inputs)
d_output = D.forward(g_output)
loss = D.loss_function(d_output, targets)
self.counter += 1
if self.counter % 10 == 0:
self.progress.append(loss.item())
self.optimiser.zero_grad()
loss.backward()
self.optimiser.step()
if torch.cuda.is_available():
torch.set_default_tensor_type(torch.cuda.FloatTensor)
print('using cuda:', torch.cuda.get_device_name(0))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
celeba_dataset = CelebADataset('celeba_aligned_small.h5py')
D = Discriminator()
G = Generator()
G.to(device)
D.to(device)
epoches = 1
for epoch in range(epoches):
print('开始第', epoch+1, '轮', '*************'*4)
for image_data in celeba_dataset:
D.train(image_data, torch.cuda.FloatTensor([1.0]))
D.train(generate_random_image((1, 3, 128, 128)), torch.cuda.FloatTensor([0.0]))
G.train(D, generate_random_seed(100), torch.cuda.FloatTensor([1.0]))
多运行几轮能得到不错的效果:
for i in range(6):
output = G.forward(generate_random_seed(100))
img = output.detach().permute(0,2,3,1).view(128,128,3).cpu().numpy()
plt.subplot(2, 3, i+1)
plt.imshow(img)
plt.show()
运行一轮我这1060跑了4个小时,设备好的炼丹师们可以多运行几轮试试。
|