第 5 期(进阶版):训练第一个 DDPM 模型(使用 CIFAR-10 数据集)
—— 用 DDPM 模型生成彩色图像,感受扩散魔法在 CIFAR-10 上的威力!
本期目标
-
将 MNIST 替换为 CIFAR-10;
-
模型结构适配 RGB 三通道输入;
-
保持原始扩散与采样流程;
-
增加图像可视化对比!
数据准备(CIFAR-10)
导入必要库
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
加载 CIFAR-10 数据集
transform = transforms.Compose([transforms.ToTensor(),transforms.Lambda(lambda x: x * 2 - 1) # [0,1] → [-1,1]
])cifar10 = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
dataloader = DataLoader(cifar10, batch_size=128, shuffle=True)
更新扩散超参数
T = 300 # 扩散步数
beta_start = 1e-4
beta_end = 0.02
betas = torch.linspace(beta_start, beta_end, T)alphas = 1. - betas
alphas_cumprod = torch.cumprod(alphas, dim=0)
sqrt_alphas_cumprod = torch.sqrt(alphas_cumprod)
sqrt_one_minus_alphas_cumprod = torch.sqrt(1. - alphas_cumprod)
正向扩散函数 q_sample
def q_sample(x_start, t, noise=None):if noise is None:noise = torch.randn_like(x_start)sqrt_alphas_cumprod_t = sqrt_alphas_cumprod[t].view(-1, 1, 1, 1).to(x_start.device)sqrt_one_minus_alphas_cumprod_t = sqrt_one_minus_alphas_cumprod[t].view(-1, 1, 1, 1).to(x_start.device)return sqrt_alphas_cumprod_t * x_start + sqrt_one_minus_alphas_cumprod_t * noise
定义模型(本次实验中的模型较为简单只是为了说明整个流程)
class CIFARDenoiseModel(nn.Module):def __init__(self):super().__init__()self.net = nn.Sequential(nn.Conv2d(3, 64, 3, padding=1),nn.ReLU(),nn.Conv2d(64, 64, 3, padding=1),nn.ReLU(),nn.Conv2d(64, 3, 3, padding=1))def forward(self, x, t):return self.net(x)
定义损失函数
def get_loss(model, x_0, t):noise = torch.randn_like(x_0)x_t = q_sample(x_0, t, noise)predicted_noise = model(x_t, t)return F.mse_loss(predicted_noise, noise)
训练模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CIFARDenoiseModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)for epoch in range(10):for step, (x, _) in enumerate(dataloader):x = x.to(device)t = torch.randint(0, T, (x.size(0),), device=device).long()loss = get_loss(model, x, t)optimizer.zero_grad()loss.backward()optimizer.step()if step % 100 == 0:print(f"Epoch {epoch} | Step {step} | Loss: {loss.item():.4f}")
图像生成:逆扩散采样
@torch.no_grad()
def p_sample(model, x_t, t):beta_t = betas[t].to(x_t.device)sqrt_one_minus_alpha_cumprod_t = sqrt_one_minus_alphas_cumprod[t].to(x_t.device)sqrt_recip_alpha_t = (1. / torch.sqrt(alphas[t])).to(x_t.device)predicted_noise = model(x_t, torch.tensor([t], device=x_t.device))model_mean = sqrt_recip_alpha_t * (x_t - beta_t / sqrt_one_minus_alpha_cumprod_t * predicted_noise)if t == 0:return model_meannoise = torch.randn_like(x_t)return model_mean + torch.sqrt(beta_t) * noise@torch.no_grad()
def sample_images(model, n_samples=16):model.eval()x_t = torch.randn(n_samples, 3, 32, 32).to(device)for t in reversed(range(T)):x_t = p_sample(model, x_t, t)return x_t
可视化生成图像
def show_images(images):images = (images + 1) / 2 # 反归一化回 [0,1]grid = torchvision.utils.make_grid(images, nrow=4)plt.figure(figsize=(6, 6))plt.imshow(grid.permute(1, 2, 0).cpu().numpy())plt.axis("off")plt.show()samples = sample_images(model)
show_images(samples)
期总结
项目 | 内容 |
---|---|
数据集 | CIFAR-10(三通道图像) |
模型结构 | 简单 CNN,输入/输出为 RGB 图像 |
输入图像尺寸 | 32x32x3 |
损失函数 | MSE(预测噪声) |
可视化 | 展示彩色图像生成过程 |
第 6 期预告
🚀 用 UNet 替代 CNN 训练 CIFAR-10 扩散模型!我们将构建更强大的网络,进一步提高图像质量!