用Python做有趣的AI项目5:AI 画画机器人(图像风格迁移)
这个项目将使用 PyTorch 实现图像风格迁移(Neural Style Transfer),让一张图片看起来具有另一张图片的“艺术风格”。
🔧 开发环境建议
Python 3.8+
PyTorch(pip install torch torchvision)
PIL(pip install pillow)
CUDA(可选,但建议有 GPU)
🗂️ 项目结构示例
bashstyle_transfer/
├── style.jpg # 风格图像(如:星夜)
├── content.jpg # 内容图像(如:你的自拍)
├── style_transfer.py # 主程序
✅ Step-by-step:图像风格迁移项目详解(含代码)
✅ Step 1:导入库
pythonimport torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import copy
import matplotlib.pyplot as plt
✅ Step 2:设备设置(使用 GPU 优先)
pythondevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")
✅ Step 3:图像加载与预处理函数
pythondef load_image(img_path, max_size=400):image = Image.open(img_path).convert('RGB')# 限制最大尺寸if max(image.size) > max_size:size = max_sizeelse:size = max(image.size)in_transform = transforms.Compose([transforms.Resize(size),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])image = in_transform(image).unsqueeze(0)return image.to(device)
✅ Step 4:定义函数来展示图像
pythondef im_convert(tensor):image = tensor.to("cpu").clone().detach()image = image.squeeze(0)image = transforms.ToPILImage()(image)return image
✅ Step 5:加载内容图像和风格图像
pythoncontent = load_image("content.jpg").to(device)
style = load_image("style.jpg").to(device)
✅ Step 6:加载预训练的 VGG 模型(VGG19)
pythonvgg = models.vgg19(pretrained=True).features.to(device).eval()
我们只取模型中的某些层用于提取风格与内容特征。
✅ Step 7:定义提取内容和风格特征的函数
pythondef get_features(image, model, layers=None):if layers is None:layers = {'0': 'conv1_1','5': 'conv2_1','10': 'conv3_1','19': 'conv4_1','21': 'conv4_2', # content representation'28': 'conv5_1'}features = {}x = imagefor name, layer in model._modules.items():x = layer(x)if name in layers:features[layers[name]] = xreturn features
✅ Step 8:定义 Gram 矩阵函数(风格提取核心)
pythondef gram_matrix(tensor):b, d, h, w = tensor.size()tensor = tensor.view(d, h * w)gram = torch.mm(tensor, tensor.t())return gram
✅ Step 9:提取内容和风格特征
pythoncontent_features = get_features(content, vgg)
style_features = get_features(style, vgg)# 为每个风格层计算 Gram 矩阵
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
✅ Step 10:初始化目标图像(从内容图像复制)
pythontarget = content.clone().requires_grad_(True).to(device)
✅ Step 11:设置超参数
pythonstyle_weights = {'conv1_1': 1.0,'conv2_1': 0.75,'conv3_1': 0.2,'conv4_1': 0.2,'conv5_1': 0.2
}
content_weight = 1e4 # α
style_weight = 1e2 # β
✅ Step 12:设置优化器
pythonoptimizer = optim.Adam([target], lr=0.003)
✅ Step 13:训练模型
pythonsteps = 300for i in range(1, steps+1):target_features = get_features(target, vgg)content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2']) ** 2)style_loss = 0for layer in style_weights:target_feature = target_features[layer]target_gram = gram_matrix(target_feature)style_gram = style_grams[layer]layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram) ** 2)style_loss += layer_style_loss / (target_feature.shape[1] ** 2)total_loss = content_weight * content_loss + style_weight * style_lossoptimizer.zero_grad()total_loss.backward()optimizer.step()if i % 50 == 0:print(f"Step {i}, Total loss: {total_loss.item():.4f}")
✅ Step 14:保存并显示结果图像
pythonfinal_img = im_convert(target)
final_img.save("result.jpg")
final_img.show()
🖼️ 示例效果
将自拍和《星夜.jpg》结合,输出一张油画风格的人像。
附上完整代码:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import copy# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")# 图像加载和预处理
def load_image(img_path, max_size=400):image = Image.open(img_path).convert('RGB')if max(image.size) > max_size:size = max_sizeelse:size = max(image.size)in_transform = transforms.Compose([transforms.Resize(size),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])image = in_transform(image).unsqueeze(0)return image.to(device)# 还原为可视图像
def im_convert(tensor):image = tensor.to("cpu").clone().detach()image = image.squeeze(0)image = transforms.ToPILImage()(image)return image# 提取特征
def get_features(image, model, layers=None):if layers is None:layers = {'0': 'conv1_1','5': 'conv2_1','10': 'conv3_1','19': 'conv4_1','21': 'conv4_2', # 内容层'28': 'conv5_1'}features = {}x = imagefor name, layer in model._modules.items():x = layer(x)if name in layers:features[layers[name]] = xreturn features# Gram矩阵
def gram_matrix(tensor):b, d, h, w = tensor.size()tensor = tensor.view(d, h * w)gram = torch.mm(tensor, tensor.t())return gram# 主程序入口
def main():# 加载图像content = load_image("content.jpg")style = load_image("style.jpg")# 加载预训练模型vgg = models.vgg19(pretrained=True).features.to(device).eval()content_features = get_features(content, vgg)style_features = get_features(style, vgg)style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}target = content.clone().requires_grad_(True).to(device)# 权重设置style_weights = {'conv1_1': 1.0,'conv2_1': 0.75,'conv3_1': 0.2,'conv4_1': 0.2,'conv5_1': 0.2}content_weight = 1e4style_weight = 1e2optimizer = optim.Adam([target], lr=0.003)steps = 300print("开始风格迁移...")for i in range(1, steps + 1):target_features = get_features(target, vgg)content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2']) ** 2)style_loss = 0for layer in style_weights:target_feature = target_features[layer]target_gram = gram_matrix(target_feature)style_gram = style_grams[layer]layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram) ** 2)style_loss += layer_style_loss / (target_feature.shape[1] ** 2)total_loss = content_weight * content_loss + style_weight * style_lossoptimizer.zero_grad()total_loss.backward()optimizer.step()if i % 50 == 0:print(f"Step {i}/{steps}, Total loss: {total_loss.item():.4f}")# 保存结果result = im_convert(target)result.save("result.jpg")print("风格迁移完成!结果保存在 result.jpg")# 运行主函数
if __name__ == "__main__":main()
✅ 使用说明
🖼️ 准备:
把你的内容图命名为 content.jpg
把你的风格图命名为 style.jpg
放在与 style_transfer.py 同一个目录下
▶️ 运行:
bashpython style_transfer.py
#🖼️ 输出:
运行成功后,生成的图像将保存在:
result.jpg