一、环境准备
1.1 依赖安装
首先确保已安装Python 3.8+,然后执行以下命令安装PyTorch及依赖:
pip install torch torchvision torchmetrics
1.2 验证安装
import torch
print(torch.__version__) # 应输出1.13.0+
print(torch.cuda.is_available()) # 若有GPU应返回True
二、ResNet核心原理
ResNet(残差网络)的核心是残差块和跳跃连接,解决了深层网络的梯度消失问题,允许训练超过100层的网络。
2.1 残差块结构
残差块由两个卷积层+跳跃连接组成,公式为:
out = F(x) + x
其中F(x)是卷积层的输出,x是输入的跳跃连接。
2.2 跳跃连接优势
- 保留梯度信息,避免深层网络梯度消失
- 简化梯度传播路径
- 加速网络训练
三、搭建ResNet步骤
3.1 导入必要库
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchmetrics import Accuracy
3.2 定义残差块(BasicBlock)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
# 两个3x3卷积层
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=3, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels*self.expansion)
# 跳跃连接(shortcut)
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels*self.expansion:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels*self.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels*self.expansion)
)
def forward(self, x):
out = nn.ReLU()(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x) # 跳跃连接
out = nn.ReLU()(out)
return out
3.3 定义ResNet模型
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super().__init__()
self.in_channels = 64
# 第一层卷积
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
# 残差层(layer1-layer4)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
# 全局平均池化+全连接层
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for s in strides:
layers.append(block(self.in_channels, out_channels, s))
self.in_channels = out_channels*block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = nn.ReLU()(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.avgpool(out)
out = torch.flatten(out, 1)
out = self.fc(out)
return out
3.4 实例化模型
# ResNet18(4个残差层,每个层2个块)
model = ResNet(BasicBlock, [2,2,2,2], num_classes=10)
# 若有GPU,将模型移到GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
四、训练与测试示例(CIFAR-10数据集)
4.1 数据加载
# 数据变换
transform = transforms.Compose([
transforms.Resize((32,32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 加载数据集
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
# 数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)
4.2 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
4.3 训练循环
num_epochs = 10
best_acc = 0.0
for epoch in range(num_epochs):
# 训练模式
model.train()
train_loss = 0.0
train_acc = Accuracy(task='multiclass', num_classes=10).to(device)
for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)
# 前向传播
outputs = model(images)
loss = criterion(outputs, labels)
# 反向传播+优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录损失和准确率
train_loss += loss.item()
train_acc.update(outputs, labels)
# 验证模式
model.eval()
test_acc = Accuracy(task='multiclass', num_classes=10).to(device)
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
test_acc.update(outputs, labels)
# 打印结果
print(f'Epoch [{epoch+1}/{num_epochs}]')
print(f'Train Loss: {train_loss/len(train_loader):.4f} | Train Acc: {train_acc.compute():.4f}')
print(f'Test Acc: {test_acc.compute():.4f}
')
# 保存最优模型
if test_acc.compute() > best_acc:
best_acc = test_acc.compute()
torch.save(model.state_dict(), 'best_resnet18.pth')
print(f'Best model saved with acc: {best_acc:.4f}')
五、进阶优化技巧
5.1 混合精度训练
使用torch.cuda.amp加速训练:
from torch.cuda.amp import GradScaler, autocast
scaler = GradScaler()
# 训练循环中修改:
with autocast():
outputs = model(images)
loss = criterion(outputs, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
5.2 分布式训练
使用torch.nn.DataParallel或torch.distributed实现多GPU训练。
5.3 模型剪枝
使用torch.nn.utils.prune减少模型参数量:
import torch.nn.utils.prune as prune
# 对卷积层剪枝50%
prune.random_unstructured(model.conv1, name='weight', amount=0.5)
六、常见问题解答
Q1: 模型训练时梯度消失怎么办?
A: 确保使用残差块、合适的学习率(0.001-0.01)、BatchNorm层,或尝试混合精度训练。
Q2: 模型过拟合怎么办?
A: 增加数据增强、使用Dropout层、L2正则化、早停(Early Stopping)。
Q3: ResNet18和ResNet50有什么区别?
A: ResNet50使用Bottleneck块(1×1+3×3+1×1卷积),参数量更少,适合深层网络。
七、总结
ResNet是深度学习领域的经典网络,通过残差块和跳跃连接解决了深层网络的训练难题。本文详细介绍了PyTorch搭建ResNet18的完整流程,包括环境准备、模型定义、训练测试及进阶优化技巧。根据实际任务需求,可调整残差块数量、通道数等参数,实现更高效的模型训练。
注:本文基于PyTorch 2.0+版本编写,具体实现可根据实际环境调整。