当前位置：首页 > news >正文

pytorch学习使用

news 2025/7/6 11:34:51

1. 基础使用

1.1 基础信息

# 输出 torch 版本
print(torch.__version__)# 判断 cuda 是否可用
print(torch.cuda.is_available())
"""
2.7.0
False
"""

1.2 创建tensor

# 创建一个5*3的矩阵，初始值为0.
print("-------- empty --------")
print(torch.empty(5, 3))  # 等价与 torch.empty((5, 3))# 创建一个随机初始化的 5*3 矩阵，初始值在[0, 1)之间，符合均匀分布
print("-------- rand --------")
rand_x = torch.rand(5, 3)  # 等价于 rand_x = torch.rand((5, 3))
print(rand_x)
print(rand_x[:, 0]) # 访问第0列，输出为一维数组
print(rand_x[0, :]) # 访问第0行，输出为一维数组
print(rand_x[:, 0:2]) # 访问前两列，输出为二维数组
print(rand_x[:, [0, 2]]) # 访问第0列，第2列，输出为二维数组
print(rand_x[::2]) # 第0,2,4行，输出为二维数组# 创建一个随机初始化的 2*10 矩阵，符合标准正态分布
print("-------- normal --------")
normal_x = torch.normal(0, 1, size=(2, 10))
print(normal_x)# 创建一个随机初始化的一维矩阵，初始值在[0, 1000)之间
print("-------- randint --------")
randint_x = torch.randint(low=0, high=1000, size=(8,))
print(randint_x)# 创建一个数值皆是 0，类型为 long 的矩阵
print("-------- zeros --------")
zero_x = torch.zeros(5, 3, dtype=torch.long)  # 等价于 zero_x = torch.zeros((5, 3), dtype=torch.long)
print(zero_x)# 创建一个数值皆是 1. ，类型为 float 的矩阵
print("-------- ones --------")
one_x = torch.ones(5, 3, dtype=torch.float)  # 等价于 zero_x = torch.ones((5, 3), dtype=torch.float)
print(one_x)# 创建一个对角线数值皆是 1. ，类型为 float 的矩阵
print("-------- eye --------")
eye_x = torch.eye(5, 3, dtype=torch.float)
print(eye_x)
# 提取对角线元素
s = eye_x.diag()
print(s)
# 将向量嵌入对角线生成矩阵
t = s.diag_embed() # 等价于：t = torch.diag_embed(s)
print(t)# 创建一维tensor 数值是 [5.5, 3]，值中有一个浮点数，因此所有数值均为浮点类型
print("-------- 一维tensor --------")
tensor1 = torch.tensor([5.5, 3])
print(tensor1)# 创建二维tensor，值中无浮点数，因此所有数值均为整数类型
print("-------- 二维tensor --------")
tensor0 = torch.tensor([[1, 2], [3, 4], [5, 6]])
print(tensor0)# 基于现有张量，创建一个新张量，其形状由参数 size 定义，所有元素值为1，默认继承原张量的数据类型（dtype）和设备（如CPU/GPU）
print("-------- new_ones --------")
tensor2 = tensor1.new_ones((2, 3))
print(tensor2)# 修改数值类型
print("-------- randn_like --------")
tensor3 = torch.randn_like(tensor2, dtype=torch.float)
print(tensor3)# 输出 tensor 的 size
print("-------- tensor size --------")
print(tensor3.size())
print(tensor3.shape)# 将单元素张量转化为python标量
print("-------- tensor item --------")
tensor4 = torch.Tensor([3.14])
print(tensor4.item())
"""
-------- empty --------
tensor([[0., 0., 0.],[0., 0., 0.],[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]])
-------- rand --------
tensor([[0.6596, 0.3999, 0.4556],[0.2757, 0.6820, 0.7506],[0.0683, 0.1522, 0.9666],[0.7557, 0.1943, 0.2406],[0.5978, 0.7308, 0.1105]])
tensor([0.6596, 0.2757, 0.0683, 0.7557, 0.5978])
tensor([0.6596, 0.3999, 0.4556])
tensor([[0.6596, 0.3999],[0.2757, 0.6820],[0.0683, 0.1522],[0.7557, 0.1943],[0.5978, 0.7308]])
tensor([[0.6596, 0.4556],[0.2757, 0.7506],[0.0683, 0.9666],[0.7557, 0.2406],[0.5978, 0.1105]])
tensor([[0.6596, 0.3999, 0.4556],[0.0683, 0.1522, 0.9666],[0.5978, 0.7308, 0.1105]])
-------- normal --------
tensor([[ 0.3300, -0.5461,  1.3952, -1.4907, -0.4039,  0.2111,  0.4386,  0.6213,-0.9563, -0.4214],[-0.2401, -1.3838, -1.1084,  1.8060, -0.1078, -0.1417, -1.5372, -0.3526,0.2074, -1.0423]])
-------- randint --------
tensor([474, 834, 908, 552, 926, 543, 338, 452])
-------- zeros --------
tensor([[0, 0, 0],[0, 0, 0],[0, 0, 0],[0, 0, 0],[0, 0, 0]])
-------- ones --------
tensor([[1., 1., 1.],[1., 1., 1.],[1., 1., 1.],[1., 1., 1.],[1., 1., 1.]])
-------- eye --------
tensor([[1., 0., 0.],[0., 1., 0.],[0., 0., 1.],[0., 0., 0.],[0., 0., 0.]])
tensor([1., 1., 1.])
tensor([[1., 0., 0.],[0., 1., 0.],[0., 0., 1.]])
-------- 一维tensor --------
tensor([5.5000, 3.0000])
-------- 二维tensor --------
tensor([[1, 2],[3, 4],[5, 6]])
-------- new_ones --------
tensor([[1., 1., 1.],[1., 1., 1.]])
-------- randn_like --------
tensor([[ 0.4086,  0.6232, -0.6118],[ 0.3720,  0.0189,  1.0114]])
-------- tensor size --------
torch.Size([2, 3])
torch.Size([2, 3])
-------- tensor item --------
3.140000104904175
"""

1.3 tensor之间的运算

a = torch.tensor([[1.0, 2, 3], [4, 5, 6]])
b = torch.tensor([[1, 2, 3], [4, 5, 6]])
# 加法
print("-------- tensor之间相加 --------")
c = a + b
print(c)
c = torch.add(a, b)
print(c)
c = a.add(b)
print(c)
# a.add_(b) # 会修改a的值，最后带下划线的都会修改调用者的值
# print(a)# 减法
print("-------- tensor之间相减 --------")
c = a - b
print(c)
c = torch.sub(a, b)
print(c)
c = a.sub(b)
print(c)
# a.sub_(b) # 会修改a的值，最后带下划线的都会修改调用者的值
# print(a)# 乘法，哈达玛积（对应元素相乘）
print("-------- tensor之间相乘 --------")
c = a * b
print(c)
c = torch.mul(a, b)
print(c)
c = a.mul(b)
print(c)
# a.mul_(b) # 会修改a的值，最后带下划线的都会修改调用者的值
# print(a)# 除法
print("-------- tensor之间相除 --------")
c = a / b
print(c)
c = torch.div(a, b)
print(c)
c = a.div(b)
print(c)
# a.div_(b) # 会修改a的值，最后带下划线的都会修改调用者的值，a必须是浮点数类型
# print(a)# 矩阵乘法
print("-------- tensor之间矩阵乘法 --------")
a = torch.tensor([[1, 1, 1], [1, 1, 1]])
b = torch.tensor([[1, 1], [1, 1], [1, 1]])
c = torch.mm(a, b)
print(c)
c = torch.matmul(a, b)
print(c)
c = a @ b
print(c)
c = a.matmul(b)
print(c)# 幂运算
print("-------- tensor幂运算 --------")
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
c = torch.pow(a, 3)
print(c)
c = a.pow(3)
print(c)
c = a**3
print(c)
# a.pow_(3) # 会修改a的值，最后带下划线的都会修改调用者的值
# print(a)# 开方运算
print("-------- tensor幂运算 --------")
a = torch.tensor([[1.0, 2, 3], [4, 5, 6]])
c = a.sqrt()
print(c)
# a.sqrt_() # 会修改a的值，最后带下划线的都会修改调用者的值，a必须是浮点数类型
# print(a)# 对数
print("-------- tensor对数运算 --------")
a = torch.tensor([[1.0, 2, 3], [4, 5, 6]])
c = torch.log2(a)
print(c)
c = torch.log10(a)
print(c)
c = torch.log(a) # 以e为底
print(c)
# torch.log_(a) # 会修改a的值，最后带下划线的都会修改调用者的值，a必须是浮点数类型
# print(a)
"""
-------- tensor之间相加 --------
tensor([[ 2.,  4.,  6.],[ 8., 10., 12.]])
tensor([[ 2.,  4.,  6.],[ 8., 10., 12.]])
tensor([[ 2.,  4.,  6.],[ 8., 10., 12.]])
-------- tensor之间相减 --------
tensor([[0., 0., 0.],[0., 0., 0.]])
tensor([[0., 0., 0.],[0., 0., 0.]])
tensor([[0., 0., 0.],[0., 0., 0.]])
-------- tensor之间相乘 --------
tensor([[ 1.,  4.,  9.],[16., 25., 36.]])
tensor([[ 1.,  4.,  9.],[16., 25., 36.]])
tensor([[ 1.,  4.,  9.],[16., 25., 36.]])
-------- tensor之间相除 --------
tensor([[1., 1., 1.],[1., 1., 1.]])
tensor([[1., 1., 1.],[1., 1., 1.]])
tensor([[1., 1., 1.],[1., 1., 1.]])
-------- tensor之间矩阵乘法 --------
tensor([[3, 3],[3, 3]])
tensor([[3, 3],[3, 3]])
tensor([[3, 3],[3, 3]])
tensor([[3, 3],[3, 3]])
-------- tensor幂运算 --------
tensor([[  1,   8,  27],[ 64, 125, 216]])
tensor([[  1,   8,  27],[ 64, 125, 216]])
tensor([[  1,   8,  27],[ 64, 125, 216]])
-------- tensor幂运算 --------
tensor([[1.0000, 1.4142, 1.7321],[2.0000, 2.2361, 2.4495]])
-------- tensor对数运算 --------
tensor([[0.0000, 1.0000, 1.5850],[2.0000, 2.3219, 2.5850]])
tensor([[0.0000, 0.3010, 0.4771],[0.6021, 0.6990, 0.7782]])
tensor([[0.0000, 0.6931, 1.0986],[1.3863, 1.6094, 1.7918]])
"""

1.4 tensor和数字之间的运算

a = torch.tensor([[1.0, 2, 3], [4, 5, 6]])
b = 2# 加减乘除，tensor中的每个数字都与b进行运算
print(a + b)
print(a - b)
print(a * b)
print(a / b)
"""
tensor([[3., 4., 5.],[6., 7., 8.]])
tensor([[-1.,  0.,  1.],[ 2.,  3.,  4.]])
tensor([[ 2.,  4.,  6.],[ 8., 10., 12.]])
tensor([[0.5000, 1.0000, 1.5000],[2.0000, 2.5000, 3.0000]])
"""

1.5 tensor尺寸修改

print("-------- 二维张量 --------")
a = torch.tensor([[1.0, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
c = a.reshape(-1, 2) # 转化成两列的二维矩阵
print(c)
c = a.view(-1, 3) # 转化成三列的二维矩阵
print(c)
c = a.reshape(-1) # 按照行转化为一维数组
print(c)
c = a.view(-1) # 按照行转化为一维数组
print(c)
print("-------- 三维维张量 --------")
a = torch.tensor([[[1.0, 2, 3],[4, 5, 6],[7, 8, 9]],[[10, 11, 12],[13, 14, 15],[16, 17, 18]],[[20, 21, 22],[23, 24, 25],[26, 27, 28]]
])
c = a.view(-1, 3*3)
print(c)
"""
-------- 二维张量 --------
tensor([[ 1.,  2.],[ 3.,  4.],[ 5.,  6.],[ 7.,  8.],[ 9., 10.],[11., 12.]])
tensor([[ 1.,  2.,  3.],[ 4.,  5.,  6.],[ 7.,  8.,  9.],[10., 11., 12.]])
tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.])
tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.])
-------- 三维维张量 --------
tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],[10., 11., 12., 13., 14., 15., 16., 17., 18.],[20., 21., 22., 23., 24., 25., 26., 27., 28.]])
"""

1.6 tensor转置

print("-------- 一维张量 --------")
a = torch.tensor([1, 2, 3])  # shape: (3,)
c = a.transpose(0, 0)  # 无变化，仍为 [1, 2, 3]
print(c)
b = a.unsqueeze(0)  # shape: (1, 3)，先通过 unsqueeze() 升维至二维，再转置
c = b.transpose(0, 1)  # shape: (3, 1)
print(c)print("-------- 二维张量 --------")
a = torch.tensor([[1, 2], [3, 4], [5, 6]])  # shape: (3, 2)
c = a.transpose(0, 1)  # 或 a.t()
print(a)
print(c)  # tensor([[1, 3, 5], [2, 4, 6]]), shape: (2, 3)
c = a.t()
print(c)print("-------- 三维张量 --------")
a = torch.arange(24).reshape(2, 3, 4)  # shape: (2, 3, 4)
c = a.transpose(0, 2)  # 交换第0和第2维
print(a)
print(c) # torch.Size([4, 3, 2])
"""
-------- 一维张量 --------
tensor([1, 2, 3])
tensor([[1],[2],[3]])
-------- 二维张量 --------
tensor([[1, 2],[3, 4],[5, 6]])
tensor([[1, 3, 5],[2, 4, 6]])
tensor([[1, 3, 5],[2, 4, 6]])
-------- 三维张量 --------
tensor([[[ 0,  1,  2,  3],[ 4,  5,  6,  7],[ 8,  9, 10, 11]],[[12, 13, 14, 15],[16, 17, 18, 19],[20, 21, 22, 23]]])
tensor([[[ 0, 12],[ 4, 16],[ 8, 20]],[[ 1, 13],[ 5, 17],[ 9, 21]],[[ 2, 14],[ 6, 18],[10, 22]],[[ 3, 15],[ 7, 19],[11, 23]]])
"""

1.7 tensor拼接

a = torch.tensor([[1.0, 2, 3], [4, 5, 6]])
b = torch.tensor([[7, 8, 9], [10, 11, 12]])
c = torch.stack([a, b], dim=0)
print(c)
c = torch.stack([a, b], dim=1)
print(c)
c = torch.stack([a, b], dim=2)
print(c)
"""
tensor([[[ 1.,  2.,  3.],[ 4.,  5.,  6.]],[[ 7.,  8.,  9.],[10., 11., 12.]]])
tensor([[[ 1.,  2.,  3.],[ 7.,  8.,  9.]],[[ 4.,  5.,  6.],[10., 11., 12.]]])
tensor([[[ 1.,  7.],[ 2.,  8.],[ 3.,  9.]],[[ 4., 10.],[ 5., 11.],[ 6., 12.]]])
"""

2. 搭建常见模型

2.1 DNN

2.1.1 代码

# 导入必要的库
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms# MINIST手写数字集DNN
"""
MINIST数据集：
wget https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz
wget https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz
wget https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz
wget https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz
"""# 设置随机种子保证可重复性
torch.manual_seed(42)# 设置计算设备（优先使用GPU）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")# -------------------- 1.数据加载与预处理 --------------------
# 定义数据预处理转换（标准化参数来自MNIST官方统计值）
transform = transforms.Compose([transforms.ToTensor(),  # 将PIL图像像素[0,255]转换为[0,1]范围的Tensortransforms.Normalize((0.1307,), (0.3081,))  # 标准化到[-1,1]范围
])# 下载并加载训练集和测试集
train_dataset = datasets.MNIST(root='./data',  # 数据集存储路径train=True,  # 加载训练集download=True,  # 自动下载数据集transform=transform  # 应用定义的数据转换
)test_dataset = datasets.MNIST(root='./data',train=False,  # 加载测试集download=True,transform=transform
)# 创建数据加载器（分批加载数据）
train_loader = DataLoader(train_dataset,batch_size=64,  # 每批64个样本shuffle=True  # 打乱训练数据顺序
)test_loader = DataLoader(test_dataset,batch_size=1000,  # 测试时使用更大的批处理量shuffle=False  # 测试数据无需打乱
)# -------------------- 2.定义卷积神经网络模型 --------------------
class DNN(nn.Module):def __init__(self):super(DNN, self).__init__()# 定义网络层结构self.fc1 = nn.Linear(28 * 28, 128)  # 输入层（784像素→128神经元）self.fc2 = nn.Linear(128, 64)  # 隐藏层（128→64）self.fc3 = nn.Linear(64, 10)  # 输出层（64→10类）self.dropout = nn.Dropout(0.5)  # 50%概率丢弃神经元防止过拟合self.relu = nn.ReLU()self.softmax = nn.LogSoftmax(dim=1)def forward(self, x):x = x.view(-1, 28 * 28)  # 将图像展平为一维向量x = self.relu(self.fc1(x))  # 第一层激活函数x = self.dropout(x)  # Dropoutx = self.relu(self.fc2(x))  # 第二层激活函数x = self.dropout(x)  # Dropoutx = self.fc3(x)  # 最终输出（无需激活函数，因使用CrossEntropyLoss）return self.softmax(x)# 实例化模型并转移到计算设备
model = DNN().to(device)# 输出网络结构
# print(model) # 通过print(model)输出模型结构，显示的是__init__中定义的层顺序，但不反映实际执行顺序
from net_structure import *
print_model_leaf_structure(model, torch.randn(64, 1, 28, 28)) # 64张图片，每张图片1个通道(灰色图像)，图片尺寸28x28# -------------------- 3.定义损失函数和优化器 --------------------
criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001)  # 自适应学习率优化器# -------------------- 4.训练过程 --------------------
def train(epochs):model.train()  # 设置为训练模式for epoch in range(epochs):total_loss, running_loss = 0.0, 0.0for batch_idx, (data, target) in enumerate(train_loader):# 将数据转移到对应设备（CPU/GPU）data, target = data.to(device), target.to(device)# 前向传播outputs = model(data)loss = criterion(outputs, target)# 反向传播和优化optimizer.zero_grad()  # 清空梯度loss.backward()  # 计算梯度optimizer.step()  # 更新参数# 记录损失值running_loss += loss.item()total_loss += loss.item()if batch_idx % 100 == 99:  # 每100个batch打印一次print(f'Epoch {epoch + 1}, Batch {batch_idx + 1}, Loss: {running_loss / 100:.3f}')running_loss = 0# 打印每个epoch的损失print(f'Epoch {epoch + 1}/{epochs} - Loss: {total_loss / len(train_loader):.4f}')# 执行5个epoch的训练
train(epochs=5)# -------------------- 5.保存训练好的模型 --------------------
torch.save(model.state_dict(), 'mnist_dnn.pth')  # 推荐保存参数的方式# -------------------- 6.模型评估 --------------------
def evaluate(new_model):new_model.eval()  # 设置为评估模式correct = 0total = 0with torch.no_grad():  # 不计算梯度，节省内存for data, target in test_loader:data, target = data.to(device), target.to(device)outputs = new_model(data)_, predicted = torch.max(outputs.data, 1) # 获取预测结果（最大概率的类别）total += target.size(0)correct += (predicted == target).sum().item()accuracy = 100 * correct / totalprint(f'Test Accuracy: {accuracy:.2f}%')new_model = DNN().to(device)
# 加载保存的模型参数（演示加载过程）
new_model.load_state_dict(torch.load('mnist_dnn.pth'))
# 执行评估
evaluate(new_model)

2.1.2 结果

"""
【    Linear     】Input shape:     torch.Size([64, 784])      → Output shape:     torch.Size([64, 128])      | Params count: 100480
【     ReLU      】Input shape:     torch.Size([64, 128])      → Output shape:     torch.Size([64, 128])      | Params count: 0
【    Dropout    】Input shape:     torch.Size([64, 128])      → Output shape:     torch.Size([64, 128])      | Params count: 0
【    Linear     】Input shape:     torch.Size([64, 128])      → Output shape:      torch.Size([64, 64])      | Params count: 8256
【     ReLU      】Input shape:      torch.Size([64, 64])      → Output shape:      torch.Size([64, 64])      | Params count: 0
【    Dropout    】Input shape:      torch.Size([64, 64])      → Output shape:      torch.Size([64, 64])      | Params count: 0
【    Linear     】Input shape:      torch.Size([64, 64])      → Output shape:      torch.Size([64, 10])      | Params count: 650
【  LogSoftmax   】Input shape:      torch.Size([64, 10])      → Output shape:      torch.Size([64, 10])      | Params count: 0
***Total Parameters***: 109386 = [100480 + 0 + 0 + 8256 + 0 + 0 + 650 + 0]Epoch 1, Batch 100, Loss: 1.360
Epoch 1, Batch 200, Loss: 0.689
Epoch 1, Batch 300, Loss: 0.536
Epoch 1, Batch 400, Loss: 0.495
Epoch 1, Batch 500, Loss: 0.457
Epoch 1, Batch 600, Loss: 0.434
Epoch 1, Batch 700, Loss: 0.405
Epoch 1, Batch 800, Loss: 0.397
Epoch 1, Batch 900, Loss: 0.378
Epoch 1/5 - Loss: 0.5635
Epoch 2, Batch 100, Loss: 0.352
Epoch 2, Batch 200, Loss: 0.345
Epoch 2, Batch 300, Loss: 0.354
Epoch 2, Batch 400, Loss: 0.340
Epoch 2, Batch 500, Loss: 0.309
Epoch 2, Batch 600, Loss: 0.297
Epoch 2, Batch 700, Loss: 0.325
Epoch 2, Batch 800, Loss: 0.318
Epoch 2, Batch 900, Loss: 0.307
Epoch 2/5 - Loss: 0.3257
Epoch 3, Batch 100, Loss: 0.285
Epoch 3, Batch 200, Loss: 0.290
Epoch 3, Batch 300, Loss: 0.282
Epoch 3, Batch 400, Loss: 0.289
Epoch 3, Batch 500, Loss: 0.280
Epoch 3, Batch 600, Loss: 0.271
Epoch 3, Batch 700, Loss: 0.273
Epoch 3, Batch 800, Loss: 0.272
Epoch 3, Batch 900, Loss: 0.267
Epoch 3/5 - Loss: 0.2788
Epoch 4, Batch 100, Loss: 0.257
Epoch 4, Batch 200, Loss: 0.236
Epoch 4, Batch 300, Loss: 0.269
Epoch 4, Batch 400, Loss: 0.269
Epoch 4, Batch 500, Loss: 0.264
Epoch 4, Batch 600, Loss: 0.272
Epoch 4, Batch 700, Loss: 0.255
Epoch 4, Batch 800, Loss: 0.251
Epoch 4, Batch 900, Loss: 0.254
Epoch 4/5 - Loss: 0.2578
Epoch 5, Batch 100, Loss: 0.247
Epoch 5, Batch 200, Loss: 0.219
Epoch 5, Batch 300, Loss: 0.236
Epoch 5, Batch 400, Loss: 0.226
Epoch 5, Batch 500, Loss: 0.236
Epoch 5, Batch 600, Loss: 0.250
Epoch 5, Batch 700, Loss: 0.240
Epoch 5, Batch 800, Loss: 0.240
Epoch 5, Batch 900, Loss: 0.235
Epoch 5/5 - Loss: 0.2361
Test Accuracy: 96.37%
"""

2.1.3 输出结果分析

网络结构

在这里插入图片描述

线性全连接层的参数量为： $(C_{in}+1) \times C_{out}$ ，其中：
- $C_{in}$ ：输入维度；
- $C_{out}$ ：输出维度；
- 其中+1是偏置量。

在这里插入图片描述

可以看出参数都在Linear线性层（全连接）
- 第一个线性层参数量： $\times 128 = 100480$ ；
- 第二个线性层参数量： $\times 64 = 8256$ ；
- 第三个线性层参数量： $\times 10 = 650$ 。

Epoch、Batch、Batch_size

Epoch：所有训练数据训练一次称为一次Epoch；
Batch：所有训练数据可能被分为多组进行训练，每组数据称为一个Batch；
Batch_size：一各Batch种元素数量称为Batch_size；例如上述网络结构中的64就是Batch_size。
举个例子：例如训练数据一共6400条，一次训练输入64条数据，那一次训练会有 $\frac{6400}{64} = 100$ 个Batch，每个Batch中有64个数据。

2.2 CNN

2.2.1 代码

# 导入必要的库
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt# MINIST手写数字集CNN
"""
MINIST数据集：
wget https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz
wget https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz
wget https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz
wget https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz
"""# 设置随机种子保证可重复性
torch.manual_seed(42)# 设置计算设备（优先使用GPU）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")# -------------------- 1.数据加载与预处理 --------------------
# 定义数据预处理转换（标准化参数来自MNIST官方统计值）
transform = transforms.Compose([transforms.ToTensor(),  # 将PIL图像像素[0,255]转换为[0,1]范围的Tensortransforms.Normalize((0.1307,), (0.3081,))  # 标准化到[-1,1]范围
])# 下载并加载训练集和测试集
train_dataset = datasets.MNIST(root='./data',  # 数据集存储路径train=True,  # 加载训练集download=True,  # 自动下载数据集transform=transform  # 应用定义的数据转换
)test_dataset = datasets.MNIST(root='./data',train=False,  # 加载测试集download=True,transform=transform
)# 创建数据加载器（分批加载数据）
train_loader = DataLoader(train_dataset,batch_size=64,  # 每批64个样本shuffle=True  # 打乱训练数据顺序
)test_loader = DataLoader(test_dataset,batch_size=1000,  # 测试时使用更大的批处理量shuffle=False  # 测试数据无需打乱
)# -------------------- 2.定义卷积神经网络模型 --------------------
class CNN(nn.Module):def __init__(self):super(CNN, self).__init__()# 第一个卷积层：1输入通道（灰度图），10个输出通道，5x5卷积核self.conv1 = nn.Conv2d(1, 10, kernel_size=5)# 第二个卷积层：10输入通道，20个输出通道，5x5卷积核self.conv2 = nn.Conv2d(10, 20, kernel_size=5)# relu层self.relu = nn.ReLU()# 最大池化层，2x2窗口，步长2self.pool = nn.MaxPool2d(2)# 全连接层：输入维度320（计算见forward），输出10类（0-9数字）self.fc = nn.Linear(320, 10)def forward(self, x):# 输入尺寸：[batch_size, 1, 28, 28]x = self.pool(self.relu(self.conv1(x)))  # -> [64,10,12,12]x = self.pool(self.relu(self.conv2(x)))  # -> [64,20,4,4]x = x.view(-1, 320)  # 展平处理（320=20 * 4 * 4）x = self.fc(x)  # 全连接层输出return x# 实例化模型并转移到计算设备
model = CNN().to(device)# 输出网络结构
# print(model) # 通过print(model)输出模型结构，显示的是__init__中定义的层顺序，但不反映实际执行顺序
from net_structure import *
print_model_leaf_structure(model, torch.randn(64, 1, 28, 28)) # 64张图片，每张图片1个通道(灰色图像)，图片尺寸28x28# -------------------- 3.定义损失函数和优化器 --------------------
criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数（适用于分类）
optimizer = optim.SGD(model.parameters(),lr=0.01,  # 初始学习率momentum=0.5  # 动量参数加速收敛
)# -------------------- 4.训练过程 --------------------
def train(epochs):model.train()  # 设置为训练模式for epoch in range(epochs):total_loss, running_loss = 0.0, 0.0for batch_idx, (data, target) in enumerate(train_loader):# 将数据转移到对应设备（CPU/GPU）data, target = data.to(device), target.to(device)# 前向传播outputs = model(data)loss = criterion(outputs, target)# 反向传播和优化optimizer.zero_grad()  # 清空梯度loss.backward()  # 计算梯度optimizer.step()  # 更新参数# 记录损失值running_loss += loss.item()total_loss += loss.item()if batch_idx % 100 == 99:  # 每100个batch打印一次print(f'Epoch {epoch + 1}, Batch {batch_idx + 1}, Loss: {running_loss / 100:.3f}')running_loss = 0# 打印每个epoch的损失print(f'Epoch {epoch + 1}/{epochs} - Loss: {total_loss / len(train_loader):.4f}')# 执行5个epoch的训练
train(epochs=5)# -------------------- 5.保存训练好的模型 --------------------
torch.save(model.state_dict(), 'mnist_cnn.pth')  # 保存模型参数# -------------------- 6.模型评估 --------------------
def evaluate(new_model):new_model.eval()  # 设置为评估模式correct = 0total = 0with torch.no_grad():  # 不计算梯度，节省内存for data, target in test_loader:data, target = data.to(device), target.to(device)outputs = new_model(data)_, predicted = torch.max(outputs.data, 1) # 获取预测结果（最大概率的类别）total += target.size(0)correct += (predicted == target).sum().item()accuracy = 100 * correct / totalprint(f'Test Accuracy: {accuracy:.2f}%')new_model = CNN().to(device)
# 加载保存的模型参数（演示加载过程）
new_model.load_state_dict(torch.load('mnist_cnn.pth'))
# 执行评估
evaluate(new_model)# -------------------- 7.可视化预测结果（可选） --------------------
# 获取测试集的一个batch
dataiter = iter(test_loader)
images, labels = next(dataiter)
images, labels = images.to(device), labels.to(device)# 进行预测
outputs = new_model(images)
_, preds = torch.max(outputs, 1)# 可视化前16张图片及其预测结果
fig = plt.figure(figsize=(12, 6))
for idx in range(16):ax = fig.add_subplot(4, 4, idx + 1)img = images[idx].cpu().numpy().squeeze()ax.imshow(img, cmap='gray_r')ax.set_title(f'Pred: {preds[idx]} | True: {labels[idx]}')ax.axis('off')
plt.tight_layout()
plt.show()

2.2.2 结果

"""
【    Conv2d     】Input shape:  torch.Size([64, 1, 28, 28])   → Output shape:  torch.Size([64, 10, 24, 24])  | Params count: 260
【     ReLU      】Input shape:  torch.Size([64, 10, 24, 24])  → Output shape:  torch.Size([64, 10, 24, 24])  | Params count: 0
【   MaxPool2d   】Input shape:  torch.Size([64, 10, 24, 24])  → Output shape:  torch.Size([64, 10, 12, 12])  | Params count: 0
【    Conv2d     】Input shape:  torch.Size([64, 10, 12, 12])  → Output shape:   torch.Size([64, 20, 8, 8])   | Params count: 5020
【     ReLU      】Input shape:   torch.Size([64, 20, 8, 8])   → Output shape:   torch.Size([64, 20, 8, 8])   | Params count: 0
【   MaxPool2d   】Input shape:   torch.Size([64, 20, 8, 8])   → Output shape:   torch.Size([64, 20, 4, 4])   | Params count: 0
【    Linear     】Input shape:     torch.Size([64, 320])      → Output shape:      torch.Size([64, 10])      | Params count: 3210
***Total Parameters***: 8490 = [260 + 0 + 0 + 5020 + 0 + 0 + 3210]Epoch 1, Batch 100, Loss: 1.293
Epoch 1, Batch 200, Loss: 0.383
Epoch 1, Batch 300, Loss: 0.275
Epoch 1, Batch 400, Loss: 0.223
Epoch 1, Batch 500, Loss: 0.182
Epoch 1, Batch 600, Loss: 0.161
Epoch 1, Batch 700, Loss: 0.151
Epoch 1, Batch 800, Loss: 0.142
Epoch 1, Batch 900, Loss: 0.131
Epoch 1/5 - Loss: 0.3183
Epoch 2, Batch 100, Loss: 0.114
Epoch 2, Batch 200, Loss: 0.107
Epoch 2, Batch 300, Loss: 0.114
Epoch 2, Batch 400, Loss: 0.098
Epoch 2, Batch 500, Loss: 0.100
Epoch 2, Batch 600, Loss: 0.095
Epoch 2, Batch 700, Loss: 0.090
Epoch 2, Batch 800, Loss: 0.092
Epoch 2, Batch 900, Loss: 0.086
Epoch 2/5 - Loss: 0.0998
Epoch 3, Batch 100, Loss: 0.090
Epoch 3, Batch 200, Loss: 0.072
Epoch 3, Batch 300, Loss: 0.072
Epoch 3, Batch 400, Loss: 0.079
Epoch 3, Batch 500, Loss: 0.078
Epoch 3, Batch 600, Loss: 0.072
Epoch 3, Batch 700, Loss: 0.069
Epoch 3, Batch 800, Loss: 0.083
Epoch 3, Batch 900, Loss: 0.068
Epoch 3/5 - Loss: 0.0749
Epoch 4, Batch 100, Loss: 0.063
Epoch 4, Batch 200, Loss: 0.066
Epoch 4, Batch 300, Loss: 0.063
Epoch 4, Batch 400, Loss: 0.070
Epoch 4, Batch 500, Loss: 0.061
Epoch 4, Batch 600, Loss: 0.065
Epoch 4, Batch 700, Loss: 0.058
Epoch 4, Batch 800, Loss: 0.058
Epoch 4, Batch 900, Loss: 0.055
Epoch 4/5 - Loss: 0.0625
Epoch 5, Batch 100, Loss: 0.052
Epoch 5, Batch 200, Loss: 0.057
Epoch 5, Batch 300, Loss: 0.063
Epoch 5, Batch 400, Loss: 0.052
Epoch 5, Batch 500, Loss: 0.052
Epoch 5, Batch 600, Loss: 0.066
Epoch 5, Batch 700, Loss: 0.053
Epoch 5, Batch 800, Loss: 0.051
Epoch 5, Batch 900, Loss: 0.054
Epoch 5/5 - Loss: 0.0553
Test Accuracy: 98.30%
"""

在这里插入图片描述

2.2.3 输出结果分析

网络结构

在这里插入图片描述

卷积层参数量： $(K_h \times K_w \times C_{in} + 1) \times C_{out}$ ，其中
- $K_h, K_w$ ：卷积核高宽；
- $C_{in}$ ：输入通道数；
- $C_{out}$ ：输出通道数；
- 其中+1是偏置量。

在这里插入图片描述

参数量都在卷积层和线性层：
- 第一个卷积层参数量： $\times 5 \times 1 + 1) \times 10 = 260$ ；
- 第二个卷积层参数量： $\times 5 \times 10 + 1) \times 20 = 5020$ ；
- 第三个卷积层参数量： $\times 10 = 3210$ 。

3. 绘制forward定义的模型结构

第2节中有对如下函数的使用

3.1 打印函数定义


# register_hooks 给网络注册钩子函数，用于输出网络结构，仅输出最外层结构
def register_hooks(model):def hook_fn(module, input, output):layer_name = str(module).split('(')[0]input_shape = input[0].shape if isinstance(input, tuple) else input.shapeoutput_shape = output.shapeprint(f"【{layer_name}】: Input shape: {input_shape} → Output shape: {output_shape}")hooks = []for name, layer in model.named_children():  # 遍历直接子层hook = layer.register_forward_hook(hook_fn)hooks.append(hook)return hooks# register_leaf_hooks 给网络注册钩子函数，用于输出网络结构，输出最内层结构，要求：nn中网络需要事先在__init__函数中定义
def register_leaf_hooks(model):# 定义钩子函数（捕获输入输出形状）total_params_list = []  # 初始化总参数量列表def hook_fn(module, input, output):params_count = sum(p.numel() for p in module.parameters())total_params_list.append(params_count)input_shape, output_shape = str(input[0].shape), str(output.shape)print(f"【{module.__class__.__name__:^15}】Input shape: {input_shape:^30} → Output shape: {output_shape:^30} | Params count: {params_count}")hooks = []for name, module in model.named_modules(): # forward中动态创建的层不会注册为模型的子模块，因此无法通过named_modules()遍历到，导致钩子无法绑定# 判断是否为叶子节点（无子模块）if len(list(module.children())) == 0:hook = module.register_forward_hook(hook_fn)hooks.append(hook)return hooks, total_params_list# print_model_structure 输出模型最外层结构
def print_model_structure(model, inputs):hooks = []try:hooks = register_hooks(model)if isinstance(inputs, (list, tuple)):model(*inputs)else:model(inputs)print()except Exception as e:print(e)finally:for hook in hooks:hook.remove()# print_model_structure 输出模型最内层结构
def print_model_leaf_structure(model, inputs):hooks = []try:hooks, total_params_list = register_leaf_hooks(model)if isinstance(inputs, (list, tuple)):model(*inputs)else:model(inputs)print(f"***Total Parameters***: {sum(total_params_list)} = [" + " + ".join([str(e) for e in total_params_list]) + "]\n")except Exception as e:print(e)finally:for hook in hooks:hook.remove()

3.2 打印函数的使用

注意点：

需要给定输入的维度，一般来说第一维是Batch_size，后面是单个数据的维度；
网络定义时要在__init__函数中提前定义好各层，之后直接在forward中使用__init__定义好的层，这样输出网络结构时才能够捕获到。相当于要在__init__中提前注册好各层的定义

例如对于上述DNN和CNN：

# 设置计算设备（优先使用GPU）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 实例化模型并转移到计算设备
model = DNN().to(device)# 输出网络结构
# print(model) # 通过print(model)输出模型结构，显示的是__init__中定义的层顺序，但不反映实际执行顺序
from net_structure import *
print_model_leaf_structure(model, torch.randn(64, 1, 28, 28)) # 64张图片，每张图片1个通道(灰色图像)，图片尺寸28x28