当前位置：首页 > news >正文

零基础学习书生.浦语大模型--基础岛

news 2025/7/15 4:45:44

第二关:玩转书生[多模态对话]和[AI搜索]产品

任务一：使用MindSearch

任务二：尝试使用书生.浦语

尝试让其写一段Self-Attention网络模块代码

import torch
import torch.nn as nn
import torch.nn.functional as Fclass SelfAttention(nn.Module):def __init__(self, embed_dim, num_heads):""":param embed_dim: 输入嵌入的维度:param num_heads: 多头注意力的数量"""super(SelfAttention, self).__init__()self.embed_dim = embed_dimself.num_heads = num_headsself.head_dim = embed_dim // num_headsassert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"self.qkv = nn.Linear(embed_dim, embed_dim * 3)  # Query, Key, Valueself.out = nn.Linear(embed_dim, embed_dim)def forward(self, x):B, T, C = x.size()  # B: Batch size, T: Sequence length, C: Channel (embed_dim)# (B, T, C) -> (B, T, C * 3)qkv = self.qkv(x).view(B, T, 3, self.num_heads, self.head_dim).permute(0, 2, 1, 3, 4)# Split into Q, K, Vq, k, v = qkv.chunk(3, dim=2)# (B, num_heads, T, head_dim) * (B, num_heads, head_dim, T) -> (B, num_heads, T, T)attn_weights = torch.matmul(q, k.permute(0, 1, 3, 2)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))attn_weights = F.softmax(attn_weights, dim=-1)# (B, num_heads, T, T) * (B, num_heads, T, head_dim) -> (B, num_heads, T, head_dim)attn_output = torch.matmul(attn_weights, v)# (B, num_heads, T, head_dim) -> (B, T, num_heads * head_dim)attn_output = attn_output.permute(0, 2, 1, 3).reshape(B, T, self.embed_dim)# (B, T, C)output = self.out(attn_output)return output# 示例使用
if __name__ == "__main__":embed_dim = 512num_heads = 8seq_length = 10batch_size = 2# 创建一个随机的输入张量x = torch.randn(batch_size, seq_length, embed_dim)# 创建 Self-Attention 层self_attention = SelfAttention(embed_dim, num_heads)# 前向传播output = self_attention(x)print(output.shape)  # 应该输出 (2, 10, 512)

生成的代码逻辑清晰，漂亮

任务三：尝试使用InternVL