接下来为每个模型提供基本结构的代码,并对其亮点进行简要描述。以下代码是简化的示例:
1. ResNet
亮点:
残差连接: ResNet 引入了残差块,通过跳跃连接和恒等映射解决了梯度消失和爆炸问题,使得网络可以更轻松地学习恒等映射。
适应深度: 允许构建非常深的神经网络,成为深度学习中的里程碑。
import torchimport torch.nn as nnclass ResidualBlock(nn.Module): def __init__(self, in_channels, out_channels, stride=1): super(ResidualBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_channels) self.shortcut = nn.Sequential() if stride != 1 or in_channels != out_channels: self.shortcut = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(out_channels) ) def forward(self, x): residual = self.shortcut(x) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out += residual out = self.relu(out) return out
2. MobileNetV1
亮点:
深度可分离卷积: MobileNetV1 使用深度可分离卷积层,减少模型参数和计算量,适应移动设备和嵌入式系统。
高效性能: 设计旨在在保持高准确性的同时,降低模型大小和计算复杂度,适用于资源受限的环境。
import torchimport torch.nn as nnclass DepthwiseSeparableConv(nn.Module): def __init__(self, in_channels, out_channels, stride): super(DepthwiseSeparableConv, self).__init__() self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False) self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False) self.bn = nn.BatchNorm2d(out_channels) def forward(self, x): x = self.depthwise(x) x = self.pointwise(x) x = self.bn(x) return xclass MobileNetV1(nn.Module): def __init__(self): super(MobileNetV1, self).__init__() self.model = nn.Sequential( nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), DepthwiseSeparableConv(32, 64, 1), DepthwiseSeparableConv(64, 128, 2), DepthwiseSeparableConv(128, 128, 1), # ... Repeat layers as needed ) def forward(self, x): return self.model(x)
3. MobileNetV2
亮点:
倒残差块: MobileNetV2 引入了倒残差块,包括扩张卷积、融合层和线性瓶颈,提高了性能。
动态调整结构: MobileNetV2通过改变扩张比例和网络宽度,可以在不同的资源和准确性要求之间进行平衡。
import torchimport torch.nn as nnclass InvertedResidual(nn.Module): def __init__(self, in_channels, out_channels, stride, expand_ratio): super(InvertedResidual, self).__init__() self.use_res_connect = stride == 1 and in_channels == out_channels hidden_dim = int(round(in_channels * expand_ratio)) layers = [] if expand_ratio != 1: layers.append(nn.Conv2d(in_channels, hidden_dim, kernel_size=1, bias=False)) layers.append(nn.BatchNorm2d(hidden_dim)) layers.append(nn.ReLU6(inplace=True)) layers.extend([ nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=stride, padding=1, groups=hidden_dim, bias=False), nn.BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), nn.Conv2d(hidden_dim, out_channels, kernel_size=1, bias=False), nn.BatchNorm2d(out_channels) ]) self.conv = nn.Sequential(*layers) def forward(self, x): if self.use_res_connect: return x + self.conv(x) else: return self.conv(x)
4. ShuffleNetV2
亮点:
通道重排: ShuffleNetV2 使用通道重排(channel shuffling)来降低通道之间的相关性,减少计算量。
组卷积: 使用组卷积结构,进一步减小计算复杂度。
import torchimport torch.nn as nnclass ShuffleUnit(nn.Module): def __init__(self, in_channels, out_channels, mid_channels, stride): super(ShuffleUnit, self).__init__() self.stride = stride self.groups = 4 if stride == 1: mid_channels = mid_channels // 2 self.branch1 = nn.Sequential( nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False), nn.BatchNorm2d(mid_channels), nn.ReLU(inplace=True), ) self.branch2 = nn.Sequential( nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, groups=mid_channels, bias=False), nn.BatchNorm2d(mid_channels), nn.Conv2d(mid_channels, out_channels - mid_channels, kernel_size=1, bias=False), nn.BatchNorm2d(out_channels - mid_channels), nn.ReLU(inplace=True), ) def forward(self, x): if self.stride == 1: x1, x2 = x.chunk(2, dim=1) out = torch.cat([x1, self.branch1(x2), self.branch2(x2)], dim=1) else: out = torch.cat([self.branch1(x), self.branch2(x)], dim=1) return out
5. EfficientNet
亮点:
复合缩放: EfficientNet 使用复合缩放(compound scaling)策略,通过调整网络深度、宽度和分辨率来平衡准确性和计算复杂度。
MBConv结构: 使用了MBConv(Mobile Inverted Bottleneck Convolution)结构,结合了轻量级的深度可分离卷积和通道注意力机制。
import torchimport torch.nn as nnclass MBConvBlock(nn.Module): def __init__(self, in_channels, out_channels, expand_ratio, kernel_size, stride, se_ratio): super(MBConvBlock, self).__init__() self.use_se = se_ratio is not None and 0 < se_ratio <= 1 self.use_residual = stride == 1 and in_channels == out_channels # Expansion phase hidden_dim = int(round(in_channels * expand_ratio)) self.expand_conv = nn.Conv2d(in_channels, hidden_dim, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(hidden_dim) self.swish = Swish() # Depthwise convolution phase self.dw_conv = nn.Conv2d(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=hidden_dim, bias=False) self.bn2 = nn.BatchNorm2d(hidden_dim) # Squeeze and Excitation phase if self.use_se: num_squeezed_channels = max(1, int(in_channels * se_ratio)) self.se = SqueezeExcitation(hidden_dim, num_squeezed_channels) else: self.se = None # Output phase self.project_conv = nn.Conv2d(hidden_dim, out_channels, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(out_channels) def forward(self, x): # Expansion x = self.expand_conv(x) x = self.bn1(x) x = self.swish(x) # Depthwise convolution x = self.dw_conv(x) x = self.bn2(x) x = self.swish(x) # Squeeze and Excitation if self.use_se: x = self.se(x) # Output x = self.project_conv(x) x = self.bn3(x) # Skip connection and drop connect if self.use_residual: x = x + input return x
6. GhostNet
亮点:
Ghost Module: GhostNet 使用 Ghost Module 结构,通过分组卷积和深度可分离卷积的结合,降低计算复杂度。
线性变换: 使用线性变换来进行信息的跨通道传递,减少信息的损失。
import torchimport torch.nn as nnclass GhostModule(nn.Module): def __init__(self, in_channels, out_channels, kernel_size=1, ratio=2, dw_size=3, stride=1): super(GhostModule, self).__init__() self.primary_conv = nn.Sequential( nn.Conv2d(in_channels, int(in_channels / ratio), kernel_size=kernel_size, stride=stride, padding=(kernel_size - 1) // 2, bias=False), nn.BatchNorm2d(int(in_channels / ratio)), nn.ReLU(inplace=True), ) self.cheap_operation = nn.Sequential( nn.Conv2d(int(in_channels / ratio), out_channels - in_channels, kernel_size=dw_size, stride=1, padding=dw_size // 2, groups=int(in_channels / ratio), bias=False), nn.BatchNorm2d(out_channels - in_channels), nn.ReLU(inplace=True), ) def forward(self, x): x1 = self.primary_conv(x) x2 = self.cheap_operation(x1) out = torch.cat([x, x2], 1) return out
7. GoogLeNet (Inception)
亮点:
Inception Module: GoogLeNet 使用 Inception Module 结构,通过并行的卷积操作和池化操作来捕捉不同尺度的特征。
全局平均池化: 使用全局平均池化来减小参数量,提高计算效率。
import torchimport torch.nn as nnclass InceptionModule(nn.Module): def __init__(self, in_channels, out1, mid2, out2, mid3, out3, out4): super(InceptionModule, self).__init__() self.branch1 = nn.Sequential( nn.Conv2d(in_channels, out1, kernel_size=1), nn.BatchNorm2d(out1), nn.ReLU(inplace=True) ) self.branch2 = nn.Sequential( nn.Conv2d(in_channels, mid2, kernel_size=1), nn.BatchNorm2d(mid2), nn.ReLU(inplace=True), nn.Conv2d(mid2, out2, kernel_size=3, padding=1), nn.BatchNorm2d(out2), nn.ReLU(inplace=True) ) self.branch3 = nn.Sequential( nn.Conv2d(in_channels, mid3, kernel_size=1), nn.BatchNorm2d(mid3), nn.ReLU(inplace=True), nn.Conv2d(mid3, out3, kernel_size=5, padding=2), nn.BatchNorm2d(out3), nn.ReLU(inplace=True) ) self.branch4 = nn.Sequential( nn.MaxPool2d(kernel_size=3, stride=1, padding=1), nn.Conv2d(in_channels, out4, kernel_size=1), nn.BatchNorm2d(out4), nn.ReLU(inplace=True) ) def forward(self, x): branch1 = self.branch1(x) branch2 = self.branch2(x) branch3 = self.branch3(x) branch4 = self.branch4(x) outputs = [branch1, branch2, branch3, branch4] return torch.cat(outputs, 1)
8. DenseNet
亮点:
密集连接: DenseNet 使用密集连接结构,通过将前一层的所有特征图与当前层的输入连接在一起,促使信息充分流动。
过渡层: 使用过渡层来控制特征图的维度,平衡参数量和计算复杂度。
import torchimport torch.nn as nnclass Bottleneck(nn.Module): def __init__(self, in_channels, growth_rate): super(Bottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_channels) self.relu1 = nn.ReLU(inplace=True) self.conv1 = nn.Conv2d(in_channels, 4 * growth_rate, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(4 * growth_rate) self.relu2 = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) def forward(self, x): out = self.conv1(self.relu1(self.bn1(x))) out = self.conv2(self.relu2(self.bn2(out))) out = torch.cat([x, out], 1) return outclass DenseBlock(nn.Module): def __init__(self, in_channels, growth_rate, num_layers): super(DenseBlock, self).__init__() layers = [] for i in range(num_layers): layers.append(Bottleneck(in_channels + i * growth_rate, growth_rate)) self.layers = nn.Sequential(*layers) def forward(self, x): return self.layers(x)
9. HRNet (High-Resolution Network)
亮点:
多分辨率融合: HRNet 通过保持高分辨率特征图的信息,使用多分辨率的特征融合策略,更全面地捕捉图像的细节。
适应性结构: 允许根据任务需求调整网络结构,通过增加或减少分辨率分支的数量来适应不同场景。
import torchimport torch.nn as nnclass BasicBlock(nn.Module): def __init__(self, in_channels, out_channels, stride=1): super(BasicBlock, self).__init__() # ... (详细结构) def forward(self, x): # ... (详细结构) return xclass HighResolutionBlock(nn.Module): def __init__(self, in_channels, out_channels, num_modules, num_blocks, stride): super(HighResolutionBlock, self).__init__() self.blocks = nn.ModuleList([ BasicBlock(in_channels, out_channels, stride), *[BasicBlock(out_channels, out_channels) for _ in range(num_blocks - 1)] ]) def forward(self, x): for block in self.blocks: x = block(x) return x
10. MultiHeadAttention
import torchimport torch.nn as nnclass MultiHeadAttention(nn.Module): def __init__(self, embed_size, num_heads): super(MultiHeadAttention, self).__init__() self.embed_size = embed_size self.num_heads = num_heads self.head_dim = embed_size // num_heads self.values = nn.Linear(embed_size, embed_size, bias=False) self.keys = nn.Linear(embed_size, embed_size, bias=False) self.queries = nn.Linear(embed_size, embed_size, bias=False) self.fc_out = nn.Linear(num_heads * self.head_dim, embed_size) def forward(self, values, keys, query, mask): # 自注意力机制的前向传播 N = query.shape[0] value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1] values = self.values(values).view(N, value_len, self.num_heads, self.head_dim) keys = self.keys(keys).view(N, key_len, self.num_heads, self.head_dim) queries = self.queries(query).view(N, query_len, self.num_heads, self.head_dim) energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys]) if mask is not None: energy = energy.masked_fill(mask == 0, float("-1e20")) attention = torch.nn.functional.softmax(energy / (self.embed_size ** (1 / 2)), dim=3) out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(N, query_len, self.num_heads * self.head_dim) out = self.fc_out(out) return out