五 CNN经典网络模型:ResNet简介及代码实现(PyTorch超详细注释版( 二 )


总的来说,一是其导数总比原导数加1,这样即使原导数很小时,也能传递下去,能解决梯度消失的问题; 二是y=f(x)+x式子中引入了恒等映射(当f(x)=0时,y=2),解决了深度增加时神经网络的退化问题 。
4.结构简单
虽然的主体结构跟类似,但结构更简单,修改也更方便,因此迅速被广泛使用 。
四、代码实现1. model.py
import torch.nn as nnimport torch# 定义ResNet18/34的残差结构,为2个3x3的卷积class BasicBlock(nn.Module):# 判断残差结构中,主分支的卷积核个数是否发生变化,不变则为1expansion = 1# init():进行初始化,申明模型中各层的定义# downsample=None对应实线残差结构,否则为虚线残差结构def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):super(BasicBlock, self).__init__()self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,kernel_size=3, stride=stride, padding=1, bias=False)# 使用批量归一化self.bn1 = nn.BatchNorm2d(out_channel)# 使用ReLU作为激活函数self.relu = nn.ReLU()self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,kernel_size=3, stride=1, padding=1, bias=False)self.bn2 = nn.BatchNorm2d(out_channel)self.downsample = downsample# forward():定义前向传播过程,描述了各层之间的连接关系def forward(self, x):# 残差块保留原始输入identity = x# 如果是虚线残差结构,则进行下采样if self.downsample is not None:identity = self.downsample(x)out = self.conv1(x)out = self.bn1(out)out = self.relu(out)# -----------------------------------------out = self.conv2(out)out = self.bn2(out)# 主分支与shortcut分支数据相加out += identityout = self.relu(out)return out# 定义ResNet50/101/152的残差结构,为1x1+3x3+1x1的卷积class Bottleneck(nn.Module):# expansion是指在每个小残差块内,减小尺度增加维度的倍数,如64*4=256# Bottleneck层输出通道是输入的4倍expansion = 4# init():进行初始化,申明模型中各层的定义# downsample=None对应实线残差结构,否则为虚线残差结构,专门用来改变x的通道数def __init__(self, in_channel, out_channel, stride=1, downsample=None,groups=1, width_per_group=64):super(Bottleneck, self).__init__()width = int(out_channel * (width_per_group / 64.)) * groupsself.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,kernel_size=1, stride=1, bias=False)# 使用批量归一化self.bn1 = nn.BatchNorm2d(width)# -----------------------------------------self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,kernel_size=3, stride=stride, bias=False, padding=1)self.bn2 = nn.BatchNorm2d(width)# -----------------------------------------self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel * self.expansion,kernel_size=1, stride=1, bias=False)self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)# 使用ReLU作为激活函数self.relu = nn.ReLU(inplace=True)self.downsample = downsample# forward():定义前向传播过程,描述了各层之间的连接关系def forward(self, x):# 残差块保留原始输入identity = x# 如果是虚线残差结构,则进行下采样if self.downsample is not None:identity = self.downsample(x)out = self.conv1(x)out = self.bn1(out)out = self.relu(out)out = self.conv2(out)out = self.bn2(out)out = self.relu(out)out = self.conv3(out)out = self.bn3(out)# 主分支与shortcut分支数据相加out += identityout = self.relu(out)return out# 定义ResNet类class ResNet(nn.Module):# 初始化函数def __init__(self,block,blocks_num,num_classes=1000,include_top=True,groups=1,width_per_group=64):super(ResNet, self).__init__()self.include_top = include_top# maxpool的输出通道数为64,残差结构输入通道数为64self.in_channel = 64self.groups = groupsself.width_per_group = width_per_groupself.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,padding=3, bias=False)self.bn1 = nn.BatchNorm2d(self.in_channel)self.relu = nn.ReLU(inplace=True)self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)# 浅层的stride=1,深层的stride=2# block:定义的两种残差模块# block_num:模块中残差块的个数self.layer1 = self._make_layer(block, 64, blocks_num[0])self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)if self.include_top:# 自适应平均池化,指定输出(H,W),通道数不变self.avgpool = nn.AdaptiveAvgPool2d((1, 1))# 全连接层self.fc = nn.Linear(512 * block.expansion, num_classes)# 遍历网络中的每一层# 继承nn.Module类中的一个方法:self.modules(), 他会返回该网络中的所有modulesfor m in self.modules():# isinstance(object, type):如果指定对象是指定类型,则isinstance()函数返回True# 如果是卷积层if isinstance(m, nn.Conv2d):# kaiming正态分布初始化,使得Conv2d卷积层反向传播的输出的方差都为1# fan_in:权重是通过线性层(卷积或全连接)隐性确定# fan_out:通过创建随机矩阵显式创建权重nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')# 定义残差模块,由若干个残差块组成# block:定义的两种残差模块,channel:该模块中所有卷积层的基准通道数 。block_num:模块中残差块的个数def _make_layer(self, block, channel, block_num, stride=1):downsample = None# 如果满足条件,则是虚线残差结构if stride != 1 or self.in_channel != channel * block.expansion:downsample = nn.Sequential(nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),nn.BatchNorm2d(channel * block.expansion))layers = []layers.append(block(self.in_channel,channel,downsample=downsample,stride=stride,groups=self.groups,width_per_group=self.width_per_group))self.in_channel = channel * block.expansionfor _ in range(1, block_num):layers.append(block(self.in_channel,channel,groups=self.groups,width_per_group=self.width_per_group))# Sequential:自定义顺序连接成模型,生成网络结构return nn.Sequential(*layers)# forward():定义前向传播过程,描述了各层之间的连接关系def forward(self, x):# 无论哪种ResNet,都需要的静态层x = self.conv1(x)x = self.bn1(x)x = self.relu(x)x = self.maxpool(x)# 动态层x = self.layer1(x)x = self.layer2(x)x = self.layer3(x)x = self.layer4(x)if self.include_top:x = self.avgpool(x)x = torch.flatten(x, 1)x = self.fc(x)return x# ResNet()中block参数对应的位置是BasicBlock或Bottleneck# ResNet()中blocks_num[0-3]对应[3, 4, 6, 3],表示残差模块中的残差数# 34层的resnetdef resnet34(num_classes=1000, include_top=True):# https://download.pytorch.org/models/resnet34-333f7ec4.pthreturn ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)# 50层的resnetdef resnet50(num_classes=1000, include_top=True):# https://download.pytorch.org/models/resnet50-19c8e357.pthreturn ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)# 101层的resnetdef resnet101(num_classes=1000, include_top=True):# https://download.pytorch.org/models/resnet101-5d3b4d8f.pthreturn ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)