|
| 1 | +import timm |
| 2 | +import torch |
| 3 | +import torch.nn as nn |
| 4 | +import torch.nn.functional as F |
| 5 | +from mmcv.cnn import ConvModule, build_conv_layer |
| 6 | +from mmengine.model import BaseModule |
| 7 | + |
| 8 | +from mmseg.registry import MODELS |
| 9 | + |
| 10 | + |
| 11 | +class UpSampleBN(nn.Module): |
| 12 | + """ UpSample module |
| 13 | + Args: |
| 14 | + skip_input (int): the input feature |
| 15 | + output_features (int): the output feature |
| 16 | + norm_cfg (dict, optional): Config dict for normalization layer. |
| 17 | + Default: dict(type='BN', requires_grad=True). |
| 18 | + act_cfg (dict, optional): The activation layer of AAM: |
| 19 | + Aggregate Attention Module. |
| 20 | + """ |
| 21 | + |
| 22 | + def __init__(self, |
| 23 | + skip_input, |
| 24 | + output_features, |
| 25 | + norm_cfg=dict(type='BN'), |
| 26 | + act_cfg=dict(type='LeakyReLU')): |
| 27 | + super().__init__() |
| 28 | + |
| 29 | + self._net = nn.Sequential( |
| 30 | + ConvModule( |
| 31 | + in_channels=skip_input, |
| 32 | + out_channels=output_features, |
| 33 | + kernel_size=3, |
| 34 | + stride=1, |
| 35 | + padding=1, |
| 36 | + bias=True, |
| 37 | + norm_cfg=norm_cfg, |
| 38 | + act_cfg=act_cfg, |
| 39 | + ), |
| 40 | + ConvModule( |
| 41 | + in_channels=output_features, |
| 42 | + out_channels=output_features, |
| 43 | + kernel_size=3, |
| 44 | + stride=1, |
| 45 | + padding=1, |
| 46 | + bias=True, |
| 47 | + norm_cfg=norm_cfg, |
| 48 | + act_cfg=act_cfg, |
| 49 | + )) |
| 50 | + |
| 51 | + def forward(self, x, concat_with): |
| 52 | + up_x = F.interpolate( |
| 53 | + x, |
| 54 | + size=[concat_with.size(2), |
| 55 | + concat_with.size(3)], |
| 56 | + mode='bilinear', |
| 57 | + align_corners=True) |
| 58 | + f = torch.cat([up_x, concat_with], dim=1) |
| 59 | + return self._net(f) |
| 60 | + |
| 61 | + |
| 62 | +class Encoder(nn.Module): |
| 63 | + """ the efficientnet_b5 model |
| 64 | + Args: |
| 65 | + basemodel_name (str): the name of base model |
| 66 | + """ |
| 67 | + |
| 68 | + def __init__(self, basemodel_name): |
| 69 | + super().__init__() |
| 70 | + self.original_model = timm.create_model( |
| 71 | + basemodel_name, pretrained=True) |
| 72 | + # Remove last layer |
| 73 | + self.original_model.global_pool = nn.Identity() |
| 74 | + self.original_model.classifier = nn.Identity() |
| 75 | + |
| 76 | + def forward(self, x): |
| 77 | + features = [x] |
| 78 | + for k, v in self.original_model._modules.items(): |
| 79 | + if k == 'blocks': |
| 80 | + for ki, vi in v._modules.items(): |
| 81 | + features.append(vi(features[-1])) |
| 82 | + else: |
| 83 | + features.append(v(features[-1])) |
| 84 | + return features |
| 85 | + |
| 86 | + |
| 87 | +@MODELS.register_module() |
| 88 | +class AdabinsBackbone(BaseModule): |
| 89 | + """ the backbone of the adabins |
| 90 | + Args: |
| 91 | + basemodel_name (str):the name of base model |
| 92 | + num_features (int): the middle feature |
| 93 | + num_classes (int): the classes number |
| 94 | + bottleneck_features (int): the bottleneck features |
| 95 | + conv_cfg (dict): Config dict for convolution layer. |
| 96 | + """ |
| 97 | + |
| 98 | + def __init__(self, |
| 99 | + basemodel_name, |
| 100 | + num_features=2048, |
| 101 | + num_classes=128, |
| 102 | + bottleneck_features=2048, |
| 103 | + conv_cfg=dict(type='Conv')): |
| 104 | + super().__init__() |
| 105 | + self.encoder = Encoder(basemodel_name) |
| 106 | + features = int(num_features) |
| 107 | + self.conv2 = build_conv_layer( |
| 108 | + conv_cfg, |
| 109 | + bottleneck_features, |
| 110 | + features, |
| 111 | + kernel_size=1, |
| 112 | + stride=1, |
| 113 | + padding=1) |
| 114 | + self.up1 = UpSampleBN( |
| 115 | + skip_input=features // 1 + 112 + 64, output_features=features // 2) |
| 116 | + self.up2 = UpSampleBN( |
| 117 | + skip_input=features // 2 + 40 + 24, output_features=features // 4) |
| 118 | + self.up3 = UpSampleBN( |
| 119 | + skip_input=features // 4 + 24 + 16, output_features=features // 8) |
| 120 | + self.up4 = UpSampleBN( |
| 121 | + skip_input=features // 8 + 16 + 8, output_features=features // 16) |
| 122 | + |
| 123 | + self.conv3 = build_conv_layer( |
| 124 | + conv_cfg, |
| 125 | + features // 16, |
| 126 | + num_classes, |
| 127 | + kernel_size=3, |
| 128 | + stride=1, |
| 129 | + padding=1) |
| 130 | + |
| 131 | + def forward(self, x): |
| 132 | + features = self.encoder(x) |
| 133 | + x_block0, x_block1, x_block2, x_block3, x_block4 = features[ |
| 134 | + 3], features[4], features[5], features[7], features[10] |
| 135 | + x_d0 = self.conv2(x_block4) |
| 136 | + x_d1 = self.up1(x_d0, x_block3) |
| 137 | + x_d2 = self.up2(x_d1, x_block2) |
| 138 | + x_d3 = self.up3(x_d2, x_block1) |
| 139 | + x_d4 = self.up4(x_d3, x_block0) |
| 140 | + out = self.conv3(x_d4) |
| 141 | + return out |
0 commit comments