[Feature]: Add UT

YuanLiuuuuuu · YuanLiuuuuuu · commit e1e2c3c35e5f · 2022-04-24T16:52:22.000+08:00
diff --git a/mmseg/models/backbones/mae.py b/mmseg/models/backbones/mae.py
@@ -2,8 +2,8 @@
 import math
 import warnings
 
+import numpy as np
 import torch
-import torch.distributed as dist
 import torch.nn as nn
 from mmcv.cnn import build_norm_layer
 from mmcv.cnn.utils.weight_init import (constant_init, kaiming_init,
@@ -17,6 +17,11 @@
 from ..utils import PatchEmbed
 from .beit import BEiTTransformerEncoderLayer
 
+try:
+    from scipy import interpolate
+except ImportError:
+    interpolate = None
+
 
 @BACKBONES.register_module()
 class MAE(BaseModule):
@@ -61,8 +66,8 @@ class MAE(BaseModule):
         with_cp (bool): Use checkpoint or not. Using checkpoint will save
             some memory while slowing down the training speed. Default: False.
         pretrained (str, optional): model pretrained path. Default: None.
-        init_values (float): Initialize the values of MAEAttention and FFN
-            with learnable scaling.
+        init_values (float): Initialize the values of Attention and FFN
+            with learnable scaling. Defaults to 0.1.
         init_cfg (dict or list[dict], optional): Initialization config dict.
             Default: None.
     """
@@ -91,7 +96,7 @@ def __init__(self,
                  norm_eval=False,
                  with_cp=False,
                  pretrained=None,
-                 init_values=None,
+                 init_values=0.1,
                  init_cfg=None):
         super(MAE, self).__init__(init_cfg=init_cfg)
 
@@ -166,7 +171,7 @@ def __init__(self,
                     attn_drop_rate=attn_drop_rate,
                     drop_path_rate=dpr[i],
                     num_fcs=num_fcs,
-                    qkv_bias='qv_bias' if qv_bias else False,
+                    bias='qv_bias' if qv_bias else False,
                     act_cfg=act_cfg,
                     norm_cfg=norm_cfg,
                     window_size=window_size,
@@ -191,6 +196,57 @@ def rescale(param, layer_id):
             rescale(layer.attn.proj.weight.data, layer_id + 1)
             rescale(layer.ffn.layers[1].weight.data, layer_id + 1)
 
+    def _geometric_sequence_interpolation(self, src_size, dst_size, sequence,
+                                          num):
+        """Get new sequence via geometric sequence interpolation.
+
+        Args:
+            src_size (int): Pos_embedding size in pre-trained model.
+            dst_size (int): Pos_embedding size in the current model.
+            sequence (tensor): The relative position bias of the pretrain
+                model after removing the extra tokens.
+            num (int): Number of attention heads.
+        Returns:
+            new_sequence (tensor): Geometric sequence interpolate the
+                pre-trained relative position bias to the size of
+                the current model.
+        """
+
+        def geometric_progression(a, r, n):
+            return a * (1.0 - r**n) / (1.0 - r)
+
+        # Here is a binary function.
+        left, right = 1.01, 1.5
+        while right - left > 1e-6:
+            q = (left + right) / 2.0
+            gp = geometric_progression(1, q, src_size // 2)
+            if gp > dst_size // 2:
+                right = q
+            else:
+                left = q
+        # The position of each interpolated point is determined
+        # by the ratio obtained by dichotomy.
+        dis = []
+        cur = 1
+        for i in range(src_size // 2):
+            dis.append(cur)
+            cur += q**(i + 1)
+        r_ids = [-_ for _ in reversed(dis)]
+        x = r_ids + [0] + dis
+        y = r_ids + [0] + dis
+        t = dst_size // 2.0
+        dx = np.arange(-t, t + 0.1, 1.0)
+        dy = np.arange(-t, t + 0.1, 1.0)
+        # Interpolation functions are being executed and called.
+        new_sequence = []
+        for i in range(num):
+            z = sequence[:, i].view(src_size, src_size).float().numpy()
+            f = interpolate.interp2d(x, y, z, kind='cubic')
+            new_sequence.append(
+                torch.Tensor(f(dx, dy)).contiguous().view(-1, 1).to(sequence))
+        new_sequence = torch.cat(new_sequence, dim=-1)
+        return new_sequence
+
     def init_weights(self):
 
         def _init_weights(m):
@@ -210,51 +266,15 @@ def _init_weights(m):
             logger = get_root_logger()
             checkpoint = _load_checkpoint(
                 self.init_cfg['checkpoint'], logger=logger, map_location='cpu')
-
-            if 'state_dict' in checkpoint:
-                state_dict = checkpoint['state_dict']
-                state_dict = {
-                    key.replace('backbone.', ''): val
-                    for key, val in state_dict.items()
-                }
-            else:
-                state_dict = checkpoint
-
-        if 'pos_embed' in state_dict:
-            pos_embed_checkpoint = state_dict['pos_embed']
-            embedding_size = pos_embed_checkpoint.shape[-1]
-            num_extra_tokens = self.pos_embed.shape[-2] - self.num_patches
-            # height (== width) for the checkpoint position embedding
-            orig_size = int(
-                (pos_embed_checkpoint.shape[-2] - num_extra_tokens)**0.5)
-            # height (== width) for the new position embedding
-            new_size = int(self.num_patches**0.5)
-            # class_token and dist_token are kept unchanged
-            if orig_size != new_size:
-                if dist.get_rank() == 0:
-                    print('Position interpolate from %dx%d to %dx%d' %
-                          (orig_size, orig_size, new_size, new_size))
-                extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
-                # only the position tokens are interpolated
-                pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
-                pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size,
-                                                embedding_size).permute(
-                                                    0, 3, 1, 2)
-                pos_tokens = torch.nn.functional.interpolate(
-                    pos_tokens,
-                    size=(new_size, new_size),
-                    mode='bicubic',
-                    align_corners=False)
-                pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
-                new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
-                state_dict['pos_embed'] = new_pos_embed
-
+            state_dict = self.resize_rel_pos_embed(checkpoint)
             self.load_state_dict(state_dict, False)
-
         elif self.init_cfg is not None:
             super(MAE, self).init_weights()
         else:
-            trunc_normal_(self.pos_embed, std=.02)
+            # We only implement the 'jax_impl' initialization implemented at
+            # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353  # noqa: E501
+            # Copyright 2019 Ross Wightman
+            # Licensed under the Apache License, Version 2.0 (the "License")
             trunc_normal_(self.cls_token, std=.02)
             for n, m in self.named_modules():
                 if isinstance(m, nn.Linear):
diff --git a/tests/test_models/test_backbones/test_mae.py b/tests/test_models/test_backbones/test_mae.py
@@ -0,0 +1,182 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmseg.models.backbones.mae import MAE
+from .utils import check_norm_state
+
+
+def test_mae_backbone():
+    with pytest.raises(TypeError):
+        # pretrained must be a string path
+        model = MAE()
+        model.init_weights(pretrained=0)
+
+    with pytest.raises(TypeError):
+        # img_size must be int or tuple
+        model = MAE(img_size=512.0)
+
+    with pytest.raises(TypeError):
+        # out_indices must be int ,list or tuple
+        model = MAE(out_indices=1.)
+
+    with pytest.raises(AssertionError):
+        # The length of img_size tuple must be lower than 3.
+        MAE(img_size=(224, 224, 224))
+
+    with pytest.raises(TypeError):
+        # Pretrained must be None or Str.
+        MAE(pretrained=123)
+
+    # Test img_size isinstance tuple
+    imgs = torch.randn(1, 3, 224, 224)
+    model = MAE(img_size=(224, ))
+    model.init_weights()
+    model(imgs)
+
+    # Test img_size isinstance tuple
+    imgs = torch.randn(1, 3, 224, 224)
+    model = MAE(img_size=(224, 224))
+    model(imgs)
+
+    # Test norm_eval = True
+    model = MAE(norm_eval=True)
+    model.train()
+
+    # Test BEiT backbone with input size of 224 and patch size of 16
+    model = MAE()
+    model.init_weights()
+    model.train()
+
+    # Test qv_bias
+    model = MAE(qv_bias=False)
+    model.train()
+
+    # Test out_indices = list
+    model = MAE(out_indices=[2, 4, 8, 12])
+    model.train()
+
+    assert check_norm_state(model.modules(), True)
+
+    # Test image size = (224, 224)
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert feat[-1].shape == (1, 768, 14, 14)
+
+    # Test MAE backbone with input size of 256 and patch size of 16
+    model = MAE(img_size=(256, 256))
+    model.init_weights()
+    model.train()
+    imgs = torch.randn(1, 3, 256, 256)
+    feat = model(imgs)
+    assert feat[-1].shape == (1, 768, 16, 16)
+
+    # Test MAE backbone with input size of 32 and patch size of 16
+    model = MAE(img_size=(32, 32))
+    model.init_weights()
+    model.train()
+    imgs = torch.randn(1, 3, 32, 32)
+    feat = model(imgs)
+    assert feat[-1].shape == (1, 768, 2, 2)
+
+    # Test unbalanced size input image
+    model = MAE(img_size=(112, 224))
+    model.init_weights()
+    model.train()
+    imgs = torch.randn(1, 3, 112, 224)
+    feat = model(imgs)
+    assert feat[-1].shape == (1, 768, 7, 14)
+
+    # Test irregular input image
+    model = MAE(img_size=(234, 345))
+    model.init_weights()
+    model.train()
+    imgs = torch.randn(1, 3, 234, 345)
+    feat = model(imgs)
+    assert feat[-1].shape == (1, 768, 14, 21)
+
+    # Test init_values=0
+    model = MAE(init_values=0)
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert feat[-1].shape == (1, 768, 14, 14)
+
+    # Test final norm
+    model = MAE(final_norm=True)
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert feat[-1].shape == (1, 768, 14, 14)
+
+    # Test patch norm
+    model = MAE(patch_norm=True)
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert feat[-1].shape == (1, 768, 14, 14)
+
+
+def test_beit_init():
+    path = 'PATH_THAT_DO_NOT_EXIST'
+    # Test all combinations of pretrained and init_cfg
+    # pretrained=None, init_cfg=None
+    model = MAE(pretrained=None, init_cfg=None)
+    assert model.init_cfg is None
+    model.init_weights()
+
+    # pretrained=None
+    # init_cfg loads pretrain from an non-existent file
+    model = MAE(
+        pretrained=None, init_cfg=dict(type='Pretrained', checkpoint=path))
+    assert model.init_cfg == dict(type='Pretrained', checkpoint=path)
+    # Test loading a checkpoint from an non-existent file
+    with pytest.raises(OSError):
+        model.init_weights()
+
+    # test resize_rel_pos_embed
+    value = torch.randn(732, 16)
+    ckpt = {
+        'state_dict': {
+            'layers.0.attn.relative_position_index': 0,
+            'layers.0.attn.relative_position_bias_table': value
+        }
+    }
+    model = MAE(img_size=(512, 512))
+    with pytest.raises(AttributeError):
+        model.resize_rel_pos_embed(ckpt)
+
+    # pretrained=None
+    # init_cfg=123, whose type is unsupported
+    model = MAE(pretrained=None, init_cfg=123)
+    with pytest.raises(TypeError):
+        model.init_weights()
+
+    # pretrained loads pretrain from an non-existent file
+    # init_cfg=None
+    model = MAE(pretrained=path, init_cfg=None)
+    assert model.init_cfg == dict(type='Pretrained', checkpoint=path)
+    # Test loading a checkpoint from an non-existent file
+    with pytest.raises(OSError):
+        model.init_weights()
+
+    # pretrained loads pretrain from an non-existent file
+    # init_cfg loads pretrain from an non-existent file
+    with pytest.raises(AssertionError):
+        model = MAE(
+            pretrained=path, init_cfg=dict(type='Pretrained', checkpoint=path))
+    with pytest.raises(AssertionError):
+        model = MAE(pretrained=path, init_cfg=123)
+
+    # pretrain=123, whose type is unsupported
+    # init_cfg=None
+    with pytest.raises(TypeError):
+        model = MAE(pretrained=123, init_cfg=None)
+
+    # pretrain=123, whose type is unsupported
+    # init_cfg loads pretrain from an non-existent file
+    with pytest.raises(AssertionError):
+        model = MAE(
+            pretrained=123, init_cfg=dict(type='Pretrained', checkpoint=path))
+
+    # pretrain=123, whose type is unsupported
+    # init_cfg=123, whose type is unsupported
+    with pytest.raises(AssertionError):
+        model = MAE(pretrained=123, init_cfg=123)