chisyliu
diff --git a/‎.dev/clean_models.py
Lines changed: 125 additions & 0 deletions b/‎.dev/clean_models.py
Lines changed: 125 additions & 0 deletions
diff --git a/‎.dev/gather_models.py
Lines changed: 197 additions & 0 deletions b/‎.dev/gather_models.py
Lines changed: 197 additions & 0 deletions
@@ -0,0 +1,125 @@
+import argparse
+import glob
+import json
+import os
+import os.path as osp
+
+import mmcv
+
+# build schedule look-up table to automatically find the final model
+SCHEDULES_LUT = {
+    '20ki': 20000,
+    '40ki': 40000,
+    '60ki': 60000,
+    '80ki': 80000,
+    '160ki': 160000
+}
+RESULTS_LUT = ['mIoU', 'mAcc', 'aAcc']
+
+
+def get_final_iter(config):
+    iter_num = SCHEDULES_LUT[config.split('_')[-2]]
+    return iter_num
+
+
+def get_final_results(log_json_path, iter_num):
+    result_dict = dict()
+    with open(log_json_path, 'r') as f:
+        for line in f.readlines():
+            log_line = json.loads(line)
+            if 'mode' not in log_line.keys():
+                continue
+
+            if log_line['mode'] == 'train' and log_line['iter'] == iter_num:
+                result_dict['memory'] = log_line['memory']
+
+            if log_line['iter'] == iter_num:
+                result_dict.update({
+                    key: log_line[key]
+                    for key in RESULTS_LUT if key in log_line
+                })
+                return result_dict
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Gather benchmarked models')
+    parser.add_argument(
+        'root',
+        type=str,
+        help='root path of benchmarked models to be gathered')
+    parser.add_argument(
+        'config',
+        type=str,
+        help='root path of benchmarked configs to be gathered')
+
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    models_root = args.root
+    config_name = args.config
+
+    # find all models in the root directory to be gathered
+    raw_configs = list(mmcv.scandir(config_name, '.py', recursive=True))
+
+    # filter configs that is not trained in the experiments dir
+    used_configs = []
+    for raw_config in raw_configs:
+        work_dir = osp.splitext(osp.basename(raw_config))[0]
+        if osp.exists(osp.join(models_root, work_dir)):
+            used_configs.append(work_dir)
+    print(f'Find {len(used_configs)} models to be gathered')
+
+    # find final_ckpt and log file for trained each config
+    # and parse the best performance
+    model_infos = []
+    for used_config in used_configs:
+        exp_dir = osp.join(models_root, used_config)
+        # check whether the exps is finished
+        final_iter = get_final_iter(used_config)
+        final_model = 'iter_{}.pth'.format(final_iter)
+        model_path = osp.join(exp_dir, final_model)
+
+        # skip if the model is still training
+        if not osp.exists(model_path):
+            print(f'{used_config} not finished yet')
+            continue
+
+        # get logs
+        log_json_path = glob.glob(osp.join(exp_dir, '*.log.json'))[0]
+        log_txt_path = glob.glob(osp.join(exp_dir, '*.log'))[0]
+        model_performance = get_final_results(log_json_path, final_iter)
+
+        if model_performance is None:
+            print(f'{used_config} does not have performance')
+            continue
+
+        model_time = osp.split(log_txt_path)[-1].split('.')[0]
+        model_infos.append(
+            dict(
+                config=used_config,
+                results=model_performance,
+                iters=final_iter,
+                model_time=model_time,
+                log_json_path=osp.split(log_json_path)[-1]))
+
+    # publish model for each checkpoint
+    for model in model_infos:
+
+        model_name = osp.split(model['config'])[-1].split('.')[0]
+
+        model_name += '_' + model['model_time']
+        for checkpoints in mmcv.scandir(
+                osp.join(models_root, model['config']), suffix='.pth'):
+            if checkpoints.endswith(f"iter_{model['iters']}.pth"
+                                    ) or checkpoints.endswith('latest.pth'):
+                continue
+            print('removing {}'.format(
+                osp.join(models_root, model['config'], checkpoints)))
+            os.remove(osp.join(models_root, model['config'], checkpoints))
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,197 @@
+import argparse
+import glob
+import json
+import os
+import os.path as osp
+import shutil
+import subprocess
+
+import mmcv
+import torch
+
+# build schedule look-up table to automatically find the final model
+RESULTS_LUT = ['mIoU', 'mAcc', 'aAcc']
+
+
+def process_checkpoint(in_file, out_file):
+    checkpoint = torch.load(in_file, map_location='cpu')
+    # remove optimizer for smaller file size
+    if 'optimizer' in checkpoint:
+        del checkpoint['optimizer']
+    # if it is necessary to remove some sensitive data in checkpoint['meta'],
+    # add the code here.
+    torch.save(checkpoint, out_file)
+    sha = subprocess.check_output(['sha256sum', out_file]).decode()
+    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+    subprocess.Popen(['mv', out_file, final_file])
+    return final_file
+
+
+def get_final_iter(config):
+    iter_num = config.split('_')[-2]
+    assert iter_num.endswith('k')
+    return int(iter_num[:-1]) * 1000
+
+
+def get_final_results(log_json_path, iter_num):
+    result_dict = dict()
+    with open(log_json_path, 'r') as f:
+        for line in f.readlines():
+            log_line = json.loads(line)
+            if 'mode' not in log_line.keys():
+                continue
+
+            if log_line['mode'] == 'train' and log_line['iter'] == iter_num:
+                result_dict['memory'] = log_line['memory']
+
+            if log_line['iter'] == iter_num:
+                result_dict.update({
+                    key: log_line[key]
+                    for key in RESULTS_LUT if key in log_line
+                })
+                return result_dict
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Gather benchmarked models')
+    parser.add_argument(
+        'root',
+        type=str,
+        help='root path of benchmarked models to be gathered')
+    parser.add_argument(
+        'config',
+        type=str,
+        help='root path of benchmarked configs to be gathered')
+    parser.add_argument(
+        'out_dir',
+        type=str,
+        help='output path of gathered models to be stored')
+    parser.add_argument('out_file', type=str, help='the output json file name')
+    parser.add_argument(
+        '--filter', type=str, nargs='+', default=[], help='config filter')
+    parser.add_argument(
+        '--all', action='store_true', help='whether include .py and .log')
+
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    models_root = args.root
+    models_out = args.out_dir
+    config_name = args.config
+    mmcv.mkdir_or_exist(models_out)
+
+    # find all models in the root directory to be gathered
+    raw_configs = list(mmcv.scandir(config_name, '.py', recursive=True))
+
+    # filter configs that is not trained in the experiments dir
+    used_configs = []
+    for raw_config in raw_configs:
+        work_dir = osp.splitext(osp.basename(raw_config))[0]
+        if osp.exists(osp.join(models_root, work_dir)):
+            used_configs.append((work_dir, raw_config))
+    print(f'Find {len(used_configs)} models to be gathered')
+
+    # find final_ckpt and log file for trained each config
+    # and parse the best performance
+    model_infos = []
+    for used_config, raw_config in used_configs:
+        bypass = True
+        for p in args.filter:
+            if p in used_config:
+                bypass = False
+                break
+        if bypass:
+            continue
+        exp_dir = osp.join(models_root, used_config)
+        # check whether the exps is finished
+        final_iter = get_final_iter(used_config)
+        final_model = 'iter_{}.pth'.format(final_iter)
+        model_path = osp.join(exp_dir, final_model)
+
+        # skip if the model is still training
+        if not osp.exists(model_path):
+            print(f'{used_config} train not finished yet')
+            continue
+
+        # get logs
+        log_json_paths = glob.glob(osp.join(exp_dir, '*.log.json'))
+        log_json_path = log_json_paths[0]
+        model_performance = None
+        for idx, _log_json_path in enumerate(log_json_paths):
+            model_performance = get_final_results(_log_json_path, final_iter)
+            if model_performance is not None:
+                log_json_path = _log_json_path
+                break
+
+        if model_performance is None:
+            print(f'{used_config} model_performance is None')
+            continue
+
+        model_time = osp.split(log_json_path)[-1].split('.')[0]
+        model_infos.append(
+            dict(
+                config=used_config,
+                raw_config=raw_config,
+                results=model_performance,
+                iters=final_iter,
+                model_time=model_time,
+                log_json_path=osp.split(log_json_path)[-1]))
+
+    # publish model for each checkpoint
+    publish_model_infos = []
+    for model in model_infos:
+        model_publish_dir = osp.join(models_out,
+                                     model['raw_config'].rstrip('.py'))
+        model_name = osp.split(model['config'])[-1].split('.')[0]
+
+        publish_model_path = osp.join(model_publish_dir,
+                                      model_name + '_' + model['model_time'])
+        trained_model_path = osp.join(models_root, model['config'],
+                                      'iter_{}.pth'.format(model['iters']))
+        if osp.exists(model_publish_dir):
+            for file in os.listdir(model_publish_dir):
+                if file.endswith('.pth'):
+                    print(f'model {file} found')
+                    model['model_path'] = osp.abspath(
+                        osp.join(model_publish_dir, file))
+                    break
+            if 'model_path' not in model:
+                print(f'dir {model_publish_dir} exists, no model found')
+
+        else:
+            mmcv.mkdir_or_exist(model_publish_dir)
+
+            # convert model
+            final_model_path = process_checkpoint(trained_model_path,
+                                                  publish_model_path)
+            model['model_path'] = final_model_path
+
+        new_json_path = f'{model_name}-{model["log_json_path"]}'
+        # copy log
+        shutil.copy(
+            osp.join(models_root, model['config'], model['log_json_path']),
+            osp.join(model_publish_dir, new_json_path))
+        if args.all:
+            new_txt_path = new_json_path.rstrip('.json')
+            shutil.copy(
+                osp.join(models_root, model['config'],
+                         model['log_json_path'].rstrip('.json')),
+                osp.join(model_publish_dir, new_txt_path))
+
+        if args.all:
+            # copy config to guarantee reproducibility
+            raw_config = osp.join(config_name, model['raw_config'])
+            mmcv.Config.fromfile(raw_config).dump(
+                osp.join(model_publish_dir, osp.basename(raw_config)))
+
+        publish_model_infos.append(model)
+
+    models = dict(models=publish_model_infos)
+    mmcv.dump(models, osp.join(models_out, args.out_file))
+
+
+if __name__ == '__main__':
+    main()