[Feature] add log collector (open-mmlab#1175)

RockeyCoss · MeowZheng · Junjun2016 · web-flow · commit ba52d5045ed5 · 2022-01-14T15:19:23.000+08:00
* [Feature] add log collector

* Update .dev/log_collector/readme.md

Co-authored-by: Miao Zheng &lt;76149310+MeowZheng@users.noreply.github.com&gt;

* Update .dev/log_collector/example_config.py

Co-authored-by: Miao Zheng &lt;76149310+MeowZheng@users.noreply.github.com&gt;

* fix typo and so on

* modify readme

* fix some bugs and revise the readme.md

* more elegant

* Update .dev/log_collector/readme.md

Co-authored-by: Junjun2016 &lt;hejunjun@sjtu.edu.cn&gt;

Co-authored-by: Miao Zheng &lt;76149310+MeowZheng@users.noreply.github.com&gt;
Co-authored-by: Junjun2016 &lt;hejunjun@sjtu.edu.cn&gt;
diff --git a/.dev/log_collector/example_config.py b/.dev/log_collector/example_config.py
@@ -0,0 +1,18 @@
+work_dir = '../../work_dirs'
+metric = 'mIoU'
+
+# specify the log files we would like to collect in `log_items`
+log_items = [
+    'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup',
+    'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr',
+    'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr',
+    'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr'
+]
+# or specify ignore_keywords, then the folders whose name contain
+# `'segformer'` won't be collected
+# ignore_keywords = ['segformer']
+
+# should not include metric
+other_info_keys = ['mAcc']
+markdown_file = 'markdowns/lr_in_trans.json.md'
+json_file = 'jsons/trans_in_cnn.json'
diff --git a/.dev/log_collector/log_collector.py b/.dev/log_collector/log_collector.py
@@ -0,0 +1,143 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import datetime
+import json
+import os
+import os.path as osp
+from collections import OrderedDict
+
+from utils import load_config
+
+# automatically collect all the results
+
+# The structure of the directory:
+#     ├── work-dir
+#     │   ├── config_1
+#     │   │   ├── time1.log.json
+#     │   │   ├── time2.log.json
+#     │   │   ├── time3.log.json
+#     │   │   ├── time4.log.json
+#     │   ├── config_2
+#     │   │   ├── time5.log.json
+#     │   │   ├── time6.log.json
+#     │   │   ├── time7.log.json
+#     │   │   ├── time8.log.json
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='extract info from log.json')
+    parser.add_argument('config_dir')
+    args = parser.parse_args()
+    return args
+
+
+def has_keyword(name: str, keywords: list):
+    for a_keyword in keywords:
+        if a_keyword in name:
+            return True
+    return False
+
+
+def main():
+    args = parse_args()
+    cfg = load_config(args.config_dir)
+    work_dir = cfg['work_dir']
+    metric = cfg['metric']
+    log_items = cfg.get('log_items', [])
+    ignore_keywords = cfg.get('ignore_keywords', [])
+    other_info_keys = cfg.get('other_info_keys', [])
+    markdown_file = cfg.get('markdown_file', None)
+    json_file = cfg.get('json_file', None)
+
+    if json_file and osp.split(json_file)[0] != '':
+        os.makedirs(osp.split(json_file)[0], exist_ok=True)
+    if markdown_file and osp.split(markdown_file)[0] != '':
+        os.makedirs(osp.split(markdown_file)[0], exist_ok=True)
+
+    assert not (log_items and ignore_keywords), \
+        'log_items and ignore_keywords cannot be specified at the same time'
+    assert metric not in other_info_keys, \
+        'other_info_keys should not contain metric'
+
+    if ignore_keywords and isinstance(ignore_keywords, str):
+        ignore_keywords = [ignore_keywords]
+    if other_info_keys and isinstance(other_info_keys, str):
+        other_info_keys = [other_info_keys]
+    if log_items and isinstance(log_items, str):
+        log_items = [log_items]
+
+    if not log_items:
+        log_items = [
+            item for item in sorted(os.listdir(work_dir))
+            if not has_keyword(item, ignore_keywords)
+        ]
+
+    experiment_info_list = []
+    for config_dir in log_items:
+        preceding_path = os.path.join(work_dir, config_dir)
+        log_list = [
+            item for item in os.listdir(preceding_path)
+            if item.endswith('.log.json')
+        ]
+        log_list = sorted(
+            log_list,
+            key=lambda time_str: datetime.datetime.strptime(
+                time_str, '%Y%m%d_%H%M%S.log.json'))
+        val_list = []
+        last_iter = 0
+        for log_name in log_list:
+            with open(os.path.join(preceding_path, log_name), 'r') as f:
+                # ignore the info line
+                f.readline()
+                all_lines = f.readlines()
+                val_list.extend([
+                    json.loads(line) for line in all_lines
+                    if json.loads(line)['mode'] == 'val'
+                ])
+                for index in range(len(all_lines) - 1, -1, -1):
+                    line_dict = json.loads(all_lines[index])
+                    if line_dict['mode'] == 'train':
+                        last_iter = max(last_iter, line_dict['iter'])
+                        break
+
+        new_log_dict = dict(
+            method=config_dir, metric_used=metric, last_iter=last_iter)
+        for index, log in enumerate(val_list, 1):
+            new_ordered_dict = OrderedDict()
+            new_ordered_dict['eval_index'] = index
+            new_ordered_dict[metric] = log[metric]
+            for key in other_info_keys:
+                if key in log:
+                    new_ordered_dict[key] = log[key]
+            val_list[index - 1] = new_ordered_dict
+
+        assert len(val_list) >= 1, \
+            f"work dir {config_dir} doesn't contain any evaluation."
+        new_log_dict['last eval'] = val_list[-1]
+        new_log_dict['best eval'] = max(val_list, key=lambda x: x[metric])
+        experiment_info_list.append(new_log_dict)
+        print(f'{config_dir} is processed')
+
+    if json_file:
+        with open(json_file, 'w') as f:
+            json.dump(experiment_info_list, f, indent=4)
+
+    if markdown_file:
+        lines_to_write = []
+        for index, log in enumerate(experiment_info_list, 1):
+            lines_to_write.append(
+                f"|{index}|{log['method']}|{log['best eval'][metric]}"
+                f"|{log['best eval']['eval_index']}|"
+                f"{log['last eval'][metric]}|"
+                f"{log['last eval']['eval_index']}|{log['last_iter']}|\n")
+        with open(markdown_file, 'w') as f:
+            f.write(f'|exp_num|method|{metric} best|best index|'
+                    f'{metric} last|last index|last iter num|\n')
+            f.write('|:---:|:---:|:---:|:---:|:---:|:---:|:---:|\n')
+            f.writelines(lines_to_write)
+
+    print('processed successfully')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/.dev/log_collector/readme.md b/.dev/log_collector/readme.md
@@ -0,0 +1,143 @@
+# Log Collector
+
+## Function
+
+Automatically collect logs and write the result in a json file or markdown file.
+
+If there are several `.log.json` files in one folder, Log Collector assumes that the `.log.json` files other than the first one are resume from the preceding `.log.json` file. Log Collector returns the result considering all `.log.json` files.
+
+## Usage:
+
+To use log collector, you need to write a config file to configure the log collector first.
+
+For example:
+
+example_config.py:
+
+```python
+# The work directory that contains folders that contains .log.json files.
+work_dir = '../../work_dirs'
+# The metric used to find the best evaluation.
+metric = 'mIoU'
+
+# **Don't specify the log_items and ignore_keywords at the same time.**
+# Specify the log files we would like to collect in `log_items`.
+# The folders specified should be the subdirectories of `work_dir`.
+log_items = [
+    'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup',
+    'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr',
+    'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr',
+    'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr'
+]
+# Or specify `ignore_keywords`. The folders whose name contain one
+# of the keywords in the `ignore_keywords` list(e.g., `'segformer'`)
+# won't be collected.
+# ignore_keywords = ['segformer']
+
+# Other log items in .log.json that you want to collect.
+# should not include metric.
+other_info_keys = ["mAcc"]
+# The output markdown file's name.
+markdown_file ='markdowns/lr_in_trans.json.md'
+# The output json file's name. (optional)
+json_file = 'jsons/trans_in_cnn.json'
+```
+
+ The structure of the work-dir directory should be like：
+
+```text
+├── work-dir
+│   ├── folder1
+│   │   ├── time1.log.json
+│   │   ├── time2.log.json
+│   │   ├── time3.log.json
+│   │   ├── time4.log.json
+│   ├── folder2
+│   │   ├── time5.log.json
+│   │   ├── time6.log.json
+│   │   ├── time7.log.json
+│   │   ├── time8.log.json
+```
+
+Then , cd to the log collector folder.
+
+Now you can run log_collector.py by using command:
+
+```bash
+python log_collector.py ./example_config.py
+```
+
+The output markdown file is like:
+
+|exp_num|method|mIoU best|best index|mIoU last|last index|last iter num|
+|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
+|1|segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup|0.2776|10|0.2776|10|160000|
+|2|segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr|0.2802|10|0.2802|10|160000|
+|3|segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr|0.4943|11|0.4943|11|160000|
+|4|segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr|0.4883|11|0.4883|11|160000|
+
+The output json file is like:
+```json
+[
+    {
+        "method": "segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup",
+        "metric_used": "mIoU",
+        "last_iter": 160000,
+        "last eval": {
+            "eval_index": 10,
+            "mIoU": 0.2776,
+            "mAcc": 0.3779
+        },
+        "best eval": {
+            "eval_index": 10,
+            "mIoU": 0.2776,
+            "mAcc": 0.3779
+        }
+    },
+    {
+        "method": "segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr",
+        "metric_used": "mIoU",
+        "last_iter": 160000,
+        "last eval": {
+            "eval_index": 10,
+            "mIoU": 0.2802,
+            "mAcc": 0.3764
+        },
+        "best eval": {
+            "eval_index": 10,
+            "mIoU": 0.2802,
+            "mAcc": 0.3764
+        }
+    },
+    {
+        "method": "segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr",
+        "metric_used": "mIoU",
+        "last_iter": 160000,
+        "last eval": {
+            "eval_index": 11,
+            "mIoU": 0.4943,
+            "mAcc": 0.6097
+        },
+        "best eval": {
+            "eval_index": 11,
+            "mIoU": 0.4943,
+            "mAcc": 0.6097
+        }
+    },
+    {
+        "method": "segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr",
+        "metric_used": "mIoU",
+        "last_iter": 160000,
+        "last eval": {
+            "eval_index": 11,
+            "mIoU": 0.4883,
+            "mAcc": 0.6061
+        },
+        "best eval": {
+            "eval_index": 11,
+            "mIoU": 0.4883,
+            "mAcc": 0.6061
+        }
+    }
+]
+```
diff --git a/.dev/log_collector/utils.py b/.dev/log_collector/utils.py
@@ -0,0 +1,20 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# modified from https://github.dev/open-mmlab/mmcv
+import os.path as osp
+import sys
+from importlib import import_module
+
+
+def load_config(cfg_dir: str) -> dict:
+    assert cfg_dir.endswith('.py')
+    root_path, file_name = osp.split(cfg_dir)
+    temp_module = osp.splitext(file_name)[0]
+    sys.path.insert(0, root_path)
+    mod = import_module(temp_module)
+    sys.path.pop(0)
+    cfg_dict = {
+        k: v
+        for k, v in mod.__dict__.items() if not k.startswith('__')
+    }
+    del sys.modules[temp_module]
+    return cfg_dict