Skip to content

Commit cb2e0d3

Browse files
authored
Add blood vessel dataset processing script (open-mmlab#184)
* Add blood vessel dataset processing script * Fix syntax error * Fix syntax error * Fix syntax error * Fix bugs * Fix bugs * Fix bugs * Use safe functions and expand more apis * Use safe functions and expand more apis * Fix hard code and verify dataset integrity
1 parent 5a76a71 commit cb2e0d3

File tree

6 files changed

+544
-1
lines changed

6 files changed

+544
-1
lines changed

docs/getting_started.md

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,34 @@ mmsegmentation
4646
│ │ │ ├── images
4747
│ │ │ │ ├── training
4848
│ │ │ │ ├── validation
49+
│ ├── CHASE_DB1
50+
│ │ ├── images
51+
│ │ │ ├── training
52+
│ │ │ ├── validation
53+
│ │ ├── annotations
54+
│ │ │ ├── training
55+
│ │ │ ├── validation
56+
│ ├── DRIVE
57+
│ │ ├── images
58+
│ │ │ ├── training
59+
│ │ │ ├── validation
60+
│ │ ├── annotations
61+
│ │ │ ├── training
62+
│ │ │ ├── validation
63+
│ ├── HRF
64+
│ │ ├── images
65+
│ │ │ ├── training
66+
│ │ │ ├── validation
67+
│ │ ├── annotations
68+
│ │ │ ├── training
69+
│ │ │ ├── validation
70+
│ ├── STARE
71+
│ │ ├── images
72+
│ │ │ ├── training
73+
│ │ │ ├── validation
74+
│ │ ├── annotations
75+
│ │ │ ├── training
76+
│ │ │ ├── validation
4977
5078
```
5179

@@ -93,6 +121,54 @@ If you would like to use Pascal Context dataset, please install [Detail](https:/
93121
python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json
94122
```
95123

124+
### CHASE DB1
125+
126+
The training and validation set of CHASE DB1 could be download from [here](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip).
127+
128+
To convert CHASE DB1 dataset to MMSegmentation format, you should run the following command:
129+
130+
```shell
131+
python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip
132+
```
133+
134+
The script will make directory structure automatically.
135+
136+
### DRIVE
137+
138+
The training and validation set of DRIVE could be download from [here](https://drive.grand-challenge.org/). Before that, you should register an account. Currently '1st_manual' is not provided officially.
139+
140+
To convert DRIVE dataset to MMSegmentation format, you should run the following command:
141+
142+
```shell
143+
python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip
144+
```
145+
146+
The script will make directory structure automatically.
147+
148+
### HRF
149+
150+
First, download [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip), [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) and [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip).
151+
152+
To convert HRF dataset to MMSegmentation format, you should run the following command:
153+
154+
```shell
155+
python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip
156+
```
157+
158+
The script will make directory structure automatically.
159+
160+
### STARE
161+
162+
First, download [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) and [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar).
163+
164+
To convert STARE dataset to MMSegmentation format, you should run the following command:
165+
166+
```shell
167+
python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar
168+
```
169+
170+
The script will make directory structure automatically.
171+
96172
## Inference with pretrained models
97173

98174
We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.),

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ line_length = 79
88
multi_line_output = 0
99
known_standard_library = setuptools
1010
known_first_party = mmseg
11-
known_third_party = PIL,cityscapesscripts,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch
11+
known_third_party = PIL,cityscapesscripts,cv2,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch
1212
no_lines_before = STDLIB,LOCALFOLDER
1313
default_section = THIRDPARTY

tools/convert_datasets/chase_db1.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import argparse
2+
import os
3+
import os.path as osp
4+
import tempfile
5+
import zipfile
6+
7+
import mmcv
8+
9+
CHASE_DB1_LEN = 28 * 3
10+
TRAINING_LEN = 60
11+
12+
13+
def parse_args():
14+
parser = argparse.ArgumentParser(
15+
description='Convert CHASE_DB1 dataset to mmsegmentation format')
16+
parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
17+
parser.add_argument('--tmp_dir', help='path of the temporary directory')
18+
parser.add_argument('-o', '--out_dir', help='output path')
19+
args = parser.parse_args()
20+
return args
21+
22+
23+
def main():
24+
args = parse_args()
25+
dataset_path = args.dataset_path
26+
if args.out_dir is None:
27+
out_dir = osp.join('data', 'CHASE_DB1')
28+
else:
29+
out_dir = args.out_dir
30+
31+
print('Making directories...')
32+
mmcv.mkdir_or_exist(out_dir)
33+
mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
34+
mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
35+
mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
36+
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
37+
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
38+
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
39+
40+
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
41+
print('Extracting CHASEDB1.zip...')
42+
zip_file = zipfile.ZipFile(dataset_path)
43+
zip_file.extractall(tmp_dir)
44+
45+
print('Generating training dataset...')
46+
47+
assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
48+
'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN)
49+
50+
for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
51+
img = mmcv.imread(osp.join(tmp_dir, img_name))
52+
if osp.splitext(img_name)[1] == '.jpg':
53+
mmcv.imwrite(img,
54+
osp.join(out_dir, 'images', 'training', img_name))
55+
else:
56+
# The annotation img should be divided by 128, because some of
57+
# the annotation imgs are not standard. We should set a
58+
# threshold to convert the nonstandard annotation imgs. The
59+
# value divided by 128 is equivalent to '1 if value >= 128
60+
# else 0'
61+
mmcv.imwrite(
62+
img[:, :, 0] // 128,
63+
osp.join(out_dir, 'annotations', 'training',
64+
osp.splitext(img_name)[0] + '.jpg'))
65+
66+
for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
67+
img = mmcv.imread(osp.join(tmp_dir, img_name))
68+
if osp.splitext(img_name)[1] == '.jpg':
69+
mmcv.imwrite(
70+
img, osp.join(out_dir, 'images', 'validation', img_name))
71+
else:
72+
mmcv.imwrite(
73+
img[:, :, 0] // 128,
74+
osp.join(out_dir, 'annotations', 'validation',
75+
osp.splitext(img_name)[0] + '.jpg'))
76+
77+
print('Removing the temporary files...')
78+
79+
print('Done!')
80+
81+
82+
if __name__ == '__main__':
83+
main()

tools/convert_datasets/drive.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import argparse
2+
import os
3+
import os.path as osp
4+
import tempfile
5+
import zipfile
6+
7+
import cv2
8+
import mmcv
9+
10+
11+
def parse_args():
12+
parser = argparse.ArgumentParser(
13+
description='Convert DRIVE dataset to mmsegmentation format')
14+
parser.add_argument(
15+
'training_path', help='the training part of DRIVE dataset')
16+
parser.add_argument(
17+
'testing_path', help='the testing part of DRIVE dataset')
18+
parser.add_argument('--tmp_dir', help='path of the temporary directory')
19+
parser.add_argument('-o', '--out_dir', help='output path')
20+
args = parser.parse_args()
21+
return args
22+
23+
24+
def main():
25+
args = parse_args()
26+
training_path = args.training_path
27+
testing_path = args.testing_path
28+
if args.out_dir is None:
29+
out_dir = osp.join('data', 'DRIVE')
30+
else:
31+
out_dir = args.out_dir
32+
33+
print('Making directories...')
34+
mmcv.mkdir_or_exist(out_dir)
35+
mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
36+
mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
37+
mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
38+
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
39+
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
40+
mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
41+
42+
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
43+
print('Extracting training.zip...')
44+
zip_file = zipfile.ZipFile(training_path)
45+
zip_file.extractall(tmp_dir)
46+
47+
print('Generating training dataset...')
48+
now_dir = osp.join(tmp_dir, 'training', 'images')
49+
for img_name in os.listdir(now_dir):
50+
img = mmcv.imread(osp.join(now_dir, img_name))
51+
mmcv.imwrite(
52+
img,
53+
osp.join(out_dir, 'images', 'training',
54+
osp.splitext(img_name)[0] + '.jpg'))
55+
56+
now_dir = osp.join(tmp_dir, 'training', '1st_manual')
57+
for img_name in os.listdir(now_dir):
58+
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
59+
ret, img = cap.read()
60+
mmcv.imwrite(
61+
img[:, :, 0] // 128,
62+
osp.join(out_dir, 'annotations', 'training',
63+
osp.splitext(img_name)[0] + '.jpg'))
64+
65+
print('Extracting test.zip...')
66+
zip_file = zipfile.ZipFile(testing_path)
67+
zip_file.extractall(tmp_dir)
68+
69+
print('Generating validation dataset...')
70+
now_dir = osp.join(tmp_dir, 'test', 'images')
71+
for img_name in os.listdir(now_dir):
72+
img = mmcv.imread(osp.join(now_dir, img_name))
73+
mmcv.imwrite(
74+
img,
75+
osp.join(out_dir, 'images', 'validation',
76+
osp.splitext(img_name)[0] + '.jpg'))
77+
78+
now_dir = osp.join(tmp_dir, 'test', '1st_manual')
79+
if osp.exists(now_dir):
80+
for img_name in os.listdir(now_dir):
81+
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
82+
ret, img = cap.read()
83+
# The annotation img should be divided by 128, because some of
84+
# the annotation imgs are not standard. We should set a
85+
# threshold to convert the nonstandard annotation imgs. The
86+
# value divided by 128 is equivalent to '1 if value >= 128
87+
# else 0'
88+
mmcv.imwrite(
89+
img[:, :, 0] // 128,
90+
osp.join(out_dir, 'annotations', 'validation',
91+
osp.splitext(img_name)[0] + '.jpg'))
92+
93+
now_dir = osp.join(tmp_dir, 'test', '2nd_manual')
94+
if osp.exists(now_dir):
95+
for img_name in os.listdir(now_dir):
96+
cap = cv2.VideoCapture(osp.join(now_dir, img_name))
97+
ret, img = cap.read()
98+
mmcv.imwrite(
99+
img[:, :, 0] // 128,
100+
osp.join(out_dir, 'annotations', 'validation',
101+
osp.splitext(img_name)[0] + '.jpg'))
102+
103+
print('Removing the temporary files...')
104+
105+
print('Done!')
106+
107+
108+
if __name__ == '__main__':
109+
main()

0 commit comments

Comments
 (0)