Skip to content

Commit 4f7718b

Browse files
committed
Image and Annotation pre-process script
1 parent 43e4239 commit 4f7718b

File tree

1 file changed

+145
-0
lines changed

1 file changed

+145
-0
lines changed

scripts/preprocess_images.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
from PIL import Image
2+
import os
3+
from tqdm import tqdm
4+
import splitfolders
5+
import argparse
6+
import shutil
7+
8+
def parse_args():
9+
parser = argparse.ArgumentParser(
10+
description='preprocess pics')
11+
parser.add_argument('--img-path', required=True, help='images file path')
12+
parser.add_argument('--ann-path',required=True, help='annotations file path')
13+
parser.add_argument('--resized-img-path',required=True, help='resized image file path')
14+
parser.add_argument('--resized-ann-path', required=True, help='resized annotation file path')
15+
parser.add_argument('--split-img-path', required=True, help='split image file path')
16+
parser.add_argument('--split-ann-path', required=True, help='split annotation file path')
17+
parser.add_argument('--seed', default=4832, help='choose your seed!')
18+
parser.add_argument('--split-ratio', nargs='+', type=float, help='train, test, val ratios')
19+
args = parser.parse_args()
20+
return args
21+
22+
23+
def validate_size(img_path, ann_path):
24+
img_dir = os.listdir(img_path)
25+
ann_dir = os.listdir(ann_path)
26+
for item in img_dir:
27+
if os.path.isfile(img_path+item):
28+
im = Image.open(img_path+item)
29+
im_width, im_height = im.size
30+
ann = Image.open(ann_path+item)
31+
ann_width, ann_height = ann.size
32+
if (ann_width != im_width or ann_height != im_height):
33+
return False
34+
return True
35+
36+
def resize_images(path, output_path):
37+
if not os.path.exists(output_path):
38+
os.mkdir(output_path)
39+
dirs = os.listdir(path)
40+
for item in dirs:
41+
input_file = path + '/' + item
42+
output = output_path + '/' + item
43+
if os.path.isfile(input_file):
44+
im = Image.open(input_file)
45+
width, height = im.size
46+
new_height = int((600 * height)/width)
47+
resize_factor = (600, new_height)
48+
imResize = im.resize(resize_factor, Image.ANTIALIAS)
49+
width, height = imResize.size
50+
imResize.save(output)
51+
52+
def split_train_val(all_images, all_annotations, output_dir_img, output_dir_ann, seed, split=(.8, 0.1,0.1)):
53+
folder_above_img = os.path.dirname(all_images)
54+
img_destination = os.path.join(folder_above_img, 'nested_img')
55+
if not os.path.exists(img_destination):
56+
os.mkdir(img_destination)
57+
shutil.move(all_images, img_destination)
58+
59+
folder_above_ann = os.path.dirname(all_annotations)
60+
ann_destination = os.path.join(folder_above_ann, 'nested_ann')
61+
if not os.path.exists(ann_destination):
62+
os.mkdir(ann_destination)
63+
shutil.move(all_annotations, ann_destination)
64+
65+
if not os.path.exists(output_dir_img):
66+
os.mkdir(output_dir_img)
67+
if not os.path.exists(output_dir_ann):
68+
os.mkdir(output_dir_ann)
69+
70+
dirs = [output_dir_img, output_dir_ann]
71+
subdirs = ['train', 'test', 'val']
72+
73+
for d in dirs:
74+
for s in subdirs:
75+
output_path = os.path.join(d, s)
76+
if not os.path.exists(output_path):
77+
os.mkdir(output_path)
78+
79+
splitfolders.ratio(img_destination, output=output_dir_img, seed=seed, ratio=split)
80+
splitfolders.ratio(ann_destination, output=output_dir_ann, seed=seed, ratio=split)
81+
82+
83+
for s in subdirs:
84+
dest_path = os.path.join(output_dir_img, s)
85+
inner_path = os.path.join(dest_path, 'resized_img')
86+
for img in os.listdir(inner_path):
87+
img_path = os.path.join(inner_path, img)
88+
shutil.copy(img_path, dest_path)
89+
shutil.rmtree(inner_path)
90+
for s in subdirs:
91+
dest_path = os.path.join(output_dir_ann, s)
92+
inner_path = os.path.join(dest_path, 'resized_ann')
93+
for ann in os.listdir(inner_path):
94+
ann_path = os.path.join(inner_path, ann)
95+
shutil.copy(ann_path, dest_path)
96+
shutil.rmtree(inner_path)
97+
98+
99+
100+
def change_jpg(path, output):
101+
dirs = os.listdir(path)
102+
for item in dirs:
103+
input_file = path+'/'+item
104+
if os.path.isfile(input_file):
105+
im = Image.open(input_file)
106+
im = im.convert('RGB')
107+
filename = item.split('.')[0]
108+
full_output = output+'/'+filename+'.jpg'
109+
im.save(full_output)
110+
os.remove(input_file)
111+
112+
if __name__ == '__main__':
113+
args = parse_args()
114+
img_path = args.img_path
115+
ann_path = args.ann_path
116+
img_resize_path = args.resized_img_path
117+
ann_resize_path = args.resized_ann_path
118+
split_img_out = args.split_img_path
119+
split_ann_out = args.split_ann_path
120+
seed = args.seed
121+
if args.split_ratio != None:
122+
split = tuple(args.split_ratio)
123+
else:
124+
split = (.8, 0.1,0.1)
125+
126+
127+
assert validate_size(img_path, ann_path)
128+
resize_images(img_path, img_resize_path)
129+
resize_images(ann_path, ann_resize_path)
130+
change_jpg(img_resize_path, img_resize_path)
131+
split_train_val(img_resize_path, ann_resize_path, split_img_out, split_ann_out, seed, split)
132+
133+
134+
135+
136+
137+
138+
139+
140+
141+
142+
143+
144+
145+

0 commit comments

Comments
 (0)