@@ -57,26 +57,6 @@ def convert_anno_detection_to_segmentation(img_dir, anno_det_dir, output_anno_se
5757 cv2 .rectangle (anno_mask ,(int (left )- extend ,int (top )- extend ),(int (right )+ extend ,int (bottom )+ extend ),1 ,- 1 )
5858 cv2 .imwrite (os .path .join (output_anno_segment_dir ,img_name ),anno_mask )
5959
60- def convert_anno_objective2_to_segmentation (img_dir , anno_det_dir , output_anno_segment_dir , extend = - 1 , format_anno_det = 'icdar' , class_list = dict ()):
61- list_images = get_list_file_in_folder (img_dir )
62- list_images = sorted (list_images )
63- for idx , img_name in enumerate (list_images ):
64- print (idx , img_name )
65- img_path = os .path .join (img_dir ,img_name )
66- img = cv2 .imread (img_path )
67- anno_mask = np .zeros ((img .shape [0 ], img .shape [1 ]), np .uint8 )
68- anno_file = os .path .join (anno_det_dir ,img_name .replace ('.jpg' ,'.json' ).replace ('.png' ,'.json' ))
69-
70- import json
71- with open (anno_file , "r" ) as anno :
72- anno_str = json .load (anno )
73-
74- for i , line in enumerate (anno_str ['cellboxes' ]):
75- left , top , right , bottom = line [0 ], line [1 ], line [2 ], line [3 ]
76- cv2 .rectangle (anno_mask ,(int (left )- extend ,int (top )- extend ),(int (right )+ extend ,int (bottom )+ extend ),1 ,- 1 )
77- cv2 .imwrite (os .path .join (output_anno_segment_dir ,img_name ),anno_mask )
78- print ('ok' )
79-
8060def split_dataset (img_dir , ann_dir , img_dst_dir , ann_dst_dir , ratio = 0.5 ):
8161 list_images = get_list_file_in_folder (img_dir )
8262 random .shuffle (list_images )
@@ -260,41 +240,20 @@ def visualize_normal_format_dataset(img_dir, ann_dir):
260240
261241
262242if __name__ == '__main__' :
263- #img=cv2.imread('/home/cuongnd/PycharmProjects/aicr/source/mmsegmentation/data/ade/ADEChallengeData2016/annotations/validation/ADE_val_00000012.png', cv2.IMREAD_GRAYSCALE)
264-
265- data_dir = '/data20.04/data/table recognition/from_Korea/201012_172754_pubtabnet_valid_sample_objective#2'
266- img_dir = data_dir + '/images'
267- anno_det_dir = data_dir + '/annots'
268- output_anno_segment_dir = data_dir + '/annot_seg'
269-
270-
271- #convert_anno_objective2_to_segmentation(img_dir, anno_det_dir, output_anno_segment_dir)
272- #
273243 # split_dataset(img_dir='/data4T/cuongnd/dataset/publaynet_split1/img_dir/train',
274244 # ann_dir='/data4T/cuongnd/dataset/publaynet_split1/ann_dir/train_3classes',
275245 # img_dst_dir='/data4T/cuongnd/dataset/doc_structure1/img_dir/train',
276246 # ann_dst_dir='/data4T/cuongnd/dataset/doc_structure1/ann_dir/train',
277247 # ratio=0.002)
278248
279- # del_dataset(img_dir='/data20.04/data/doc_structure/publaynet/img_dir/train',
280- # ann_dir='/data20.04/data/doc_structure/publaynet/ann_dir/train')
281-
282- src_anno_dir = '/data4T/cuongnd/dataset/publaynet_split1/ann_dir/val'
283- dst_anno_dir = '/data4T/cuongnd/dataset/publaynet_split1/ann_dir/val_3classes'
284- # refactor_classes_of_dataset(src_anno_dir, dst_anno_dir,
285- # src_classes=[1, 2, 3, 4, 5], #('text', 'title', 'list', 'table', 'figure')
286- # dst_classes=[1, 1, 3, 2, 1])
287-
249+ src_anno_dir = '/data_backup/cuongnd/Viettel_freeform/MAFC/BHYT_origin/imgs/clean'
250+ dst_anno_dir = '/data_backup/cuongnd/mmseg/doc_seg/imgs/bhyt'
251+ convert_all_imgs_to_jpg (src_anno_dir ,dst_anno_dir )
288252
289- #onvert_voc_label_to_normal_format(src_anno_dir,dst_anno_dir)
290253
291- #convert_all_imgs_to_jpg(src_anno_dir,dst_anno_dir)
292- #
293- # refine_dataset(img_dir='/data4T/ntanh/publaynet/train',
294- # ann_dir='/data4T/ntanh/publaynet_gen_gt_oct2.1/train/label')
295- img_dir = '/home/duycuong/home_data/mmlab/mmseg/popular_doc/images/train'
296- ann_dir = '/home/duycuong/home_data/mmlab/mmseg/popular_doc/annotations/train'
297- visualize_normal_format_dataset (img_dir = img_dir ,
298- ann_dir = ann_dir )
254+ # img_dir='/data_backup/cuongnd/mmseg/doc_seg_data/imgs/train'
255+ # ann_dir='/data_backup/cuongnd/mmseg/doc_seg_data/anno/train'
256+ # visualize_normal_format_dataset(img_dir=img_dir,
257+ # ann_dir=ann_dir)
299258
300259
0 commit comments