1616
1717
1818def train_test_split (chip_dfs : Dict , test_size = 0.2 , seed = 1 ) -> Tuple [Dict , Dict ]:
19- """Split chips into training and test set"""
19+ """Split chips into training and test set.
20+
21+ Args:
22+ chip_dfs: Dictionary containing key (filename of the chip) value (dataframe with
23+ geometries for that chip) pairs.
24+ test_size: Relative number of chips to be put in the test dataset. 1-test_size is the size of the
25+ training data set.
26+ """
2027 chips_list = list (chip_dfs .keys ())
2128 random .seed (seed )
2229 random .shuffle (chips_list )
@@ -33,28 +40,62 @@ def train_test_split(chip_dfs: Dict, test_size=0.2, seed=1) -> Tuple[Dict, Dict]
3340def format_coco (chip_dfs : Dict , chip_width : int , chip_height : int ):
3441 """Format train and test chip geometries to COCO json format.
3542
36- COCO train and val set have specific ids.
43+ Args:
44+ chip_dfs: Dictionary containing key (filename of the chip) value (dataframe with
45+ geometries for that chip) pairs.
46+ chip_width: width of the chip in pixel size.
47+ chip_height: height of the chip in pixel size.
48+
49+ COCOjson example structure and instructions below. For more detailed information on building a COCO
50+ dataset see http://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch
51+
52+ cocojson = {
53+ "info": {...},
54+ "licenses": [...],
55+ "categories": [{"supercategory": "person","id": 1,"name": "person"},
56+ {"supercategory": "vehicle","id": 2,"name": "bicycle"},
57+ ...],
58+ "images": [{"file_name": "000000289343.jpg", "height": 427, "width": 640, "id": 397133},
59+ {"file_name": "000000037777.jpg", "height": 230, "width": 352, "id": 37777},
60+ ...],
61+ "annotations": [{"segmentation": [[510.66,423.01,...,510.45,423.01]], "area": 702.10, "iscrowd": 0,
62+ "image_id": 289343, "bbox": [473.07,395.93,38.65,28.67], "category_id": 18, "id": 1768},
63+ {"segmentation": [[340.32,758.01,...,134.25,875.01]], "area": 342.08, "iscrowd": 0,
64+ "image_id": 289343, "bbox": [473.07,395.93,38.65,28.67], "category_id": 18, "id": 1768},
65+ ...]
66+ }
67+
68+ - "id" in "categories" has to match "category_id" in "annotations".
69+ - "id" in "images" has to match "image_id" in "annotations".
70+ - "segmentation" in "annotations" is encoded in Run-Length-Encoding (except for crowd region (iscrowd=1)).
71+ - "id" in "annotations has to be unique for each geometry, so 4370 geometries in 1000 chips > 4370 unique
72+ geometry ids. However, does not have to be unique between coco train and validation set.
73+ - "file_name" in "images" does officially not have to match the "image_id" in "annotations" but is strongly
74+ recommended.
3775 """
3876 cocojson = {
3977 "info" : {},
4078 "licenses" : [],
4179 'categories' : [{'supercategory' : 'AgriculturalFields' ,
42- 'id' : 1 , # id needs to match category_id.
80+ 'id' : 1 , # needs to match category_id.
4381 'name' : 'agfields_singleclass' }]}
4482
45- for key_idx , key in enumerate (chip_dfs .keys ()):
46- if 'train' in key :
47- chip_id = int (key [21 :])
48- elif 'val' in key :
49- chip_id = int (key [19 :])
83+ annotation_id = 1
84+
85+ for chip_name in chip_dfs .keys ():
5086
51- key_image = ({"file_name" : f'{ key } .jpg' ,
52- "id" : int (chip_id ),
53- "height" : chip_width ,
54- "width" : chip_height })
55- cocojson .setdefault ('images' , []).append (key_image )
87+ if 'train' in chip_name :
88+ chip_id = int (chip_name [21 :])
89+ elif 'val' in chip_name :
90+ chip_id = int (chip_name [19 :])
5691
57- for row_idx , row in chip_dfs [key ]['chip_df' ].iterrows ():
92+ image = {"file_name" : f'{ chip_name } .jpg' ,
93+ "id" : int (chip_id ),
94+ "height" : chip_width ,
95+ "width" : chip_height }
96+ cocojson .setdefault ('images' , []).append (image )
97+
98+ for _ , row in chip_dfs [chip_name ]['chip_df' ].iterrows ():
5899 # Convert geometry to COCO segmentation format:
59100 # From shapely POLYGON ((x y, x1 y2, ..)) to COCO [[x, y, x1, y1, ..]].
60101 # The annotations were encoded by RLE, except for crowd region (iscrowd=1)
@@ -65,17 +106,19 @@ def format_coco(chip_dfs: Dict, chip_width: int, chip_height: int):
65106 coco_bbox = [bounds [0 ], bounds [1 ], bounds [2 ] - bounds [0 ], bounds [3 ] - bounds [1 ]]
66107 coco_bbox = [round (coords , 2 ) for coords in coco_bbox ]
67108
68- key_annotation = {"id" : key_idx ,
69- "image_id" : int (chip_id ),
70- "category_id" : 1 , # with multiple classes use "category_id" : row.reclass_id
71- "mycategory_name" : 'agfields_singleclass' ,
72- "old_multiclass_category_name" : row ['r_lc_name' ],
73- "old_multiclass_category_id" : row ['r_lc_id' ],
74- "bbox" : coco_bbox ,
75- "area" : row .geometry .area ,
76- "iscrowd" : 0 ,
77- "segmentation" : [coco_xy ]}
78- cocojson .setdefault ('annotations' , []).append (key_annotation )
109+ annotation = {"id" : annotation_id ,
110+ "image_id" : int (chip_id ),
111+ "category_id" : 1 , # with multiple classes use "category_id" : row.reclass_id
112+ "mycategory_name" : 'agfields_singleclass' ,
113+ "old_multiclass_category_name" : row ['r_lc_name' ],
114+ "old_multiclass_category_id" : row ['r_lc_id' ],
115+ "bbox" : coco_bbox ,
116+ "area" : row .geometry .area ,
117+ "iscrowd" : 0 ,
118+ "segmentation" : [coco_xy ]}
119+ cocojson .setdefault ('annotations' , []).append (annotation )
120+
121+ annotation_id += 1
79122
80123 return cocojson
81124
@@ -94,7 +137,7 @@ def move_coco_val_images(inpath_train_folder, val_chips_list):
94137
95138
96139def coco_to_shapely (inpath_json : Union [Path , str ],
97- categories : List [int ]= None ) -> Dict :
140+ categories : List [int ] = None ) -> Dict :
98141 """Transforms COCO annotations to shapely geometry format.
99142
100143 Args:
0 commit comments