Skip to content
This repository was archived by the owner on Feb 18, 2023. It is now read-only.

Commit b17fd3b

Browse files
committed
added cocojson description, fix coco unique geometry id
1 parent 8c58967 commit b17fd3b

File tree

1 file changed

+69
-26
lines changed

1 file changed

+69
-26
lines changed

utils/coco.py

Lines changed: 69 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,14 @@
1616

1717

1818
def train_test_split(chip_dfs: Dict, test_size=0.2, seed=1) -> Tuple[Dict, Dict]:
19-
"""Split chips into training and test set"""
19+
"""Split chips into training and test set.
20+
21+
Args:
22+
chip_dfs: Dictionary containing key (filename of the chip) value (dataframe with
23+
geometries for that chip) pairs.
24+
test_size: Relative number of chips to be put in the test dataset. 1-test_size is the size of the
25+
training data set.
26+
"""
2027
chips_list = list(chip_dfs.keys())
2128
random.seed(seed)
2229
random.shuffle(chips_list)
@@ -33,28 +40,62 @@ def train_test_split(chip_dfs: Dict, test_size=0.2, seed=1) -> Tuple[Dict, Dict]
3340
def format_coco(chip_dfs: Dict, chip_width: int, chip_height: int):
3441
"""Format train and test chip geometries to COCO json format.
3542
36-
COCO train and val set have specific ids.
43+
Args:
44+
chip_dfs: Dictionary containing key (filename of the chip) value (dataframe with
45+
geometries for that chip) pairs.
46+
chip_width: width of the chip in pixel size.
47+
chip_height: height of the chip in pixel size.
48+
49+
COCOjson example structure and instructions below. For more detailed information on building a COCO
50+
dataset see http://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch
51+
52+
cocojson = {
53+
"info": {...},
54+
"licenses": [...],
55+
"categories": [{"supercategory": "person","id": 1,"name": "person"},
56+
{"supercategory": "vehicle","id": 2,"name": "bicycle"},
57+
...],
58+
"images": [{"file_name": "000000289343.jpg", "height": 427, "width": 640, "id": 397133},
59+
{"file_name": "000000037777.jpg", "height": 230, "width": 352, "id": 37777},
60+
...],
61+
"annotations": [{"segmentation": [[510.66,423.01,...,510.45,423.01]], "area": 702.10, "iscrowd": 0,
62+
"image_id": 289343, "bbox": [473.07,395.93,38.65,28.67], "category_id": 18, "id": 1768},
63+
{"segmentation": [[340.32,758.01,...,134.25,875.01]], "area": 342.08, "iscrowd": 0,
64+
"image_id": 289343, "bbox": [473.07,395.93,38.65,28.67], "category_id": 18, "id": 1768},
65+
...]
66+
}
67+
68+
- "id" in "categories" has to match "category_id" in "annotations".
69+
- "id" in "images" has to match "image_id" in "annotations".
70+
- "segmentation" in "annotations" is encoded in Run-Length-Encoding (except for crowd region (iscrowd=1)).
71+
- "id" in "annotations has to be unique for each geometry, so 4370 geometries in 1000 chips > 4370 unique
72+
geometry ids. However, does not have to be unique between coco train and validation set.
73+
- "file_name" in "images" does officially not have to match the "image_id" in "annotations" but is strongly
74+
recommended.
3775
"""
3876
cocojson = {
3977
"info": {},
4078
"licenses": [],
4179
'categories': [{'supercategory': 'AgriculturalFields',
42-
'id': 1, # id needs to match category_id.
80+
'id': 1, # needs to match category_id.
4381
'name': 'agfields_singleclass'}]}
4482

45-
for key_idx, key in enumerate(chip_dfs.keys()):
46-
if 'train' in key:
47-
chip_id = int(key[21:])
48-
elif 'val' in key:
49-
chip_id = int(key[19:])
83+
annotation_id = 1
84+
85+
for chip_name in chip_dfs.keys():
5086

51-
key_image = ({"file_name": f'{key}.jpg',
52-
"id": int(chip_id),
53-
"height": chip_width,
54-
"width": chip_height})
55-
cocojson.setdefault('images', []).append(key_image)
87+
if 'train' in chip_name:
88+
chip_id = int(chip_name[21:])
89+
elif 'val' in chip_name:
90+
chip_id = int(chip_name[19:])
5691

57-
for row_idx, row in chip_dfs[key]['chip_df'].iterrows():
92+
image = {"file_name": f'{chip_name}.jpg',
93+
"id": int(chip_id),
94+
"height": chip_width,
95+
"width": chip_height}
96+
cocojson.setdefault('images', []).append(image)
97+
98+
for _, row in chip_dfs[chip_name]['chip_df'].iterrows():
5899
# Convert geometry to COCO segmentation format:
59100
# From shapely POLYGON ((x y, x1 y2, ..)) to COCO [[x, y, x1, y1, ..]].
60101
# The annotations were encoded by RLE, except for crowd region (iscrowd=1)
@@ -65,17 +106,19 @@ def format_coco(chip_dfs: Dict, chip_width: int, chip_height: int):
65106
coco_bbox = [bounds[0], bounds[1], bounds[2] - bounds[0], bounds[3] - bounds[1]]
66107
coco_bbox = [round(coords, 2) for coords in coco_bbox]
67108

68-
key_annotation = {"id": key_idx,
69-
"image_id": int(chip_id),
70-
"category_id": 1, # with multiple classes use "category_id" : row.reclass_id
71-
"mycategory_name": 'agfields_singleclass',
72-
"old_multiclass_category_name": row['r_lc_name'],
73-
"old_multiclass_category_id": row['r_lc_id'],
74-
"bbox": coco_bbox,
75-
"area": row.geometry.area,
76-
"iscrowd": 0,
77-
"segmentation": [coco_xy]}
78-
cocojson.setdefault('annotations', []).append(key_annotation)
109+
annotation = {"id": annotation_id,
110+
"image_id": int(chip_id),
111+
"category_id": 1, # with multiple classes use "category_id" : row.reclass_id
112+
"mycategory_name": 'agfields_singleclass',
113+
"old_multiclass_category_name": row['r_lc_name'],
114+
"old_multiclass_category_id": row['r_lc_id'],
115+
"bbox": coco_bbox,
116+
"area": row.geometry.area,
117+
"iscrowd": 0,
118+
"segmentation": [coco_xy]}
119+
cocojson.setdefault('annotations', []).append(annotation)
120+
121+
annotation_id += 1
79122

80123
return cocojson
81124

@@ -94,7 +137,7 @@ def move_coco_val_images(inpath_train_folder, val_chips_list):
94137

95138

96139
def coco_to_shapely(inpath_json: Union[Path, str],
97-
categories: List[int]=None) -> Dict:
140+
categories: List[int] = None) -> Dict:
98141
"""Transforms COCO annotations to shapely geometry format.
99142
100143
Args:

0 commit comments

Comments
 (0)