Open
Description
Basically, while implementing YOLO model from scratch and following the tutorial, I figured out that the mAP score in "YOLOv3 from Scratch" is misleadingly high. get_evaluation_bboxes
function incorrectly assigns classes to predicted boxes. A lot of objects of different classes end up being in one class by mistake, which skews the AP score.
Here is the code, so you could reproduce the bug yourselves:
import config
import torch
from model import YOLOv3
from utils import (
cells_to_bboxes,
)
def get_loader():
from dataset import YOLODataset
from torch.utils.data import DataLoader, Subset
IMAGE_SIZE = config.IMAGE_SIZE
train_dataset = YOLODataset(
config.DATASET + "/train.csv",
transform=config.train_transforms,
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
img_dir=config.IMG_DIR,
label_dir=config.LABEL_DIR,
anchors=config.ANCHORS,
)
sample = Subset(train_dataset, list(range(0, 1)))
train_loader = DataLoader(
dataset=sample,
batch_size=config.BATCH_SIZE,
num_workers=config.NUM_WORKERS,
pin_memory=config.PIN_MEMORY,
shuffle=False,
drop_last=False,
)
return train_loader
def get_evaluation_bboxes(
loader,
model,
iou_threshold,
anchors,
threshold,
box_format="midpoint",
device="cuda",
):
# make sure model is in eval before get bboxes
model.eval()
train_idx = 0
all_pred_boxes = []
all_true_boxes = []
for batch_idx, (x, labels) in enumerate(loader):
x = x.to(device)
with torch.no_grad():
predictions = model(x)
batch_size = x.shape[0]
bboxes = [[] for _ in range(batch_size)]
for i in range(3):
S = predictions[i].shape[2]
anchor = torch.tensor([*anchors[i]]).to(device) * S
boxes_scale_i = cells_to_bboxes(
predictions[i], anchor, S=S, is_preds=True
)
for idx, (box) in enumerate(boxes_scale_i):
bboxes[idx] += box
# we just want one bbox for each label, not one for each scale
true_bboxes = cells_to_bboxes(
labels[2], anchor, S=S, is_preds=False
)
for idx in range(batch_size):
nms_boxes = bboxes[idx]
for nms_box in nms_boxes:
all_pred_boxes.append([train_idx] + nms_box)
for box in true_bboxes[idx]:
if box[1] > threshold:
all_true_boxes.append([train_idx] + box)
train_idx += 1
model.train()
return all_pred_boxes, all_true_boxes
def main():
model = YOLOv3(num_classes=config.NUM_CLASSES).to(config.DEVICE)
loader = get_loader()
imgs, labels = next(iter(loader))
raw_preds = model(imgs.to(config.DEVICE))
all_pred_classes = []
for head in raw_preds:
all_pred_classes.append(torch.argmax(head[..., 6:], dim=-1).flatten())
all_pred_classes = torch.cat(all_pred_classes, dim=0)
print(all_pred_classes.shape)
pred_boxes, true_boxes = get_evaluation_bboxes(
loader,
model,
iou_threshold=config.NMS_IOU_THRESH,
anchors=config.ANCHORS,
threshold=config.CONF_THRESHOLD,
)
pred_boxes = torch.tensor([pred_box[1] for pred_box in pred_boxes])
print(torch.unique(pred_boxes,return_counts = True))
print(torch.unique(all_pred_classes, return_counts=True))
if __name__ == "__main__":
main()
The code outputs two distributions of classes. It is the same predictions object, processed by Alladin's function and my own function. The output is:
(tensor([ 1., 2., 4., 6., 7., 8., 13., 16., 19.]), tensor([ 49, 704, 169, 5408, 2704, 169, 171, 625, 648]))
(tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18], device='cuda:0'), tensor([ 269, 702, 401, 208, 457, 525, 812, 221, 255, 755, 704, 270,
779, 339, 831, 815, 811, 491, 1002], device='cuda:0'))
P.S. I removed non-max suppression to make the function faster.
Metadata
Metadata
Assignees
Labels
No labels