Skip to content

Incorrect implementation of get_evaluation_bboxes leads to misleadingly high mAP #191

Open
@vvvvvaves

Description

@vvvvvaves

Basically, while implementing YOLO model from scratch and following the tutorial, I figured out that the mAP score in "YOLOv3 from Scratch" is misleadingly high. get_evaluation_bboxes function incorrectly assigns classes to predicted boxes. A lot of objects of different classes end up being in one class by mistake, which skews the AP score.

Here is the code, so you could reproduce the bug yourselves:

import config
import torch

from model import YOLOv3
from utils import (
    cells_to_bboxes,
)

def get_loader():
    from dataset import YOLODataset
    from torch.utils.data import DataLoader, Subset

    IMAGE_SIZE = config.IMAGE_SIZE
    train_dataset = YOLODataset(
        config.DATASET + "/train.csv",
        transform=config.train_transforms,
        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
        img_dir=config.IMG_DIR,
        label_dir=config.LABEL_DIR,
        anchors=config.ANCHORS,
    )

    sample = Subset(train_dataset, list(range(0, 1)))

    train_loader = DataLoader(
        dataset=sample,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        pin_memory=config.PIN_MEMORY,
        shuffle=False,
        drop_last=False,
    )

    return train_loader

def get_evaluation_bboxes(
    loader,
    model,
    iou_threshold,
    anchors,
    threshold,
    box_format="midpoint",
    device="cuda",
):
    # make sure model is in eval before get bboxes
    model.eval()
    train_idx = 0
    all_pred_boxes = []
    all_true_boxes = []
    for batch_idx, (x, labels) in enumerate(loader):
        x = x.to(device)

        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        bboxes = [[] for _ in range(batch_size)]
        for i in range(3):
            S = predictions[i].shape[2]
            anchor = torch.tensor([*anchors[i]]).to(device) * S
            boxes_scale_i = cells_to_bboxes(
                predictions[i], anchor, S=S, is_preds=True
            )
            for idx, (box) in enumerate(boxes_scale_i):
                bboxes[idx] += box

        # we just want one bbox for each label, not one for each scale
        true_bboxes = cells_to_bboxes(
            labels[2], anchor, S=S, is_preds=False
        )

        for idx in range(batch_size):
            nms_boxes = bboxes[idx]

            for nms_box in nms_boxes:
                all_pred_boxes.append([train_idx] + nms_box)

            for box in true_bboxes[idx]:
                if box[1] > threshold:
                    all_true_boxes.append([train_idx] + box)

            train_idx += 1

    model.train()
    return all_pred_boxes, all_true_boxes

def main():
    model = YOLOv3(num_classes=config.NUM_CLASSES).to(config.DEVICE)

    loader = get_loader()

    imgs, labels = next(iter(loader))
    raw_preds = model(imgs.to(config.DEVICE))
    all_pred_classes = []
    for head in raw_preds:
        all_pred_classes.append(torch.argmax(head[..., 6:], dim=-1).flatten())
    all_pred_classes = torch.cat(all_pred_classes, dim=0)

    print(all_pred_classes.shape)


    pred_boxes, true_boxes = get_evaluation_bboxes(
        loader,
        model,
        iou_threshold=config.NMS_IOU_THRESH,
        anchors=config.ANCHORS,
        threshold=config.CONF_THRESHOLD,
    )

    pred_boxes = torch.tensor([pred_box[1] for pred_box in pred_boxes])

    print(torch.unique(pred_boxes,return_counts = True))
    print(torch.unique(all_pred_classes, return_counts=True))


if __name__ == "__main__":
    main()

The code outputs two distributions of classes. It is the same predictions object, processed by Alladin's function and my own function. The output is:

(tensor([ 1.,  2.,  4.,  6.,  7.,  8., 13., 16., 19.]), tensor([  49,  704,  169, 5408, 2704,  169,  171,  625,  648]))
(tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18], device='cuda:0'), tensor([ 269,  702,  401,  208,  457,  525,  812,  221,  255,  755,  704,  270,
         779,  339,  831,  815,  811,  491, 1002], device='cuda:0'))

P.S. I removed non-max suppression to make the function faster.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions