Incorrect implementation of get_evaluation_bboxes leads to misleadingly high mAP

Basically, while implementing YOLO model from scratch and following the tutorial, I figured out that the mAP score in "YOLOv3 from Scratch" is misleadingly high. `get_evaluation_bboxes` function incorrectly assigns classes to predicted boxes. A lot of objects of different classes end up being in one class by mistake, which skews the AP score.

Here is the code, so you could reproduce the bug yourselves:

```
import config
import torch

from model import YOLOv3
from utils import (
    cells_to_bboxes,
)

def get_loader():
    from dataset import YOLODataset
    from torch.utils.data import DataLoader, Subset

    IMAGE_SIZE = config.IMAGE_SIZE
    train_dataset = YOLODataset(
        config.DATASET + "/train.csv",
        transform=config.train_transforms,
        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
        img_dir=config.IMG_DIR,
        label_dir=config.LABEL_DIR,
        anchors=config.ANCHORS,
    )

    sample = Subset(train_dataset, list(range(0, 1)))

    train_loader = DataLoader(
        dataset=sample,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        pin_memory=config.PIN_MEMORY,
        shuffle=False,
        drop_last=False,
    )

    return train_loader

def get_evaluation_bboxes(
    loader,
    model,
    iou_threshold,
    anchors,
    threshold,
    box_format="midpoint",
    device="cuda",
):
    # make sure model is in eval before get bboxes
    model.eval()
    train_idx = 0
    all_pred_boxes = []
    all_true_boxes = []
    for batch_idx, (x, labels) in enumerate(loader):
        x = x.to(device)

        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        bboxes = [[] for _ in range(batch_size)]
        for i in range(3):
            S = predictions[i].shape[2]
            anchor = torch.tensor([*anchors[i]]).to(device) * S
            boxes_scale_i = cells_to_bboxes(
                predictions[i], anchor, S=S, is_preds=True
            )
            for idx, (box) in enumerate(boxes_scale_i):
                bboxes[idx] += box

        # we just want one bbox for each label, not one for each scale
        true_bboxes = cells_to_bboxes(
            labels[2], anchor, S=S, is_preds=False
        )

        for idx in range(batch_size):
            nms_boxes = bboxes[idx]

            for nms_box in nms_boxes:
                all_pred_boxes.append([train_idx] + nms_box)

            for box in true_bboxes[idx]:
                if box[1] > threshold:
                    all_true_boxes.append([train_idx] + box)

            train_idx += 1

    model.train()
    return all_pred_boxes, all_true_boxes

def main():
    model = YOLOv3(num_classes=config.NUM_CLASSES).to(config.DEVICE)

    loader = get_loader()

    imgs, labels = next(iter(loader))
    raw_preds = model(imgs.to(config.DEVICE))
    all_pred_classes = []
    for head in raw_preds:
        all_pred_classes.append(torch.argmax(head[..., 6:], dim=-1).flatten())
    all_pred_classes = torch.cat(all_pred_classes, dim=0)

    print(all_pred_classes.shape)


    pred_boxes, true_boxes = get_evaluation_bboxes(
        loader,
        model,
        iou_threshold=config.NMS_IOU_THRESH,
        anchors=config.ANCHORS,
        threshold=config.CONF_THRESHOLD,
    )

    pred_boxes = torch.tensor([pred_box[1] for pred_box in pred_boxes])

    print(torch.unique(pred_boxes,return_counts = True))
    print(torch.unique(all_pred_classes, return_counts=True))


if __name__ == "__main__":
    main()

```
The code outputs two distributions of classes. It is the same predictions object, processed by Alladin's function and my own function. The output is:

```
(tensor([ 1.,  2.,  4.,  6.,  7.,  8., 13., 16., 19.]), tensor([  49,  704,  169, 5408, 2704,  169,  171,  625,  648]))
(tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18], device='cuda:0'), tensor([ 269,  702,  401,  208,  457,  525,  812,  221,  255,  755,  704,  270,
         779,  339,  831,  815,  811,  491, 1002], device='cuda:0'))
```

P.S. I removed non-max suppression to make the function faster.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Incorrect implementation of get_evaluation_bboxes leads to misleadingly high mAP #191

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Incorrect implementation of get_evaluation_bboxes leads to misleadingly high mAP #191

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions