Skip to content

Commit b4b5cf5

Browse files
authored
Add np.int32 dtype support for GPTJ output (mlcommons#1437)
1 parent 6a6a105 commit b4b5cf5

File tree

1 file changed

+95
-87
lines changed

1 file changed

+95
-87
lines changed

language/gpt-j/evaluation.py

Lines changed: 95 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,95 @@
1-
from dataset import Dataset
2-
import os
3-
import time
4-
import numpy as np
5-
import json
6-
import nltk
7-
import array
8-
import torch
9-
from torch.nn.functional import pad
10-
from torch.utils.data import DataLoader
11-
import evaluate
12-
import argparse
13-
import nltk
14-
from transformers import AutoModelForCausalLM, AutoTokenizer
15-
16-
17-
def get_args():
18-
"""Parse commandline."""
19-
parser = argparse.ArgumentParser()
20-
parser.add_argument("--mlperf-accuracy-file", required=True, help="path to mlperf_log_accuracy.json")
21-
parser.add_argument("--dataset-file", required=True, help="path to cnn_eval.json")
22-
parser.add_argument("--verbose", action="store_true", help="verbose messages")
23-
args = parser.parse_args()
24-
return args
25-
26-
def postprocess_text(preds, targets):
27-
preds = [pred.strip() for pred in preds]
28-
targets = [target.strip() for target in targets]
29-
30-
# rougeLSum expects newline after each sentence
31-
preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in preds]
32-
targets = ["\n".join(nltk.sent_tokenize(target)) for target in targets]
33-
34-
return preds, targets
35-
36-
37-
def main():
38-
39-
args = get_args()
40-
model_name = "EleutherAI/gpt-j-6B"
41-
dataset_path = args.dataset_file
42-
metric = evaluate.load("rouge")
43-
nltk.download('punkt')
44-
45-
tokenizer = AutoTokenizer.from_pretrained(
46-
model_name,
47-
model_max_length=2048,
48-
padding_side="left",
49-
use_fast=False,)
50-
tokenizer.pad_token = tokenizer.eos_token
51-
52-
data_object = Dataset(dataset_path)
53-
54-
targets = data_object.targets
55-
56-
57-
58-
59-
with open(args.mlperf_accuracy_file, "r") as f:
60-
results = json.load(f)
61-
62-
63-
target_required = []
64-
preds_token_ids = []
65-
66-
for pred in results:
67-
qsl_idx = pred['qsl_idx']
68-
target = targets[qsl_idx]
69-
target_required.append(target)
70-
preds_token_ids.append(np.frombuffer(bytes.fromhex(pred['data']), np.int64))
71-
72-
73-
preds_decoded_text = tokenizer.batch_decode(preds_token_ids, skip_special_tokens=True)
74-
75-
preds, targets = postprocess_text(preds_decoded_text, target_required)
76-
77-
78-
result = metric.compute(predictions=preds, references=targets, use_stemmer=True,use_aggregator=False)
79-
result = {k: round(np.mean(v) * 100, 4) for k, v in result.items()}
80-
prediction_lens = [len(pred) for pred in preds]
81-
result["gen_len"] = np.sum(prediction_lens)
82-
result["gen_num"] = len(preds)
83-
print("\nResults\n")
84-
print(result)
85-
86-
if __name__ == "__main__":
87-
main()
1+
from dataset import Dataset
2+
import os
3+
import time
4+
import numpy as np
5+
import json
6+
import nltk
7+
import array
8+
import torch
9+
from torch.nn.functional import pad
10+
from torch.utils.data import DataLoader
11+
import evaluate
12+
import argparse
13+
import nltk
14+
from transformers import AutoModelForCausalLM, AutoTokenizer
15+
16+
17+
def get_args():
18+
"""Parse commandline."""
19+
parser = argparse.ArgumentParser()
20+
parser.add_argument("--mlperf-accuracy-file", required=True,
21+
help="path to mlperf_log_accuracy.json")
22+
parser.add_argument("--dataset-file", required=True,
23+
help="path to cnn_eval.json")
24+
parser.add_argument("--verbose", action="store_true",
25+
help="verbose messages")
26+
parser.add_argument("--dtype", default="int64",
27+
help="dtype of the accuracy log", choices=["int32", "int64"])
28+
args = parser.parse_args()
29+
return args
30+
31+
32+
def postprocess_text(preds, targets):
33+
preds = [pred.strip() for pred in preds]
34+
targets = [target.strip() for target in targets]
35+
36+
# rougeLSum expects newline after each sentence
37+
preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in preds]
38+
targets = ["\n".join(nltk.sent_tokenize(target)) for target in targets]
39+
40+
return preds, targets
41+
42+
43+
def main():
44+
45+
args = get_args()
46+
model_name = "EleutherAI/gpt-j-6B"
47+
dataset_path = args.dataset_file
48+
metric = evaluate.load("rouge")
49+
nltk.download('punkt')
50+
51+
tokenizer = AutoTokenizer.from_pretrained(
52+
model_name,
53+
model_max_length=2048,
54+
padding_side="left",
55+
use_fast=False,)
56+
tokenizer.pad_token = tokenizer.eos_token
57+
58+
data_object = Dataset(dataset_path)
59+
60+
targets = data_object.targets
61+
62+
with open(args.mlperf_accuracy_file, "r") as f:
63+
results = json.load(f)
64+
65+
target_required = []
66+
preds_token_ids = []
67+
68+
eval_dtype = np.int64
69+
if args.dtype == "int32":
70+
eval_dtype = np.int32
71+
72+
for pred in results:
73+
qsl_idx = pred['qsl_idx']
74+
target = targets[qsl_idx]
75+
target_required.append(target)
76+
preds_token_ids.append(np.frombuffer(
77+
bytes.fromhex(pred['data']), eval_dtype))
78+
79+
preds_decoded_text = tokenizer.batch_decode(
80+
preds_token_ids, skip_special_tokens=True)
81+
82+
preds, targets = postprocess_text(preds_decoded_text, target_required)
83+
84+
result = metric.compute(
85+
predictions=preds, references=targets, use_stemmer=True, use_aggregator=False)
86+
result = {k: round(np.mean(v) * 100, 4) for k, v in result.items()}
87+
prediction_lens = [len(pred) for pred in preds]
88+
result["gen_len"] = np.sum(prediction_lens)
89+
result["gen_num"] = len(preds)
90+
print("\nResults\n")
91+
print(result)
92+
93+
94+
if __name__ == "__main__":
95+
main()

0 commit comments

Comments
 (0)