Skip to content

Commit 0910c6e

Browse files
pgmpablo157321github-actions[bot]arjunsuresh
authored
Add v5.1 submission checker (mlcommons#2204)
* Add v5.1 submission checker * [Automated Commit] Format Codebase * Restore resnet for edge * Update auto-update-dev.yml * Update generate_final_report.py * Update submission_checker.py * Add whisper configuration to checker * [Automated Commit] Format Codebase * Update auto-update-dev.yml * [Automated Commit] Format Codebase * Update auto-update-dev.yml * Only require one of server or interactive * Add v5.1 seeds * Fix typo, add commas --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Arjun Suresh <[email protected]>
1 parent b0ce3fe commit 0910c6e

File tree

7 files changed

+315
-244
lines changed

7 files changed

+315
-244
lines changed

.github/workflows/auto-update-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ on:
44
push:
55
branches:
66
- master # Trigger workflow on commits to 'master' branch
7+
78

89
jobs:
910
update-dev:

language/llama3.1-8b/ref_eval.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ def rouge(label, pred):
1717

1818

1919
def niah_em(label, pred):
20-
label_uuids = re.findall(r'[\w]{8}-[\w]{4}-[\w]{4}-[\w]{4}-[\w]{12}', label)
20+
label_uuids = re.findall(
21+
r'[\w]{8}-[\w]{4}-[\w]{4}-[\w]{4}-[\w]{12}', label)
2122
pred_uuids = re.findall(r'[\w]{8}-[\w]{4}-[\w]{4}-[\w]{4}-[\w]{12}', pred)
2223

2324
# https://github.com/hsiehjackson/RULER/blob/main/scripts/eval/synthetic/constants.py#L28
@@ -43,7 +44,8 @@ def qa_em(label, pred):
4344
return {'exact_match': 100.0}
4445

4546
normalized_answer = re.sub(r'\s+', '', answer_substring).lower()
46-
label_entries = [re.sub(r'\s+', '', entry).lower() for entry in label.split('|')]
47+
label_entries = [re.sub(r'\s+', '', entry).lower()
48+
for entry in label.split('|')]
4749

4850
match_found = any(entry in normalized_answer for entry in label_entries)
4951
return {'exact_match': 100.0 if match_found else 0.0}
@@ -63,7 +65,12 @@ def process_row(row):
6365

6466
def run_evaluation(df):
6567
with Pool(cpu_count()) as pool:
66-
accuracies = list(tqdm(pool.imap(process_row, df.to_dict('records')), total=len(df)))
68+
accuracies = list(
69+
tqdm(
70+
pool.imap(
71+
process_row,
72+
df.to_dict('records')),
73+
total=len(df)))
6774

6875
df['accuracy'] = accuracies
6976
return df
@@ -74,10 +81,10 @@ def run_evaluation(df):
7481
df = pd.read_pickle(fname)
7582

7683
df = run_evaluation(df)
77-
#df.to_pickle(str(fname).replace(".pkl", "_eval.pkl"))
84+
# df.to_pickle(str(fname).replace(".pkl", "_eval.pkl"))
7885
print(f"WROTE: {str(fname).replace('.pkl', '_eval.pkl')}")
7986

8087
accuracy = df.accuracy.apply(pd.Series)
8188
print(df.dataset.value_counts())
8289
print(accuracy.describe())
83-
print(df.describe())
90+
print(df.describe())

loadgen/mlperf.conf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ whisper.*.performance_sample_count_override = 1633
2727
3d-unet.*.performance_sample_count_override = 0
2828

2929
# Set seeds.
30-
*.*.qsl_rng_seed = 6023615788873153749
31-
*.*.sample_index_rng_seed = 15036839855038426416
32-
*.*.schedule_rng_seed = 9933818062894767841
30+
*.*.qsl_rng_seed = 1780908523862526354
31+
*.*.sample_index_rng_seed = 14771362308971278857
32+
*.*.schedule_rng_seed = 18209322760996052031
3333

3434
# Set seeds for TEST_05 (not needed from v5.0 onwards)
3535
*.*.test05_qsl_rng_seed = 7975553102935885558

text_to_image/tools/sample_ids.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def get_args():
2020
default=10,
2121
help="Dataset download location")
2222
parser.add_argument(
23-
"--seed", "-s", type=int, default=265673710, help="Dataset download location"
23+
"--seed", "-s", type=int, default=927722784, help="Dataset download location"
2424
)
2525
args = parser.parse_args()
2626
return args

text_to_image/tools/sample_ids.txt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
716
2-
2387
3-
3694
4-
273
5-
1221
6-
2942
7-
2047
8-
1691
9-
1510
10-
4399
1+
2747
2+
2235
3+
2165
4+
1515
5+
1538
6+
1367
7+
2419
8+
4629
9+
3657
10+
4532

tools/submission/generate_final_report.py

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -135,26 +135,24 @@ def main():
135135
[
136136
"resnet",
137137
"retinanet",
138-
"3d-unet-99",
139-
"3d-unet-99.9",
140-
"rnnt",
141138
"bert-99",
142139
"bert-99.9",
143140
"dlrm-v2-99",
144141
"dlrm-v2-99.9",
145-
"gptj-99",
146-
"gptj-99.9",
147-
"stable-diffusion-xl",
142+
"3d-unet-99",
143+
"3d-unet-99.9",
144+
"llama3.1-8b",
148145
"llama2-70b-99",
149146
"llama2-70b-99.9",
150-
"llama2-70b-interactive-99",
151-
"llama2-70b-interactive-99.9",
152-
"llama3.1-405b",
147+
"stable-diffusion-xl",
153148
"mixtral-8x7b",
154-
"pointpainting",
149+
"llama3.1-405b",
155150
"rgat",
151+
"pointpainting",
152+
"deepseek-r1",
153+
"whisper"
156154
],
157-
["SingleStream", "MultiStream", "Server", "Offline"],
155+
["SingleStream", "MultiStream", "Server", "Offline", "Interactive"],
158156
[
159157
"Latency (ms)",
160158
"Samples/s",
@@ -165,14 +163,13 @@ def main():
165163
],
166164
]
167165

168-
if args.version == "4.1":
166+
if args.version == "5.0":
169167
filter_scenarios = {
170168
"datacenter": {
171169
"resnet": ["Server", "Offline"],
172170
"retinanet": ["Server", "Offline"],
173-
"rnnt": ["Server", "Offline"],
174-
"bert-99": ["Server", "Offline"],
175-
"bert-99.9": ["Server", "Offline"],
171+
"bert-99": [],
172+
"bert-99.9": [],
176173
"dlrm-v2-99": ["Server", "Offline"],
177174
"dlrm-v2-99.9": ["Server", "Offline"],
178175
"3d-unet-99": ["Offline"],
@@ -182,21 +179,32 @@ def main():
182179
"stable-diffusion-xl": ["Server", "Offline"],
183180
"llama2-70b-99": ["Server", "Offline"],
184181
"llama2-70b-99.9": ["Server", "Offline"],
182+
"llama2-70b-interactive-99": ["Server", "Offline"],
183+
"llama2-70b-interactive-99.9": ["Server", "Offline"],
185184
"mixtral-8x7b": ["Server", "Offline"],
185+
"rgat": ["Offline"],
186+
"llama3.1-405b": ["Offline", "Server"],
187+
"pointpainting": []
186188
},
187189
"edge": {
188190
"resnet": ["SingleStream", "MultiStream", "Offline"],
189191
"retinanet": ["SingleStream", "MultiStream", "Offline"],
190-
"rnnt": ["SingleStream", "Offline"],
191192
"bert-99": ["SingleStream", "Offline"],
192-
"bert-99.9": [],
193+
"bert-99.9": ["SingleStream", "Offline"],
193194
"dlrm-v2-99": [],
194195
"dlrm-v2-99.9": [],
195196
"3d-unet-99": ["SingleStream", "Offline"],
196197
"3d-unet-99.9": ["SingleStream", "Offline"],
198+
"llama2-70b-99": [],
199+
"llama2-70b-99.9": [],
200+
"llama2-70b-interactive-99": [],
201+
"llama2-70b-interactive-99.9": [],
202+
"llama3.1-405b": [],
197203
"gptj-99": ["SingleStream", "Offline"],
198204
"gptj-99.9": ["SingleStream", "Offline"],
205+
"rgat": [],
199206
"stable-diffusion-xl": ["SingleStream", "Offline"],
207+
"pointpainting": ["SingleStream"],
200208
},
201209
}
202210
else:
@@ -210,17 +218,16 @@ def main():
210218
"dlrm-v2-99.9": ["Server", "Offline"],
211219
"3d-unet-99": ["Offline"],
212220
"3d-unet-99.9": ["Offline"],
213-
"gptj-99": ["Server", "Offline"],
214-
"gptj-99.9": ["Server", "Offline"],
215221
"stable-diffusion-xl": ["Server", "Offline"],
216-
"llama2-70b-99": ["Server", "Offline"],
217-
"llama2-70b-99.9": ["Server", "Offline"],
218-
"llama2-70b-interactive-99": ["Server", "Offline"],
219-
"llama2-70b-interactive-99.9": ["Server", "Offline"],
222+
"llama2-70b-99": ["Server", "Offline", "Interactive"],
223+
"llama2-70b-99.9": ["Server", "Offline", "Interactive"],
220224
"mixtral-8x7b": ["Server", "Offline"],
221225
"rgat": ["Offline"],
222226
"llama3.1-405b": ["Offline", "Server"],
223-
"pointpainting": []
227+
"pointpainting": [],
228+
"llama3.1-8b": ["Server", "Offline"],
229+
"deepseek-r1": ["Server", "Offline"],
230+
"whisper": ["Offline"],
224231
},
225232
"edge": {
226233
"resnet": ["SingleStream", "MultiStream", "Offline"],
@@ -233,14 +240,12 @@ def main():
233240
"3d-unet-99.9": ["SingleStream", "Offline"],
234241
"llama2-70b-99": [],
235242
"llama2-70b-99.9": [],
236-
"llama2-70b-interactive-99": [],
237-
"llama2-70b-interactive-99.9": [],
238243
"llama3.1-405b": [],
239-
"gptj-99": ["SingleStream", "Offline"],
240-
"gptj-99.9": ["SingleStream", "Offline"],
241244
"rgat": [],
242245
"stable-diffusion-xl": ["SingleStream", "Offline"],
243246
"pointpainting": ["SingleStream"],
247+
"llama3.1-8b": ["SingleStream", "Offline"],
248+
"whisper": ["Offline"],
244249
},
245250
}
246251

0 commit comments

Comments
 (0)