Skip to content

Commit 63bbb8d

Browse files
committed
Merge branch 'master' of https://github.com/krai/axs2mlperf
2 parents a348087 + 01c2367 commit 63bbb8d

File tree

1 file changed

+63
-29
lines changed

1 file changed

+63
-29
lines changed

submitter/code_axs.py

Lines changed: 63 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,35 @@ def scenarios_from_sut_type_and_task(sut_system_type, task):
3636
return scenarios
3737

3838

39-
def list_experiment_entries( power, sut_name, sut_system_type, task, division, experiment_tags, framework, device, loadgen_dataset_size, loadgen_buffer_size, scenarios, model_name=None, mlperf_model_name=None, generate=False, infer_from_ss=False, extra_common_attributes=None, per_scenario_attributes=None, require_compliance=None, __entry__=None):
39+
def list_experiment_entries( power, sut_name, sut_system_type, task, division, experiment_tags, framework, device, loadgen_dataset_size, loadgen_buffer_size, scenarios, model_name=None, mlperf_model_name=None, generate=False, infer_from_ss=False, extra_common_attributes=None, per_scenario_attributes=None, require_compliance=None, substitution_map=None, __entry__=None):
40+
"""Generate a list of entries that are expected to be used for a particular submission.
41+
--generate+ enforces immediate creation of those entries (off by default)
42+
43+
--division=close among other things requires compliance tests to be run ; --require_compliance- overrides this.
44+
--division=open among other things does not require compliance tests ; --require_compliance+ overrides this.
45+
46+
Usage examples:
47+
48+
# This submission is supposed to be complete on its own: {[nonInteractive_]Server,Offline} x {Accuracy,Performance,TEST06}
49+
axs byname submitter , list_experiment_entries --framework=openai --task=llama2 --division=open --require_compliance+ --sut_name=xd670_h200_x8_sglang --program_name=llama2_using_openai_loadgen --sut_system_type=datacenter --submitter=Krai --submission_entry_name=laid_out_sglang --extra_common_attributes,::=mlperf_model_name:llama3_1-70b-fp8_pre
50+
51+
# While this one has 3 explicit experiments: {[Interactive_]Server} x {Accuracy,Performance,TEST06}, and we want to Infer (import with substitution) Offline from the previous group.
52+
axs byname submitter , list_experiment_entries --framework=openai --task=llama2 --division=open --require_compliance+ --sut_name=xd670_h200_x8_sglang --program_name=llama2_using_openai_loadgen --sut_system_type=datacenter --submitter=Krai --submission_entry_name=laid_out_sglang --extra_common_attributes,::=mlperf_model_name:llama3_1-70b-interactive-fp8_pre
53+
54+
# We use --substitution_map to infer Offline experiments from another mlperf_model_name while using "own" Server experiments:
55+
axs byname submitter , list_experiment_entries --framework=openai --task=llama2 --division=open --require_compliance+ --sut_name=xd670_h200_x8_sglang --program_name=llama2_using_openai_loadgen --sut_system_type=datacenter --submitter=Krai --submission_entry_name=laid_out_sglang --extra_common_attributes,::=mlperf_model_name:llama3_1-70b-interactive-fp8_pre ---substitution_map='{"mlperf_model_name":{"llama3_1-70b-interactive-fp8_pre":"llama3_1-70b-fp8_pre"}}'
56+
"""
57+
58+
if infer_from_ss:
59+
substitution_map = {
60+
"loadgen_scenario": {
61+
"Offline": "SingleStream",
62+
"MultiStream": "SingleStream"
63+
}
64+
}
65+
elif substitution_map is None:
66+
substitution_map = {}
67+
4068
common_attributes = {
4169
"framework": framework,
4270
"task": task,
@@ -102,21 +130,34 @@ def list_experiment_entries( power, sut_name, sut_system_type, task, division, e
102130
joined_query = ','.join( list_query )
103131

104132
candidate_entry = __entry__.get_kernel().byquery(joined_query, False)
105-
inferrable_case = infer_from_ss and sc!="SingleStream"
133+
134+
inferrable_case, inferred_entry = False, False
135+
for substituted_param in substitution_map:
136+
for target_value in substitution_map[substituted_param]:
137+
if f"{substituted_param}={target_value}" in joined_query:
138+
source_value = substitution_map[substituted_param][target_value]
139+
inferred_query = joined_query.replace(f"{substituted_param}={target_value}",f"{substituted_param}={source_value}")
140+
inferrable_case = [ substituted_param, target_value, source_value, inferred_query ] # 4-tuple
141+
142+
if inferrable_case:
143+
[ substituted_param, target_value, source_value, inferred_query ] = inferrable_case
144+
inferred_entry = __entry__.get_kernel().byquery(inferred_query, False)
145+
inferrable_case.append( inferred_entry ) # extended to 5-tuple ( still adding None if not found )
106146

107147
if generate:
108148
if inferrable_case and not candidate_entry:
109149
print(f"Entry {joined_query} is missing, but INFERRABLE, adding it as a mapping\n")
110-
ss_entry = __entry__.get_kernel().byquery(joined_query.replace(f"loadgen_scenario={sc}","loadgen_scenario=SingleStream"), True)
111-
experiment_entries.append( [ss_entry, sc] )
150+
[ substituted_param, target_value, source_value, inferred_query, inferred_entry ] = inferrable_case
151+
candidate_entry = inferred_entry or __entry__.get_kernel().byquery(inferred_query, True)
152+
candidate_entry[substituted_param] = target_value
112153
else:
113154
if candidate_entry:
114155
print(f"Entry {joined_query} was already PRESENT, adding it to the list\n")
115156
else:
116157
print(f"Entry {joined_query} was MISSING and not inferrable, generating it now\n")
117158
candidate_entry = candidate_entry or __entry__.get_kernel().byquery(joined_query, True) # now generating for real
118159

119-
experiment_entries.append( candidate_entry )
160+
experiment_entries.append( candidate_entry )
120161

121162
else:
122163
if candidate_entry:
@@ -128,7 +169,9 @@ def list_experiment_entries( power, sut_name, sut_system_type, task, division, e
128169

129170
print(f"[{presence_msg}]\t\taxs byquery {joined_query}")
130171
if candidate_entry:
131-
print(f"Location:\t\t{candidate_entry.get_path()}")
172+
print(f"Present Location:\t\t{candidate_entry.get_path()}")
173+
elif inferred_entry:
174+
print(f"Inferred Location:\t\t{inferred_entry.get_path()}")
132175
print("")
133176

134177
return experiment_entries
@@ -157,7 +200,7 @@ def get_testing_entry(experiment_entry):
157200
return testing_entry
158201

159202

160-
def lay_out(experiment_entries, division, submitter, log_truncation_script_path, submission_checker_path, sut_path, compliance_path, scenarios, power=False, infer_from_ss=False, model_meta_data=None, submitted_tree_path=None, model_mapping_path=None, __entry__=None):
203+
def lay_out(experiment_entries, division, submitter, log_truncation_script_path, submission_checker_path, sut_path, compliance_path, scenarios, power=False, model_meta_data=None, submitted_tree_path=None, model_mapping_path=None, __entry__=None):
161204

162205
submitter_path = make_local_dir( [ division, submitter ], submitted_tree_path)
163206
code_path = make_local_dir( [ division, submitter, 'code'], submitted_tree_path)
@@ -177,11 +220,7 @@ def lay_out(experiment_entries, division, submitter, log_truncation_script_path,
177220

178221
for experiment_entry in experiment_entries:
179222

180-
if type(experiment_entry)==list:
181-
experiment_entry, target_scenario = experiment_entry # unpacking a pair to infer target_scenario
182-
else:
183-
target_scenario = experiment_entry['loadgen_scenario']
184-
223+
target_scenario = experiment_entry['loadgen_scenario']
185224
scenario = target_scenario.lower()
186225

187226
experiment_parameters = []
@@ -406,13 +445,21 @@ def run_checker(submitted_tree_path, division, submitter, submission_checker_pat
406445
logfile.write(result_checker)
407446

408447

409-
def full_run(experiment_entries, division, submitter, log_truncation_script_path, submission_checker_path, checker_log_path, sut_path, compliance_path, scenarios, power=False, infer_from_ss=False, model_meta_data=None, submitted_tree_path=None, model_mapping_path=None, __entry__=None):
448+
def full_run(experiment_entries, division, submitter, log_truncation_script_path, submission_checker_path, checker_log_path, sut_path, compliance_path, scenarios, power=False, model_meta_data=None, submitted_tree_path=None, model_mapping_path=None, __entry__=None):
449+
"""First run lay_out() to build the submission tree, then run_checker() to check its integrity.
450+
451+
Usage examples:
452+
453+
# Here we use --substitution_map to infer Offline experiments from another mlperf_model_name while using "own" Server experiments.
454+
# In order to pacify submission_checker we pass in --model_mapping_path that maps all unrecognized "open" models onto "closed" ones known by the script.
455+
axs byname submitter , full_run --framework=openai --task=llama2 --division=open --require_compliance+ --sut_name=xd670_h200_x8_sglang --program_name=llama2_using_openai_loadgen --sut_system_type=datacenter --submitter=Krai --submission_entry_name=laid_out_sglang --extra_common_attributes,::=mlperf_model_name:llama3_1-70b-interactive-fp8_pre ---substitution_map='{"mlperf_model_name":{"llama3_1-70b-interactive-fp8_pre":"llama3_1-70b-fp8_pre"}}' --model_mapping_path=$HOME/work_collection/sglang_collection/model_mapping.json
456+
"""
410457

411458
if os.path.exists(submitted_tree_path):
412459
print("The path " + submitted_tree_path + " exists, skipping lay_out()")
413460
else:
414461
print("Run lay_out in {submitted_tree_path} ...")
415-
lay_out(experiment_entries, division, submitter, log_truncation_script_path, submission_checker_path, sut_path, compliance_path, scenarios, power, infer_from_ss, model_meta_data, submitted_tree_path, model_mapping_path, __entry__)
462+
lay_out(experiment_entries, division, submitter, log_truncation_script_path, submission_checker_path, sut_path, compliance_path, scenarios, power, model_meta_data, submitted_tree_path, model_mapping_path, __entry__)
416463

417464
print("Run checker...")
418465
run_checker(submitted_tree_path, division, submitter, submission_checker_path, checker_log_path, __entry__)
@@ -422,17 +469,9 @@ def generate_readmes_for_measurements(experiment_entries, division, submitter, s
422469

423470
readme_template_path = __entry__.get_path("README_template.md")
424471

425-
target_scenario = None
426-
427472
for experiment_entry in experiment_entries:
428473

429-
if type(experiment_entry)==list:
430-
experiment_entry, target_scenario = experiment_entry # unpacking a pair to infer target_scenario
431-
else:
432-
target_scenario = experiment_entry['loadgen_scenario']
433-
434-
scenario = target_scenario.lower()
435-
474+
scenario = experiment_entry['loadgen_scenario'].lower()
436475

437476
src_dir = experiment_entry.get_path("")
438477
sut_name = experiment_entry.get('sut_name')
@@ -534,12 +573,7 @@ def generate_table(experiment_entries, division, submitter, power, __entry__):
534573
table_data = []
535574
for experiment_entry in experiment_entries:
536575

537-
if type(experiment_entry)==list:
538-
experiment_entry, target_scenario = experiment_entry # unpacking a pair to infer target_scenario
539-
else:
540-
target_scenario = experiment_entry['loadgen_scenario']
541-
542-
scenario = target_scenario
576+
scenario = experiment_entry['loadgen_scenario']
543577

544578
entry_path = experiment_entry.get_path("")
545579
if power:

0 commit comments

Comments
 (0)