Skip to content

Commit d82d7d4

Browse files
authored
Fix ds-r1 acc checker output format not captured by submission checker (mlcommons#2285)
1 parent a817ac5 commit d82d7d4

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

language/deepseek-r1/eval_accuracy.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -770,14 +770,12 @@ def print_evaluation_results(df_evaluated: pd.DataFrame,
770770
# 'evaluated': int(evaluated),
771771
# 'correct': int(correct),
772772
'exact_match': float(accuracy),
773-
'TOKENS_PER_SAMPLE': mean_output_len,
773+
'tokens_per_sample': mean_output_len,
774774
'num-samples': len(df_evaluated),
775775
}
776-
777-
result_str = json.dumps(results, indent=2)
778-
print(f"\nEvaluation Results: {result_str}")
779-
780-
return results
776+
777+
print("\nResults\n")
778+
print(results)
781779

782780

783781
def process_and_save_dataframe(df: pd.DataFrame,

0 commit comments

Comments
 (0)