Merge pull request #65 from krai/llama3.1-8b-cnndm

sahelib25 · web-flow · commit b115921f1c54 · 2025-07-10T14:44:45.000+01:00
Add support to run Llama3.1-8B model on CNNDM dataset
diff --git a/base_small_llm_loadgen_experiment/code_axs.py b/base_small_llm_loadgen_experiment/code_axs.py
@@ -0,0 +1,55 @@
+import json
+
+from transformers import AutoTokenizer
+
+def get_accuracy_dict(accuracy_dict_full):
+    accuracy_dict = {}
+    for k in accuracy_dict_full.keys():
+        if k in ["rougeL", "exact_match", "tokens_per_sample"]:
+            accuracy_dict[k] = accuracy_dict_full[k]
+    return accuracy_dict
+
+def parse_tokens(
+    tokenised_accuracy_log_path: str, output_log_path: str
+):
+    with open(tokenised_accuracy_log_path) as f:
+        log = json.load(f)
+
+    output_log = []
+    for item in log:
+        hex_str = item["data"]
+        hex_tokens = [hex_str[i : i + 8] for i in range(0, len(hex_str), 8)]
+        tokens = [
+            int.from_bytes(bytes.fromhex(tok), byteorder="little") for tok in hex_tokens
+        ]
+        output_log.append(tokens)
+
+    with open(output_log_path, "w") as f:
+        json.dump(output_log, f, indent=2)
+    return output_log_path
+
+def detokenise(
+    checkpoint_path: str, tokenised_accuracy_log_path: str, output_log_path: str
+):
+    tokeniser = AutoTokenizer.from_pretrained(checkpoint_path)
+
+    with open(tokenised_accuracy_log_path) as f:
+        log = json.load(f)
+
+    output_log = []
+    for item in log:
+        hex_str = item["data"]
+        hex_tokens = [hex_str[i : i + 8] for i in range(0, len(hex_str), 8)]
+        tokens = [
+            int.from_bytes(bytes.fromhex(tok), byteorder="little") for tok in hex_tokens
+        ]
+        output_log.append({
+            "seq_id" : item["seq_id"],
+            "qsl_idx" : item["qsl_idx"],
+            "data": tokeniser.decode(tokens),
+            "token_count" : item["token_count"]
+        })
+
+    with open(output_log_path, "w") as f:
+        json.dump(output_log, f, indent=2)
+    return output_log_path
diff --git a/base_small_llm_loadgen_experiment/data_axs.json b/base_small_llm_loadgen_experiment/data_axs.json
@@ -0,0 +1,90 @@
+{
+    "_parent_entries": [ [ "^", "byname", "base_loadgen_experiment" ] ],
+
+    "transformers_query": [ "python_package", "package_name=transformers", ["desired_python_version", ["^", "kernel_python_major_dot_minor"]] ],
+
+    "_BEFORE_CODE_LOADING": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byquery", [[ "^^", "get", "transformers_query" ]] ],
+        [ "use" ]
+    ]] ],
+
+    "desired_python_version": "3.10",
+
+    "mlperf_inference_git_entry": [ "^", "byquery", "git_repo,repo_name=mlperf_inference_git" ],
+
+    "abs_script_path": [ "^^", "execute", [[
+        [ "get", "mlperf_inference_git_entry" ],
+        [ "get_path_of", "llama3_1_8b_cnndm_accuracy_script" ]
+    ]] ],
+
+    "accuracy_log_path": ["^^", "get_path", "mlperf_log_accuracy.json"],
+
+    "dataset_name": "cnndm",
+    "model_family": "llama3_1",
+    "model_variant": "8b",
+
+    "dataset_query": [ "downloaded", [ "^^", "substitute", "dataset_name=#{dataset_name}#,model_family=#{model_family}#,variant=#{model_variant}#" ]],
+    "dataset_entry": [ "^", "byquery", [[ "^^", "get", "dataset_query" ]], {}, ["dataset_query"] ],
+
+    "dataset_path": [ "^^", "execute", [[
+        [ "get", "dataset_entry" ],
+        [ "get_path" ],
+        [ "__add__", "/cnn_eval.json" ]
+    ]] ],
+    
+    "checkpoint_path_query": [ "^^", "substitute", "downloaded,hf_tokeniser,model_family=#{model_family}#,variant=#{model_variant}#" ],
+    "checkpoint_path": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byquery", [[ "^^", "get", "checkpoint_path_query" ]] ],
+        [ "get_path" ]
+    ]] ],
+
+    "accuracy_log_dtype": "int32",
+
+    "extract_accuracy_report": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byname", "python_script" ],
+        [ "run", [], {
+                "python_deps": [
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=protobuf" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=torch" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=transformers" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=nltk" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=rouge_score" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=sentencepiece" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=pillow" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=evaluate" ]
+                ],
+                "abs_script_path": ["^^", "get", "abs_script_path"],
+                "script_extra_params": [ "^^", "substitute", "--mlperf-accuracy-file #{accuracy_log_path}# --dataset-file #{dataset_path}# --dtype #{accuracy_log_dtype}#" ],
+                "desired_python_version": ["^", "kernel_python_major_dot_minor"],
+                "capture_output": true
+            } ],
+        0,
+        [ "func", [ "ufun.rematch", "(\\{.*\\})" ] ],
+        0,
+        [ "denumpify_dict" ],
+        0,
+        [ "func", "str" ]
+    ]], {} ],
+
+    "accuracy_dict_full": [ "^^", "execute", [[
+        ["get", "accuracy_report" ],
+        0,
+        [ "func", "eval" ]
+     ]], {} ],
+    "accuracy_dict": [ "^^", "get_accuracy_dict" ],
+    "rouge1": [ "^^" , "dig","accuracy_dict.rouge1" ],
+    "rouge2": [ "^^" , "dig","accuracy_dict.rouge2" ],
+    "rougeL": [ "^^" , "dig","accuracy_dict.rougeL" ],
+    "rougeLsum": [ "^^" , "dig","accuracy_dict.rougeLsum" ],
+    "gen_len": [ "^^" , "dig","accuracy_dict.gen_len" ],
+    "gen_num": [ "^^" , "dig","accuracy_dict.gen_num" ],
+    "tokens_per_sample": [ "^^" , "dig","accuracy_dict.tokens_per_sample" ],
+
+    "tokenised_accuracy_log_path": [ "^^", "get_path", "mlperf_log_accuracy.json" ],
+    "output_log_path": [ "^^", "get_path", "detokenised_mlperf_log.json" ],
+
+    "detokenised_log": [ "^^", "detokenise" ]
+}
diff --git a/data_axs.json b/data_axs.json
@@ -40,6 +40,7 @@
         "model_pytorch_resnet50": "model_pytorch_resnet50",
         "mlperf_power_git_recipe": "mlperf_power_git_recipe",
         "dataset_cnndm_mlperf_recipe": "dataset_cnndm_mlperf_recipe",
+	"dataset_small_llm_cnndm_mlperf_recipe": "dataset_small_llm_cnndm_mlperf_recipe",
         "dataset_lambada_recipe": "dataset_lambada_recipe",
         "dataset_coco2014_images_recipe": "dataset_coco2014_images_recipe",
         "gptj_reference_loadgen": "gptj_reference_loadgen",
@@ -74,7 +75,8 @@
         "base_llama3_1_loadgen_experiment": "base_llama3_1_loadgen_experiment",
         "dataset_llrg_mlperf_recipe": "dataset_llrg_mlperf_recipe",
         "convert_openorca": "convert_openorca",
-        "quantize_quark_recipe": "quantize_quark_recipe"
+        "quantize_quark_recipe": "quantize_quark_recipe",
+	"base_small_llm_loadgen_experiment": "base_small_llm_loadgen_experiment"
     },
     "repo_name": "axs2mlperf",
     "checkout": null,
diff --git a/dataset_small_llm_cnndm_mlperf_recipe/data_axs.json b/dataset_small_llm_cnndm_mlperf_recipe/data_axs.json
@@ -0,0 +1,13 @@
+{
+    "_producer_rules": [
+        [ [ "downloaded", "dataset_name=cnndm", "model_family=llama3_1", "variant=8b" ], [["get_kernel"],["byname","downloader"],["download"]], {
+            "downloading_tool_query": "shell_tool,can_download_url_from_rclone",
+            "url": "mlc-inference:mlcommons-inference-wg-public/llama3.1_8b/cnn_eval.json",
+            "downloading_tool_params": {
+                "rclone_remote_name": "mlc-inference"
+            },
+            "newborn_entry_name": "downloaded_mlc_cnndm_llama3_1_8b",
+            "file_path": "llama3_1_8b"
+        }, [] ]
+    ]
+}