pytorch · tugsbayasgalan · Mar 6, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 11, 2025
diff --git a/torchbenchmark/models/hf_Qwen2/__init__.py b/torchbenchmark/models/hf_Qwen2/__init__.py
@@ -0,0 +1,46 @@
+import torch
+from torchbenchmark.tasks import NLP
+from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
+from transformers import AutoTokenizer, DynamicCache, AutoModelForCausalLM
+
+
+class Model(HuggingFaceModel):
+    task = NLP.LANGUAGE_MODELING
+    DEFAULT_EVAL_BSIZE = 1
+    DEFAULT_EVAL_CUDA_PRECISION = "fp16"
+
+    def __init__(self, test="inference", device="cuda", batch_size=None, extra_args=[]):
+        # self.device = "cuda"
+        super().__init__(
+            name="hf_Qwen2",
+            test=test,
+            device=device,
+            batch_size=batch_size,
+            extra_args=extra_args,
+        )
+
+        prompt = "What is the best way to debug python script?"
+        tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B")
+        inputs = tokenizer(prompt, return_tensors="pt")
+
+        input_ids = inputs.input_ids.cuda()
+        attention_mask = inputs.attention_mask.cuda()
+
+        self.example_inputs = {
+            "input_ids": input_ids, 
+            "attention_mask": attention_mask, 
+            "past_key_values": DynamicCache(), 
+            "use_cache": True
+        }
+        self.model.to(self.device)
+
+    def train(self):
+        raise NotImplementedError("Training is not implemented.")
+
+    def get_module(self):
+        return self.model, self.example_inputs
+
+    def eval(self):
+        example_inputs = self.example_inputs
+        self.model.eval()
+        self.model(input_ids=example_inputs["input_ids"], attention_mask=example_inputs["attention_mask"], past_key_values=DynamicCache(), use_cache=True)
diff --git a/torchbenchmark/models/hf_Qwen2/install.py b/torchbenchmark/models/hf_Qwen2/install.py
@@ -0,0 +1,11 @@
+import os
+
+from torchbenchmark.util.framework.huggingface.patch_hf import (
+    cache_model,
+    patch_transformers,
+)
+
+if __name__ == "__main__":
+    patch_transformers()
+    model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
+    cache_model(model_name)
diff --git a/torchbenchmark/models/hf_Qwen2/metadata.yaml b/torchbenchmark/models/hf_Qwen2/metadata.yaml
@@ -0,0 +1,11 @@
+devices:
+  NVIDIA A100-SXM4-40GB:
+    eval_batch_size: 1
+eval_benchmark: false
+eval_deterministic: false
+eval_nograd: true
+not_implemented:
+- device: cpu
+- test: train
+train_benchmark: false
+train_deterministic: false
diff --git a/torchbenchmark/models/hf_Qwen2/requirements.txt b/torchbenchmark/models/hf_Qwen2/requirements.txt
@@ -0,0 +1 @@
+transformers
diff --git a/torchbenchmark/util/framework/huggingface/basic_configs.py b/torchbenchmark/util/framework/huggingface/basic_configs.py
@@ -13,6 +13,12 @@
         'AutoConfig.from_pretrained("gpt2")',
         "AutoModelForCausalLM",
     ),
+    "hf_Qwen2": (
+        512,
+        32768,
+        'AutoConfig.from_pretrained("Qwen/Qwen2-7B")',
+        "AutoModelForCausalLM"
+    ),
     "hf_GPT2_large": (
         512,
         1024,

diff --git a/userbenchmark/export_new_models/__init__.py b/userbenchmark/export_new_models/__init__.py
@@ -0,0 +1 @@
+BM_NAME = "export_new_models"
diff --git a/userbenchmark/export_new_models/run.py b/userbenchmark/export_new_models/run.py
@@ -0,0 +1,54 @@
+import torch
+import importlib 
+import sys
+import pprint
+#import torchbenchmark
+from pathlib import Path
+
+# Makes sure we setup torchbenchmark 
+repo = Path(__file__).parent.parent.parent
+sys.path.append(str(repo))
+
+from userbenchmark.utils import dump_output
+from userbenchmark.export_new_models import BM_NAME
+
+models = [
+    "hf_Qwen2"
+]
+
+def get_model(name):
+    model_module_ = importlib.import_module(f"torchbenchmark.models.{name}")
+    model_cls = getattr(model_module_, "Model")
+    model = model_cls(device="cuda", test="eval")
+    return model
+
+def run():
+    metrics = {}
+    errors = {}
+    count_success = 0
+    for model_name in models:
+        model = get_model(model_name)
+        model, example_inputs = model.get_module()
+        try:
+            ep = torch.export.export(model, (), example_inputs, strict=False).module()
+        except Exception as e:
+            errors[model_name] = str(e)
+            continue
+        else:
+            count_success += 1
+
+    metrics["success_rate"] = count_success / len(models)
+    metrics["errors"] = errors
+
+    result = {
+        "name": BM_NAME,
+        "environ": {
+            "pytorch_git_version": torch.version.git_version,
+        },
+        "metrics": metrics,
+    }
+    pprint.pprint(result)
+    dump_output(BM_NAME, result)
+
+if __name__ == "__main__":
+    run()