Support Chat Template Override for Unsupported Models (#947)

sayanshaw24 · web-flow · commit cb00b43f0540 · 2025-05-05T18:59:43.000-07:00
diff --git a/onnxruntime_extensions/pp_api.py b/onnxruntime_extensions/pp_api.py
@@ -58,9 +58,9 @@ def tokenize(self, text, add_special_tokens = True):
     def detokenize(self, tokens):
         return batch_detokenize(self.tokenizer, [tokens])
 
-    def apply_chat_template(self, chat, add_generation_prompt=True, tokenize=False):
+    def apply_chat_template(self, chat, template="", add_generation_prompt=True, tokenize=False):
         result = _apply_chat_template(
-            self.tokenizer, "", chat, add_generation_prompt, tokenize)
+            self.tokenizer, template, chat, add_generation_prompt, tokenize)
         return tensor_result_get_at(result, 1 if tokenize else 0)
 
     def __del__(self):
diff --git a/shared/api/chat_template.cc b/shared/api/chat_template.cc
@@ -11,12 +11,10 @@ OrtxStatus TokenizerImpl::LoadChatTemplate() {
     try {
       chat_template_root_ = minja::Parser::parse(chat_template, {});
     } catch (const std::runtime_error& e) {
-      chat_template_parsing_status = std::string(e.what());
       return OrtxStatus(kOrtxOK, "Warning: The chat template for this model is not yet supported, trying to apply chat template will cause an error.");
     }
   }
 
-  chat_template_parsing_status = "Success";
   return OrtxStatus(kOrtxOK, "Loaded chat template.");
 }
 
@@ -722,9 +720,9 @@ void TokenizerImpl::InitializeChatParameters(const char* template_str,
 // ApplyChatTemplate method to choose the template logic based on chat_template
 OrtxStatus TokenizerImpl::ApplyChatTemplate(const TokenizerImpl::MessageList& message_list, std::string& output,
                                             bool add_generation_prompt) const {
-  if (chat_template_parsing_status != "Success"){
-    return OrtxStatus(kOrtxErrorInvalidArgument, "Failed to parse chat template: " + chat_template_parsing_status);
-  }
+  // Note: The official chat template from this model's config file may not be supported.
+  // However, we do not throw an error until checking model_to_template_map as the user
+  // may pass in a template string in our supported list to override the model config template.
   
   // Find the chat_template string for this model if it is supported
   auto it = model_to_template_map.find(chat_template);
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
@@ -58,8 +58,6 @@ class TokenizerImpl : public OrtxObjectImpl {
   std::string chat_template;
   mutable MessageList messages;
 
-  std::string chat_template_parsing_status;
-
   std::string bos_token;
   std::string eos_token;
   std::vector<std::string> custom_tools;
diff --git a/test/test_pp_api.py b/test/test_pp_api.py
@@ -234,11 +234,32 @@ def test_OLMa_tokenizer(self):
         np.testing.assert_array_equal(ortx_inputs, inputs)
 
     def test_Qwen_QVQ_tokenizer(self):
-        model_id = "Qwen/QVQ-72B-Preview"
+        model_id = "Qwen/Qwen3-0.6B-FP8"
         test_sentence = [self.tokenizer_test_sentence]
         hf_enc = AutoTokenizer.from_pretrained(model_id)
         inputs = hf_enc(test_sentence)["input_ids"]
         tokenizer = pp_api.Tokenizer(model_id)
+
+        # Note: we simply check if chat template override works here, as Qwen/Qwen3-0.6B-FP8 is not a
+        # supported chat template model, but we do not compare the output of apply_chat_template
+        # with HF, as it is not supported in Extensions yet.
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "url": "https://huggingface.co/spaces/big-vision/paligemma-hf/resolve/main/examples/password.jpg",
+                    },
+                    {"type": "text", "text": "What is the password?"},
+                ],
+            }
+        ]
+        message_json = json.dumps(messages)
+        templ = """{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}"""
+        prompt = tokenizer.apply_chat_template(chat=message_json, template=templ)
+        
+        # Continue tokenizer test
         ortx_inputs = tokenizer.tokenize(test_sentence)
         np.testing.assert_array_equal(ortx_inputs, inputs)