fix: check model is loaded before starting (#2206)

vansangpfiev · sangjanai · web-flow · commit a90a5e868780 · 2025-05-16T07:11:48.000+07:00
* fix: check model is loaded before starting

* chore: e2e test

---------

Co-authored-by: sangjanai &lt;sang@jan.ai&gt;
diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc
@@ -48,7 +48,7 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
     if (!yaml_node_["mmproj"]) {
       auto s = nomalize_path(file_path);
       auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf";
-      CTL_DBG("mmproj: " << abs_path);
+      CTL_TRC("mmproj: " << abs_path);
       auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path));
       if (std::filesystem::exists(abs_path)) {
         yaml_node_["mmproj"] = rel_path.string();
diff --git a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
@@ -24,7 +24,10 @@ def setup_and_teardown(self):
 
     @pytest.mark.asyncio
     async def test_engines_uninstall_llamacpp_should_be_successfully(self):
-        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        data = {"version": "b5371"}
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
         await wait_for_websocket_download_success_event(timeout=None)
         exit_code, output, error = run(
             "Uninstall engine", ["engines", "uninstall", "llama-cpp"]
diff --git a/engine/e2e-test/cli/model/test_cli_model.py b/engine/e2e-test/cli/model/test_cli_model.py
@@ -36,6 +36,7 @@ def setup_and_teardown(self):
         run("Delete model", ["models", "delete", "tinyllama:1b"])
         stop_server()
 
+    @pytest.mark.skipif(platform.system() == "Windows", reason="Skip test for Windows")
     def test_model_pull_with_direct_url_should_be_success(self):
         exit_code, output, error = run(
             "Pull model",
diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc
@@ -80,6 +80,11 @@ std::vector<std::string> ConvertJsonToParamsVector(const Json::Value& root) {
         res.push_back("--no-mmap");
       }
       continue;
+    } else if (member == "ignore_eos") {
+      if (root[member].asBool()) {
+        res.push_back("--ignore_eos");
+      }
+      continue;
     }
 
     res.push_back("--" + member);
@@ -502,6 +507,23 @@ void LocalEngine::HandleEmbedding(std::shared_ptr<Json::Value> json_body,
 
 void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
                             http_callback&& callback) {
+  auto model_id = json_body->get("model", "").asString();
+  if (model_id.empty()) {
+    CTL_WRN("Model is empty");
+  }
+  if (server_map_.find(model_id) != server_map_.end()) {
+    CTL_INF("Model " << model_id << " is already loaded");
+    Json::Value error;
+    error["error"] = "Model " + model_id + " is already loaded";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = 409;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+  
   CTL_INF("Start loading model");
   auto wait_for_server_up = [this](const std::string& model,
                                    const std::string& host, int port) {
@@ -524,10 +546,7 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
   };
 
   LOG_DEBUG << "Start to spawn llama-server";
-  auto model_id = json_body->get("model", "").asString();
-  if (model_id.empty()) {
-    CTL_WRN("Model is empty");
-  }
+
   server_map_[model_id].host = "127.0.0.1";
   server_map_[model_id].port = GenerateRandomInteger(39400, 39999);
   auto& s = server_map_[model_id];
@@ -545,6 +564,8 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
   params.push_back("--pooling");
   params.push_back("mean");
 
+  params.push_back("--jinja");
+
   std::vector<std::string> v;
   v.reserve(params.size() + 1);
   auto engine_dir = engine_service_.GetEngineDirPath(kLlamaRepo);
diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc
@@ -433,8 +433,7 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
 
   auto author = hub_author;
   auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
-  if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
-      model_author.has_value() && !model_author.value().empty()) {
+  if (model_author.has_value() && !model_author.value().empty()) {
     author = model_author.value();
   }
 

Original file line number	Diff line number	Diff line change
`@@ -433,8 +433,7 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(`
`433`	`433`
`434`	`434`	`auto author = hub_author;`
`435`	`435`	`auto model_author = hu::GetModelAuthorCortexsoHub(model_name);`
`436`		`- if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name);`
`437`		`- model_author.has_value() && !model_author.value().empty()) {`
	`436`	`+ if (model_author.has_value() && !model_author.value().empty()) {`
`438`	`437`	`author = model_author.value();`
`439`	`438`	`}`
`440`	`439`