Skip to content

Commit a90a5e8

Browse files
fix: check model is loaded before starting (#2206)
* fix: check model is loaded before starting * chore: e2e test --------- Co-authored-by: sangjanai <[email protected]>
1 parent f192287 commit a90a5e8

File tree

5 files changed

+32
-8
lines changed

5 files changed

+32
-8
lines changed

engine/config/yaml_config.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
4848
if (!yaml_node_["mmproj"]) {
4949
auto s = nomalize_path(file_path);
5050
auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf";
51-
CTL_DBG("mmproj: " << abs_path);
51+
CTL_TRC("mmproj: " << abs_path);
5252
auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path));
5353
if (std::filesystem::exists(abs_path)) {
5454
yaml_node_["mmproj"] = rel_path.string();

engine/e2e-test/cli/engines/test_cli_engine_uninstall.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ def setup_and_teardown(self):
2424

2525
@pytest.mark.asyncio
2626
async def test_engines_uninstall_llamacpp_should_be_successfully(self):
27-
response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
27+
data = {"version": "b5371"}
28+
response = requests.post(
29+
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
30+
)
2831
await wait_for_websocket_download_success_event(timeout=None)
2932
exit_code, output, error = run(
3033
"Uninstall engine", ["engines", "uninstall", "llama-cpp"]

engine/e2e-test/cli/model/test_cli_model.py

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def setup_and_teardown(self):
3636
run("Delete model", ["models", "delete", "tinyllama:1b"])
3737
stop_server()
3838

39+
@pytest.mark.skipif(platform.system() == "Windows", reason="Skip test for Windows")
3940
def test_model_pull_with_direct_url_should_be_success(self):
4041
exit_code, output, error = run(
4142
"Pull model",

engine/extensions/local-engine/local_engine.cc

+25-4
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ std::vector<std::string> ConvertJsonToParamsVector(const Json::Value& root) {
8080
res.push_back("--no-mmap");
8181
}
8282
continue;
83+
} else if (member == "ignore_eos") {
84+
if (root[member].asBool()) {
85+
res.push_back("--ignore_eos");
86+
}
87+
continue;
8388
}
8489

8590
res.push_back("--" + member);
@@ -502,6 +507,23 @@ void LocalEngine::HandleEmbedding(std::shared_ptr<Json::Value> json_body,
502507

503508
void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
504509
http_callback&& callback) {
510+
auto model_id = json_body->get("model", "").asString();
511+
if (model_id.empty()) {
512+
CTL_WRN("Model is empty");
513+
}
514+
if (server_map_.find(model_id) != server_map_.end()) {
515+
CTL_INF("Model " << model_id << " is already loaded");
516+
Json::Value error;
517+
error["error"] = "Model " + model_id + " is already loaded";
518+
Json::Value status;
519+
status["is_done"] = true;
520+
status["has_error"] = true;
521+
status["is_stream"] = false;
522+
status["status_code"] = 409;
523+
callback(std::move(status), std::move(error));
524+
return;
525+
}
526+
505527
CTL_INF("Start loading model");
506528
auto wait_for_server_up = [this](const std::string& model,
507529
const std::string& host, int port) {
@@ -524,10 +546,7 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
524546
};
525547

526548
LOG_DEBUG << "Start to spawn llama-server";
527-
auto model_id = json_body->get("model", "").asString();
528-
if (model_id.empty()) {
529-
CTL_WRN("Model is empty");
530-
}
549+
531550
server_map_[model_id].host = "127.0.0.1";
532551
server_map_[model_id].port = GenerateRandomInteger(39400, 39999);
533552
auto& s = server_map_[model_id];
@@ -545,6 +564,8 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
545564
params.push_back("--pooling");
546565
params.push_back("mean");
547566

567+
params.push_back("--jinja");
568+
548569
std::vector<std::string> v;
549570
v.reserve(params.size() + 1);
550571
auto engine_dir = engine_service_.GetEngineDirPath(kLlamaRepo);

engine/services/model_source_service.cc

+1-2
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,7 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
433433

434434
auto author = hub_author;
435435
auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
436-
if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
437-
model_author.has_value() && !model_author.value().empty()) {
436+
if (model_author.has_value() && !model_author.value().empty()) {
438437
author = model_author.value();
439438
}
440439

0 commit comments

Comments
 (0)