@@ -32,6 +32,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
3232 { LLM_ARCH_QWEN2VL, " qwen2vl" },
3333 { LLM_ARCH_QWEN3, " qwen3" },
3434 { LLM_ARCH_QWEN3MOE, " qwen3moe" },
35+ { LLM_ARCH_QWEN3VL, " qwen3vl" },
36+ { LLM_ARCH_QWEN3VLMOE, " qwen3vlmoe" },
3537 { LLM_ARCH_PHI2, " phi2" },
3638 { LLM_ARCH_PHI3, " phi3" },
3739 { LLM_ARCH_PHIMOE, " phimoe" },
@@ -103,6 +105,9 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
103105 { LLM_ARCH_SEED_OSS, " seed_oss" },
104106 { LLM_ARCH_GROVEMOE, " grovemoe" },
105107 { LLM_ARCH_APERTUS, " apertus" },
108+ { LLM_ARCH_MINIMAX_M2, " minimax-m2" },
109+ { LLM_ARCH_COGVLM, " cogvlm" },
110+ { LLM_ARCH_PANGU_EMBED, " pangu-embedded" },
106111 { LLM_ARCH_UNKNOWN, " (unknown)" },
107112};
108113
@@ -145,6 +150,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
145150 { LLM_KV_EXPERTS_PER_GROUP, " %s.experts_per_group" },
146151 { LLM_KV_MOE_EVERY_N_LAYERS, " %s.moe_every_n_layers" },
147152 { LLM_KV_NEXTN_PREDICT_LAYERS, " %s.nextn_predict_layers" },
153+ { LLM_KV_NUM_DEEPSTACK_LAYERS, " %s.n_deepstack_layers" },
148154 { LLM_KV_POOLING_TYPE, " %s.pooling_type" },
149155 { LLM_KV_LOGIT_SCALE, " %s.logit_scale" },
150156 { LLM_KV_DECODER_START_TOKEN_ID, " %s.decoder_start_token_id" },
@@ -779,6 +785,45 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
779785 { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
780786 },
781787 },
788+ {
789+ LLM_ARCH_QWEN3VL,
790+ {
791+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
792+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
793+ { LLM_TENSOR_OUTPUT, " output" },
794+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
795+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
796+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
797+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
798+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
799+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
800+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
801+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
802+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
803+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
804+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
805+ },
806+ },
807+ {
808+ LLM_ARCH_QWEN3VLMOE,
809+ {
810+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
811+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
812+ { LLM_TENSOR_OUTPUT, " output" },
813+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
814+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
815+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
816+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
817+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
818+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
819+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
820+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
821+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
822+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
823+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
824+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
825+ },
826+ },
782827 {
783828 LLM_ARCH_PHI2,
784829 {
@@ -2312,6 +2357,64 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
23122357 { LLM_TENSOR_FFN_UP_CHEXPS, " blk.%d.ffn_up_chexps" },
23132358 },
23142359 },
2360+ {
2361+ LLM_ARCH_MINIMAX_M2,
2362+ {
2363+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
2364+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
2365+ { LLM_TENSOR_OUTPUT, " output" },
2366+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
2367+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
2368+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
2369+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
2370+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
2371+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
2372+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
2373+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
2374+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
2375+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
2376+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
2377+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
2378+ { LLM_TENSOR_FFN_EXP_PROBS_B, " blk.%d.exp_probs_b" },
2379+ },
2380+ },
2381+ {
2382+ LLM_ARCH_PANGU_EMBED,
2383+ {
2384+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
2385+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
2386+ { LLM_TENSOR_OUTPUT, " output" },
2387+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
2388+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
2389+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
2390+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
2391+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
2392+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
2393+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
2394+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
2395+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
2396+ },
2397+ },
2398+ {
2399+ LLM_ARCH_COGVLM,
2400+ {
2401+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
2402+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
2403+ { LLM_TENSOR_OUTPUT, " output" },
2404+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
2405+ { LLM_TENSOR_ATTN_QKV, " blk.%d.attn_qkv" },
2406+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
2407+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
2408+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
2409+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
2410+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
2411+ { LLM_TENSOR_VISEXP_ATTN_QKV, " blk.%d.vis_attn_qkv" },
2412+ { LLM_TENSOR_VISEXP_ATTN_OUT, " blk.%d.vis_attn_output" },
2413+ { LLM_TENSOR_VISEXP_FFN_GATE, " blk.%d.vis_gate" },
2414+ { LLM_TENSOR_VISEXP_FFN_DOWN, " blk.%d.vis_down" },
2415+ { LLM_TENSOR_VISEXP_FFN_UP, " blk.%d.vis_up" },
2416+ },
2417+ },
23152418 {
23162419 LLM_ARCH_UNKNOWN,
23172420 {
@@ -2488,6 +2591,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
24882591 {LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
24892592 {LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
24902593 {LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2594+ {LLM_TENSOR_VISEXP_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2595+ {LLM_TENSOR_VISEXP_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2596+ {LLM_TENSOR_VISEXP_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2597+ {LLM_TENSOR_VISEXP_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2598+ {LLM_TENSOR_VISEXP_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
24912599 // NextN/MTP tensors are currently ignored (reserved for future MTP support)
24922600 // These tensors only exist in the last layer(s) and are treated as output tensors
24932601 {LLM_TENSOR_NEXTN_EH_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
0 commit comments