@@ -1023,7 +1023,7 @@ ggml_tensor * llm_graph_context::build_inp_pos_bucket_dec() const {
1023
1023
1024
1024
auto inp = std::make_unique<llm_graph_input_pos_bucket_kv>(hparams, kv_self);
1025
1025
1026
- const auto n_kv = kv_self->n_base () ;
1026
+ const auto n_kv = kv_self->n ;
1027
1027
1028
1028
auto & cur = inp->pos_bucket ;
1029
1029
@@ -1240,7 +1240,7 @@ llm_graph_input_attn_kv_unified * llm_graph_context::build_attn_inp_kv_unified()
1240
1240
auto inp = std::make_unique<llm_graph_input_attn_kv_unified>(hparams, cparams, kv_self);
1241
1241
1242
1242
{
1243
- const auto n_kv = kv_self->n_base () ;
1243
+ const auto n_kv = kv_self->n ;
1244
1244
1245
1245
inp->self_kq_mask = ggml_new_tensor_2d (ctx0, GGML_TYPE_F32, n_kv, GGML_PAD (n_tokens, GGML_KQ_MASK_PAD));
1246
1246
// cb(inp->self_kq_mask, "KQ_mask", -1);
@@ -1252,7 +1252,7 @@ llm_graph_input_attn_kv_unified * llm_graph_context::build_attn_inp_kv_unified()
1252
1252
if (hparams.n_swa_pattern > 1 ) {
1253
1253
GGML_ASSERT (hparams.n_swa > 0 );
1254
1254
1255
- const auto n_kv = kv_self->n_swa () ;
1255
+ const auto n_kv = kv_self->n ;
1256
1256
1257
1257
inp->self_kq_mask_swa = ggml_new_tensor_2d (ctx0, GGML_TYPE_F32, n_kv, GGML_PAD (n_tokens, GGML_KQ_MASK_PAD));
1258
1258
// cb(inp->self_kq_mask_swa, "KQ_mask_swa", -1);
@@ -1297,9 +1297,9 @@ ggml_tensor * llm_graph_context::build_attn(
1297
1297
1298
1298
// store to KV cache
1299
1299
{
1300
- const auto kv_head = kv_layer. cells ->head ;
1300
+ const auto kv_head = kv_self ->head ;
1301
1301
1302
- GGML_ASSERT (kv_layer. cells ->size == n_ctx);
1302
+ GGML_ASSERT (kv_self ->size == n_ctx);
1303
1303
1304
1304
ggml_tensor * k_cache_view = ggml_view_1d (ctx0, kv_layer.k , n_tokens*n_embd_k_gqa, ggml_row_size (kv_layer.k ->type , n_embd_k_gqa)*kv_head);
1305
1305
// cb(k_cache_view, "k_cache_view", il);
@@ -1331,7 +1331,7 @@ ggml_tensor * llm_graph_context::build_attn(
1331
1331
1332
1332
const auto & kq_mask = is_swa ? inp->get_kq_mask_swa () : inp->get_kq_mask ();
1333
1333
1334
- const auto n_kv = kv_layer. cells ->n ;
1334
+ const auto n_kv = kv_self ->n ;
1335
1335
1336
1336
const auto & n_embd_head_k = hparams.n_embd_head_k ;
1337
1337
const auto & n_embd_head_v = hparams.n_embd_head_v ;
0 commit comments