Skip to content

Commit 7bef684

Browse files
authored
models : move build_inp_out_ids outside loop (#17151)
* move build_inp_out_ids outside loop * realign
1 parent 395e286 commit 7bef684

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

src/models/ernie4-5.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#include "models.h"
22

3-
4-
53
llm_build_ernie4_5::llm_build_ernie4_5(const llama_model & model, const llm_graph_params & params) :
64
llm_graph_context(params) {
75
const int64_t n_embd_head = hparams.n_embd_head_v;
@@ -19,6 +17,8 @@ llm_build_ernie4_5::llm_build_ernie4_5(const llama_model & model, const llm_grap
1917

2018
auto * inp_attn = build_attn_inp_kv();
2119

20+
ggml_tensor * inp_out_ids = build_inp_out_ids();
21+
2222
for (int il = 0; il < n_layer; ++il) {
2323
ggml_tensor * inpSA = inpL;
2424

@@ -67,9 +67,8 @@ llm_build_ernie4_5::llm_build_ernie4_5(const llama_model & model, const llm_grap
6767
}
6868
if (il == n_layer - 1) {
6969
// skip computing output for unused tokens
70-
ggml_tensor * inp_out_ids = build_inp_out_ids();
71-
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
72-
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
70+
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
71+
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
7372
}
7473
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
7574
cb(ffn_inp, "ffn_inp", il);

src/models/openai-moe-iswa.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ llm_build_openai_moe_iswa::llm_build_openai_moe_iswa(const llama_model & model,
1111

1212
auto * inp_attn = build_attn_inp_kv_iswa();
1313

14+
ggml_tensor * inp_out_ids = build_inp_out_ids();
15+
1416
for (int il = 0; il < n_layer; ++il) {
1517
ggml_tensor * inpSA = inpL;
1618

@@ -69,7 +71,6 @@ llm_build_openai_moe_iswa::llm_build_openai_moe_iswa(const llama_model & model,
6971
}
7072
if (il == n_layer - 1) {
7173
// skip computing output for unused tokens
72-
ggml_tensor * inp_out_ids = build_inp_out_ids();
7374
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
7475
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
7576
}

0 commit comments

Comments
 (0)