Skip to content

Commit ffca1bb

Browse files
committed
token healing : change argument order
1 parent 010662e commit ffca1bb

File tree

4 files changed

+9
-9
lines changed

4 files changed

+9
-9
lines changed

common/sampling.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ static llama_token_healing_output llama_token_healing_get_prefix(
128128

129129
llama_token_healing_output llama_token_healing_rollback(
130130
const llama_context * ctx_main,
131-
llama_token_healing_type th_type,
132131
std::vector<llama_token> & tokens,
132+
llama_token_healing_type th_type,
133133
int max_to_remove) {
134134
// NB. To avoid returning empty `tokens`, at least 1 token will remain in `tokens` after rolling back.
135135
// It is the caller's responsibility to add BOS to the start of the prompt if they want to roll back the whole prompt.

common/sampling.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,8 @@ struct llama_token_healing_output {
189189
// Call `llama_token_healing_set_prefix` with the returned prefix before the first sampling.
190190
llama_token_healing_output llama_token_healing_rollback(
191191
const llama_context * ctx_main,
192-
llama_token_healing_type th_type,
193192
std::vector<llama_token> & tokens,
193+
llama_token_healing_type th_type,
194194
int max_to_remove = -1);
195195

196196
void llama_token_healing_set_prefix(llama_sampling_context * ctx_sampling, const std::string & prefix);

examples/main/main.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,8 @@ int main(int argc, char ** argv) {
282282
}
283283
llama_token_healing_output token_healing_out{};
284284
if (!params.interactive_first && sparams.token_healing.enabled) {
285-
token_healing_out = llama_token_healing_rollback(ctx, sparams.token_healing.type, embd_inp,
286-
sparams.token_healing.n_rollback);
285+
token_healing_out = llama_token_healing_rollback(ctx, embd_inp,
286+
sparams.token_healing.type, sparams.token_healing.n_rollback);
287287
}
288288

289289
// Should not run without any tokens
@@ -917,7 +917,7 @@ int main(int argc, char ** argv) {
917917
const int max_to_remove = sparams.token_healing.n_rollback < 0
918918
? n_new_tokens
919919
: std::min(sparams.token_healing.n_rollback, n_new_tokens);
920-
token_healing_out = llama_token_healing_rollback(ctx, sparams.token_healing.type, embd_inp, max_to_remove);
920+
token_healing_out = llama_token_healing_rollback(ctx, embd_inp, sparams.token_healing.type, max_to_remove);
921921
n_bytes_to_skip = token_healing_out.prefix.size();
922922
}
923923

examples/server/server.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -2076,8 +2076,8 @@ struct server_context {
20762076

20772077
if (slot.sparams.token_healing.enabled) {
20782078
// For FIM roll back only the prefix part (i.e. cursor location)
2079-
token_healing_out = llama_token_healing_rollback(ctx, slot.sparams.token_healing.type,
2080-
prefix_tokens, slot.sparams.token_healing.n_rollback);
2079+
token_healing_out = llama_token_healing_rollback(ctx, prefix_tokens,
2080+
slot.sparams.token_healing.type, slot.sparams.token_healing.n_rollback);
20812081
}
20822082

20832083
auto embd_inp = params.spm_infill ? suffix_tokens : prefix_tokens;
@@ -2097,8 +2097,8 @@ struct server_context {
20972097
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
20982098

20992099
if (slot.sparams.token_healing.enabled) {
2100-
token_healing_out = llama_token_healing_rollback(ctx, slot.sparams.token_healing.type,
2101-
prompt_tokens, slot.sparams.token_healing.n_rollback);
2100+
token_healing_out = llama_token_healing_rollback(ctx, prompt_tokens,
2101+
slot.sparams.token_healing.type, slot.sparams.token_healing.n_rollback);
21022102
}
21032103
}
21042104

0 commit comments

Comments
 (0)