File tree Expand file tree Collapse file tree 3 files changed +3
-20
lines changed Expand file tree Collapse file tree 3 files changed +3
-20
lines changed Original file line number Diff line number Diff line change @@ -59,13 +59,7 @@ int main(int argc, char ** argv) {
5959 }
6060
6161 params.cpuparams_batch .n_threads = params.speculative .cpuparams_batch .n_threads ;
62-
63- // Apply tensor overrides for draft model
64- if (!params.speculative .tensor_buft_overrides .empty ()) {
65- params.tensor_buft_overrides = params.speculative .tensor_buft_overrides ;
66- } else {
67- params.tensor_buft_overrides .clear ();
68- }
62+ params.tensor_buft_overrides = params.speculative .tensor_buft_overrides ;
6963
7064 common_init_result llama_init_dft = common_init_from_params (params);
7165
Original file line number Diff line number Diff line change @@ -85,13 +85,7 @@ int main(int argc, char ** argv) {
8585 }
8686
8787 params.cpuparams_batch .n_threads = params.speculative .cpuparams_batch .n_threads ;
88-
89- // Apply tensor overrides for draft model
90- if (!params.speculative .tensor_buft_overrides .empty ()) {
91- params.tensor_buft_overrides = params.speculative .tensor_buft_overrides ;
92- } else {
93- params.tensor_buft_overrides .clear ();
94- }
88+ params.tensor_buft_overrides = params.speculative .tensor_buft_overrides ;
9589
9690 common_init_result llama_init_dft = common_init_from_params (params);
9791
Original file line number Diff line number Diff line change @@ -2011,12 +2011,7 @@ struct server_context {
20112011 params_dft.cache_type_k = params_base.speculative .cache_type_k ;
20122012 params_dft.cache_type_v = params_base.speculative .cache_type_v ;
20132013
2014- // Apply tensor overrides for draft model
2015- if (!params_base.speculative .tensor_buft_overrides .empty ()) {
2016- params_dft.tensor_buft_overrides = params_base.speculative .tensor_buft_overrides ;
2017- } else {
2018- params_dft.tensor_buft_overrides .clear (); // ensure no main overrides leak in
2019- }
2014+ params_dft.tensor_buft_overrides = params_base.speculative .tensor_buft_overrides ;
20202015
20212016 llama_init_dft = common_init_from_params (params_dft);
20222017
You can’t perform that action at this time.
0 commit comments