File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -442,8 +442,8 @@ void llama_kv_cache_unified::defrag_sched(float thold) {
442
442
void llama_kv_cache_unified::set_full () {
443
443
n = size;
444
444
445
- // when simulating a full KV cache, the specific value of the "head" pointer is not important because we are not
446
- // going to write any data - we just want to measure the memory needed by the graph in such state .
445
+ // when simulating a full KV cache, the specific value of the "head" pointer is not important because it does not
446
+ // affect the shapes of the tensors in the compute graph - it only affects the offests of the K/V views .
447
447
// we should only guarantee that the head position won't cause out-of-bounds view of the K, V tensors, so
448
448
// setting it to 0 is the simplest way to achieve that
449
449
// ref: https://github.com/ggml-org/llama.cpp/issues/13359
You can’t perform that action at this time.
0 commit comments