Skip to content

Commit 79a74e8

Browse files
Added prefix stats for capture and visualization
1 parent 462cd1c commit 79a74e8

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

harness/harness/base_harness.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,9 @@ def initialize_metrics(self):
515515
'vllm:generation_tokens_total',
516516
'vllm:prompt_tokens_total',
517517
'vllm:kv_cache_usage_perc',
518-
'vllm:time_to_first_token_seconds'
518+
'vllm:time_to_first_token_seconds',
519+
'vllm:prefix_cache_queries_total',
520+
'vllm:prefix_cache_hits_total'
519521
],
520522
collection_interval=self.metrics_interval,
521523
timeout=30,
@@ -677,6 +679,16 @@ def _generate_metrics_visualizations(self):
677679
'metric': 'vllm:kv_cache_usage_perc',
678680
'title': 'KV cache usage percentage over time',
679681
'filename': f'kv_cache_usage_perc_{timestamp}.png'
682+
},
683+
{
684+
'metric': 'vllm:prefix_cache_queries_total',
685+
'title': 'Prefix cache queries total over time',
686+
'filename': f'prefix_cache_queries_total_{timestamp}.png'
687+
},
688+
{
689+
'metric': 'vllm:prefix_cache_hits_total',
690+
'title': 'Prefix cache hits total over time',
691+
'filename': f'prefix_cache_hits_total_{timestamp}.png'
680692
}
681693
]
682694

harness/metrics/metrics_info.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ vllm:generation_tokens_total:COUNTER
1515
vllm:request_input_tokens:GAUGE
1616
vllm:request_output_tokens:COUNTER
1717

18+
#Prefix metrics
19+
vllm:prefix_cache_queries_total:COUNTER
20+
vllm:prefix_cache_hits_total:COUNTER
21+
1822
# Success/Failure Metrics
1923
vllm:request_success_total:COUNTER
2024
vllm:request_failure_total:COUNTER

0 commit comments

Comments
 (0)