Skip to content

Commit 86f45a8

Browse files
Adding a few more stats
1 parent cd3ae2c commit 86f45a8

File tree

2 files changed

+14
-17
lines changed

2 files changed

+14
-17
lines changed

harness/harness_llama3.1_8b.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -391,12 +391,8 @@ def initialize_metrics(self):
391391
metrics_to_collect=[
392392
'vllm:num_requests_running',
393393
'vllm:generation_tokens_total',
394-
'vllm:request_success_total',
395-
'vllm:request_failure_total',
396-
'vllm:request_latency',
397-
'vllm:gpu_utilization',
398-
'vllm:gpu_memory_used',
399-
'vllm:kv_cache_usage_ratio'
394+
'vllm:prompt_tokens_total',
395+
'vllm:kv_cache_usage_perc'
400396
],
401397
collection_interval=self.metrics_interval,
402398
timeout=30,
@@ -701,32 +697,32 @@ def _generate_metrics_visualizations(self):
701697
# Generate visualizations for metrics that are available
702698
visualization_configs = [
703699
{
704-
'metric': 'vllm:gpu_utilization',
705-
'title': 'GPU Utilization Over Time',
706-
'filename': f'gpu_utilization_{timestamp}.png'
700+
'metric': 'vllm:generation_tokens_total',
701+
'title': 'Generation Tokens Total over Time',
702+
'filename': f'generation_tokens_total_{timestamp}.png'
707703
},
708704
{
709705
'metric': 'vllm:num_requests_running',
710706
'title': 'Running Requests Over Time',
711707
'filename': f'requests_running_{timestamp}.png'
712708
},
713709
{
714-
'metric': 'vllm:request_latency',
715-
'title': 'Request Latency Over Time',
716-
'filename': f'request_latency_{timestamp}.png'
710+
'metric': 'vllm:prompt_tokens_total',
711+
'title': 'Prompt tokens total over time',
712+
'filename': f'prompt_tokens_total_{timestamp}.png'
717713
},
718714
{
719-
'metric': 'vllm:gpu_memory_used',
720-
'title': 'GPU Memory Usage Over Time',
721-
'filename': f'gpu_memory_{timestamp}.png'
715+
'metric': 'vllm:kv_cache_usage_perc',
716+
'title': 'KV cache usage percentage over time',
717+
'filename': f'kv_cache_usage_perc_{timestamp}.png'
722718
}
723719
]
724720

725721
successful_viz = 0
726722
for viz in visualization_configs:
727723
# Check if metric is available before trying to plot
728724
if available_metrics and viz['metric'] not in available_metrics:
729-
self.logger.debug(f"Metric {viz['metric']} not available in metrics file, skipping")
725+
self.logger.info(f"Metric {viz['metric']} not available in metrics file, skipping")
730726
continue
731727

732728
try:

harness/metrics/metrics_info.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ vllm:request_itl:HISTOGRAM
3232
vllm:request_latency:HISTOGRAM
3333

3434
# Cache Metrics
35-
vllm:kv_cache_usage_ratio:GAUGE
35+
vllm:kv_cache_usage_perc:GAUGE
3636
vllm:kv_cache_used:GAUGE
3737
vllm:kv_cache_total:GAUGE
3838

@@ -45,5 +45,6 @@ vllm:cpu_utilization:GAUGE
4545
vllm:memory_used:GAUGE
4646
vllm:memory_total:GAUGE
4747

48+
4849
# Histogram Components (automatically detected by suffix)
4950
# _bucket, _count, _sum suffixes are handled as histogram components

0 commit comments

Comments
 (0)