Adding a few more stats

naveenmiriyaluredhat · naveenmiriyaluredhat · commit 86f45a85f782 · 2025-11-01T19:39:41.000Z
diff --git a/harness/harness_llama3.1_8b.py b/harness/harness_llama3.1_8b.py
@@ -391,12 +391,8 @@ def initialize_metrics(self):
                 metrics_to_collect=[
                     'vllm:num_requests_running',
                     'vllm:generation_tokens_total',
-                    'vllm:request_success_total',
-                    'vllm:request_failure_total',
-                    'vllm:request_latency',
-                    'vllm:gpu_utilization',
-                    'vllm:gpu_memory_used',
-                    'vllm:kv_cache_usage_ratio'
+                    'vllm:prompt_tokens_total',
+                    'vllm:kv_cache_usage_perc'
                 ],
                 collection_interval=self.metrics_interval,
                 timeout=30,
@@ -701,32 +697,32 @@ def _generate_metrics_visualizations(self):
             # Generate visualizations for metrics that are available
             visualization_configs = [
                 {
-                    'metric': 'vllm:gpu_utilization',
-                    'title': 'GPU Utilization Over Time',
-                    'filename': f'gpu_utilization_{timestamp}.png'
+                    'metric': 'vllm:generation_tokens_total',
+                    'title': 'Generation Tokens Total over Time',
+                    'filename': f'generation_tokens_total_{timestamp}.png'
                 },
                 {
                     'metric': 'vllm:num_requests_running',
                     'title': 'Running Requests Over Time',
                     'filename': f'requests_running_{timestamp}.png'
                 },
                 {
-                    'metric': 'vllm:request_latency',
-                    'title': 'Request Latency Over Time',
-                    'filename': f'request_latency_{timestamp}.png'
+                    'metric': 'vllm:prompt_tokens_total',
+                    'title': 'Prompt tokens total over time',
+                    'filename': f'prompt_tokens_total_{timestamp}.png'
                 },
                 {
-                    'metric': 'vllm:gpu_memory_used',
-                    'title': 'GPU Memory Usage Over Time',
-                    'filename': f'gpu_memory_{timestamp}.png'
+                    'metric': 'vllm:kv_cache_usage_perc',
+                    'title': 'KV cache usage percentage over time',
+                    'filename': f'kv_cache_usage_perc_{timestamp}.png'
                 }
             ]
             
             successful_viz = 0
             for viz in visualization_configs:
                 # Check if metric is available before trying to plot
                 if available_metrics and viz['metric'] not in available_metrics:
-                    self.logger.debug(f"Metric {viz['metric']} not available in metrics file, skipping")
+                    self.logger.info(f"Metric {viz['metric']} not available in metrics file, skipping")
                     continue
                 
                 try:
diff --git a/harness/metrics/metrics_info.txt b/harness/metrics/metrics_info.txt
@@ -32,7 +32,7 @@ vllm:request_itl:HISTOGRAM
 vllm:request_latency:HISTOGRAM
 
 # Cache Metrics
-vllm:kv_cache_usage_ratio:GAUGE
+vllm:kv_cache_usage_perc:GAUGE
 vllm:kv_cache_used:GAUGE
 vllm:kv_cache_total:GAUGE
 
@@ -45,5 +45,6 @@ vllm:cpu_utilization:GAUGE
 vllm:memory_used:GAUGE
 vllm:memory_total:GAUGE
 
+
 # Histogram Components (automatically detected by suffix)
 # _bucket, _count, _sum suffixes are handled as histogram components