Skip to content

[RFC] Remove/hook unused metrics #2901 #5182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ test_results/*
/resources/linux
/resources/x86_64
/resources/aarch64
/src/check_metrics.py
8 changes: 0 additions & 8 deletions src/vmm/src/devices/virtio/net/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,6 @@ pub struct NetDeviceMetrics {
pub rx_queue_event_count: SharedIncMetric,
/// Number of events associated with the rate limiter installed on the receiving path.
pub rx_event_rate_limiter_count: SharedIncMetric,
/// Number of RX partial writes to guest.
pub rx_partial_writes: SharedIncMetric,
/// Number of RX rate limiter throttling events.
pub rx_rate_limiter_throttled: SharedIncMetric,
/// Number of events received on the associated tap.
Expand Down Expand Up @@ -191,8 +189,6 @@ pub struct NetDeviceMetrics {
pub tx_count: SharedIncMetric,
/// Number of transmitted packets.
pub tx_packets_count: SharedIncMetric,
/// Number of TX partial reads from guest.
pub tx_partial_reads: SharedIncMetric,
/// Number of events associated with the transmitting queue.
pub tx_queue_event_count: SharedIncMetric,
/// Number of events associated with the rate limiter installed on the transmitting path.
Expand Down Expand Up @@ -233,8 +229,6 @@ impl NetDeviceMetrics {
.add(other.rx_queue_event_count.fetch_diff());
self.rx_event_rate_limiter_count
.add(other.rx_event_rate_limiter_count.fetch_diff());
self.rx_partial_writes
.add(other.rx_partial_writes.fetch_diff());
self.rx_rate_limiter_throttled
.add(other.rx_rate_limiter_throttled.fetch_diff());
self.rx_tap_event_count
Expand All @@ -256,8 +250,6 @@ impl NetDeviceMetrics {
self.tx_count.add(other.tx_count.fetch_diff());
self.tx_packets_count
.add(other.tx_packets_count.fetch_diff());
self.tx_partial_reads
.add(other.tx_partial_reads.fetch_diff());
self.tx_queue_event_count
.add(other.tx_queue_event_count.fetch_diff());
self.tx_rate_limiter_event_count
Expand Down
15 changes: 0 additions & 15 deletions src/vmm/src/logger/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,19 +354,13 @@ pub struct ApiServerMetrics {
pub process_startup_time_us: SharedStoreMetric,
/// Measures the cpu's startup time in microseconds.
pub process_startup_time_cpu_us: SharedStoreMetric,
/// Number of failures on API requests triggered by internal errors.
pub sync_response_fails: SharedIncMetric,
/// Number of timeouts during communication with the VMM.
pub sync_vmm_send_timeout_count: SharedIncMetric,
}
impl ApiServerMetrics {
/// Const default construction.
pub const fn new() -> Self {
Self {
process_startup_time_us: SharedStoreMetric::new(),
process_startup_time_cpu_us: SharedStoreMetric::new(),
sync_response_fails: SharedIncMetric::new(),
sync_vmm_send_timeout_count: SharedIncMetric::new(),
}
}
}
Expand Down Expand Up @@ -508,15 +502,12 @@ impl PatchRequestsMetrics {
pub struct DeprecatedApiMetrics {
/// Total number of calls to deprecated HTTP endpoints.
pub deprecated_http_api_calls: SharedIncMetric,
/// Total number of calls to deprecated CMD line parameters.
pub deprecated_cmd_line_api_calls: SharedIncMetric,
}
impl DeprecatedApiMetrics {
/// Const default construction.
pub const fn new() -> Self {
Self {
deprecated_http_api_calls: SharedIncMetric::new(),
deprecated_cmd_line_api_calls: SharedIncMetric::new(),
}
}
}
Expand All @@ -530,8 +521,6 @@ pub struct LoggerSystemMetrics {
pub metrics_fails: SharedIncMetric,
/// Number of misses on logging human readable content.
pub missed_log_count: SharedIncMetric,
/// Number of errors while trying to log human readable content.
pub log_fails: SharedIncMetric,
}
impl LoggerSystemMetrics {
/// Const default construction.
Expand All @@ -540,7 +529,6 @@ impl LoggerSystemMetrics {
missed_metrics_count: SharedIncMetric::new(),
metrics_fails: SharedIncMetric::new(),
missed_log_count: SharedIncMetric::new(),
log_fails: SharedIncMetric::new(),
}
}
}
Expand Down Expand Up @@ -811,16 +799,13 @@ impl VcpuMetrics {
/// Metrics specific to the machine manager as a whole.
#[derive(Debug, Default, Serialize)]
pub struct VmmMetrics {
/// Number of device related events received for a VM.
pub device_events: SharedIncMetric,
/// Metric for signaling a panic has occurred.
pub panic_count: SharedStoreMetric,
}
impl VmmMetrics {
/// Const default construction.
pub const fn new() -> Self {
Self {
device_events: SharedIncMetric::new(),
panic_count: SharedStoreMetric::new(),
}
}
Expand Down
7 changes: 0 additions & 7 deletions tests/host_tools/fcmetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ def validate_fc_metrics(metrics):
"event_fails",
"rx_queue_event_count",
"rx_event_rate_limiter_count",
"rx_partial_writes",
"rx_rate_limiter_throttled",
"rx_tap_event_count",
"rx_bytes_count",
Expand All @@ -118,7 +117,6 @@ def validate_fc_metrics(metrics):
"tx_fails",
"tx_count",
"tx_packets_count",
"tx_partial_reads",
"tx_queue_event_count",
"tx_rate_limiter_event_count",
"tx_rate_limiter_throttled",
Expand All @@ -131,8 +129,6 @@ def validate_fc_metrics(metrics):
"api_server": [
"process_startup_time_us",
"process_startup_time_cpu_us",
"sync_response_fails",
"sync_vmm_send_timeout_count",
],
"balloon": [
"activate_fails",
Expand All @@ -145,7 +141,6 @@ def validate_fc_metrics(metrics):
"block": block_metrics,
"deprecated_api": [
"deprecated_http_api_calls",
"deprecated_cmd_line_api_calls",
],
"get_api_requests": [
"instance_info_count",
Expand Down Expand Up @@ -177,7 +172,6 @@ def validate_fc_metrics(metrics):
"missed_metrics_count",
"metrics_fails",
"missed_log_count",
"log_fails",
],
"mmds": [
"rx_accepted",
Expand Down Expand Up @@ -241,7 +235,6 @@ def validate_fc_metrics(metrics):
{"exit_mmio_write_agg": latency_agg_metrics_fields},
],
"vmm": [
"device_events",
"panic_count",
],
"uart": [
Expand Down
1 change: 0 additions & 1 deletion tests/integration_tests/functional/test_pause_resume.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def verify_net_emulation_paused(metrics):
"""Verify net emulation is paused based on provided metrics."""
net_metrics = metrics["net"]
assert net_metrics["rx_queue_event_count"] == 0
assert net_metrics["rx_partial_writes"] == 0
assert net_metrics["rx_tap_event_count"] == 0
assert net_metrics["rx_bytes_count"] == 0
assert net_metrics["rx_packets_count"] == 0
Expand Down
3 changes: 3 additions & 0 deletions tools/ab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@
{"fio_engine": "libaio", "metric": "clat_write"},
# boot time metrics
{"performance_test": "test_boottime", "metric": "resume_time"},
# block throughput on m8g
{"fio_engine": "libaio", "vcpus": 2, "instance": "m8g.metal-24xl"},
{"fio_engine": "libaio", "vcpus": 2, "instance": "m8g.metal-48xl"},
]


Expand Down