Skip to content

fix: Add support to customize ignorable statuses for maintenance windows #4817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import {
NumberInput,
Select,
SelectItem,
MultiSelect,
MultiSelectItem,
} from "@tremor/react";
import { FormEvent, useEffect, useState } from "react";
import { toast } from "react-toastify";
Expand All @@ -20,12 +22,19 @@ import "react-datepicker/dist/react-datepicker.css";
import { useRouter } from "next/navigation";
import { useApi } from "@/shared/lib/hooks/useApi";
import { showErrorToast } from "@/shared/ui";
import { Status } from "@/entities/alerts/model";
import { capitalize } from "@/utils/helpers";

interface Props {
maintenanceToEdit: MaintenanceRule | null;
editCallback: (rule: MaintenanceRule | null) => void;
}

const DEFAULT_IGNORE_STATUSES = [
"resolved",
"acknowledged",
]

export default function CreateOrUpdateMaintenanceRule({
maintenanceToEdit,
editCallback,
Expand All @@ -40,6 +49,7 @@ export default function CreateOrUpdateMaintenanceRule({
const [intervalType, setIntervalType] = useState<string>("minutes");
const [enabled, setEnabled] = useState<boolean>(true);
const [suppress, setSuppress] = useState<boolean>(false);
const [ignoreStatuses, setIgnoreStatuses] = useState<string[]>(DEFAULT_IGNORE_STATUSES);
const editMode = maintenanceToEdit !== null;
const router = useRouter();
useEffect(() => {
Expand All @@ -50,6 +60,7 @@ export default function CreateOrUpdateMaintenanceRule({
setStartTime(new Date(maintenanceToEdit.start_time));
setSuppress(maintenanceToEdit.suppress);
setEnabled(maintenanceToEdit.enabled);
setIgnoreStatuses(maintenanceToEdit.ignore_statuses);
if (maintenanceToEdit.duration_seconds) {
setEndInterval(maintenanceToEdit.duration_seconds / 60);
}
Expand All @@ -64,6 +75,7 @@ export default function CreateOrUpdateMaintenanceRule({
setEndInterval(5);
setSuppress(false);
setEnabled(true);
setIgnoreStatuses([]);
router.replace("/maintenance");
};

Expand Down Expand Up @@ -99,6 +111,7 @@ export default function CreateOrUpdateMaintenanceRule({
duration_seconds: calculateDurationInSeconds(),
suppress: suppress,
enabled: enabled,
ignore_statuses: ignoreStatuses,
});
clearForm();
mutate();
Expand All @@ -123,6 +136,7 @@ export default function CreateOrUpdateMaintenanceRule({
duration_seconds: calculateDurationInSeconds(),
suppress: suppress,
enabled: enabled,
ignore_statuses: ignoreStatuses,
});
exitEditMode();
mutate();
Expand Down Expand Up @@ -178,6 +192,14 @@ export default function CreateOrUpdateMaintenanceRule({
showSqlImport={false}
/>
</div>

<div className="mt-2.5">
<MultiSelect value={ignoreStatuses} onValueChange={setIgnoreStatuses}>
{Object.values(Status).map((value) => {
return <MultiSelectItem key={value} value={value}>{capitalize(value)}</MultiSelectItem>
})}
</MultiSelect>
</div>
<div className="mt-2.5">
<Text>
Start At<span className="text-red-500 text-xs">*</span>
Expand Down
2 changes: 2 additions & 0 deletions keep-ui/app/(keep)/maintenance/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export interface MaintenanceRule {
updated_at?: Date;
suppress: boolean;
enabled: boolean;
ignore_statuses: string[];
}

export interface MaintenanceRuleCreate {
Expand All @@ -20,4 +21,5 @@ export interface MaintenanceRuleCreate {
end_time?: Date;
duration_seconds?: number;
enabled: boolean;
ignore_statuses: string[];
}
19 changes: 7 additions & 12 deletions keep/api/bl/maintenance_windows_bl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@

class MaintenanceWindowsBl:

ALERT_STATUSES_TO_IGNORE = [
AlertStatus.RESOLVED.value,
AlertStatus.ACKNOWLEDGED.value,
]

def __init__(self, tenant_id: str, session: Session | None) -> None:
self.logger = logging.getLogger(__name__)
self.tenant_id = tenant_id
Expand All @@ -42,17 +37,17 @@
)
return False

if alert.status in self.ALERT_STATUSES_TO_IGNORE:
self.logger.debug(
"Alert status is set to be ignored, ignoring maintenance windows",
extra={"tenant_id": self.tenant_id},
)
return False

self.logger.info("Checking maintenance window for alert", extra=extra)
env = celpy.Environment()

for maintenance_rule in self.maintenance_rules:
if alert.status in maintenance_rule.ignore_statuses:
self.logger.debug(

Check warning on line 45 in keep/api/bl/maintenance_windows_bl.py

View check run for this annotation

Codecov / codecov/patch

keep/api/bl/maintenance_windows_bl.py#L44-L45

Added lines #L44 - L45 were not covered by tests
"Alert status is set to be ignored, ignoring maintenance windows",
extra={"tenant_id": self.tenant_id},
)
continue

Check warning on line 49 in keep/api/bl/maintenance_windows_bl.py

View check run for this annotation

Codecov / codecov/patch

keep/api/bl/maintenance_windows_bl.py#L49

Added line #L49 was not covered by tests

if maintenance_rule.end_time <= datetime.datetime.now():
# this is wtf error, should not happen because of query in init
self.logger.error(
Expand Down
11 changes: 10 additions & 1 deletion keep/api/models/db/maintenance_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@
from typing import Optional

from pydantic import BaseModel
from sqlalchemy import DateTime
from sqlalchemy import DateTime, JSON

# third-parties
from sqlmodel import Column, Field, Index, SQLModel, func

from keep.api.models.alert import AlertStatus

DEFAULT_ALERT_STATUSES_TO_IGNORE = [
AlertStatus.RESOLVED.value,
AlertStatus.ACKNOWLEDGED.value,
]

class MaintenanceWindowRule(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
Expand All @@ -29,6 +35,7 @@ class MaintenanceWindowRule(SQLModel, table=True):
)
suppress: bool = False
enabled: bool = True
ignore_statuses: list = Field(sa_column=Column(JSON), default_factory=list)

__table_args__ = (
Index("ix_maintenance_rule_tenant_id", "tenant_id"),
Expand All @@ -44,6 +51,7 @@ class MaintenanceRuleCreate(BaseModel):
duration_seconds: Optional[int] = None
suppress: bool = False
enabled: bool = True
ignore_statuses: list[str] = DEFAULT_ALERT_STATUSES_TO_IGNORE


class MaintenanceRuleRead(BaseModel):
Expand All @@ -58,3 +66,4 @@ class MaintenanceRuleRead(BaseModel):
updated_at: Optional[datetime]
suppress: bool = False
enabled: bool = True
ignore_statuses: list[str] = DEFAULT_ALERT_STATUSES_TO_IGNORE
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Add ignore_statuses to MaintenanceWindowRule

Revision ID: aa167915c4d6
Revises: bedb5f07417b
Create Date: 2025-05-16 14:33:29.828572

"""

import sqlalchemy as sa
from alembic import op
from sqlmodel import Session

from keep.api.models.db.maintenance_window import DEFAULT_ALERT_STATUSES_TO_IGNORE

# revision identifiers, used by Alembic.
revision = "aa167915c4d6"
down_revision = "bedb5f07417b"
branch_labels = None
depends_on = None

migration_metadata = sa.MetaData()

mwr_table = sa.Table(
'maintenancewindowrule',
migration_metadata,
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('ignore_statuses', sa.JSON)
)

def populate_db():
session = Session(op.get_bind())
session.execute(sa.update(mwr_table).values(ignore_statuses=DEFAULT_ALERT_STATUSES_TO_IGNORE))


def upgrade() -> None:

with op.batch_alter_table("maintenancewindowrule", schema=None) as batch_op:
batch_op.add_column(sa.Column("ignore_statuses", sa.JSON(), nullable=True))

populate_db()


def downgrade() -> None:

with op.batch_alter_table("maintenancewindowrule", schema=None) as batch_op:
batch_op.drop_column("ignore_statuses")
38 changes: 38 additions & 0 deletions tests/test_maintenance_windows_bl.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@ def mock_session():
return MagicMock()


@pytest.fixture
def active_maintenance_window_rule_custom_ignore():
return MaintenanceWindowRule(
id=1,
name="Active maintenance_window",
tenant_id="test-tenant",
cel_query='source == "test-source"',
start_time=datetime.utcnow() - timedelta(hours=1),
end_time=datetime.utcnow() + timedelta(days=1),
enabled=True,
ignore_statuses=[AlertStatus.FIRING.value,],
)


@pytest.fixture
def active_maintenance_window_rule():
return MaintenanceWindowRule(
Expand All @@ -23,6 +37,7 @@ def active_maintenance_window_rule():
start_time=datetime.utcnow() - timedelta(hours=1),
end_time=datetime.utcnow() + timedelta(days=1),
enabled=True,
ignore_statuses=[AlertStatus.RESOLVED.value, AlertStatus.ACKNOWLEDGED.value],
)


Expand Down Expand Up @@ -198,3 +213,26 @@ def test_alert_with_missing_cel_field(mock_session, active_maintenance_window_ru

# Should return False because the field doesn't exist
assert result is False


def test_alert_not_ignored_due_to_custom_status(
mock_session, active_maintenance_window_rule_custom_ignore, alert_dto
):
# Set the alert status to RESOLVED

mock_session.query.return_value.filter.return_value.filter.return_value.filter.return_value.all.return_value = [
active_maintenance_window_rule_custom_ignore
]

maintenance_window_bl = MaintenanceWindowsBl(
tenant_id="test-tenant", session=mock_session
)

# Should return False because the alert status is FIRING
alert_dto.status = AlertStatus.FIRING.value
result = maintenance_window_bl.check_if_alert_in_maintenance_windows(alert_dto)
assert result is False

alert_dto.status = AlertStatus.RESOLVED.value
result = maintenance_window_bl.check_if_alert_in_maintenance_windows(alert_dto)
assert result is True