Skip to content

Commit 12847a1

Browse files
committed
Add tests for fleet manager program
Signed-off-by: Luca Carrogu <[email protected]>
1 parent 869efd5 commit 12847a1

File tree

7 files changed

+182
-2
lines changed

7 files changed

+182
-2
lines changed

src/slurm_plugin/fleet_status_manager.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,13 @@ def _get_config(self, config_file_path):
6060
self.region = config.get("slurm_fleet_status_manager", "region")
6161
self.cluster_name = config.get("slurm_fleet_status_manager", "cluster_name")
6262
self.terminate_max_batch_size = config.getint(
63-
"slurm_fleet_status_manager", "terminate_max_batch_size", fallback=self.DEFAULTS.get("terminate_max_batch_size")
63+
"slurm_fleet_status_manager",
64+
"terminate_max_batch_size",
65+
fallback=self.DEFAULTS.get("terminate_max_batch_size"),
66+
)
67+
self._boto3_retry = config.getint(
68+
"slurm_fleet_status_manager", "boto3_retry", fallback=self.DEFAULTS.get("max_retry")
6469
)
65-
self._boto3_retry = config.getint("slurm_fleet_status_manager", "boto3_retry", fallback=self.DEFAULTS.get("max_retry"))
6670
self._boto3_config = {"retries": {"max_attempts": self._boto3_retry, "mode": "standard"}}
6771
proxy = config.get("slurm_fleet_status_manager", "proxy", fallback=self.DEFAULTS.get("proxy"))
6872
if proxy != "NONE":
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with
4+
# the License. A copy of the License is located at
5+
#
6+
# http://aws.amazon.com/apache2.0/
7+
#
8+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
9+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
10+
# limitations under the License.
11+
12+
13+
import os
14+
from types import SimpleNamespace
15+
16+
import botocore
17+
import pytest
18+
import slurm_plugin
19+
from assertpy import assert_that
20+
from slurm_plugin.clustermgtd import ComputeFleetStatus
21+
from slurm_plugin.fleet_status_manager import (
22+
SlurmFleetManagerConfig,
23+
_get_computefleet_status,
24+
_manage_fleet_status_transition,
25+
_start_partitions,
26+
_stop_partitions,
27+
)
28+
from slurm_plugin.slurm_resources import PartitionStatus
29+
30+
31+
@pytest.fixture()
32+
def boto3_stubber_path():
33+
# we need to set the region in the environment because the Boto3ClientFactory requires it.
34+
os.environ["AWS_DEFAULT_REGION"] = "us-east-2"
35+
return "slurm_plugin.instance_manager.boto3"
36+
37+
38+
@pytest.mark.parametrize(
39+
("config_file", "expected_attributes"),
40+
[
41+
(
42+
"default.conf",
43+
{
44+
"cluster_name": "test",
45+
"region": "us-east-2",
46+
"terminate_max_batch_size": 1000,
47+
"_boto3_config": {"retries": {"max_attempts": 5, "mode": "standard"}},
48+
"logging_config": os.path.join(
49+
os.path.dirname(slurm_plugin.__file__),
50+
"logging",
51+
"parallelcluster_fleet_status_manager_logging.conf",
52+
),
53+
},
54+
),
55+
(
56+
"all_options.conf",
57+
{
58+
"cluster_name": "test_again",
59+
"region": "us-east-1",
60+
"terminate_max_batch_size": 50,
61+
"_boto3_config": {
62+
"retries": {"max_attempts": 10, "mode": "standard"},
63+
"proxies": {"https": "my.resume.proxy"},
64+
},
65+
"logging_config": "/path/to/fleet_status_manager_logging/config",
66+
},
67+
),
68+
],
69+
)
70+
def test_fleet_status_manager_config(config_file, expected_attributes, test_datadir):
71+
resume_config = SlurmFleetManagerConfig(test_datadir / config_file)
72+
for key in expected_attributes:
73+
assert_that(resume_config.__dict__.get(key)).is_equal_to(expected_attributes.get(key))
74+
75+
76+
@pytest.mark.parametrize(
77+
("computefleet_status_data_path", "status", "action"),
78+
[
79+
("path_to_file_1", ComputeFleetStatus.STOPPED, None),
80+
("path_to_file_2", ComputeFleetStatus.RUNNING, None),
81+
("path_to_file_3", ComputeFleetStatus.STOPPING, None),
82+
("path_to_file_4", ComputeFleetStatus.STARTING, None),
83+
("path_to_file_5", ComputeFleetStatus.STOP_REQUESTED, "stop"),
84+
("path_to_file_6", ComputeFleetStatus.START_REQUESTED, "start"),
85+
("path_to_file_7", ComputeFleetStatus.PROTECTED, None),
86+
],
87+
)
88+
def test_fleet_status_manager(mocker, test_datadir, computefleet_status_data_path, status, action):
89+
# mocks
90+
config = SimpleNamespace(some_key_1="some_value_1", some_key_2="some_value_2")
91+
get_computefleet_status_mocked = mocker.patch("slurm_plugin.fleet_status_manager._get_computefleet_status")
92+
get_computefleet_status_mocked.return_value = status
93+
stop_partitions_mocked = mocker.patch("slurm_plugin.fleet_status_manager._stop_partitions")
94+
start_partitions_mocked = mocker.patch("slurm_plugin.fleet_status_manager._start_partitions")
95+
96+
# method to test
97+
_manage_fleet_status_transition(config, computefleet_status_data_path)
98+
99+
# assertions
100+
get_computefleet_status_mocked.assert_called_once_with(computefleet_status_data_path)
101+
if action == "start":
102+
start_partitions_mocked.assert_called_once()
103+
stop_partitions_mocked.assert_not_called()
104+
elif action == "stop":
105+
stop_partitions_mocked.assert_called_once_with(config)
106+
start_partitions_mocked.assert_not_called()
107+
else:
108+
start_partitions_mocked.assert_not_called()
109+
stop_partitions_mocked.assert_not_called()
110+
111+
112+
@pytest.mark.parametrize(
113+
("config_file", "expected_status"),
114+
[
115+
("correct_status.json", ComputeFleetStatus.RUNNING),
116+
("no_status.json", Exception),
117+
("malformed_status.json", Exception),
118+
("wrong_status.json", Exception),
119+
(None, Exception),
120+
],
121+
)
122+
def test_get_computefleet_status(test_datadir, config_file, expected_status):
123+
if expected_status is Exception:
124+
with pytest.raises(Exception):
125+
_get_computefleet_status(test_datadir / config_file)
126+
else:
127+
status = _get_computefleet_status(test_datadir / config_file)
128+
assert_that(status).is_equal_to(expected_status)
129+
130+
131+
def test_start_partitions(mocker):
132+
update_all_partitions_mocked = mocker.patch("slurm_plugin.fleet_status_manager.update_all_partitions")
133+
resume_powering_down_nodes_mocked = mocker.patch("slurm_plugin.fleet_status_manager.resume_powering_down_nodes")
134+
135+
_start_partitions()
136+
137+
update_all_partitions_mocked.assert_called_once_with(PartitionStatus.UP, reset_node_addrs_hostname=False)
138+
resume_powering_down_nodes_mocked.assert_called_once()
139+
140+
141+
def test_stop_partitions(mocker):
142+
# mocks
143+
config = SimpleNamespace(
144+
terminate_max_batch_size="3", region="us-east-1", cluster_name="test", boto3_config=botocore.config.Config()
145+
)
146+
update_all_partitions_mocked = mocker.patch("slurm_plugin.fleet_status_manager.update_all_partitions")
147+
148+
terminate_all_compute_nodes_mocked = mocker.patch.object(
149+
slurm_plugin.instance_manager.InstanceManager, "terminate_all_compute_nodes", auto_spec=True
150+
)
151+
152+
# method to test
153+
_stop_partitions(config)
154+
155+
# assertions
156+
update_all_partitions_mocked.assert_called_once_with(PartitionStatus.INACTIVE, reset_node_addrs_hostname=True)
157+
terminate_all_compute_nodes_mocked.assert_called_once_with(config.terminate_max_batch_size)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[slurm_fleet_status_manager]
2+
cluster_name = test_again
3+
region = us-east-1
4+
proxy = my.resume.proxy
5+
boto3_retry = 10
6+
terminate_max_batch_size = 50
7+
logging_config = /path/to/fleet_status_manager_logging/config
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"status": "RUNNING",
3+
"lastStatusUpdatedTime": "2022-01-26T11:08:18.000Z"
4+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
RUNNING
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"lastStatusUpdatedTime": "2022-01-26T11:08:18.000Z"
3+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"status": "NO_EXIST",
3+
"lastStatusUpdatedTime": "2022-01-26T11:08:18.000Z"
4+
}

0 commit comments

Comments
 (0)