2
2
# This file is part of pycloudlib. See LICENSE file for license information.
3
3
"""Basic examples of various lifecycle with an OCI instance."""
4
4
5
- from datetime import datetime
6
- import json
7
5
import logging
8
- import sys
6
+ import threading
9
7
import time
8
+ from datetime import datetime
9
+ from typing import Generator
10
10
11
11
import pytest
12
12
13
13
import pycloudlib
14
14
from pycloudlib .oci .instance import OciInstance
15
- from typing import Generator
16
- import logging
17
- import threading
18
15
19
16
logger = logging .getLogger (__name__ )
20
17
21
- EXISTING_INSTANCE_IDS = [
18
+ EXISTING_INSTANCE_IDS : list [ str ] = [
22
19
# add the OCIDs of the instances you want to use for testing here
23
20
]
24
21
22
+
25
23
# change this to either "class" or "module" as you see fit
26
24
@pytest .fixture (scope = "module" )
27
25
def cluster () -> Generator [list [OciInstance ], None , None ]:
28
26
"""
29
- Launch a cluster of BM instances and yield them.
30
- """
27
+ Launch a cluster of BM instances.
31
28
29
+ Yields:
30
+ list[OciInstance]: The created or retrieved cluster instances.
31
+ """
32
32
with pycloudlib .OCI (
33
33
"pycl-oracle-cluster-test" ,
34
34
# use the already created "mofed-vcn" for cluster testing
35
- vcn_name = "mofed-vcn" # THIS WILL OVERRIDE THE VCN_NAME IN THE CONFIG FILE
35
+ vcn_name = "mofed-vcn" , # THIS WILL OVERRIDE THE VCN_NAME IN THE CONFIG FILE
36
36
) as client :
37
37
if EXISTING_INSTANCE_IDS :
38
38
instances = [client .get_instance (instance_id ) for instance_id in EXISTING_INSTANCE_IDS ]
@@ -42,33 +42,49 @@ def cluster() -> Generator[list[OciInstance], None, None]:
42
42
# so once this function returns, the instances are ready
43
43
instances = client .create_compute_cluster (
44
44
# if you create a custom image, specify its OCID here
45
- image_id = client .released_image ("noble" ),
45
+ image_id = client .released_image ("noble" ),
46
46
instance_count = 2 ,
47
47
)
48
48
yield instances
49
49
50
50
51
51
class TestOracleClusterBasic :
52
- def test_basic_ping_on_private_ips (self , cluster : list [OciInstance ]):
52
+ """Test basic functionalities of Oracle Cluster."""
53
+
54
+ def test_basic_ping_on_private_ips (self , cluster : list [OciInstance ]): # pylint: disable=W0621
53
55
"""
54
- Verifies that the instances in the cluster can reach each other on their private IPs.
56
+ Test that cluster instances can ping each other on private IPs.
57
+
58
+ Args:
59
+ cluster (list[OciInstance]): Instances in the cluster.
55
60
"""
56
61
# get the private ips of the instances
57
62
private_ips = [instance .private_ip for instance in cluster ]
58
63
# try to ping each instance from each other instance at their private ip
59
64
for instance in cluster :
60
65
for private_ip in private_ips :
61
66
if private_ip != instance .private_ip :
62
- logger .info (f "Pinging { private_ip } from { instance .private_ip } " )
67
+ logger .info ("Pinging %s from %s" , private_ip , instance .private_ip )
63
68
# ping once with a timeout of 5 seconds
64
69
r = instance .execute (f"ping -c 1 -W 5 { private_ip } " )
65
70
assert r .ok , f"Failed to ping { private_ip } from { instance .private_ip } "
66
- logger .info (f "Successfully pinged { private_ip } from { instance .private_ip } " )
71
+ logger .info ("Successfully pinged %s from %s" , private_ip , instance .private_ip )
67
72
68
73
69
74
def setup_mofed_iptables_rules (instance : OciInstance ):
75
+ """
76
+ Set up IPTABLES rules for RDMA usage.
77
+
78
+ Args:
79
+ instance (OciInstance): Target instance to configure.
80
+
81
+ Returns:
82
+ OciInstance: The same instance after configuration.
83
+ """
70
84
# Update the cloud.cfg file to set preserve_hostname to true
71
- instance .execute ("sed -i 's/preserve_hostname: false/preserve_hostname: true/' /etc/cloud/cloud.cfg" )
85
+ instance .execute (
86
+ "sed -i 's/preserve_hostname: false/preserve_hostname: true/' /etc/cloud/cloud.cfg"
87
+ )
72
88
# Backup the existing iptables rules
73
89
backup_file = f"/etc/iptables/rules.v4.bak.{ datetime .now ().strftime ('%F-%T' )} "
74
90
instance .execute (f"cp -v /etc/iptables/rules.v4 { backup_file } " )
@@ -101,23 +117,31 @@ def setup_mofed_iptables_rules(instance: OciInstance):
101
117
102
118
103
119
def ensure_image_is_rdma_ready (instance : OciInstance ):
120
+ """
121
+ Check if the image supports RDMA.
122
+
123
+ Args:
124
+ instance (OciInstance): The instance to verify.
125
+ """
104
126
r = instance .execute ("ibstatus" )
105
127
if not r .stdout or not r .ok :
106
128
logger .info ("Infiniband status: %s" , r .stdout + "\n " + r .stderr )
107
129
pytest .skip ("The image beiing used is not RDMA ready" )
108
130
109
131
110
132
class TestOracleClusterRdma :
133
+ """Test RDMA functionalities of Oracle Cluster."""
134
+
111
135
@pytest .fixture (scope = "class" )
112
- def mofed_cluster (self , cluster : list [OciInstance ]) -> Generator [list [OciInstance ], None , None ]:
136
+ def mofed_cluster (
137
+ self ,
138
+ cluster : list [OciInstance ], # pylint: disable=W0621
139
+ ) -> Generator [list [OciInstance ], None , None ]:
113
140
"""
114
- Custom fixture to configure the instances in the cluster for RDMA testing.
115
-
116
- This fixture will:
117
- - Ensure the image being used is RDMA ready
118
- - Create a secondary VNIC on the private subnet for each instance in the cluster
119
- - Configure the secondary VNIC for RDMA usage
120
- - Set up the necessary iptables rules for RDMA usage on each instance's secondary NIC
141
+ Configure cluster for RDMA testing.
142
+
143
+ Yields:
144
+ list[OciInstance]: RDMA-ready cluster instances.
121
145
"""
122
146
ensure_image_is_rdma_ready (cluster [0 ])
123
147
for instance in cluster :
@@ -130,38 +154,54 @@ def mofed_cluster(self, cluster: list[OciInstance]) -> Generator[list[OciInstanc
130
154
# create a secondary VNIC on the 2nd vnic on the private subnet for RDMA usage
131
155
instance .add_network_interface (
132
156
nic_index = 1 ,
133
- subnet_name = "private subnet-mofed-vcn" , # use the private subnet for mofed testing
157
+ subnet_name = "private subnet-mofed-vcn" , # use the private subnet for mofed testing
134
158
)
135
159
instance .configure_secondary_vnic ()
136
160
setup_mofed_iptables_rules (instance )
137
-
161
+
138
162
yield cluster
139
-
140
- def test_basic_ping_on_new_rdma_ips (
141
- self ,
142
- mofed_cluster : list [OciInstance ],
143
- ):
144
- # get the private ips of the instances
163
+
164
+ def test_basic_ping_on_new_rdma_ips (self , mofed_cluster : list [OciInstance ]):
165
+ """
166
+ Test ping on RDMA-enabled private IPs.
167
+
168
+ Args:
169
+ mofed_cluster (list[OciInstance]): RDMA-enabled cluster instances.
170
+ """
171
+ # get the private ips of the instances that are on the same RDMA-enabled subnet
145
172
rdma_ips = [instance .secondary_vnic_private_ip for instance in mofed_cluster ]
146
- # try to ping each instance from each other instance at their private ip
173
+
147
174
for instance in mofed_cluster :
148
175
for rdma_ip in rdma_ips :
149
176
if rdma_ip != instance .secondary_vnic_private_ip :
150
- logger .info (f"Pinging { rdma_ip } from { instance .secondary_vnic_private_ip } " )
151
- # ping once with a timeout of 5 seconds
177
+ logger .info (
178
+ "Pinging %s from %s" ,
179
+ rdma_ip ,
180
+ instance .secondary_vnic_private_ip ,
181
+ )
182
+ # ping once with a timeout of 5 seconds so it doesn't hang
152
183
r = instance .execute (f"ping -c 1 -W 5 { rdma_ip } " )
153
- assert r .ok , f"Failed to ping { rdma_ip } from { instance .secondary_vnic_private_ip } "
154
- logger .info (f"Successfully pinged { rdma_ip } from { instance .secondary_vnic_private_ip } " )
155
-
156
- def test_rping (
157
- self ,
158
- mofed_cluster : list [OciInstance ],
159
- ):
184
+ assert (
185
+ r .ok
186
+ ), f"Failed to ping { rdma_ip } from { instance .secondary_vnic_private_ip } "
187
+ logger .info (
188
+ "Successfully pinged %s from %s" ,
189
+ rdma_ip ,
190
+ instance .secondary_vnic_private_ip ,
191
+ )
192
+
193
+ def test_rping (self , mofed_cluster : list [OciInstance ]):
194
+ """
195
+ Test rping between two instances.
196
+
197
+ Args:
198
+ mofed_cluster (list[OciInstance]): RDMA-enabled cluster instances
199
+ """
160
200
server_instance = mofed_cluster [0 ]
161
201
client_instance = mofed_cluster [1 ]
162
202
163
203
def start_server ():
164
- # start the rping server on the first instance
204
+ """Start the rping server on the "server_instance"."""
165
205
server_instance .execute (f"rping -s -a { server_instance .secondary_vnic_private_ip } -v &" )
166
206
167
207
server_thread = threading .Thread (target = start_server )
@@ -170,74 +210,88 @@ def start_server():
170
210
# Wait for rping server to start
171
211
time .sleep (5 )
172
212
# start the rping client on the second instance (only send 10 packets so it doesn't hang)
173
- r = client_instance .execute (f"rping -c -a { server_instance .secondary_vnic_private_ip } -C 10 -v" )
213
+ r = client_instance .execute (
214
+ f"rping -c -a { server_instance .secondary_vnic_private_ip } -C 10 -v"
215
+ )
174
216
logger .info ("rping output: %s" , r .stdout )
175
217
assert r .ok , "Failed to run rping"
176
218
177
- def test_ucmatose (
178
- self ,
179
- mofed_cluster : list [OciInstance ],
180
- ):
219
+ def test_ucmatose (self , mofed_cluster : list [OciInstance ]):
220
+ """
221
+ Test ucmatose connections.
222
+
223
+ Args:
224
+ mofed_cluster (list[OciInstance]): RDMA-enabled cluster instances
225
+ """
181
226
server_instance = mofed_cluster [0 ]
182
227
client_instance = mofed_cluster [1 ]
183
228
184
229
def start_server ():
185
- # start the rping server on the first instance
186
- server_instance .execute (f "ucmatose &" )
230
+ """Start the ucmatose server on the "server_instance"."""
231
+ server_instance .execute ("ucmatose &" )
187
232
188
233
server_thread = threading .Thread (target = start_server )
189
234
server_thread .start ()
190
235
191
236
# Wait for server to start
192
237
time .sleep (5 )
193
- # start the client on the second instance (only send 10 packets so it doesn't hang)
238
+ # start the ucmatose client
194
239
r = client_instance .execute (f"ucmatose -s { server_instance .secondary_vnic_private_ip } " )
195
240
logger .info ("ucmatose output: %s" , r .stdout )
196
241
assert r .ok , "Failed to run ucmatose"
197
242
198
- def test_ucx_perftest_lat_one_node (
199
- self ,
200
- mofed_cluster : list [OciInstance ],
201
- ):
243
+ def test_ucx_perftest_lat_one_node (self , mofed_cluster : list [OciInstance ]):
244
+ """
245
+ Run ucx_perftest latency on a single node.
246
+
247
+ Args:
248
+ mofed_cluster (list[OciInstance]): RDMA-enabled cluster instances
249
+ """
202
250
server_instance = mofed_cluster [0 ]
203
251
# ucx_perftest only works within a single instance on all MOFED stacks right now, so this
204
252
# being 0 is intentional. (Will adjust if Oracle provides config info to resolve this)
205
253
client_instance = mofed_cluster [0 ]
206
254
207
255
def start_server ():
208
- # start the rping server on the first instance
209
- server_instance .execute (f "ucx_perftest -c 0 &" )
256
+ """Start the ucx_perftest server on the "server_instance"."""
257
+ server_instance .execute ("ucx_perftest -c 0 &" )
210
258
211
259
server_thread = threading .Thread (target = start_server )
212
260
server_thread .start ()
213
261
214
262
# Wait for server to start
215
263
time .sleep (5 )
216
- # start the client on the second instance (only send 10 packets so it doesn't hang)
217
- r = client_instance .execute (f"ucx_perftest { server_instance .secondary_vnic_private_ip } -t tag_lat -c 1" )
264
+ # start the ucx_perftest client
265
+ r = client_instance .execute (
266
+ f"ucx_perftest { server_instance .secondary_vnic_private_ip } -t tag_lat -c 1"
267
+ )
218
268
logger .info ("ucx_perftest output: %s" , r .stdout )
219
269
assert r .ok , "Failed to run ucx_perftest"
220
270
271
+ def test_ucx_perftest_bw_one_node (self , mofed_cluster : list [OciInstance ]):
272
+ """
273
+ Run ucx_perftest bandwidth on a single node.
221
274
222
- def test_ucx_perftest_bw_one_node (
223
- self ,
224
- mofed_cluster : list [OciInstance ],
225
- ):
275
+ Args:
276
+ mofed_cluster (list[OciInstance]): RDMA-enabled cluster instances
277
+ """
226
278
server_instance = mofed_cluster [0 ]
227
279
# ucx_perftest only works within a single instance on all MOFED stacks right now, so this
228
280
# being 0 is intentional. (Will adjust if Oracle provides config info to resolve this)
229
281
client_instance = mofed_cluster [0 ]
230
282
231
283
def start_server ():
232
- # start the rping server on the first instance
233
- server_instance .execute (f "ucx_perftest -c 0 &" )
284
+ """Start the ucx_perftest server on the "server_instance"."""
285
+ server_instance .execute ("ucx_perftest -c 0 &" )
234
286
235
287
server_thread = threading .Thread (target = start_server )
236
288
server_thread .start ()
237
289
238
290
# Wait for server to start
239
291
time .sleep (5 )
240
- # start the client on the second instance (only send 10 packets so it doesn't hang)
241
- r = client_instance .execute (f"ucx_perftest { server_instance .secondary_vnic_private_ip } -t tag_bw -c 1" )
292
+ # start the ucx_perftest client
293
+ r = client_instance .execute (
294
+ f"ucx_perftest { server_instance .secondary_vnic_private_ip } -t tag_bw -c 1"
295
+ )
242
296
logger .info ("ucx_perftest output: %s" , r .stdout )
243
297
assert r .ok , "Failed to run ucx_perftest"
0 commit comments