Skip to content

Commit f10cd73

Browse files
authored
Release 1.5.2
2 parents 3acdd12 + 3ba4e33 commit f10cd73

File tree

4 files changed

+38
-26
lines changed

4 files changed

+38
-26
lines changed

CHANGELOG.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,17 @@ cfncluster-node CHANGELOG
33

44
This file is used to list changes made in each version of the cfncluster-node package.
55

6+
1.5.2
7+
-----
8+
9+
Bug fixes/minor improvements:
10+
11+
- Fixed Slurm behavior to add CPU slots so multiple jobs can be scheduled on a single node, this also sets CPU as a consumable resource
12+
613
1.5.1
714
-----
815

916
Bug fixes/minor improvements:
1017

11-
- Fixed Torque behaviour when scaling up from an empty cluster
18+
- Fixed Torque behavior when scaling up from an empty cluster
1219
- Avoid Torque server restart when adding and removing compute nodes

requirements26.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
paramiko==2.3.1
1+
paramiko==2.3.2

setup.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,18 @@
1919
def read(fname):
2020
return open(os.path.join(os.path.dirname(__file__), fname)).read()
2121

22-
console_scripts = ['sqswatcher = sqswatcher.sqswatcher:main',
22+
console_scripts = ['sqswatcher = sqswatcher.sqswatcher:main',
2323
'nodewatcher = nodewatcher.nodewatcher:main']
24-
version = "1.5.1"
24+
version = "1.5.2"
2525
requires = ['boto>=2.48.0', 'python-dateutil>=2.6.1']
2626

2727
if sys.version_info[:2] == (2, 6):
2828
# For python2.6 we have to require argparse since it
2929
# was not in stdlib until 2.7.
3030
requires.append('argparse>=1.4')
31-
requires.append('paramiko==2.3.1')
31+
requires.append('paramiko==2.3.2')
3232
else:
33-
requires.append('paramiko>=2.3.1')
33+
requires.append('paramiko>=2.3.2')
3434

3535
setup(
3636
name = "cfncluster-node",

sqswatcher/plugins/slurm.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -83,52 +83,57 @@ def __readNodeList():
8383
return nodes
8484

8585

86-
def __writeNodeList(node_list):
86+
def __writeNodeList(node_list, slots=0):
8787
_config = "/opt/slurm/etc/slurm.conf"
8888
fh, abs_path = mkstemp()
8989
with open(abs_path,'w') as new_file:
9090
with open(_config) as slurm_config:
9191
for line in slurm_config:
9292
if line.startswith('#PARTITION'):
93-
partition = line.split(':')[1].rstrip()
93+
# Involved slurm.conf section
94+
# #PARTITION:compute
95+
# NodeName=dummy-compute Procs=2048 State=UNKNOWN
96+
# NodeName=ip-172-31-6-43,ip-172-31-7-230 Procs=1 State=UNKNOWN
97+
# PartitionName=compute Nodes=dummy-compute,ip-172-31-6-43,ip-172-31-7-230 Default=YES MaxTime=INFINITE State=UP
98+
partition_name = line.split(':')[1].rstrip()
9499
new_file.write(line)
95-
dummy_node = slurm_config.next()
96-
new_file.write(dummy_node)
97-
node_names = slurm_config.next()
98-
partitions = slurm_config.next()
99-
items = node_names.split(' ')
100-
node_line = items[0].split('=')
101-
if len(node_list[partition]) > 0:
102-
new_file.write('NodeName=' + ','.join(node_list[partition]) + " " + ' '.join(items[1:]))
100+
dummy_node_line = slurm_config.next()
101+
new_file.write(dummy_node_line)
102+
node_names_line = slurm_config.next()
103+
partitions_line = slurm_config.next()
104+
node_names_line_items = node_names_line.split(' ')
105+
if slots == 0:
106+
slots = node_names_line_items[1].split('=')[1].strip()
107+
if len(node_list[partition_name]) > 0:
108+
new_file.write('NodeName=' + ','.join(node_list[partition_name]) + ' Procs=%s' % slots + ' ' + ' '.join(node_names_line_items[2:]))
103109
else:
104-
new_file.write("#NodeName= Procs=1 State=UNKNOWN\n")
105-
items = partitions.split(' ')
106-
node_line = items[1].split('=')
107-
new_file.write(items[0] + " " + node_line[0] + '=dummy-' + partition + ',' + ','.join(node_list[partition]) + " " + ' '.join(items[2:]))
110+
new_file.write('#NodeName= Procs=%s State=UNKNOWN\n' % slots)
111+
partitions_line_items = partitions_line.split(' ')
112+
new_file.write(partitions_line_items[0] + ' Nodes=dummy-' + partition_name + ',' + ','.join(node_list[partition_name]) + " " + ' '.join(partitions_line_items[2:]))
108113
else:
109114
new_file.write(line)
110115
os.close(fh)
111-
#Remove original file
116+
# Remove original file
112117
os.remove(_config)
113-
#Move new file
118+
# Move new file
114119
move(abs_path, _config)
115-
#Update permissions on new file
120+
# Update permissions on new file
116121
os.chmod(_config, 0744)
117122

118123

119124
def addHost(hostname, cluster_user, slots):
120-
log.info('Adding %s' % hostname)
125+
log.info('Adding %s with %s slots' % (hostname, slots))
121126

122127
# Get the current node list
123128
node_list = __readNodeList()
124129

125130
# Add new node
126131
node_list['compute'].append(hostname)
127-
__writeNodeList(node_list)
132+
__writeNodeList(node_list, slots)
128133

129134
# Restart slurmctl locally
130135
restartMasterNodeSlurm()
131-
136+
132137
# Restart slurmctl on host
133138
__restartSlurm(hostname, cluster_user)
134139

0 commit comments

Comments
 (0)