@@ -83,52 +83,57 @@ def __readNodeList():
83
83
return nodes
84
84
85
85
86
- def __writeNodeList (node_list ):
86
+ def __writeNodeList (node_list , slots = 0 ):
87
87
_config = "/opt/slurm/etc/slurm.conf"
88
88
fh , abs_path = mkstemp ()
89
89
with open (abs_path ,'w' ) as new_file :
90
90
with open (_config ) as slurm_config :
91
91
for line in slurm_config :
92
92
if line .startswith ('#PARTITION' ):
93
- partition = line .split (':' )[1 ].rstrip ()
93
+ # Involved slurm.conf section
94
+ # #PARTITION:compute
95
+ # NodeName=dummy-compute Procs=2048 State=UNKNOWN
96
+ # NodeName=ip-172-31-6-43,ip-172-31-7-230 Procs=1 State=UNKNOWN
97
+ # PartitionName=compute Nodes=dummy-compute,ip-172-31-6-43,ip-172-31-7-230 Default=YES MaxTime=INFINITE State=UP
98
+ partition_name = line .split (':' )[1 ].rstrip ()
94
99
new_file .write (line )
95
- dummy_node = slurm_config .next ()
96
- new_file .write (dummy_node )
97
- node_names = slurm_config .next ()
98
- partitions = slurm_config .next ()
99
- items = node_names .split (' ' )
100
- node_line = items [0 ].split ('=' )
101
- if len (node_list [partition ]) > 0 :
102
- new_file .write ('NodeName=' + ',' .join (node_list [partition ]) + " " + ' ' .join (items [1 :]))
100
+ dummy_node_line = slurm_config .next ()
101
+ new_file .write (dummy_node_line )
102
+ node_names_line = slurm_config .next ()
103
+ partitions_line = slurm_config .next ()
104
+ node_names_line_items = node_names_line .split (' ' )
105
+ if slots == 0 :
106
+ slots = node_names_line_items [1 ].split ('=' )[1 ].strip ()
107
+ if len (node_list [partition_name ]) > 0 :
108
+ new_file .write ('NodeName=' + ',' .join (node_list [partition_name ]) + ' Procs=%s' % slots + ' ' + ' ' .join (node_names_line_items [2 :]))
103
109
else :
104
- new_file .write ("#NodeName= Procs=1 State=UNKNOWN\n " )
105
- items = partitions .split (' ' )
106
- node_line = items [1 ].split ('=' )
107
- new_file .write (items [0 ] + " " + node_line [0 ] + '=dummy-' + partition + ',' + ',' .join (node_list [partition ]) + " " + ' ' .join (items [2 :]))
110
+ new_file .write ('#NodeName= Procs=%s State=UNKNOWN\n ' % slots )
111
+ partitions_line_items = partitions_line .split (' ' )
112
+ new_file .write (partitions_line_items [0 ] + ' Nodes=dummy-' + partition_name + ',' + ',' .join (node_list [partition_name ]) + " " + ' ' .join (partitions_line_items [2 :]))
108
113
else :
109
114
new_file .write (line )
110
115
os .close (fh )
111
- #Remove original file
116
+ # Remove original file
112
117
os .remove (_config )
113
- #Move new file
118
+ # Move new file
114
119
move (abs_path , _config )
115
- #Update permissions on new file
120
+ # Update permissions on new file
116
121
os .chmod (_config , 0744 )
117
122
118
123
119
124
def addHost (hostname , cluster_user , slots ):
120
- log .info ('Adding %s' % hostname )
125
+ log .info ('Adding %s with %s slots ' % ( hostname , slots ) )
121
126
122
127
# Get the current node list
123
128
node_list = __readNodeList ()
124
129
125
130
# Add new node
126
131
node_list ['compute' ].append (hostname )
127
- __writeNodeList (node_list )
132
+ __writeNodeList (node_list , slots )
128
133
129
134
# Restart slurmctl locally
130
135
restartMasterNodeSlurm ()
131
-
136
+
132
137
# Restart slurmctl on host
133
138
__restartSlurm (hostname , cluster_user )
134
139
0 commit comments