#!/usr/bin/python # # (c) Copyright 2013 Cray Inc. All Rights Reserved. # # slurmconfgen.py # # A script to generate a slurm configuration file automatically. Should be # run from a service node on the system to be configured. Reads a template # file from the Slurm configuration directory (/etc/opt/slurm) and # writes the filled-in template to stdout. import subprocess, os, shutil, sys, datetime, tempfile, stat, re, time, socket ####################################### # sdb_query ####################################### def sdb_query(query): """ Query the SDB. Returns the results, space separated. """ # Set correct path for isql os.environ['ODBCSYSINI']='/etc/opt/cray/sysadm/' # Call isql isql = subprocess.Popen(["isql", "XTAdmin", "-b", "-v", "-x0x20"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Execute the query (out, err) = isql.communicate(query) if len(err) > 0: raise Exception(err) return out ####################################### # get_nodes ####################################### def get_nodes(): """ Get the nodes from the SDB. Returns a list of tuples with entries for nodeid, memory, cores, sockets, compute units, gpus, mics """ # Query the SDB for the info out = sdb_query("SELECT nodeid,availmem,numcores,sockets,cu, \ IF(type='GPU',avail,0) AS gpu, \ IF(type='MIC',avail,0) AS mic \ FROM attributes LEFT JOIN processor ON nodeid=processor_id \ LEFT JOIN gpus ON nodeid=node_id \ WHERE processor_type='compute' ORDER BY nodeid;") # Now out should contain all the compute node information nodes = [] for line in out.splitlines(): fields = line.split() nodes.append(tuple([int(x) for x in fields])) return nodes ####################################### # split_nodes ####################################### """ Test data from opal-p2: [(24,32768,40,2,20,0,0), (25,32768,40,2,20,0,0), (26,32768,40,2,20,0,0), (27,32768,40,2,20,0,0), (32,32768,16,1,8,1,0), (33,32768,16,1,8,1,0), (34,32768,16,1,8,1,0), (35,32768,16,1,8,1,0), (48,65536,32,2,16,0,0), (49,65536,32,2,16,0,0), (50,65536,32,2,16,0,0), (51,65536,32,2,16,0,0)] """ def split_nodes(nodelist): """ Given a list of nodes as returned by get_nodes, returns a tuple of equivalence class representative list, equivalence class nid list. """ class_reps = [] class_nodes = [] for node in nodelist: # Check if this matches an existing representative i = 0 match = False for rep in class_reps: if (node[1:] == rep[1:]): # It matches, add to the nodes for this class class_nodes[i].append(node[0]) match = True break i += 1 # We didn't find a matching equivalence class, make a new one if not match: class_reps.append(node) class_nodes.append([node[0]]) return class_reps, class_nodes ###################################### # range_str ###################################### def range_str(range_start, range_end, field_width): """ Returns a string representation of the given range using the given field width """ if range_end < range_start: raise Exception('Range end before range start') elif range_start == range_end: return "{0:0{1}d}".format(range_end, field_width) elif range_start + 1 == range_end: return "{0:0{2}d},{1:0{2}d}".format(range_start, range_end, field_width) return "{0:0{2}d}-{1:0{2}d}".format(range_start, range_end, field_width) ###################################### # rli_compress ###################################### def rli_compress(nidlist): """ Given a list of node ids, rli compress them into a slurm hostlist (ex. list [1,2,3,5] becomes string nid0000[1-3,5]) """ # Determine number of digits in the highest nid number numdigits = len(str(max(nidlist))) if numdigits > 5: raise Exception('Nid number too high') # Create start of the hostlist rli = "nid" + ('0' * (5 - numdigits)) + '[' range_start = nidlist[0] range_end = nidlist[0] add_comma = False for nid in nidlist: # If nid too large, append to rli and start fresh if nid > range_end + 1 or nid < range_end: rli += ("," if add_comma else "") + \ range_str(range_start, range_end, numdigits) add_comma = True range_start = nid range_end = nid # Append the last range rli += ("," if add_comma else "") \ + range_str(range_start, range_end, numdigits) + ']' return rli ####################################### # scale_mem ####################################### def scale_mem(mem): """ Since the SDB memory value differs from /proc/meminfo on the compute nodes, we must scale all memory values """ return mem * 98 / 100 ####################################### # format_nodes ####################################### def format_nodes(class_reps, class_nodes): """ Given a list of class representatives and lists of nodes in those classes, formats a string in slurm.conf format (ex. NodeName=nid00[024-027] Sockets=2 CoresPerSocket=10 ThreadsPerCore=2 RealMemory=32768) """ i = 0 nodestr = "" for rep in class_reps: nodeid, memory, cores, sockets, cu, gpu, mic = rep # KNC nodes only have half network resources if mic > 0: gres = "craynetwork:2,mic:{0}".format(mic) else: gres = "craynetwork:4" if gpu > 0: gres += ",gpu:{0}".format(gpu) nodestr += "NodeName={0} Sockets={1} CoresPerSocket={2} \ ThreadsPerCore={3} Gres={5} # RealMemory={4} \n".format( rli_compress(class_nodes[i]), sockets, cu/sockets, cores/cu, memory, gres) i += 1 return nodestr ####################################### # get_gres_types ####################################### def get_gres_types(class_reps): """ Searches the class representatives for different gres (gpu and mic) and returns them in a comma-separated string. """ gpu = max([rep[5] for rep in class_reps]) mic = max([rep[6] for rep in class_reps]) gres = "craynetwork" if gpu > 0: gres += ",gpu" if mic > 0: gres += ",mic" return gres ####################################### # get_mem_per_cpu ####################################### def get_mem_per_cpu(nodes): """ Give a list of nodes formatted as in get_nodes, determines the default memory per cpu (mem)/(cores) and max memory per cpu, returned as a tuple """ defmem = 0 maxmem = 0 for node in nodes: if node[1] > maxmem: maxmem = node[1] mem_per_thread = node[1] / node[2] if defmem == 0 or mem_per_thread < defmem: defmem = mem_per_thread return (scale_mem(defmem), scale_mem(maxmem)) ####################################### # cluster_name ####################################### def get_cluster_name(): """ Gets the cluster name from /etc/xthostname """ with open("/etc/xthostname", "r") as xthostname: return xthostname.readline().rstrip() ####################################### # write_slurm_conf ####################################### def get_slurm_conf(infile, replace): """ Reads from infile, replaces following the given dictionary, and returns the result as a string """ with open(infile, "r") as template: text = template.read() # Using replace is less elegant than using string.format, # but avoids KeyErrors if the user removes keys from # the template file for i, j in replace.iteritems(): text = text.replace(i, j) return text ####################################### # main ####################################### if __name__ == "__main__": # Some constant file names sysconfdir = "@sysconfdir@" slurmconf = "/slurm.conf" slurmconf_template = slurmconf + ".template" # Get nodes using isql nodes = get_nodes() # Split them into equivalence classes class_reps, class_nodes = split_nodes(nodes) # Determine the min and max memory per cpu defmem, maxmem = get_mem_per_cpu(nodes) # Create the replacement dictionary replace = { '{sysconfdir}' : sysconfdir, '{defmem}' : str(defmem), '{maxmem}' : str(maxmem), '{clustername}' : get_cluster_name(), '{computenodes}' : format_nodes(class_reps, class_nodes), '{nodelist}' : rli_compress([node[0] for node in nodes]), '{grestypes}' : get_gres_types(class_reps) } # Read and format the template text = get_slurm_conf(sysconfdir + slurmconf_template, replace) # Just print to stdout print text