#!/bin/sh # Name: pestat # ------------ # Torque resource manager utility script: Print a 1-line summary of jobs on each node. # Usage: Run "pestat -h" for help information. # # Colors may be used in the output (also controlled by the PESTAT_COLOR environment variable). # The printout at the end may be customized if needed. # # Netload information: # -------------------- # The Torque pbs_mom records network load information as the sum of transmit+receive # of all interfaces. # The "netload" information is defined in the source file ./src/resmom/linux/mom_mach.c # as the sum of bytes on all network interfaces since boot time, read from /proc/net/dev. # The pestat command (from version 2.9) prints delta-netload information when run twice # with some time interval in between. The file $NETLOADFILE stores recorded information. # # The baseline netload information may be generated from cron, say, every 10 minutes # by this crontab entry: # */10 * * * * /usr/local/bin/pestat -C > /dev/null # Otherwise the default netload file is the user-specific file netload.$USER. # Subsequent pestat commands will use the baseline netload. # Author: Ole.H.Nielsen@fysik.dtu.dk # URL: ftp://ftp.fysik.dtu.dk/pub/Torque/pestat VERSION="pestat version 2.13. Date: 6 February 2013" # Locations of command and directories PBSNODES=/usr/local/bin/pbsnodes PBSCONFIG=/usr/local/bin/pbs-config QSTAT=/usr/local/bin/qstat AWK=/bin/awk # Node names have different lengths at different sites, # so configure this printf string to accomodate your longest node name + 1 (>= 5 chars) NODENAMEFORMAT="%-20s" # The pestat status directory (must be secure location for root) PESTAT_LIBDIR=/var/lib/pestat NETLOAD_CRON=$PESTAT_LIBDIR/netload.cron if test ! -d $PESTAT_LIBDIR -a "$USER" = "root" then mkdir -v $PESTAT_LIBDIR fi # Minimum age of NETLOADFILE (seconds): If less than this value the netload information may not be reliable: NETLOADFILE_MINAGE=10 # Command usage: function usage() { cat < (do not use with the -g flag) -g groupname: Print only users in group -j jobs: List only nodes with at least running jobs -C: Use with cron: Netload file will be saved as $NETLOAD_CRON -h: Print this help information -V: Version information EOF } # # Netload information from Torque pbs_mom will be printed # netloadprint=1 # Temporary file for Netload information: if test -s $NETLOAD_CRON then # If NETLOADFILE has been generated by a cron-job as $NETLOAD_CRON we use this file NETLOADFILE=$NETLOAD_CRON NETLOADWRITE=0 else # Default: Per-user netload file NETLOADFILE=/var/tmp/netload.$USER NETLOADWRITE=1 fi # Scaling of the network load: # Default value: NETLOADSCALE=1 # If you use Linux port bonding, each network byte is counted twice by Torque: bond0 plus ethX devices # therefore you need to scale down the netload by a factor of 2: # NETLOADSCALE=2 # Netload threshold above which we flag this node: # Netload > 2000 Mbit/s is flagged (we have dual-Gigabit Ethernet) # Gigabit Ethernet full-duplex is 2*1000=2000 Mbit/s NETLOADTHRES=2000 # # Default parameter values # # Omit down nodes from the flagged list because we do not wish to see them # (Use "pbsnodes -l" to list down nodes). listdownnodes=0 # List only nodes with >= minjobs running jobs (default: minjobs=0) # This is useful for selecting those nodes that run multiple jobs. minjobs=0 # Colored output by default colors=1 # Check user environment variable PESTAT_COLOR for color if test "$PESTAT_COLOR" = "0" then colors=0 fi # Check if output is NOT a terminal: Turn colors off (can be overruled by "-c" flag). FD=1 # File Descriptor no. 1 = stdout if test ! -t $FD then colors=0 fi # # Process command arguments # listflagged=0 while getopts "fdcnVu:g:j:Ch" options; do case $options in f ) listflagged=1 echo Listing only nodes that are flagged by \* ;; d ) listdownnodes=1 # Listing also down nodes ;; c ) colors=1 # Force the use of colors in output ;; n ) colors=0 # Do not use colors in output ;; u ) username=$OPTARG echo Select only user $username ;; g ) groupname=$OPTARG echo Select only users in group $groupname ;; j ) minjobs=$OPTARG echo List only nodes with at least $minjobs running jobs ;; C ) NETLOADFILE=$NETLOAD_CRON NETLOADWRITE=1 ;; V ) echo $VERSION exit 0;; h|? ) usage exit 1;; * ) usage exit 1;; esac done # Test for extraneous command line arguments if test $# -gt $(($OPTIND-1)) then echo ERROR: Too many command line arguments: $* usage exit 1 fi if test -n "$username" -a -n "$groupname" then echo ERROR: Do not select both username and groupname usage exit 1 fi if test -s $NETLOADFILE then # NETLOADFILE file age in seconds: filetime=`stat -c "%Y" $NETLOADFILE` now=`date "+%s"` netloadage=$(($now-$filetime)) # Print age (strip extraneous output from "stat") echo Netload file $NETLOADFILE age: $netloadage seconds, dated `stat -c "%y" $NETLOADFILE | awk -F. '{print $1}'` if test $netloadage -lt $NETLOADFILE_MINAGE then echo Netload file age should be at least $NETLOADFILE_MINAGE seconds, please try again... exit 0 fi else echo Netload file $NETLOADFILE does not exist: Cannot print netload until next time. echo You have to run this command again after some time in order to calculate current network load. netloadprint=0 fi # # Heading for printout showing: # # node: Node hostname # state: Torque state # load: CPU load average # pmem: Physical memory # ncpu: Number of CPUs # mem: Physical+virtual memory # resi: Resident (used) memory # usrs: Number of sessions / Number of users # Netload: Number of network I/O bytes # jobs: Number of jobs # jobids/users: Jobids and corresponding usernames of Torque jobs on this node # # Show the Torque node status and parse the results # $PBSNODES -a | $AWK -v listflagged=$listflagged -v listdownnodes=$listdownnodes \ -v colors=$colors -v username=$username -v groupname=$groupname -v minjobs=$minjobs \ -v NODENAMEFORMAT=$NODENAMEFORMAT \ -v QSTAT=$QSTAT -v PBSCONFIG=$PBSCONFIG \ -v netloadprint=$netloadprint -v NETLOADFILE=$NETLOADFILE -v NETLOADWRITE=$NETLOADWRITE \ -v netloadage=$netloadage -v NETLOADTHRES=$NETLOADTHRES -v NETLOADSCALE=$NETLOADSCALE ' BEGIN { # Define terminal colors for the output if requested if (colors != 0) { # See http://en.wikipedia.org/wiki/ANSI_escape_code#Colors RED="\033[1;31m" GREEN="\033[1;32m" NORMAL="\033[0m" } # Conversion factor of netload (bytes) to Mbit/sec MBITSEC = 1000000*NETLOADSCALE/8 # Get the Torque version (thanks to Stefan Becuwe ) # since 2.5.x, output of array jobs in qstat has changed # < 2.5 jobid-arrayid ... (one line per task) # >= 2.5 jobid[] (just one line) pbsmajor = 2 pbsminor = 5 PBSVERSION = PBSCONFIG " --version" while ((PBSVERSION | getline) > 0) { split($1, a, ".") } close(PBSVERSION) if ( a[1] >= pbsmajor && a[2] >= pbsminor ) { recentpbs = 1 } else { recentpbs = 0 } # Get the list of jobids versus usernames from qstat QSTAT = QSTAT " -r" # Append -r flag (running jobs) to qstat. while ((QSTAT | getline) > 0) { # Parse lines from qstat -r if (++line>5) { # Skip first 5 header lines split($1,b,".") # Jobid is b[1] jobuser[b[1]] = $2 # Username of this jobid } } close(QSTAT) if (netloadprint == 1) { # Read previously recorded netload file (format: nodename network-bytes list-of-jobids) while ((getline < NETLOADFILE) > 0) { oldnetload[$1] = $2 for (i=3; i<=NF; i++) oldjobidlist[$1] = oldjobidlist[$1] " " $i } if (NETLOADWRITE == 1) { # Truncate NETLOADFILE close(NETLOADFILE) print "" > NETLOADFILE } } if (username != "") { userselect=1 # We select only this username userfound=0 # Get the list of user full names from passwd lines while ("getent passwd" | getline ) { split($0,b,":") # Split password line into fields if (username == b[1]) userfound=1 } if (userfound != 1) { print RED "ERROR: No such username:" NORMAL, username exit 1 } } else if (groupname != "") { groupselect=1 # We have to select users in this groupname groupfound=0 # Get the list of group names while ("getent group" | getline ) { split($0,b,":") # Split group line into fields group[b[3]] = b[1] # Group name b[1] of this GID (b[3]) if (groupname == b[1]) groupfound=1 } if (groupfound != 1) { print RED "ERROR: No such groupname:" NORMAL, groupname exit 1 } # Get the list of user full names from passwd lines while ("getent passwd" | getline ) { split($0,b,":") # Split password line into fields gidname[b[1]] = group[b[4]] # Group name of this GID (numeric group id) } } # Print a header line printf(NODENAMEFORMAT, "Node") print "state load pmem ncpu mem resi usrs tasks NetMbit jobids/users" } # # Parse the output of pbsnodes # NF==1 { node=$1 # 1st line is nodename nodename[node] = node # Node name getline # Get the next input line numjobs[node] = 0 # Torque jobs on the node numtasks[node] = 0 # Number of tasks started by Torque on the node listnode=0 # Set to > 0 if this node gets flagged userusesnode=0 # If this node is used by the selected user groupusesnode=0 # If this node is used by a user in the selected group while (NF >= 3) { # Read a number of non-blank lines if ($1 == "state") { if ($3 == "job-exclusive") state[node] = "excl" else if ($3 == "job-exclusive,busy") state[node] = "busy" else if ($3 == "busy") state[node] = "busy" else if ($3 == "free") state[node] = "free" else if ($3 == "offline") state[node] = "offl" else if ($3 == "offline,job-exclusive") state[node] = "offl" else if ($3 == "offline,job-exclusive,busy") state[node] = "offl" else if ($3 == "offline,busy") state[node] = "offl" else if ($3 == "down") state[node] = "down" else if ($3 == "down,offline") state[node] = "down" else if ($3 == "down,job-exclusive") state[node] = "down" else if ($3 == "down,offline,job-exclusive") state[node] = "down" else if ($3 == "down,offline,busy") state[node] = "down" else if ($3 == "down,offline,job-exclusive,busy") state[node] = "down" else if ($3 == "UNKN") state[node] = "UNKN" } else if ($1 == "np") np[node] = $3 else if ($1 == "properties") properties[node] = $3 else if ($1 == "ntype") ntype[node] = $3 else if ($1 == "jobs") numtasks[node] = NF - 2 else if ($1 == "status") { # Get the node status subfields split (substr($0,15), a, ",") # Remove leading "status =", split subfields separated by "," for (field in a) { # Process individual status subfields split(a[field],b,"=") # Split var=value fields if (b[1]=="arch") arch[node]=b[2] else if (b[1]=="opsys") opsys[node]=b[2] else if (b[1]=="sessions") sessions[node]=b[2] else if (b[1]=="nsessions") nsessions[node]=int(b[2]) else if (b[1]=="nusers") nusers[node]=b[2] else if (b[1]=="idletime") idletime[node]=b[2] else if (b[1]=="totmem") totmem[node]=b[2] else if (b[1]=="availmem") availmem[node]=b[2] else if (b[1]=="physmem") physmem[node]=b[2] else if (b[1]=="ncpus") ncpus[node]=b[2] else if (b[1]=="loadave") loadave[node]=b[2] else if (b[1]=="netload") netload[node]=b[2] else if (b[1]=="size") size[node]=b[2] else if (b[1]=="jobs") { # Get the list of jobids/users for this node if (b[2] == "? 0") b[2] = "" # Fix for a bug in pbsnodes ? numjobs[node]=split(b[2],c) for (i=1; i <= numjobs[node]; i++) { split(c[i], d, ".") # Get jobid and username jobid = d[1] tjobid = jobid if (recentpbs == 1) { # For Torque version >= 2.5 sub(/\[[0-9]+\]/, "[]", tjobid) } user = jobuser[tjobid] # Case where the node pbs_mom has a (dead job) jobid unknown to pbs_server: if (length(user) == 0) { # Flag non-existent username user="NONE*" usercolor=RED listnode++ } else usercolor=NORMAL # Append jobid and username to the job list jobiduserlist[node] = jobiduserlist[node] " " usercolor jobid " " user NORMAL jobidlist[node] = jobidlist[node] " " jobid # If this node is used by the selected user if (userselect==1 && user == username) userusesnode=1 # If this node is used by a user in the selected group if (groupselect==1 && gidname[user] == groupname) groupusesnode=1 } } else if (b[1]=="rectime") rectime[node]=b[2] } } getline # Get the next input line } if (NETLOADWRITE == 1) { # Save netload information to file (format: nodename network-bytes list-of-jobids) print nodename[node], netload[node], jobidlist[node] >> NETLOADFILE } if (netloadprint == 1 && oldnetload[node] > 0) { netloadflag=" " # Calculate delta-netload netload[node] = netload[node] - oldnetload[node] if (netload[node] < 0) { netload[node] = -1 # Negative values are bad (could be due to recent node reboot) netloadflag="*" } netmbit=netload[node]/(netloadage*MBITSEC) # Convert netload to Mbit/sec if (netmbit > NETLOADTHRES) { netmbitcolor=RED # Netload > NETLOADTHRES is flagged listnode++ } else { netmbitcolor=NORMAL } if (jobidlist[node] != oldjobidlist[node]) { netloadflag="!" # List of jobids has changed: flag this # print "Old: ", oldjobidlist[node], "Current: ", jobidlist[node] } } else { netload[node] = 0 # No data available netloadflag="*" } # Is this node used by the selected user? Otherwise skip printout. if (userselect==1 && userusesnode==0) next # Is this node used by a user in the selected group? Otherwise skip printout. if (groupselect==1 && groupusesnode==0) next # If this node runs less than minjobs jobs if (numjobs[node] < minjobs) next # Print out values that we are interested in. Flag unexpected values with a "*". # Flag nodes with status down, offline or unknown if (state[node] == "busy" || state[node] == "down" || state[node] == "offl" || state[node] == "UNKN") { stateflag="*" statecolor=RED listnode++ } else { stateflag=" " statecolor=NORMAL } # Flag unexpected CPU load average loaddiff = loadave[node] - numtasks[node] if (loaddiff > 0.5 || loaddiff < -0.5) { loadflag="*" loadcolor=RED cpucolor=GREEN listnode++ } else { loadflag=" " loadcolor=NORMAL cpucolor=NORMAL } # Remove "kb" unit from memory sizes sub("kb", "", totmem[node]) sub("kb", "", availmem[node]) sub("kb", "", physmem[node]) # Resident memory resi = (totmem[node]-availmem[node])/1024 if (resi > 50 && resi > physmem[node]/1024 - 50) { # High memory usage resiflag="*" resicolor=RED pmemcolor=GREEN listnode++ } else { resiflag=" " resicolor=NORMAL pmemcolor=NORMAL } # Flag unexpected number of processes or users if (nsessions[node] > 2*ncpus[node] + 1) { # More than 2 sessions per job sessflag="*" sesscolor=RED listnode++ } else if (nusers[node] > ncpus[node]) { # More users than nCPUs is bad sessflag="*" sesscolor=RED listnode++ } else { sessflag=" " sesscolor=NORMAL } # Flag unexpected number of jobs if (numjobs[node] > numtasks[node]) { # Should be at least 1 task per job jobflag="*" jobcolor=RED listnode++ } else { jobflag=" " jobcolor=NORMAL } # Listing of down nodes? if (listdownnodes == 0 && state[node] == "down") listnode=0 # Print a 1-line list for this node if (!listflagged || listnode > 0) { printf (NODENAMEFORMAT, node) printf (" %s%s%1s%s", statecolor, state[node], stateflag, NORMAL) printf (" %s%4.2g%1s%s", loadcolor, loadave[node], loadflag, NORMAL) printf (" %s%6d%s", pmemcolor, physmem[node]/1024, NORMAL) printf (" %s%3d%s", cpucolor, ncpus[node], NORMAL) printf (" %6d", totmem[node]/1024) printf (" %s%6d%1s%s", resicolor, resi, resiflag, NORMAL) printf (" %s%1d/%1d%1s%s", sesscolor, nsessions[node], nusers[node], sessflag, NORMAL) printf (" %s%3d%1s%s", jobcolor, numtasks[node], jobflag, NORMAL) if (netloadprint == 1) { printf (" %s%5d%1s%s", netmbitcolor, netmbit, netloadflag, NORMAL) } else printf (" - ") printf (" %s\n", jobiduserlist[node]) } }'