#!/usr/bin/env expect
############################################################################
# Purpose: Establish global state information for Slurm test suite
#
# To define site-specific state information, set the values in a file
# named 'globals.local'. Those values will override any specified here.
# for example:
#
# $ cat globals.local
# set slurm_dir  "/usr/local"
# set build_dir  "/home/mine/SLURM/build_smd"
# set src_dir    "/home/mine/SLURM/slurm.git"
# set mpicc      "/usr/local/bin/mpicc"
#
# If you want to have more than one test going at the same time for multiple
# installs you can have multiple globals.local files and set the
# SLURM_LOCAL_GLOBALS_FILE env var, and have that set to the correct
# globals.local file for your various installs.  The file can be named anything,
# not just globals.local.
#
############################################################################
# Copyright (C) 2002-2007 The Regents of the University of California.
# Copyright (C) 2008-2010 Lawrence Livermore National Security.
# Portions Copyright (C) 2010-2018 SchedMD LLC.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# Additions by Joseph Donaghy <donaghy1@llnl.gov>
# CODE-OCEC-09-009. All rights reserved.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the supplied file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################

# Avoid sourcing this file multiple times
if {[info procs exit] eq "exit"} {
	return
}

#
# Include private functions and variables
#
source ./globals_private

global sacctmgr sacct salloc sattach sbatch sbcast scancel scontrol sinfo
global smd squeue sreport srun sstat strigger

################################################################
#
# NAME
#	cset - conditional set
#
# SYNOPSIS
#	cset name value
#
# DESCRIPTION
#	Conditional set.  Only set variable if variable does not yet exist.
#
# Input: name  -- name of the variable to set
#	 value -- value to set to 'name'
#
################################################################

proc cset {name value} {
	if {![uplevel 1 info exists $name]} {
		upvar $name tmp
		set tmp $value
	}
}


#
# Defining colors here to be able to use them in globals.local.
# By default, these colors are bold
#
set COLOR_RED         "\033\[1;31m"
set COLOR_RED_NORMAL  "\033\[31m"
set COLOR_ORANGE      "\033\[1;38;5;208m"
set COLOR_YELLOW      "\033\[1;33m"
set COLOR_GREEN       "\033\[1;32m"
set COLOR_BLUE        "\033\[1;34m"
set COLOR_MAGENTA     "\033\[1;35m"
set COLOR_CYAN        "\033\[1;36m"
set COLOR_NONE        "\033\[0m"

cset local_globals_file "./globals.local"

# Log level "enum"
# Define log levels here so they are available in globals.local
set LOG_LEVEL_QUIET   0
set LOG_LEVEL_FATAL   1
set LOG_LEVEL_ERROR   2
set LOG_LEVEL_WARNING 3
set LOG_LEVEL_INFO    4
set LOG_LEVEL_DEBUG   5
set LOG_LEVEL_TRACE   6

if {[info exists env(SLURM_LOCAL_GLOBALS_FILE)]} {
	set local_globals_file $env(SLURM_LOCAL_GLOBALS_FILE)
}

if [file exists $local_globals_file] {
	source $local_globals_file
}

#
# Specify the slurm install directory.
# Used to locate binaries, libraries, and header files.
#
cset slurm_dir   "/usr"
cset build_dir   "../../"
cset src_dir     "../../"
cset config_h    "${build_dir}/config.h"
cset sacctmgr    "${slurm_dir}/bin/sacctmgr"
cset sacct       "${slurm_dir}/bin/sacct"
cset salloc      "${slurm_dir}/bin/salloc"
cset sattach     "${slurm_dir}/bin/sattach"
cset sbatch      "${slurm_dir}/bin/sbatch"
cset sbcast      "${slurm_dir}/bin/sbcast"
cset scancel     "${slurm_dir}/bin/scancel"
cset scontrol    "${slurm_dir}/bin/scontrol"
cset sdiag       "${slurm_dir}/bin/sdiag"
cset sgather     "${slurm_dir}/bin/sgather"
cset sh5util     "${slurm_dir}/bin/sh5util"
cset sinfo       "${slurm_dir}/bin/sinfo"
cset smd         "${slurm_dir}/bin/smd"
cset sprio       "${slurm_dir}/bin/sprio"
cset squeue      "${slurm_dir}/bin/squeue"
cset srun        "${slurm_dir}/bin/srun"
cset sreport     "${slurm_dir}/bin/sreport"
cset sshare      "${slurm_dir}/bin/sshare"
cset sstat       "${slurm_dir}/bin/sstat"
cset strigger    "${slurm_dir}/bin/strigger"

cset slurmd      "${slurm_dir}/sbin/slurmd"

cset pbsnodes    "${slurm_dir}/bin/pbsnodes"
cset qdel        "${slurm_dir}/bin/qdel"
cset qstat       "${slurm_dir}/bin/qstat"
cset qsub        "${slurm_dir}/bin/qsub"
cset qalter      "${slurm_dir}/bin/qalter"
cset qrerun      "${slurm_dir}/bin/qrerun"

cset lsid      	 "${slurm_dir}/bin/lsid"
cset bjobs     	 "${slurm_dir}/bin/bjobs"
cset bkill     	 "${slurm_dir}/bin/bkill"
cset bsub     	 "${slurm_dir}/bin/bsub"

# If using MPICH-2 or other version of MPI requiring pmi libary, use this
#cset mpicc	"/home/jette/mpich2-install/bin/mpicc"
#cset use_pmi	1
# OR for other versions of MPICH, use this
cset mpicc	"/usr/local/bin/mpicc"
cset nvcc	"/usr/bin/nvcc"
cset use_pmi	0
#cset upcc       "/usr/local/bin/upcc"
cset upcc       "/usr/bin/xlupc"
cset oshcc      "/usr/local/bin/oshcc"

cset mpirun	"mpirun"
cset totalviewcli	"/usr/local/bin/totalviewcli"

# Set if using "--enable-memory-leak-debug" configuration option
cset enable_memory_leak_debug 0

# Pattern to match your shell prompt
#cset prompt {(%|#|\$|\]) *$}
cset prompt "(%|#|\\\$|]|\[^>]>) *(|\[^ ]* *)$"

#
# Specify locations of other executable files used
# Only the shell names (e.g. bin_bash) must be full pathnames
#
cset bin_awk	"awk"
cset bin_bash   [exec which bash | tail -n 1]
cset bin_cat	"cat"
cset bin_cc	"gcc"
cset bin_chmod	"chmod"
cset bin_cmp	"cmp"
cset bin_cp	"cp"
cset bin_date	"date"
cset bin_diff	"diff"
cset bin_echo	"echo"
cset bin_env	"env"
cset bin_file	"file"
cset bin_id	"id"
cset bin_grep   "grep"
cset bin_head   "head"
cset bin_ln     "ln"
cset bin_perldoc "/usr/bin/perldoc"

# Don't user $bin_hostname unless on a front-end system that
# doesn't fully use the slurmd, use $bin_printenv SLURMD_NODENAME
cset bin_hostname "hostname"

cset bin_kill	"kill"
cset bin_make	"make"
cset bin_mv	"mv"
cset bin_od	"od"
cset bin_pkill	"pkill"
cset bin_printenv "printenv"
cset bin_ps	"ps"
cset bin_pwd	"pwd"
cset bin_rm	"rm"
cset bin_sed	"sed"
cset bin_sleep  "sleep"
cset bin_sort	"sort"
cset bin_sum	"sum"
cset bin_touch	"touch"
cset bin_uname	"uname"
cset bin_uniq	"uniq"
cset bin_wc	"wc"

#
# Let the commands complete without expect timing out waiting for a
# response. Single node jobs submitted to the default partition should
# be initiated within this number of seconds.
# for interactive slurm jobs: cset timeout $max_job_delay
#
cset max_job_delay 120

#
# Files must be propagated between nodes within this number of seconds.
# The delay may be due to NFS.
#
cset max_file_delay 90

#
# Desired job state must be reached within this number of seconds.
#
cset max_job_state_delay 360

#
# Max number of iterations that wait_for_all_jobs can use
#
cset wait_for_all_jobs_iterations 600

#
# Specify the maximum number of tasks to use in the stress tests.
#
cset max_stress_tasks 4

#
# The error message that the "sleep" command prints when we run "sleep aaa".
#
cset sleep_error_message "(invalid time interval)|(bad character in argument)|(usage: sleep seconds)"

# Force LANG, as the expect tests aren't localized
set ::env(LANG)          "en_US.UTF-8"

# Testsuite level variables
cset testsuite_shared_dir "[$bin_pwd]"

#
# The poll interval (how many seconds to sleep between polls in functions like
# wait_for_file and wait_for_job)
#
cset testsuite_poll_interval 1

# Testsuite log variables
cset testsuite_log_level $LOG_LEVEL_DEBUG
cset testsuite_log_format "\[%{timestamp}s.%{msecs}03d] \(%{filename}s:%{lineno}d) %{loglevel}-7s %{message}s"
cset testsuite_time_format "%Y-%m-%d %H:%M:%S"

# Default to using color if writing to a terminal and not if writing to a file
cset testsuite_colorize        [dict exists [fconfigure stdout] -mode]
cset testsuite_color_fatal     $COLOR_RED
cset testsuite_color_error     $COLOR_RED_NORMAL
cset testsuite_color_warn      $COLOR_ORANGE
cset testsuite_color_info      $COLOR_YELLOW
cset testsuite_color_debug     $COLOR_BLUE
cset testsuite_color_trace     $COLOR_MAGENTA
cset testsuite_color_header    $COLOR_NONE
cset testsuite_color_success   $COLOR_GREEN
cset testsuite_color_failure   $COLOR_RED
cset testsuite_color_skipped   $COLOR_ORANGE

# To automatically call cleanup or not when ending the test
cset testsuite_cleanup_on_failure false
if {[info exists env(SLURM_TESTSUITE_CLEANUP_ON_FAILURE)]} {
	set testsuite_cleanup_on_failure $env(SLURM_TESTSUITE_CLEANUP_ON_FAILURE)
}

# Other common variables
set re_word_str          "\\S+"
set digit                "\\d"
set eol                  "\r?\n"
set float                "\\d+\\.?\\d*"
set number               "\\d+"
set format_time          "\\d+\\:\\d+\\:\\d+"
set number_with_suffix   "\\d+\[KM\]*"
set slash                "/"
set whitespace		 "\\s+"
set controlmachine_regex "\\S+"
# Any characters except ( , : newline
set no_delim             "\[^(,:\r\n\]"
set no_delim_slash       "\[^(,:/\r\n\]"
# The first group matches GRES name
# The second **optional** group matches GRES type.
# The third group matches GRES count.
# Test out the regex here: https://regex101.com/r/FlNYKM/7
set gres_regex "($no_delim_slash*):($no_delim*)?:?($no_delim*)"

#
# Cache SlurmUser to check for SuperUser requests
#
cset super_user     0
cset super_user_set 0

#
# Global variable used in multiple functions in "globals" file
#
set gpu_sock_list {}

################################################################
#
# NAME
#	get_test_name - gets the name of the invoking source script
#
# SYNOPSIS
#	get_test_name
#
# RETURN VALUE
#	The name of the originally called script
#
################################################################

proc get_test_name { } {

	set test_name unknown
	set frame_level 1
	while { $frame_level <= [info frame] } {
		if { [dict get [info frame $frame_level] type] == "source" } {
			set test_name [file tail [dict get [info frame $frame_level] file]]
			break
		}
		incr frame_level
	}

	return $test_name
}


#
# Name of the originally invoked test script, e.g. test1.1
#
set test_name [get_test_name]

#
# Suffix of the test script, e.g. 1.1
#
set test_id   [string map {test ""} $test_name]


################################################################
#
# NAME
#	fail - fails a test
#
# SYNOPSIS
#	fail message
#
# DESCRIPTION
#	To be used when an error is fatal for the test. This routine
#	prints the specified error message, optionally cleans up, prints
#	a final test failure message, and exits the test with exit code 1.
#
# ENVIRONMENT
#	Whether or not the cleanup procedure is called depends on the setting
#	of the $testsuite_cleanup_on_failure set in the globals.local file or
#	overridden with the SLURM_TESTSUITE_CLEANUP_ON_FAILURE environment
#	variable.
#
# NOTE
#	DO NOT call this within your local cleanup procedure.
#
################################################################

proc fail { message } {

	# Avoid recursive calls from within cleanup
	if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} {
		log_error "Local cleanup shouldn't call pass, fail or skip"
		return
	}

	log_fatal $message

	# _test_fini will handle cleanup and print the failure message.
	_test_fini 1
}


################################################################
#
# NAME
#	skip - skips a test
#
# SYNOPSIS
#	skip message
#
# DESCRIPTION
#	To be used when a precondition for the test fails and the test
#	should be skipped. This routine prints the specified warning message,
#	calls the cleanup procedure if defined, prints a final test skipped
#	message, and exits the test with exit code -1 (aka 255).
#
# NOTE
#	DO NOT call this within your local cleanup procedure.
#
################################################################

proc skip { message } {

	# Avoid recursive calls from within cleanup
	if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} {
		log_error "Local cleanup shouldn't call pass, fail or skip"
		return
	}

	log_warn $message

	# _test_fini will handle cleanup and print the skipped message.
	_test_fini -1
}


################################################################
#
# NAME
#	pass - passes a test
#
# SYNOPSIS
#	pass
#
# DESCRIPTION
#	To be used when a test passes and should complete with success.
#	This routine calls the cleanup procedure if defined, prints a final
#	test success message, and exits with exit code 0.
#
# NOTE
#	DO NOT call this within your local cleanup procedure.
#
################################################################

proc pass { } {

	# Avoid recursive calls from within cleanup
	if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} {
		log_error "Local cleanup shouldn't call pass, fail or skip"
		return
	}

	# _test_fini will handle cleanup and print the success message.
	_test_fini 0
}


################################################################
#
# NAME
#	fail_on_error - fails if exit_code is non-zero
#
# SYNOPSIS
#	fail_on_error message
#
# DESCRIPTION
#	If exit_code global variable is non-zero it calls fail with the
#	message.
#
# NOTE
#	DO NOT call this within your local cleanup procedure.
#
################################################################

proc fail_on_error { message } {
	global exit_code

	if {$exit_code != 0} {
		fail "$message"
	}
}


################################################################
#
# NAME
#	print_time - prints the current date and time
#
# SYNOPSIS
#	print_time
#
################################################################

proc print_time { } {
	global bin_date

	spawn $bin_date
	expect {
		eof {
			wait
		}
	}

	return
}

################################################################
#
# NAME
#	dict_getdef - 'dict get' with ability to specify the default value
#
# SYNOPSIS
#	dict_getdef dictionary_value key default_value
#
# DESCRIPTION
#	Tcl < 8.7 lacks a built in 'dict get' with ability to specify the
#	default value. Tcl 8.7 adds a dict getdef.
#	This proc returns the value from the dictionary corresponding to the
#	keys if it exists, or the default value otherwise.
#
# EXAMPLE
#	dict_getdef $option_dict action "warn"
#
# SOURCE
#	https://core.tcl-lang.org/tips/doc/trunk/tip/342.md
#	https://core.tcl-lang.org/tcl/tktview/2370575
#
################################################################

proc dict_getdef {D args} {
	if {[dict exists $D {*}[lrange $args 0 end-1]]} then {
		dict get $D {*}[lrange $args 0 end-1]
	} else {
		lindex $args end
	}
}


################################################################
#
# NAME
#	run_command - executes a command and returns a dictionary result
#
# SYNOPSIS
#	run_command ?options? command
#
# DESCRIPTION
#	Executes a command and returns a dictionary that includes the output,
#	exit code, etc. An action can be taken (fail, warn, none)  if the
#	command's exit code or timeout is unexpected. By default, the action
#	will be applied if the command fails. If the -xfail option is
#	specified, the behavior will be reversed to apply the action if the
#	command ran successfully.
#
# OPTIONS
#	-fail
#		If the exit code or timeout is unexpected, the action that will
#		be taken is to fail the test
#	-warn
#		If the exit code or timeout is unexpected, the action that will
#		be taken is to log a warning (this is the default)
#	-none
#		If the exit code or timeout is unexpected, no action will be
#		taken
#	-xfail
#		If the command exits with zero and does not time out, the
#		action will be applied. Without this option, the action will
#		be applied if the command exits with a non-zero exit code or
#		times out.
#	-timeout <float_number>
#		Time in seconds to wait for the command to complete before
#		timing out (default is 60.0)
#	-nolog
#		Logging for this command will occur at trace threshold only
#
# ARGUMENTS
#	command
#		a string containing the command and arguments to execute
#
# RETURN VALUE
#	A dictionary containing the following elements:
#		command    - The command that was invoked
#		exit_code  - Exit code
#		output     - The combined standard output and standard error
#		start_time - The time (epoch with milliseconds) the command was
#		             executed
#		duration   - The duration (seconds and milliseconds) the
#		             command took to run
#
################################################################
proc run_command args {
	global bin_bash

	set exit_status 0
	set output ""
	set action "warn"
	set timeout 60
	set expect_failure false
	set log_at_trace_level false
	while {[llength $args]} {
		switch -glob -- [lindex $args 0] {
			-fail    {set action "fail"; set args [lrange $args 1 end]}
			-none    {set action "none"; set args [lrange $args 1 end]}
			-timeout {set args [lassign $args - timeout]}
			-warn    {set action "warn"; set args [lrange $args 1 end]}
			-xfail   {set expect_failure true; set args [lrange $args 1 end]}
			-nolog   {set log_at_trace_level true; set args [lrange $args 1 end]}
			-*       {fail "Unknown option: [lindex $args 0]"}
			default  break
		}
	}
	if {[llength $args] == 1} {
		lassign $args command
	} else {
		fail "Invalid number of arguments [llength $args]: $args"
	}

	if {$log_at_trace_level} {
		interp alias {} log_alias {} log_trace
	} else {
		interp alias {} log_alias {} log_debug
	}

	set orig_log_user [log_user -info]
	log_user 0

	log_alias "Invoking command \"$command\""
	set start_time [format "%.3f" [expr [clock milliseconds] / 1000.000]]
	set stty_init raw ; # Prevent the terminal from inserting \r
	set expect_pid [spawn -noecho $bin_bash -c "$command"]
	expect {
		-re "(.+)" {
			append output $expect_out(1,string)
			exp_continue
		}
		timeout {
			set message "Command \"$command\" timed out after $timeout seconds"
			slow_kill $expect_pid
			set exit_status 1
		}
		eof {
			lassign [wait] pid spawnid os_error_flag errno
			set exit_status [expr $errno > 128 ? $errno - 256 : $errno]
		}
	}
	set end_time [format "%.3f" [expr [clock milliseconds] / 1000.000]]
	set duration [format "%.3f" [expr $end_time - $start_time]]

	log_alias "Command Results:"
	log_alias "	Start Time: $start_time"
	log_alias "	Duration: $duration"
	log_alias "	Exit Code: $exit_status"
	if {[info exists output]} {
		log_alias "	Output: $output"
	}

	if {! $expect_failure && $exit_status != 0} {
		cset message "Command \"$command\" failed with rc=$exit_status"
		if {[info exists output] && $output != ""} {
			append message ": [string trimright $output]"
		}
		if {$action eq "warn"} {
			log_warn $message
		} elseif {$action eq "fail"} {
			fail $message
		}
	} elseif {$expect_failure && $exit_status == 0} {
		cset message "Command \"$command\" was expected to fail but succeeeded"
		if {$action eq "warn"} {
			log_warn $message
		} elseif {$action eq "fail"} {
			fail $message
		}
	}
	log_user $orig_log_user

	dict set result command    $command
	dict set result exit_code  $exit_status
	dict set result output     $output
	dict set result start_time $start_time
	dict set result duration   $duration

	return $result
}


################################################################
#
# NAME
#	run_command_output - executes a command and returns the output
#
# SYNOPSIS
#	run_command_output ?options? command
#
# DESCRIPTION
#	Executes a command and returns a dictionary that includes the output,
#	exit code, etc. An action can be taken (fail, warn, none)  if the
#	command's exit code or timeout is unexpected. By default, the action
#	will be applied if the command fails. If the -xfail option is
#	specified, the behavior will be reversed to apply the action if the
#	command ran successfully.
#
# OPTIONS
#	-fail
#		if the exit code or timeout is unexpected, the action that will
#		be taken is to fail the test
#	-warn
#		if the exit code or timeout is unexpected, the action that will
#		be taken is to log a warning (this is the default)
#	-none
#		if the exit code or timeout is unexpected, no action will be
#		taken
#	-xfail
#		if the command exits with zero and does not time out, the
#		action will be applied. Without this option, the action will
#		be applied if the command exits with a non-zero exit code or
#		times out.
#	-timeout <float_number>
#		time in seconds to wait for the command to complete before
#		timing out (default is 60.0)
#
# ARGUMENTS
#	command
#		a string containing the command and arguments to execute
#
# RETURN VALUE
#	A string containing the combined standard output and standard error
#
################################################################
proc run_command_output args {
	global exit_code

	set result [run_command {*}$args]

	if [dict exists $result output] {
		return [dict get $result output]
	} else {
		return ""
	}
}


################################################################
#
# NAME
#	run_command_status - executes a command and returns the exit code
#
# SYNOPSIS
#	run_command_status ?options? command
#
# DESCRIPTION
#	Executes a command and returns a dictionary that includes the output,
#	exit code, etc. An action can be taken (fail, warn, none)  if the
#	command's exit code or timeout is unexpected. By default, the action
#	will be applied if the command fails. If the -xfail option is
#	specified, the behavior will be reversed to apply the action if the
#	command ran successfully.
#
# OPTIONS
#	-fail
#		if the exit code or timeout is unexpected, the action that will
#		be taken is to fail the test
#	-warn
#		if the exit code or timeout is unexpected, the action that will
#		be taken is to log a warning (this is the default)
#	-none
#		if the exit code or timeout is unexpected, no action will be
#		taken
#	-xfail
#		if the command exits with zero and does not time out, the
#		action will be applied. Without this option, the action will
#		be applied if the command exits with a non-zero exit code or
#		times out.
#	-timeout <float_number>
#		time in seconds to wait for the command to complete before
#		timing out (default is 60.0)
#
# ARGUMENTS
#	command
#		a string containing the command and arguments to execute
#
# RETURN VALUE
#	The exit code for the invoked command
#
################################################################
proc run_command_status args {
	global exit_code

	set result [run_command {*}$args]

	return [dict get $result exit_code]
}


################################################################
#
# NAME
#	cancel_job - cancels the specified job
#
# SYNOPSIS
#	cancel_job job_id ?het_job?
#
# ARGUMENTS
#	job_id
#		The Slurm job id of a job we want to cancel
#	het_job
#		For a het job confirm each component job has completed
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
################################################################

proc cancel_job { job_id {het_job 0}} {
	global scancel bin_sleep

	if {$job_id == 0} {
		return 1
	}

	log_debug "Cancelling $job_id"
	set status [catch [exec $scancel -Q $job_id] result]
	exec $bin_sleep 1
	return [wait_for_job $job_id "DONE" $het_job]
}


################################################################
#
# NAME
#	get_line_cnt - returns the size of the specified file
#
# SYNOPSIS
#	get_line_cnt file_name
#
# RETURN VALUE
#	Number of lines in the specified file.
#
################################################################

proc get_line_cnt { file_name } {
	global bin_wc number
	set lines 0
	spawn $bin_wc -l $file_name
	expect {
		-re "($number) " {
			set lines $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	return $lines
}


################################################################
#
# NAME
#	slow_kill - kills a process slowly
#
# SYNOPSIS
#	slow_kill pid
#
# DESCRIPTION
#	Kill a process slowly, first trying SIGINT, pausing for
#       a second, then sending SIGKILL.
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
################################################################

proc slow_kill { pid } {
	global bin_kill

	catch {exec $bin_kill -INT $pid}
	catch {exec $bin_kill -INT $pid}
	sleep  1
	catch {exec $bin_kill -KILL $pid}

	return 0
}


################################################################
#
# NAME
#	get_my_id - gets the id from the running user
#
# SYNOPSIS
#	get_my_id
#
# RETURN VALUE
#	output of id
#
################################################################

proc get_my_id {} {

	global bin_id number
	set login_info -1

	log_user 0

	spawn $bin_id
	expect {
		-re "(uid=.*\n)" {
			set login_info $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1

	if {$login_info == -1} {
		fail "Unable to get user info"
	}

	return $login_info
}


################################################################
#
# NAME
#	get_my_user_name - gets the name from the running user
#
# SYNOPSIS
#	get_my_user_name
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
################################################################

proc get_my_user_name {  } {
	global bin_id re_word_str

	set user_name -1

	log_user 0
	spawn $bin_id -nu
	expect {
		-re "($re_word_str)" {
			set user_name $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	if {$user_name == -1} {
		fail "Unable to get user name"
	}

	return $user_name
}


################################################################
#
# NAME
#	get_my_uid - gets the uid from the running user
#
# SYNOPSIS
#	get_my_uid
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
################################################################

proc get_my_uid {  } {
	global bin_id number

	set uid -1

	log_user 0
	spawn $bin_id -u
	expect {
		-re "($number)" {
			set uid $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $uid
}


################################################################
#
# NAME
#	get_my_gid - gets the gid from the running user
#
# SYNOPSIS
#	get_my_gid
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
################################################################

proc get_my_gid {  } {
	global bin_id number

	set gid -1

	log_user 0
	spawn $bin_id -g
	expect {
		-re "($number)" {
			set gid $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $gid
}


################################################################
#
# NAME
#	kill_salloc - kills all salloc commands associated with this user
#
# SYNOPSIS
#	kill_salloc
#
# DESCRIPTION
#	Kill all salloc commands associated with this user.
#	Issue two SIGINT, sleep 1 and a SIGKILL
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
# NOTE
#	Use slow_kill instead of kill_salloc if you can capture
#	the process id
#
################################################################

proc kill_salloc {  } {
	global bin_id bin_pkill bin_sleep number

	set uid [get_my_uid]
	catch {exec $bin_pkill -INT -u $uid salloc}
	catch {exec $bin_pkill -INT -u $uid salloc}
	sleep  1
	catch {exec $bin_pkill -KILL -u $uid salloc}

	return 0
}


################################################################
#
# NAME
#	kill_srun - kills all srun commands associated with this user
#
# SYNOPSIS
#	kill_srun
#
# DESCRIPTION
#	Kill all srun commands associated with this user.
#	Issue two SIGINT, sleep 1 and a SIGKILL
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
# NOTE
#	Use slow_kill instead of kill_srun if you can capture
#	the process id
#
################################################################

proc kill_srun {  } {
	global bin_id bin_pkill bin_sleep number

	set uid [get_my_uid]
	catch {exec $bin_pkill -INT -u $uid srun}
	catch {exec $bin_pkill -INT -u $uid srun}
	sleep  1
	catch {exec $bin_pkill -KILL -u $uid srun}

	return 0
}


################################################################
#
# NAME
#	wait_for - generic wait utility
#
# SYNOPSIS
#	wait_for ?options? condition body
#
# DESCRIPTION
#	Generic wait utility allowing you to repeatedly execute a generic block
#	of code until a specified boolean expression is met. The code block and
#	condition check occur every poll interval until a timeout is reached.
#
# OPTIONS
#	-timeout <float_number>
#		time in seconds to wait for the command to complete before
#		timing out (default is 60.0)
#	-pollinterval <floa_number>
#		time in seconds between each loop execution and condition check
#		(defaults to 1.0)
#
# ARGUMENTS
#	condition
#		The boolean expression to test
#	body
#		A block of code to evaluate in the invoking stack frame
#
# RETURN VALUE
#	true if the condition is met before the timeout occurs
#	false if the timeout occurs before the condition is met
#
################################################################
proc wait_for args {
	set timeout 60
	set poll_interval 1
	while {[llength $args]} {
		switch -glob -- [lindex $args 0] {
			-time*  {set args [lassign $args - timeout]}
			-poll*  {set args [lassign $args - poll_interval]}
			-*      {fail "Unknown option: [lindex $args 0]"}
			default break
		}
	}
	if {[llength $args] == 2} {
		lassign $args condition body
	} else {
		fail "Invalid number of arguments [llength $args]: $args"
	}

	set start_time [format "%.3f" [expr [clock milliseconds] / 1000.000]]

	log_debug "Waiting for $condition starting at [clock format [expr int($start_time)] -format %Y-%m-%dT%X].[lindex [split $start_time '.'] 1]"

	while {1} {
		# Evaluate code block
		log_trace "Evaluating code block ([string trim $body])"
		set ret [catch {uplevel $body} result]

		# Check condition
		if {[uplevel expr [format "{%s}" $condition]]} {
			set now [format "%.3f" [expr [clock milliseconds] / 1000.000]]
			log_debug "Condition ($condition) was met at [clock format [expr int($now)] -format %Y-%m-%dT%X].[lindex [split $now '.'] 1]"
			return true
		} else {
			log_trace "Condition ($condition) was not met"
		}

		# Sleep poll interval
		log_trace "Sleeping for $poll_interval seconds"
		after [expr {int($poll_interval * 1000)}]

		# Check if we have surpassed our timeout
		set now [format "%.3f" [expr [clock milliseconds] / 1000.000]]
		log_trace "Checking whether the current time ([clock format [expr int($now)] -format %Y-%m-%dT%X].[lindex [split $now '.'] 1]) is greater than the start time plus the timeout ([clock format [expr int($start_time + $timeout)] -format %Y-%m-%dT%X].[lindex [split [expr $start_time + $timeout] '.'] 1])"
		if {$now > $start_time + $timeout} {
			log_warn "Condition ($condition) did not occur before timeout ($timeout) seconds"
			return false
		}
	}
}

################################################################
#
# NAME
#	wait_for_command - waits for command output to match a pattern
#
# SYNOPSIS
#	wait_for_command command args regex ?matches_in? ?or_more? ?matches_out? ?max_delay?
#
# DESCRIPTION
#	Executes a command every testsuite_poll_interval until a regex
#	pattern is matched in the output, or timeout after max_delay.
#
# ARGUMENTS
#	command
#		The command to run via spawn.
#	args
#		The arguments to the command, as a single string.
#	regex
#		The regex pattern to search for in the command
#		output. Can be a simple string.
#	matches_in
#		The number of times to match the regex. Defaults to 1.
#	or_more
#		If 1, allow for matching the regex pattern match_cnt *or more*
#		times, instead of exactly match_cnt times. Defaults to 0.
#	matches_out
#		The upvar (a variable name to "pass by reference" in TCL)
#		to set/return the number of matches found.
#		Useful if or_more is 1 and the caller wants the	matches found.
#	max_delay
#		The timeout seconds to execute the command and wait on the
#		output before giving up. Defaults to global max_command_delay.
#
# RETURN VALUE
#	0 on success and 1 on failure. On failure, an error is logged to
#	the output. If matches_out is specified, the number of matches will
#	be returned via the reference/upvar matches_out.
#
################################################################

proc wait_for_command { command args regex {matches_in 1} {or_more 0}
                        {matches_out ""} {max_delay 120} } {
	global bin_sleep testsuite_poll_interval

	if {$matches_out != ""} {
		upvar $matches_out matches
	}

	set delay 0
	while {$delay < $max_delay} {
		set matches 0
		# `{*}` breaks apart a string into individual pieces
		spawn $command {*}$args
		expect {
			-re $regex {
				incr matches
				exp_continue
			}
			timeout {
				log_error "$command not responding after $delay seconds polling"
				# Return failure
				return 1
			}
			eof {
				wait
			}
		}

		if {($matches == $matches_in) || ($or_more == 1 &&
		                                 $matches >= $matches_in)} {
			# Return success
			return 0
		}

		log_info "[lindex [info level 0] 0] polled $matches matches of '$regex', but expecting $matches_in"
		exec $bin_sleep $testsuite_poll_interval
		incr delay $testsuite_poll_interval

	}
	if {$or_more == 1} {
		set match_str "$matches_in or more times"
	} elseif {$matches_in == 1} {
		set match_str "exactly $matches_in time"
	} else {
		set match_str "exactly $matches_in times"
	}
	log_error "Failed to match regex `$regex` $match_str after $max_delay seconds for command `$command $args`."
	# Return failure
	return 1
}


################################################################
#
# NAME
#	wait_for_file - waits for a file to exist with non-zero size
#
# SYNOPSIS
#	wait_for_file file_name
#
# DESCRIPTION
#	Wait for the specified file to exist and have a non-zero size.
#	Note that if JobFileAppend=0 is configured, a file can exist and
#	be purged then be re-created. Polls every $testsuite_poll_interval
#	seconds.
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
################################################################

proc wait_for_file { file_name } {
	global bin_sleep max_file_delay testsuite_poll_interval

	for {set my_delay 0} {$my_delay <= $max_file_delay} \
	                     {set my_delay [expr $my_delay + $testsuite_poll_interval]} {
		if {[file exists $file_name]} {
#			Add small delay for I/O buffering
			exec $bin_sleep 1
			return 0
		}
		exec $bin_sleep $testsuite_poll_interval
#
#		Expect may fail to load current NFS info.
#		Use the ls command to load current info.
#
		set slash_pos [string last $file_name "/"]
		if {$slash_pos < 1} {
			set dir_name "."
		} else {
			decr slash_pos
			set dir_name [string $file_name 0 $slash_pos]
		}
		exec /bin/ls $dir_name
	}
	log_error "Timeout waiting for file $file_name"
	return 1
}


################################################################
#
# NAME
#	wait_for_job - waits for job to be in desired state
#
# SYNOPSIS
#	wait_for_job job_id desired_state ?het_job?
#
# DESCRIPTION
#	Wait for job to be in desired state. Can handle het job components.
#
# ARGUMENTS
#	job_id
#		The Slurm job id of a job we want to wait for.
#	desired_state
#		The state you want the job to attain before returning.
#		Currently supports:
#			DONE any terminated state
#			PENDING job is pending
#			RUNNING job is running
#			SPECIAL_EXIT
#			SUSPENDED job is suspended
#	het_job
#		If set, checks the state of each component job if the job
#		is a het one.
#
# RETURN VALUE
#	A non-zero return code indicates a failure
#
# SEE ALSO
#	_wait_for_single_job
#
################################################################

proc wait_for_job { job_id desired_state {het_job 0}} {
	set jid_list ""

	if { $het_job } {
		# get component job ids
		set jid_list [get_het_job_ids $job_id 1]
	}

	set rc 0
	if { $jid_list == "" } {
		# non-het job
		set jid_list $job_id
	}

	foreach jid $jid_list {
		set rc [_wait_for_single_job $jid $desired_state]
		if { $rc } {
			# bail out on first failure
			break
		}
	}
	return $rc
}


################################################################
#
# NAME
#	wait_for_account_done - cancels and waits on jobs in specified accounts
#
# SYNOPSIS
#	wait_for_account_done accounts
#
# DESCRIPTION
#	Cancel jobs on and wait for them to be finished in account(s) given.
#	Polls every $testsuite_poll_interval seconds.
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
# NOTE
#	We sleep for two seconds before replying that a job is
#	done to give time for I/O completion (stdout/stderr files)
#
################################################################

proc wait_for_account_done { accounts } {
	global scancel squeue max_job_state_delay re_word_str testsuite_poll_interval

	if { $accounts == "" } {
		log_error "wait_for_account_done: no account given"
		return 1
	}

	log_user 0
	set account_list [split $accounts ","]
	foreach item $account_list {
		spawn $scancel -A $item
		expect {
			timeout {
				log_warn "No response from scancel"
			}
			eof {
				wait
			}
		}
	}

	set my_delay    0
	while 1 {
		set found 0
		spawn $squeue -o Account=%a -h -A$accounts
		expect {
			-re "Account=($re_word_str)" {
				set found 1
				exp_continue;
			}
			eof {
				wait
			}
		}

		if { !$found } {
			log_debug "Account(s) $accounts is/are empty"
			break;
		}

		if { $my_delay > $max_job_state_delay } {
			log_error "Timeout waiting for account(s) '$accounts' to be finished"
			log_user 1
			return 1
		}

		exec sleep $testsuite_poll_interval
		set my_delay [expr $my_delay + $testsuite_poll_interval]
	}
	log_user 1
	return 0
}


################################################################
#
# NAME
#	wait_for_part_done - cancels and waits on jobs in specified partition
#
# SYNOPSIS
#	wait_for_part_done partition
#
# DESCRIPTION
#	Cancel jobs on and wait for them to be finished in partition given.
#	Polls every $testsuite_poll_interval seconds.
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
# NOTE
#	We sleep for two seconds before replying that a job is
#	done to give time for I/O completion (stdout/stderr files)
#
################################################################

proc wait_for_part_done { part } {
	global scancel squeue max_job_state_delay re_word_str testsuite_poll_interval

	if { $part == "" } {
		log_error "wait_for_part_done: no partition given"
		return 1
	}

	log_user 0
	spawn $scancel -p $part
	expect {
		timeout {
			log_error "wait_for_part_done: No response from scancel"
		}
		eof {
			wait
		}
	}

	set my_delay    0
	while 1 {
		set found 0
		spawn $squeue -o Part=%P -h -p$part
		expect {
			-re "Part=($re_word_str)" {
				set found 1
				exp_continue;
			}
			eof {
				wait
			}
		}

		if { !$found } {
			log_debug "Partition $part is empty"
			break;
		}

		if { $my_delay > $max_job_state_delay } {
			log_error "wait_for_part_done: Timeout waiting for partition '$part' to be finished"
			log_user 1
			return 1
		}

		exec sleep $testsuite_poll_interval
		set my_delay [expr $my_delay + $testsuite_poll_interval]
	}
	log_user 1
	return 0
}


################################################################
#
# NAME
#	wait_for_step - waits for a job step to be found
#
# SYNOPSIS
#	wait_for_step step_id
#
# DESCRIPTION
#	Wait for a job step to be found.
#	Polls every $testsuite_poll_interval seconds.
#
# RETURN VALUE
#	A non-zero return code indicates a failure.
#
################################################################

proc wait_for_step { step_id } {
	global scontrol max_job_state_delay testsuite_poll_interval
	set my_delay    0
	while 1 {
		set fd [open "|$scontrol -o show step $step_id"]
		gets $fd line
		catch {close $fd}
		if {[regexp {Nodes=} $line foo] == 1} {
			return 0
		}
		if {[regexp {MidplaneList=} $line foo] == 1} {
			return 0
		}
		if { $my_delay > $max_job_state_delay } {
			log_error "Timeout waiting for job step"
			return 1
		}

		log_info "Step $step_id not done yet. Waiting for $testsuite_poll_interval seconds"
		exec sleep $testsuite_poll_interval
		set my_delay [expr $my_delay + $testsuite_poll_interval]
	}
}


################################################################
#
# NAME
#	wait_for_all_jobs - waits for jobs to finish having a specified name
#
# SYNOPSIS
#	wait_for_all_jobs job_name
#
# DESCRIPTION
#	Wait for previously submitted Slurm jobs to finish of a
#	certain name. Iterates every $testsuite_poll_interval seconds.
#
# ARGUMENTS
#	job_name
#		The name of job to wait for
#
# RETURN VALUE
#	-1 on failure, 0 if all jobs are done, and the remaining job count
#	if not all jobs are done after $wait_for_all_jobs_iterations
#	iterations.
#
################################################################

proc wait_for_all_jobs { job_name } {
	global scancel squeue bin_sleep wait_for_all_jobs_iterations testsuite_poll_interval

	set matches 0
	set timeout 30

	log_info "Waiting for all jobs to terminate"
	for {set inx 0} {$inx < $wait_for_all_jobs_iterations} {incr inx} {
		log_user 0
		set matches 0
		spawn $squeue -o %j
		expect {
			-re "$job_name" {
				incr matches
				exp_continue
			}
			-re "error" {
				set matches -1
			}
			timeout {
				log_warn "No response from squeue"
				set matches -1
			}
			eof {
				wait
			}
		}
		log_user 1
		if {$matches == 0} {
			log_info "All jobs complete"
			break
		}
		if {$matches > 0} {
			log_info "  $matches jobs remaining"
			exec sleep $testsuite_poll_interval
		}
		if {$matches == -1} {
			break
		}
	}
	if {$matches != 0} {
		spawn $scancel -n $job_name
		expect {
			timeout {
				log_warn "No response from scancel"
			}
			eof {
				wait
			}
		}
	}
	return $matches
}


################################################################
#
# NAME
#	wait_job_reason - waits for a desired job state and reason
#
# SYNOPSIS
#	wait_job_reason job_id ?desired_state? ?desired_reason_list?
#
# DESCRIPTION
#	Wait until the job is in desired state and reason is one
#	of the desired ones. Or to max_job_state_delay.
#
# ARGUMENTS
#	job_id
#		The job to wait for
#	desired_state
#		Desired state.
#	desired_reason_list
#		List of desired reasons. Empty list means that any reason
#		is ok.
#
# RETURN VALUE
#	0 when job is in the desired state and reason is one
#	of the desired ones, or 1 otherwise.
#
################################################################

proc wait_job_reason { job_id {desired_state "PENDING"}
		       {desired_reason_list ""} } {
	global scontrol re_word_str max_job_state_delay testsuite_poll_interval
	set final_state "COMPLETED CANCELLED FAILED TIMEOUT DEADLINE
	                 OUT_OF_MEMORY"

	set log_user_prev [log_user -info]
	log_user 0

	set my_delay 0
	while true {
		set pending 0
		set has_reason 1
		spawn $scontrol show job $job_id
		expect {
			-re "JobState=($re_word_str) Reason=(\\S+)" {
				set job_state $expect_out(1,string)
				set job_reason $expect_out(2,string)
			}
			timeout {
				log_error "No response from scontrol show job"
				set rc 1
				break
			}
		}

		# Check if both state and reason are the desired ones
		if {$job_state == $desired_state} {
			set found 0
			if {$desired_reason_list == ""} {
				set found 1
			}
			foreach desired_reason $desired_reason_list {
				if {$job_reason == $desired_reason } {
					set found 1
				}
			}
			if {$found} {
				set rc 0
				break
			}
		} elseif {[lsearch -exact final_state $job_state] >= 0} {
			# Job is in final step no need to wait longer
			log_error [format "Job in final state/reason '%s' / '%s' instead of the desired '%s' / '%s'." \
			                  $job_state $job_reason \
			                  $desired_state $desired_reason_list]
			set rc 1
			break
		}

		# Check if this was the last poll
		if {$my_delay > $max_job_state_delay} {
			log_error "wait_job_reason timeout"
			set rc 1
			break
		}
		set remamining_sec [expr $max_job_state_delay - $my_delay]
		log_info [format "Job in state/reason '%s' / '%s' instead of the desired '%s' / '%s'." \
		                 $job_state $job_reason \
		                 $desired_state $desired_reason_list]
		log_info [format "Polling again in %ss, %ss to timeout." \
		                 $testsuite_poll_interval $remamining_sec]

		sleep $testsuite_poll_interval
		set my_delay [expr $my_delay + $testsuite_poll_interval]
	}

	log_user $log_user_prev
	return $rc
}


################################################################
#
# NAME
#	test_config_overrides - returns value of FastSchedule from slurm.conf
#
# SYNOPSIS
#	test_config_overrides
#
# RETURN VALUE
#	Returns if SlurmdParameters=config_overrides is configured
#
################################################################

proc test_config_overrides { } {
	global scontrol re_word_str

	log_user 0
	set config_overrides 0
	spawn $scontrol show config
	expect {
		-re "SlurmdParameters *= *($re_word_str)" {
			if { [string first "config_overrides" $expect_out(1,string)] != -1} {
				set config_overrides 1
			} else {
				set config_overrides 0
			}
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $config_overrides
}


################################################################
#
# NAME
#	test_assoc_enforced - determines if we need an association to run a job
#
# SYNOPSIS
#	test_assoc_enforced
#
# DESCRIPTION
#	Determine if we need an association to run a job. This is based upon
#	the value of AccountingStorageEnforce in the slurm.conf.
#
# RETURN VALUE
#	Returns level of association enforcement, 0 if none
#
################################################################

proc test_assoc_enforced { } {
	global scontrol number

	log_user 0
	set assoc_enforced 0
	spawn $scontrol show config
	expect {
		-re "AccountingStorageEnforce *= associations" {
			set assoc_enforced 1
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $assoc_enforced
}


################################################################
#
# NAME
#	test_limits_enforced - check if AccountingStorageEnforce limits is set
#
# SYNOPSIS
#	test_limits_enforced
#
# RETURN VALUE
#	Returns 1 if limits is set, else 0
#
################################################################

proc test_limits_enforced { } {
	global scontrol

	log_user 0
	set enforced 0
	spawn $scontrol show config
	expect {
		-re "AccountingStorageEnforce *= (\[a-z]+),limits" {
			set enforced 1
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $enforced
}


################################################################
#
# NAME
#	test_enforce_part_limits - returns value of EnforcePartLimits
#
# SYNOPSIS
#	test_enforce_part_limits
#
# RETURN VALUE
#	Returns EnforcePartLimits value (ALL, ANY, or NO)
#
################################################################

proc test_enforce_part_limits { } {
	global re_word_str scontrol

	log_user 0
	set enforced "UNKNOWN"
	spawn $scontrol show config
	expect {
		-re "EnforcePartLimits *= ($re_word_str)" {
			set enforced $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $enforced
}


################################################################
#
# NAME
#	test_gang - determines if gang scheduling is configured
#
# SYNOPSIS
#	test_gang
#
# RETURN VALUE
#	Returns level of association enforcement, 0 if none
#
################################################################

proc test_gang { } {
	global scontrol

	log_user 0
	set gang 0
	spawn $scontrol show config
	expect {
		-re "PreemptMode *= .*GANG" {
			set gang 1
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $gang
}


################################################################
#
# NAME
#	test_power_save - checks whether power save mode is enabled
#
# SYNOPSIS
#	test_power_save
#
# RETURN VALUE
#	Return 1 if power save mode is enabled, 0 otherwise
#
################################################################

proc test_power_save { } {
	global scontrol number

	log_user 0
	set suspend_time 0
	spawn $scontrol show config
	expect {
		-re "SuspendTime *= ($number)" {
			set suspend_time $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	if {$suspend_time == 0} {
		set power_save 0
	} else {
		set power_save 1
	}
	return $power_save
}


################################################################
#
# NAME
#	slurmd_user_root - checks whether SlurmUser is root
#
# SYNOPSIS
#	slurmd_user_root
#
# RETURN VALUE
#	Return 1 if the SlurmdUser is root, 0 otherwise
#
################################################################

proc slurmd_user_root { } {
	global scontrol

	log_user 0
	set rc 0
	spawn $scontrol show config
	expect {
		-re "SlurmdUser *= root" {
			set rc 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $rc
}


################################################################
#
# NAME
#	test_topology - determines if system is topology aware
#
# SYNOPSIS
#	test_topology
#
# RETURN VALUE
#	Returns level of association enforcement, 0 if none
#
################################################################

proc test_topology { } {
	global scontrol

	log_user 0
	set have_topology 1
	spawn $scontrol show config
	expect {
		-re "TopologyPlugin *= *topology/none" {
			set have_topology 0
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $have_topology
}


################################################################
#
# NAME
#	get_config - returns a dictionary of slurm configuration parameters

# SYNOPSIS
#	get_config
#
# DESCRIPTION
#	Uses `scontrol show config` to return the slurm configuration as
#	a dictionary of parameter values.
#
################################################################

proc get_config { } {
	global scontrol

	set output [run_command_output -fail -nolog "$scontrol show config"]

	foreach line [split $output "\n"] {
		if {[regexp {^(\w+) += (.*)$} $line {} param_name param_value] == 1} {
			dict set config_dict $param_name $param_value
		}
	}

	return $config_dict
}


################################################################
#
# NAME
#	get_config_param - returns a slurm configuration parameter
#
# SYNOPSIS
#	get_config_param parameter_name
#
# DESCRIPTION
#	Returns a specific configuration parameter value.
#
# RETURN VALUE
#	Returns the value of the specified parameter or MISSING if it does not
#	exist.
#
################################################################

proc get_config_param { parameter_name } {

	set config_dict [get_config]

	if [dict exists $config_dict $parameter_name] {
		return [dict get $config_dict $parameter_name]
	} else {
		return "MISSING"
	}
}


################################################################
#
# NAME
#	get_affinity_types - gets the task plugins running with task/ stripped
#
# SYNOPSIS
#	get_affinity_types
#
# RETURN VALUE
#	Returns comma separated list of task plugins running without the task/
#
################################################################

proc get_affinity_types { } {
	global scontrol re_word_str

	log_user 0
	set affinity ""
	spawn $scontrol show config
	expect {
		-re "TaskPlugin *= ($re_word_str)" {
			set parts [split $expect_out(1,string) ",/"]
			while 1 {
				set task_found [lsearch $parts "task"]
				if { $task_found == -1 } break
				set parts [lreplace $parts $task_found $task_found]
			}
			set affinity [join $parts ","]
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $affinity
}


################################################################
#
# NAME
#	get_affinity_params - gets the task plugin parameters
#
# SYNOPSIS
#	get_affinity_params
#
# RETURN VALUE
#	Returns value of TaskPluginParam
#
################################################################

proc get_affinity_params { } {
	global scontrol re_word_str

	log_user 0
	set params ""
	spawn $scontrol show config
	expect {
		-re "TaskPluginParam *= ($re_word_str)" {
			set params $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $params
}


################################################################
#
# NAME
#	test_constrain_devices - determines if devices are constrained by cgroup
#
# SYNOPSIS
#	test_constrain_devices
#
# DESCRIPTION
#	Determine if devices are constrained by cgroup.
#	the value of ConstrainDevices in the gres.conf.
#
# RETURN VALUE
#	Returns 1 if constrained, 0 otherwise
#
################################################################

proc test_constrain_devices { } {
	global scontrol number

	log_user 0
	set constrain_devices 0
	spawn $scontrol show config
	expect {
		-re "ConstrainDevices *= yes" {
			set constrain_devices 1
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $constrain_devices
}


################################################################
#
# NAME
#	get_mps_count_by_index - gets the count of a specific gres/mps device
#
# SYNOPSIS
#	get_mps_count_by_index index hostname
#
# RETURN VALUE
#	Returns the Count of a specific gres/mps device
#
################################################################

proc get_mps_count_by_index { index hostname } {
	global slurmd number re_word_str

	log_user 0
	set count 0
	spawn $slurmd -G -N $hostname
	expect {
		-re "Gres Name=mps Type=$re_word_str Count=($number) Index=$index" {
			set count $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $count
}


################################################################
#
# NAME
#	get_bb_emulate - determines if Cray burst buffers API is emulated
#
# SYNOPSIS
#	get_bb_emulate
#
# DESCRIPTION
#	Determine if Cray burst buffers API is emulated
#
# RETURN VALUE
#	1 if true, 0 if false
#
################################################################

proc get_bb_emulate { } {
	global scontrol

	log_user 0
	set bb_emulate 0
	spawn $scontrol show burst
	expect {
		-re "EmulateCray" {
			set bb_emulate 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $bb_emulate
}


################################################################
#
# NAME
#	get_bb_persistent - determines if persistent burst buffers can be created by users
#
# SYNOPSIS
#	get_bb_persistent
#
# DESCRIPTION
#	Determine if persistent burst buffers can be created by users
#
# RETURN VALUE
#	1 if true, 0 if false
#
################################################################

proc get_bb_persistent { } {
	global scontrol

	log_user 0
	set bb_persistent 0
	spawn $scontrol show burst
	expect {
		-re "EnablePersistent" {
			set bb_persistent 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $bb_persistent
}


################################################################
#
# NAME
#	get_bb_types - gets the burst buffer plugins running with task/ stripped
#
# SYNOPSIS
#	get_bb_types
#
# RETURN VALUE
#	Returns comma separated list of task plugins running without the task/
#
################################################################

proc get_bb_types { } {
	global scontrol re_word_str

	log_user 0
	set bb_types ""
	spawn $scontrol show config
	expect {
		-re "BurstBufferType *= ($re_word_str)" {
			set parts [split $expect_out(1,string) ",/"]
			while 1 {
				set task_found [lsearch $parts "burst_buffer"]
				if { $task_found == -1 } break
				set parts [lreplace $parts $task_found $task_found]
			}
			set bb_types [join $parts ","]
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $bb_types
}


################################################################
#
# NAME
#	get_cpu_governors - gets the CpuFreqGovernor configuration parameter
#
# SYNOPSIS
#	get_cpu_governors
#
# RETURN VALUE
#	Returns comma separated list of available CPU governor's
#
################################################################

proc get_cpu_governors { } {
	global scontrol re_word_str

	log_user 0
	set governors ""
	spawn $scontrol show config
	expect {
		-re "CpuFreqGovernors *= ($re_word_str)" {
			set governors $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $governors
}


################################################################
#
# NAME
#	test_cpu_affinity - determines if system is using the task/affinity plugin
#
# SYNOPSIS
#	test_cpu_affinity
#
# RETURN VALUE
#	Returns 1 if enforcing, 0 if none
#
################################################################

proc test_cpu_affinity { } {
	log_user 0

	set affinity 0
	set parts [split [get_affinity_types] ","]

	if { [lsearch $parts "affinity"] != -1 } {
		set affinity 1
	}

	log_user 1
	return $affinity
}


################################################################
#
# NAME
#	test_cpu_affinity_or_cgroup - determines if system is enforcing CPU affinity
#
# SYNOPSIS
#	test_cpu_affinity_or_cgroup
#
# DESCRIPTION
#	Determine if system is enforcing CPU affinity (using
#	either the task/affinity and/or task/cgroup plugin)
#
# RETURN VALUE
#	Returns 1 if enforcing, 0 if none
#
################################################################

proc test_cpu_affinity_or_cgroup { } {
	global scontrol re_word_str

	log_user 0

	set affinity 0
	set parts [split [get_affinity_types] ","]

	if { [lsearch $parts "affinity"] != -1 } {
		set affinity 1
	} elseif { [lsearch $parts "cgroup"] != -1 } {
		spawn $scontrol show config
		expect {
			-re "TaskAffinity *= yes" {
				set affinity 1
				exp_continue
			}
			eof {
				wait
			}
		}
	}

	log_user 1
	return $affinity
}


################################################################
#
# NAME
#	test_mem_affinity - determines if system is enforcing memory affinity
#
# SYNOPSIS
#	test_mem_affinity
#
# RETURN VALUE
#	Returns 1 if enforcing, 0 if none
#
################################################################

proc test_mem_affinity { } {
	global scontrol re_word_str

	log_user 0

	set affinity 0
	set parts [split [get_affinity_types] ","]

	if { [lsearch $parts "affinity"] != -1 } {
		set affinity 1
	}

	log_user 1
	return $affinity
}


################################################################
#
# NAME
#	test_track_wckey_slurmctld - determines if we track wckeys
#
# SYNOPSIS
#	test_track_wckey_slurmctld
#
# DESCRIPTION
#	Determine if we track workload characterization keys.
#	This is based upon the value of TrackWCKey in the slurm.conf.
#
# RETURN VALUE
#	Returns value of TrackWCKey
#
################################################################

proc test_track_wckey_slurmctld { } {
	global scontrol number

	log_user 0
	set track_wckey 0
	spawn $scontrol show config
	expect {
		-re "TrackWCKey *= Yes" {
			set track_wckey 1
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $track_wckey
}


################################################################
#
# NAME
#	test_account_storage - determines if we are using a usable accounting storage plugin
#
# SYNOPSIS
#	test_account_storage
#
# DESCRIPTION
#	Determine if we are using a usable accounting storage package.
#	This is based upon the value of AccountingStorageType in the slurm.conf.
#
# RETURN VALUE
#	Returns 1 if the system is running an accounting storage type
#	that is complete, 0 otherwise
#
################################################################

proc test_account_storage { } {
	global scontrol

	log_user 0
	set account_storage 0
	spawn $scontrol show config
	expect {
		-re "(accounting_storage/slurmdbd|accounting_storage/mysql|accounting_storage/pgsql)" {
			set account_storage 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $account_storage
}


################################################################
#
# NAME
#	test_enforce_limits - determines if resource limits are enforced
#
# SYNOPSIS
#	test_enforce_limits
#
# DESCRIPTION
#	Determine whether resouce limits are enforced. This is based upon
#	the value of AccountingStorageEnforce in the slurm.conf.
#
# RETURN VALUE
#	Returns 1 if the system is enforcing limits, 0 otherwise
#
################################################################

proc test_enforce_limits { } {
	global re_word_str scontrol

	log_user 0
	set enforce_limits 0
	spawn $scontrol show config
	expect {
		-re "AccountingStorageEnforce *= ($re_word_str)" {
			if {[string first "safe" $expect_out(1,string)] != -1 } {
				set enforce_limits 1
			}
			if {[string first "limits" $expect_out(1,string)] != -1 } {
				set enforce_limits 1
			}
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $enforce_limits
}


################################################################
#
# NAME
#	test_allow_spec_resources - returns the value of AllowSpecResources
#
# SYNOPSIS
#	test_allow_spec_resources
#
# RETURN VALUE
#	Returns 1 if AllowSpecResources is set, 0 if not and 2 in case of error
#
# ENVIRONMENT
#	It also sets the global variable exit_code to 1 in case of error.
#
################################################################

proc test_allow_spec_resc { } {
	global exit_code re_word_str scontrol

	log_user 0
	set allow_spec_resc 2
	spawn $scontrol show config
	expect {
		-re "AllowSpecResourcesUsage *= ($re_word_str)" {
			if {[string equal $expect_out(1,string) Yes]} {
				set allow_spec_resc 1
			} elseif {[string equal $expect_out(1,string) No]} {
				set allow_spec_resc 0
			}
			exp_continue
		}
		timeout {
			log_error "scontrol show config time out"
			set exit_code 1
		}
		eof {
			wait
		}
	}

	if {$allow_spec_resc == 2} {
		log_error "AllowSpecResourcesUsage not found in scontrol show config"
		set exit_code 1
	}

	log_user 1
	return $allow_spec_resc
}


################################################################
#
# NAME
#	test_enforce_safe_set - determines if AccountingStorageEnforce=safe is set
#
# SYNOPSIS
#	test_enforce_safe_set
#
# DESCRIPTION
#	Determine if AccountingStorageEnforce=safe is set in the slurm.conf.
#
# RETURN VALUE
#	Returns 1 if the system is running with safe limits, 0 otherwise
#
################################################################

proc test_enforce_safe_set { } {
	global re_word_str scontrol

	log_user 0
	set enforce_limits 0
	spawn $scontrol show config
	expect {
		-re "AccountingStorageEnforce *= ($re_word_str)" {
			if {[string first "safe" $expect_out(1,string)] != -1 } {
				set enforce_limits 1
			}
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $enforce_limits
}


################################################################
#
# NAME
#	test_enforce_qos_set - determines if AccountingStorageEnforce=qos is set
#
# SYNOPSIS
#	test_enforce_qos_set
#
# DESCRIPTION
#	Determine if AccountingStorageEnforce=qos is set in the slurm.conf.
#
# RETURN VALUE
#	Returns 1 if the system is running with safe limits, 0 otherwise
#
################################################################

proc test_enforce_qos_set { } {
	global re_word_str scontrol

	log_user 0
	set enforce_limits 0
	spawn $scontrol show config
	expect {
		-re "AccountingStorageEnforce *= ($re_word_str)" {
			if {[string first "qos" $expect_out(1,string)] != -1 } {
				set enforce_limits 1
			}
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $enforce_limits
}


################################################################
#
# NAME
#	test_using_slurmdbd - checks whether using slurmdbd
#
# SYNOPSIS
#	test_using_slurmdbd
#
# DESCRIPTION
#	Since there is a lag at which the slurmdbd processes a job start we
#	need to wait a bit to make sure the data has been set before proceeding.
#	This is based upon the value of AccountingStorageType in the slurm.conf.
#
# RETURN VALUE
#	Returns 1 if the system is running with slurmdbd, 0 otherwise
#
################################################################

proc test_using_slurmdbd { } {
	global scontrol

	log_user 0
	set account_storage 0
	spawn $scontrol show config
	expect {
		-re "(accounting_storage/slurmdbd)" {
			set account_storage 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $account_storage
}


################################################################
#
# NAME
#	priority_type - gets the priority plugin type
#
# SYNOPSIS
#	priority_type
#
# DESCRIPTION
#	Use scontrol to determine the priority plugin
#
# RETURN VALUE
#	Name of priority type
#
################################################################

proc priority_type {} {
	global scontrol

	log_user 0
	set name ""
	set fd [open "|$scontrol show config"]
	while {[gets $fd line] != -1} {
		if {[regexp {^PriorityType *= priority/(\w+)} $line frag name]
				== 1} {
			break
		}
	}
	catch {close $fd}
	log_user 1

	if {[string length $name] == 0} {
		log_error "Could not identify the Priority Type"
	}

	return $name
}


################################################################
#
# NAME
#	get_min_job_age - determines the MinJobAge
#
# SYNOPSIS
#	get_min_job_age
#
# DESCRIPTION
#	Use scontrol to determine the MinJobAge
#
# RETURN VALUE
#	MinJobAge value
#
################################################################

proc get_min_job_age {} {
	global scontrol number

	set age 0
	log_user 0
	spawn $scontrol show config
	expect {
		-re "MinJobAge *= ($number)" {
			set age $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	if {$age == 0} {
		log_error "Could not identify the MinJobAge"
	}
	return $age
}


################################################################
#
# NAME
#	get_default_acct - gets user's default account
#
# SYNOPSIS
#	get_default_acct user
#
# RETURN VALUE
#	Returns name of default account if exists, NULL otherwise
#
################################################################

proc get_default_acct { user } {
	global sacctmgr re_word_str bin_id

	log_user 0
	set def_acct ""

	if { !$user } {
		set user [get_my_user_name]
	}

	spawn $sacctmgr -n list -P user $user format="DefaultAccount"
	expect {
		-re "($re_word_str)" {
			set def_acct $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $def_acct
}


################################################################
#
# NAME
#	test_front_end - checks if execution host executes on a front-end node
#
# SYNOPSIS
#	test_front_end
#
# DESCRIPTION
#	Determine if the execution host is one in which the
#	slurmd daemon executes on a front-end node rather than the
#	compute hosts (e.g. Blue Gene systems).
#
# RETURN VALUE
#	Returns 1 if the system uses a front-end, 0 otherwise
#
################################################################

proc test_front_end { } {
	global enable_front_end scontrol

	log_user 0
	set front_end 0
	spawn $scontrol show frontend
	expect {
		"FrontendName=" {
			set front_end 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $front_end
}


################################################################
#
# NAME
#	test_multiple_slurmd - checks if running multiple slurmds per node
#
# SYNOPSIS
#	test_multiple_slurmd
#
# RETURN VALUE
#	Returns 1 if running multiple slurmd per node
#
################################################################

proc test_multiple_slurmd { } {
	global scontrol

	log_user 0
	set multiple_slurmd 0
	spawn $scontrol show config
	expect {
		"MULTIPLE_SLURMD" {
			set multiple_slurmd 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $multiple_slurmd
}


################################################################
#
# NAME
#	test_configless_slurmd - checks if configless slurmd enabled
#
# SYNOPSIS
#	test_configless_slurmd
#
# RETURN VALUE
#	Returns 1 if configless slurmd enabled
#
################################################################

proc test_configless_slurmd { } {
	global scontrol bin_bash bin_grep

	log_user 0
	set configless 0
	spawn -noecho $bin_bash -c "exec $scontrol show config |\
				    $bin_grep SlurmctldParameters"
	expect {
		"enable_configless" {
			set configless 1
			exp_continue
		}
		timeout {
			log_error("timeout checing if configless is set")
		}
		eof {
			wait
		}
	}
	log_user 1

	return $configless
}

################################################################
#
# NAME
#	test_cray - determine if the system is a native cray system
#
# SYNOPSIS
#	test_cray
#
# RETURN VALUE
#	Returns 1 if the system is a cray, 0 otherwise
#
################################################################

proc test_cray { } {
	global scontrol bin_bash bin_grep

	log_user 0
	set cray 0
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SwitchType"
	expect {
		"switch/cray" {
			set cray 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $cray
}


################################################################
#
# NAME
#	test_launch_type - determines launch type plugin
#
# SYNOPSIS
#	test_launch_type
#
# RETURN VALUE
#	Returns the launch plugin type
#
################################################################

proc test_launch_type { } {
	global scontrol bin_bash bin_grep re_word_str

	log_user 0
	set type ""
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep LaunchType"
	expect {
		-re "launch/($re_word_str)" {
			set type $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $type
}


################################################################
#
# NAME
#	test_launch_test_exec - determines launch type plugin
#
# SYNOPSIS
#	test_launch_test_exec
#
# RETURN VALUE
#	Returns the launch plugin type
#
################################################################

proc test_launch_test_exec { } {
	global scontrol bin_bash bin_grep re_word_str

	log_user 0
	set test_exec 0
	set type ""
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep LaunchParameters"
	expect {
		-re "test_exec" {
			set test_exec 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $test_exec
}


################################################################
#
# NAME
#	test_node_features_plugin - determines NodeFeaturesPlugin type
#
# SYNOPSIS
#	test_node_features_plugin
#
# RETURN VALUE
#	Returns the NodeFeaturesPlugin type
#
################################################################

proc test_node_features_plugin { } {
	global scontrol bin_bash bin_grep re_word_str

	log_user 0
	set type ""
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep NodeFeaturesPlugins"
	expect {
		-re "node_features/($re_word_str)" {
			set type $expect_out(1,string)
			exp_continue
		}
		-re "null" {
			exp_continue
		}
		-re "($re_word_str)" {
			set type $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $type
}


################################################################
#
# NAME
#	test_emulated - determines if the system is emulated
#
# SYNOPSIS
#	test_emulated
#
# DESCRIPTION
#	Determine if the system is emulated (not running on
#	actual Cray or Bluegene hardware
#
# RETURN VALUE
#	Returns 1 if the system is emulated otherwise
#
################################################################

proc test_emulated { } {
	global scontrol bin_bash

	log_user 0
	set emulated 0
	spawn -noecho $bin_bash -c "exec $scontrol show config"
	expect {
		"Emulated * = yes" {
			set emulated 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $emulated
}


################################################################
#
# NAME
#	test_killonbadexit - determines if KillOnBadExit is configured to be 1
#
# SYNOPSIS
#	test_killonbadexit
#
# DESCRIPTION
#	Determine if KillOnBadExit is configured to be 1.
#
# RETURN VALUE
#	Returns 1 if KillOnBadExit is 1.
#
################################################################

proc test_killonbadexit { } {
	global scontrol bin_bash bin_grep

	log_user 0
	set killonbadexit 0
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep KillOnBadExit"
	expect {
		-re "KillOnBadExit *= *1" {
			set killonbadexit 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $killonbadexit
}


################################################################
#
# NAME
#	get_cycle_count - get desired iteration count
#
# SYNOPSIS
#	get_cycle_count
#
# DESCRIPTION
#	For tests with iteration counts (e.g. test9.1, test9.2)
#	return the desired iteration count
#
# RETURN VALUE
#	Returns desired iteration count
#
################################################################

proc get_cycle_count { } {
	global enable_memory_leak_debug

	if {$enable_memory_leak_debug != 0} {
		return 2
	}
	return 100
}


################################################################
#
# NAME
#	test_select_type - determine which select plugin is being used
#
# SYNOPSIS
#	test_select_type
#
# RETURN VALUE
#	Returns name of select plugin
#
################################################################

proc test_select_type { } {
	global scontrol bin_bash bin_grep re_word_str

	log_user 0
	set type ""
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SelectType"
	expect {
		-re "select/($re_word_str)" {
			set type $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $type
}


################################################################
#
# NAME
#	get_select_type_params - determines SelectTypeParameters being used for a given partition
#
# SYNOPSIS
#	get_select_type_params ?partition?
#
# DESCRIPTION
#	Determine SelectTypeParameters being used for a given partition.
#	If the partition is not specified, the default partition will be used.
#
# RETURN VALUE
#	Returns a string containing SelectTypeParameters
#
################################################################

proc get_select_type_params { {partition ""} } {
	global scontrol bin_bash bin_grep re_word_str

	log_user 0
	set params ""

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	if {[string compare $partition ""]} {
		spawn -noecho $bin_bash -c "exec $scontrol show part $partition | $bin_grep SelectTypeParameters"
		expect {
			-re "SelectTypeParameters *= *NONE" {
				exp_continue
			}
			-re "SelectTypeParameters *= *($re_word_str)" {
				set params $expect_out(1,string)
				exp_continue
			}
			eof {
				wait
			}
		}
	}
	if { [string compare params ""] } {
		spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SelectTypeParameters"
		expect {
			-re "SelectTypeParameters *= *($re_word_str)" {
				set params $expect_out(1,string)
				exp_continue
			}
			eof {
				wait
			}
		}
	}
	log_user 1

	return $params
}


################################################################
#
# NAME
#	test_linear - determines if system is configured with linear plugin
#
# SYNOPSIS
#	test_linear
#
# RETURN VALUE
#	Returns 1 if configured, 0 otherwise
#
################################################################

proc test_linear { } {
	global scontrol number

	log_user 0
	set select_type [test_select_type]

	if {![string compare $select_type "linear"]} {
		return 1
	} elseif {![string compare $select_type "cray_aries"] &&
		  ![test_select_type_params "other_cons_res"] &&
		  ![test_select_type_params "other_cons_tres"]} {
		return 1
	}
	log_user 1

	return 0
}


################################################################
#
# NAME
#	test_cons_res - determines if system is configured with cons_res plugin
#
# SYNOPSIS
#	test_cons_res
#
# RETURN VALUE
#	Returns 1 if configured, 0 otherwise
#
################################################################

proc test_cons_res { } {
	global scontrol number

	log_user 0
	set select_type [test_select_type]

	if {![string compare $select_type "cons_res"]} {
		return 1
	} elseif {![string compare $select_type "cray_aries"] &&
		  [test_select_type_params "other_cons_res"]} {
		return 1
	}
	log_user 1

	return 0
}


################################################################
#
# NAME
#	test_cons_tres - determines if system is configured with cons_tres plugin
#
# SYNOPSIS
#	test_cons_tres
#
# DESCRIPTION
#	Determine if system is configured with cons_tres plugin.
#
# RETURN VALUE
#	Returns 1 if configured, 0 otherwise
#
################################################################

proc test_cons_tres { } {
	global scontrol number

	log_user 0
	set select_type [test_select_type]

	if {![string compare $select_type "cons_tres"]} {
		return 1
	} elseif {![string compare $select_type "cray_aries"] &&
		  [test_select_type_params "other_cons_tres"]} {
		return 1
	}
	log_user 1

	return 0
}


################################################################
#
# NAME
#	get_total_cpus - gets the total amount of CPUs on the default partition
#
# SYNOPSIS
#	get_total_cpus
#
# RETURN VALUE
#	The total amount of CPUs on the default partition.
#
# NOTE
#	CoreSpecCount are not part of the total.
#
################################################################

proc get_total_cpus {} {
	global sinfo scontrol re_word_str exit_code

	set partition [default_partition]
	set cpu_cnt 0

	set re_with_cs    "CPUTot=(\\d+).*CoreSpecCount=(\\d+).*ThreadsPerCore=(\\d+)"
	set re_without_cs "CPUTot=(\\d+)"

	spawn $sinfo -h -o "%P %N" -p $partition --state=idle
	expect {
		-re "$partition\\* ($re_word_str)" {
			set def_hostlist $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "sinfo not responding"
			set exit_code 1
		}
		eof {
			wait
		}
	}
	set fd [open "|$scontrol --oneliner show node $def_hostlist"]
	while {[gets $fd line] != -1} {
		if {[regexp $re_with_cs $line frag tmp_cpu_cnt core_spec_cnt threads_per_core] == 1} {
			set cpu_cnt [expr $cpu_cnt + $tmp_cpu_cnt - $core_spec_cnt * $threads_per_core]
			continue
		}
		if {[regexp $re_without_cs $line frag tmp_cpu_cnt] == 1} {
			set cpu_cnt [expr $cpu_cnt + $tmp_cpu_cnt]
			continue
		}
	}
	return $cpu_cnt
}


################################################################
#
# NAME
#	test_scheduler_params - tests SchedulerParameters being used
#
# SYNOPSIS
#	test_scheduler_params type
#
# DESCRIPTION
#	Test SchedulerParameters being used
#
# RETURN VALUE
#	Returns 1 if "type" (input) is found, 0 otherwise
#
################################################################

proc test_scheduler_params { type } {
	global scontrol bin_bash bin_grep re_word_str

	log_user 0
	set ret 0
	set params ""
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SchedulerParameters"
	expect {
		-re "SchedulerParameters *= *($re_word_str)" {
			set params $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}

	# Since string first doesn't have any case
	# distinction just make it always be upper.
	set type [string toupper $type]
	set params [string toupper $params]
	set params [split $params ,]

	# We have to search with the '*' since some options have an =value
	# on them.
	if { [lsearch $params "$type*"] != -1 } {
		set ret 1
	}
	log_user 1

	return $ret
}


################################################################
#
#
# NAME
#	test_dependency_params - tests DependencyParameters being used
#
# SYNOPSIS
#	test_dependency_params type
#
# DESCRIPTION
#	Tests DependencyParameters being used
#
# RETURN VALUE
#	Returns 1 if "type" (input) is found, 0 otherwise
#
################################################################

proc test_dependency_params { type } {
	global bin_bash scontrol bin_grep re_word_str

	log_user 0
	set ret 0
	set params ""
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep DependencyParameters"
	expect {
		-re "DependencyParameters *= *($re_word_str)" {
			set params $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}

	# Since string first doesn't have any case
	# distinction just make it always be upper.
	set type [string toupper $type]
	set params [string toupper $params]
	set params [split $params ,]

	# We have to search with the '*' since some options have an =value
	# on them.
	if { [lsearch $params "$type*"] != -1 } {
		set ret 1
	}
	log_user 1

	return $ret
}

################################################################
#
# NAME
#	test_select_type_params - determines SelectTypeParameters being used
#
# SYNOPSIS
#	test_select_type_params type
#
# RETURN VALUE
#	Returns 1 if "type" (input) is found, 0 otherwise
#
################################################################

proc test_select_type_params { type } {
	global scontrol bin_bash bin_grep re_word_str

	log_user 0
	set ret 0
	set params ""
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SelectTypeParameters"
	expect {
		-re "SelectTypeParameters *= *($re_word_str)" {
			set params $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}

	# Since string first doesn't have any case
	# distinction just make it always be upper.
	set type [string toupper $type]
	set params [string toupper $params]
	set params [split $params ,]

	foreach item $params {
		# If argument is "MEMORY" then search for "_MEMORY"
		if {![string compare $type "MEMORY"] &&
		    [string first "_MEMORY" $item] != -1} {
			set ret 1
			break
		# i.e. Check for CR_CORE_MEMORY or CR_CORE
		} elseif {[string first "MEMORY" $item] != -1 &&
		          [string first $type $item] != -1} {
			set ret 1
			break
		} elseif {![string compare $type $item]} {
			set ret 1
			break
		}
	}
	log_user 1

	return $ret
}


################################################################
#
# NAME
#	test_root_user - determines if user is root
#
# SYNOPSIS
#	test_root_user
#
################################################################

proc test_root_user { } {
	global super_user super_user_set
	set uid [get_my_uid]
	if {$uid == 0} {
		set super_user 1
		set super_user_set 1
		return 1
	}
	return 0
}


################################################################
#
# NAME
#	test_super_user - determines if user is a Slurm super user
#
# SYNOPSIS
#	test_super_user
#
# DESCRIPTION
#	Determine if user is a Slurm super user (i.e. user
#	root or configured SlurmUser)
#
################################################################

proc test_super_user { } {
	global re_word_str bin_id number scontrol super_user super_user_set

	if {$super_user_set != 0} {
		return $super_user
	}

#
#	Check if user root
#
	set uid [get_my_uid]
	if {$uid == 0} {
		set super_user 1
		set super_user_set 1
		return $super_user
	}

#
#	Check if SlurmUser
#
	set user [get_my_user_name]

	log_user 0
	spawn $scontrol show config
	set slurm_user ""
	expect {
		-re "SlurmUser *= ($re_word_str)\[\(\]($number)\[\)\]" {
			set slurm_user $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	if {[string compare $user $slurm_user] == 0} {
		set super_user 1
	}
	set super_user_set 1
	log_user 1
	return $super_user
}


################################################################
#
# NAME
#	dec2hex - creates a 32 bit hex number from a signed decimal number
#
# SYNOPSIS
#	dec2hex value
#
# DESCRIPTION
#	Create a 32 bit hex number from a signed decimal number
#
# RETURN VALUE
#	32 bit hex version of input 'value'
#
# SOURCE
#	Courtesy of Chris Cornish
#	http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982
#
################################################################
# Replace all non-decimal characters

proc dec2hex {value} {
	regsub -all {[^0-x\.-]} $value {} newtemp
	set value [string trim $newtemp]
	if {$value < 2147483647 && $value > -2147483648} {
		set tempvalue [format "%#010X" [expr $value]]
		return [string range $tempvalue 2 9]
	} elseif {$value < -2147483647} {
		return "80000000"
	} else {
		return "7FFFFFFF"
	}
}


################################################################
#
# NAME
#	uint2hex - creates a 32 bit hex number from an unsigned decimal
#
# SYNOPSIS
#	uint2hex value
#
# DESCRIPTION
#	Create a 32 bit hex number from an unsigned decimal number.
#
# ARGUMENTS
#	value
#		unsigneddecimal number to convert
#
# RETURN VALUE
#	32 bit hex version of input 'value'
#
# SOURCE
#	Courtesy of Chris Cornish
#	http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982
#
################################################################
# Replace all non-decimal characters

proc uint2hex {value} {
	regsub -all {[^0-x\.-]} $value {} newtemp
	set value [string trim $newtemp]
	if {$value <= 4294967295 && $value >= 0} {
		set tempvalue [format "%#010X" [expr $value]]
		return [string range $tempvalue 2 9]
	} else {
		return "FFFFFFFF"
	}
}


################################################################
#
# NAME
#	available_nodes - returns number of available nodes
#
# SYNOPSIS
#	available_nodes ?state? ?partition?
#
# DESCRIPTION
#	Check to see if a given partition has a at least "num_nodes" number
#	of nodes in the alloc, idle, or comp state.  This can be used to
#	avoid launching a job that will never run because nodes are in the
#	"drained" state or otherwise unavailable.
#	If the partition is not specified, the default partition will be used.
#
# RETURN VALUE
#	Returns the number of available nodes in the partition, or
#	-1 on failure.
#
################################################################

proc available_nodes { {state ""} {partition ""} } {
	global sinfo

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	if {[string length $state] == 0} {
		set state "idle,alloc,comp"
	}

	set available -1
	set fd [open "|$sinfo --noheader --partition $partition --state $state --format %D"]
	gets $fd line
	catch {close $fd}
	regexp {\d+} $line available
	if {[string match *K $line]} {
		set available [expr $available * 1024]
	} elseif {[string match *M $line]} {
		set available [expr $available * 1048576]
	}


	return $available
}


################################################################
#
# NAME
#	partition_oversubscribe - determines the oversubscribe configuration of the specified partition
#
# SYNOPSIS
#	partition_oversubscribe ?partition?
#
# DESCRIPTION
#	Determine the oversubscribe configuration of the specified partition.
#	If the partition is not specified, the default partition will be used.
#
# RETURN VALUE
#	Return the oversubscribe configuration of the specified partition.
#
################################################################

proc partition_oversubscribe { {partition ""} } {
	global sinfo

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	set oversubscribe "NO"
	log_debug "$sinfo --noheader --partition $partition --format %h"
	set fd [open "|$sinfo --noheader --partition $partition --format %h"]
	gets $fd line
	catch {close $fd}
	regexp {[a-zA-Z]+} $line oversubscribe
	return $oversubscribe
}


################################################################
#
# NAME
#	default_partition - determines the name of the default partition
#
# SYNOPSIS
#	default_partition
#
# DESCRIPTION
#	Use scontrol to determine the name of the default partition
#
# RETURN VALUE
#	Name of the current default partition
#
################################################################

proc default_partition {} {
	global scontrol

	set name ""
	set fd [open "|$scontrol --all --oneliner show partition"]
	while {[gets $fd line] != -1} {
		if {[regexp {^PartitionName=([^ ]*).*Default=YES} $line frag name]
				== 1} {
			break
		}
	}
	catch {close $fd}

	if {[string length $name] == 0} {
		log_error "Could not identify the default partition"
	}

	return $name
}


################################################################
#
# NAME
#	default_part_exclusive - determines if the default partition allocates whole nodes to jobs
#
# SYNOPSIS
#	default_part_exclusive
#
# DESCRIPTION
#	Use scontrol to determine if the default partition
#	allocates whole nodes to jobs
#
# RETURN VALUE
#	Name of the current default partition
#
################################################################

proc default_part_exclusive {} {
	set def_part [default_partition]
	set oversubscribe [partition_oversubscribe $def_part]
	if {[string compare $oversubscribe "EXCLUSIVE"] == 0} {
		return 1
	} else {
		return 0
	}
}


################################################################
#
# NAME
#	switch_type - determines the switch type
#
# SYNOPSIS
#	switch_type
#
# DESCRIPTION
#	Use scontrol to determine the switch type
#
# RETURN VALUE
#	Name of SwitchType
#
################################################################

proc switch_type {} {
	global scontrol

	set name ""
	set fd [open "|$scontrol show config"]
	while {[gets $fd line] != -1} {
		if {[regexp {^SwitchType *= switch/(\w+)} $line frag name]
				== 1} {
			break
		}
	}
	catch {close $fd}

	if {[string length $name] == 0} {
		log_error "Could not identify the switch type"
	}

	return $name
}


################################################################
#
# NAME
#	make_bash_script - creates a bash script
#
# SYNOPSIS
#	make_bash_script script_name script_contents
#
# DESCRIPTION
#	Create a bash script of name "script_name", and
#	make the body of the script "script_contents".
#	make_bash_script removes the file if it already exists,
#	then generates the #! line, and then dumps "script_contents"
#	to the file.  Finally, it makes certain that the script
#	is executable.
#
# ARGUMENTS
#	script_name
#		file name for the bash script
#	script_contents
#		body of the script, not including the initial #! line.
#
# RETURN VALUE
#	Nothing.
#
################################################################

proc make_bash_script { script_name script_contents } {
	global bin_bash bin_chmod

	file delete $script_name
	set fd [open $script_name "w"]
	puts $fd "#!$bin_bash"
	puts $fd $script_contents
	close $fd
	exec $bin_chmod 700 $script_name
}


################################################################
#
# NAME
#	get_suffix - given a hostname, returns it's numeric suffix
#
# SYNOPSIS
#	get_suffix hostname
#
# DESCRIPTION
#	Given a hostname, return it's numeric suffix
#
# RETURN VALUE
#	numerical suffix for input 'hostname' or -1 if not a number
#
################################################################

proc get_suffix { hostname } {
	set host_len [string length $hostname]
	set host_inx [expr $host_len-1]
	set host_char [string index $hostname $host_inx]
	if {[string compare $host_char "0"] < 0 || [string compare $host_char "9"] > 0} {
		return -1
	}

	for {set host_inx [expr $host_len-1]} {$host_inx >= 0} {incr host_inx -1} {
		set host_char [string index $hostname $host_inx]
		if {[string compare $host_char "0"] < 0} { break }
		if {[string compare $host_char "9"] > 0} { break }
	}
	incr host_inx

	if {$host_inx == $host_len} {
		log_warn "Hostname lacks a suffix: $hostname"
		return "-1"
	}

#	Strip off leading zeros to avoid doing octal arithmetic
	set suffix [string range $hostname $host_inx $host_len]
	set suffix_len [string length $suffix]
	for {set suffix_inx 0} {$suffix_inx < [expr $suffix_len - 1]} {incr suffix_inx} {
		set suffix_char [string index $suffix $suffix_inx]
		if {[string compare $suffix_char "0"] != 0} { break }
	}

	return [string range $suffix $suffix_inx $suffix_len]
}


################################################################
#
# NAME
#	is_super_user - checks if we are user root or SlurmUser
#
# SYNOPSIS
#	is_super_user
#
# DESCRIPTION
#	Check if we are user root or SlurmUser
#
# RETURN VALUE
#	1 if true, 0 if false
#
################################################################

proc is_super_user { } {
	global re_word_str bin_id scontrol

	log_user 0
	set user_name [get_my_user_name]
	if {[string compare $user_name "root"] == 0} {
		log_user 1
		return 1
	}

	set found_user 0
	spawn $scontrol show config
	expect {
		-re "SlurmUser *= $user_name" {
			set found_user 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $found_user
}


################################################################
#
# NAME
#	check_acct_associations - checks associations
#
# SYNOPSIS
#	check_acct_associations
#
# DESCRIPTION
#	Use sacctmgr to check associations
#
# RETURN VALUE
#	0 on any error
#
################################################################

proc check_acct_associations { } {
	global sacctmgr number re_word_str exit_code

	set rc 1
	log_user 0
	log_debug "Sanity-Checking Associations"
	#
	# Use sacctmgr to check associations
	#
	set s_pid [spawn $sacctmgr -n -p list assoc wopi wopl withd format=lft,rgt,cluster]
	expect {
	       -re "($number)\\|($number)\\|($re_word_str)\\|" {
		      # Here we are checking if we have duplicates and
		      # setting up an array to check for holes later

		      set cluster $expect_out(3,string)
		      if { ![info exists c_min($cluster)] } {
			      set c_min($cluster) -1
			      set c_max($cluster) -1
		      }

		      set num1 $expect_out(1,string)
		      set num2 $expect_out(2,string)
		      set first [info exists found($cluster,$num1)]
		      set sec [info exists found($cluster,$num2)]
		      #log_debug "$first=$num1 $sec=$num2"
		      if { $first } {
			     log_error "$cluster found lft $num1 again"
			     set rc 0
		      } elseif { $sec } {
			     log_error "$cluster found rgt $num2 again"
			     set rc 0
		      } else {
			     set found($cluster,$num1) 1
			     set found($cluster,$num2) 1
			     if { $c_min($cluster) == -1
				  || $c_min($cluster) > $num1 } {
				    set c_min($cluster) $num1
			     }
			     if { $c_max($cluster) == -1
				  || $c_max($cluster) < $num2 } {
				    set c_max($cluster) $num2
			     }
		      }
		      exp_continue
	       }
	       timeout {
			log_error "sacctmgr add not responding"
			slow_kill $s_pid
			set exit_code 1
	       }
	       eof {
		      wait
	       }
	}

	foreach cluster [array names c_min] {
		# Here we are checking for holes in the list from above
		for {set inx $c_min($cluster)} {$inx < $c_max($cluster)} {incr inx} {
			if { ![info exists found($cluster,$inx)] } {
				log_error "$cluster No index at $inx"
				set rc 0
			}
		}
	}
	log_user 1
	return $rc
}


################################################################
#
# NAME
#	get_job_acct_freq - gets the value of the job account gather frequency
#
# SYNOPSIS
#	get_job_acct_freq
#
# RETURN VALUE
#	job account gather frequency
#
################################################################

proc get_job_acct_freq { } {
	global scontrol number

	log_user 0
	set freq_val 0

	spawn $scontrol show config
	expect {
		-re "JobAcctGatherFrequency *= ($number)" {
			set freq_val $expect_out(1,string)
			if {$freq_val == 0} {
				set freq_val 0
			}
		}
		-re "JobAcctGatherFrequency *= task=($number)" {
			set freq_val $expect_out(1,string)
			if {$freq_val == 0} {
				set freq_val 0
			}
		}
		eof {
			wait
		}
	}

	log_user 1
	return $freq_val
}


################################################################
#
# NAME
#	get_job_acct_type - gets the value of JobAcctGatherType
#
# SYNOPSIS
#	get_job_acct_type
#
# RETURN VALUE
#	JobAcctGatherType value
#
################################################################

proc get_job_acct_type { } {
	global scontrol re_word_str

	log_user 0
	set gather_type "none"

	spawn $scontrol show config
	expect {
		-re "JobAcctGatherType *= jobacct_gather/($re_word_str)" {
			set gather_type $expect_out(1,string)
			exp_continue
		}
		-re "JobAcctGatherType *= ($re_word_str)" {
			set gather_type $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}

	log_user 1
	return $gather_type
}


################################################################
#
# NAME
#	check_accounting_admin_level - gets the admin_level for the current user
#
# SYNOPSIS
#	check_accounting_admin_level
#
# RETURN VALUE
#	admin_level for the current user
#
################################################################

proc check_accounting_admin_level { } {
	global sacctmgr re_word_str re_word_str bin_id exit_code

	set admin_level ""
	set user_name ""

	log_user 0

	if { [test_super_user] } {
		return "Administrator"
	}

	set user_name [get_my_user_name]

	if { ![string length $user_name] } {
		log_error "No name returned from id"
		return ""
	}

	#
	# Use sacctmgr to check admin_level
	#
	set s_pid [spawn $sacctmgr -n -P list user $user_name format=admin]
	expect {
		-re "($re_word_str)" {
		      set admin_level $expect_out(1,string)
		      exp_continue
	       }
	       timeout {
		      slow_kill $s_pid
		      fail "sacctmgr add not responding"
	       }
	       eof {
		      wait
	       }
	}

	log_user 1
	return $admin_level
}


################################################################
#
# NAME
#	get_cluster_name - gets the cluster name
#
# SYNOPSIS
#	get_cluster_name
#
# RETURN VALUE
#	name of the cluster
#
################################################################

proc get_cluster_name { } {
	global scontrol re_word_str exit_code
	#
	# Use scontrol to find the cluster name
	#
	log_user 0
	set cluster_name ""
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		-re "ClusterName *= ($re_word_str)" {
			set cluster_name $expect_out(1,string)
			exp_continue
		}
		timeout {
			slow_kill $scon_pid
			fail "scontrol not responding"
		}
		eof {
			wait
		}
	}

	log_user 1
	return $cluster_name
}


################################################################
#
# NAME
#	get_control_machine - gets the ControlMachine parameter
#
# SYNOPSIS
#	get_control_machine
#
# RETURN VALUE
#	ControlMachine value
#
################################################################

proc get_control_machine { } {
	global scontrol re_word_str exit_code
	#
	# Use scontrol to find the ControlMachine
	#
	log_user 0
	set control_machine ""
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		# We need to handle two possible outputs of SlurmctldHost[0]:
		# a) hostname
		# b) hostname(IP)
		-re "SlurmctldHost.0. *= ($re_word_str)" {
			set par_idx [expr [string last "(" $expect_out(1,string)] -1]
			if { $par_idx == -2 } {
				set control_machine $expect_out(1,string)
			} else {
				set control_machine [string range $expect_out(1,string) 0 $par_idx]
			}
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			slow_kill $scon_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}

	log_user 1
	return $control_machine
}


################################################################
#
# NAME
#	get_node_cnt - determines how many nodes are on the system
#
# SYNOPSIS
#	get_node_cnt
#
# RETURN VALUE
#	Returns count of nodes on system or 0 if unknown
#
################################################################

proc get_node_cnt { } {
	global scontrol exit_code

	log_user 0
	set node_cnt 0
	set scon_pid [spawn -noecho $scontrol show nodes]
	expect {
		-re "NodeName=" {
			incr node_cnt
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			slow_kill $scon_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1

	return $node_cnt
}


################################################################
#
# NAME
#	get_node_cnt_in_part - determines how many nodes are in a given partition
#
# SYNOPSIS
#	get_node_cnt_in_part ?partition?
#
# DESCRIPTION
#	Determine how many nodes are in a given partition.
#	If the partition is not specified, the default partition will be used.
#
# RETURN VALUE
#	Returns count of nodes in a partition or 0 if unknown.
#
################################################################

proc get_node_cnt_in_part { {partition ""} } {
	global scontrol number

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	log_user 0
	set node_cnt 0
	set scon_pid [spawn -noecho $scontrol show partition $partition]
	expect {
		-re "not found" {
			log_error "Partition $partition doesn't exist"
		}
		-re "TotalNodes=($number)" {
			set node_cnt $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
		}
		eof {
		}
	}
	log_user 1

	return $node_cnt
}


################################################################
#
# NAME
#	get_idle_node_in_part - gets an idle node in a given partition
#
# SYNOPSIS
#	get_idle_node_in_part ?partition?
#
# DESCRIPTION
#	Get an idle node in a given partition.
#	If the partition is not specified, the default partition will be used.
#
# RETURN VALUE
#	Returns name of node in a partition or "" if unknown.
#
################################################################

proc get_idle_node_in_part { {partition ""} } {
	global scontrol sinfo re_word_str

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	log_user 0
	set host_list ""
	spawn $sinfo -oNAME=%N -h -p$partition --state=idle
	expect {
		-re "not found" {
			log_error "Partition $partition doesn't exist"
		}
		-re "NAME=($re_word_str)" {
			set host_list $expect_out(1,string)
		}
		timeout {
			log_error "sinfo not responding"
		}
		eof {
			wait
		}
	}

	set node_name ""
	spawn $scontrol show hostname $host_list
	expect {
		-re "($re_word_str)" {
			set node_name $expect_out(1,string)
		}
		timeout {
			log_error "scontrol not responding"
		}
		eof {
			wait
		}
	}
	log_user 1

	return $node_name
}


################################################################
#
# NAME
#	get_array_config - determines the MaxArraySize
#
# SYNOPSIS
#	get_array_config
#
# DESCRIPTION
#	Use scontrol to determine the MaxArraySize
#
# RETURN VALUE
#	MaxArraySize value
#
################################################################

proc get_array_config { } {
	global scontrol number

	log_user 0
	set array_size 1
	spawn $scontrol show config
	expect {
		-re "MaxArraySize *= ($number)" {
			set array_size $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $array_size
}


################################################################
#
# NAME
#	get_max_tasks - determines the MaxTasksPerNode
#
# SYNOPSIS
#	get_max_tasks
#
# DESCRIPTION
#	Use scontrol to determine the MaxTasksPerNode
#
# RETURN VALUE
#	MaxTasksPerNode value
#
################################################################

proc get_max_tasks { } {
	global scontrol number

	log_user 0
	set max_tasks 1
	spawn $scontrol show config
	expect {
		-re "MaxTasksPerNode *= ($number)" {
			set max_tasks $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $max_tasks
}


#################################################
#
# NAME
#	scale_to_megs - scales the value by the factor T|G|M to megabytes
#
# SYNOPSIS
#	scale_to_megs value factor
#
# DESCRIPTION
#	scale the value by the factor T|G|M to megabytes
#
# RETURN VALUE
#	the scaled variable
#
#################################################

proc scale_to_megs { value factor } {

	if {[string compare $factor "T"] == 0} {
		set value [expr $value * 1024 * 1024]
	} elseif {[string compare $factor "G"] == 0} {
		set value [expr $value * 1024]
	} elseif {[string compare $factor "M"] == 0} {
		set value [expr $value * 1]
	} elseif {[string compare $factor "K"] == 0} {
		set value [expr $value / 1024]
		set value [expr {round($value)}]
	} else {
		set value [expr $value / (1024 * 1024)]
		set value [expr {round($value)}]
	}

	return $value
}


#################################################
#
# NAME
#	scale_to_ks - scales the value by the factor G|M|K to kilobytes
#
# SYNOPSIS
#	scale_to_ks value factor
#
# DESCRIPTION
#	scale the value by the factor G|M|K to kilobytes
#
# RETURN VALUE
#	the scaled variable
#
#################################################

proc scale_to_ks { value factor } {

	if {[string compare $factor "G"] == 0} {
		set value [expr $value * 1024 * 1024]
	} elseif {[string compare $factor "M"] == 0} {
		set value [expr $value * 1024]
	} elseif {[string compare $factor "K"] == 0} {
		set value [expr $value * 1]
	} else {
		set value [expr $value / 1024]
		set value [expr {round($value)}]
	}

	return $value
}


############################################################
#
# NAME
#	check_node_mem - checks that the nodes have memory configured
#
# SYNOPSIS
#	check_node_mem
#
# RETURN VALUE
#	1 if the nodes have memory, 0 otherwise
#
############################################################

proc check_node_mem { } {
	global scontrol number

	log_user 0
	set mem_size 0

	spawn $scontrol show node
	expect {
		-re "RealMemory=($number)" {
			set mem_size $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}

	if {$mem_size == 1} {
		return 0
	} else {
		return 1
	}
	log_user 1

}


################################################################
#
# NAME
#	get_fs_damping_factor - gets FairShareDampeningFactor configuration parameter
#
# SYNOPSIS
#	get_fs_damping_factor
#
# DESCRIPTION
#	get FairShareDampeningFactor configuration parameter
#
# RETURN VALUE
#	Returns FairShareDampeningFactor
#
################################################################

proc get_fs_damping_factor { } {
	global scontrol number exit_code

	log_user 0
	set damp 1
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		-re "FairShareDampeningFactor *= ($number)" {
			set damp $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			slow_kill $scon_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1

	return $damp
}


################################################################
#
# NAME
#	slurmctld_plug_stack_nonstop - verifies that the SlurmctldPlugstack is set to nonstop
#
# SYNOPSIS
#	slurmctld_plug_stack_nonstop
#
# DESCRIPTION
#	Use scontrol to determine that the SlurmctldPlugstack is set to nonstop.
#
# RETURN VALUE
#	1 if the value is set to nonstop.
#
################################################################

proc slurmctld_plug_stack_nonstop { } {
	global scontrol re_word_str exit_code

	log_user 0
	set nonstop_enforce 0
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		-re "SlurmctldPlugstack *= ($re_word_str)" {
			if {[string first $expect_out(1,string) "nonstop"] != -1} {
				set nonstop_enforce 1
			}
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			slow_kill $scon_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1
	return $nonstop_enforce
}


################################################################
#
# NAME
#	job_submit_all_partitions - determines if the JobSubmitPlugins includes "all_partitions"
#
# SYNOPSIS
#	job_submit_all_partitions
#
# DESCRIPTION
#	Use scontrol to determine if the JobSubmitPlugins
#	includes "all_partitions".
#
# RETURN VALUE
#	1 if the value is set to nonstop.
#
################################################################

proc job_submit_all_partitions { } {
	global scontrol re_word_str exit_code

	log_user 0
	set all_partitions 0
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		-re "JobSubmitPlugins *= ($re_word_str)" {
			if {[string first $expect_out(1,string) "all_partitions"] != -1} {
				set all_partitions 1
			}
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			slow_kill $scon_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1
	return $all_partitions
}


################################################################
#
# NAME
#	wait_for_node - waits for nodes in a partition to reach a certain state
#
# SYNOPSIS
#	wait_for_node state num_nodes partition
#
# DESCRIPTION
#	Wait for a certain number of nodes in a partition to
#	reach a certain state. Polls every $testsuite_poll_interval
#	seconds. If the partition argument is empty, the default
#	partition will be used.
#
# RETURN VALUE
#	Returns: 1 on failure.
#
################################################################

proc wait_for_node {state num_nodes {partition ""} } {
	global sinfo number testsuite_poll_interval

	set wait_time 0
	set done      0
	set cnt       0
	set rt        0

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	while {$done != 1 && $wait_time < 3} {

		log_user 0
		spawn $sinfo --noheader --partition $partition --state $state --format %D
		expect {
			-re "($number)" {
				set cnt $expect_out(1,string)
				exp_continue
			}
			timeout {
				log_error "sinfo is not responding"
				set rt 1
			}
			eof {
				wait
			}
		}
		log_user 1

		if {$num_nodes <= $cnt} {
			set done 1
		} else {
			log_debug "Partition $partition has $cnt nodes idle and we want $num_nodes"
			sleep $testsuite_poll_interval
			incr wait_time 1
		}
	}
	if {$done != 1} {
		set rt 1
	}

	return $rt
}


#####################################################################
#
# NAME
#	test_preempttype_part - determines if preempt mode partition_prio is configured
#
# SYNOPSIS
#	test_preempttype_part
#
# RETURN VALUE
#	0 if none
#
#####################################################################

proc test_preempttype_part { } {

	global scontrol
	log_user 0
	set part_prio 0
	spawn $scontrol show config
	expect {
		-re "PreemptType *= preempt/partition_prio" {
			set part_prio 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $part_prio
}


#####################################################################
#
# NAME
#	test_preempttype_qos - determines if preempt mode qos is configured
#
# SYNOPSIS
#	test_preempttype_qos
#
# RETURN VALUE
#	0 if none
#
#####################################################################

proc test_preempttype_qos { } {

	global scontrol
	log_user 0
	set qos 0
	spawn $scontrol show config
	expect {
		-re "PreemptType *= preempt/qos" {
			set qos 1
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $qos
}


#####################################################################
#
# NAME
#	test_proctrack - determines the ProctrackType
#
# SYNOPSIS
#	test_proctrack
#
# RETURN VALUE
#	the proctrack type
#
#####################################################################

proc test_proctrack { } {

	global scontrol re_word_str
	log_user 0
	set proctype ""
	spawn $scontrol show config
	expect {
		-re "ProctrackType *=* proctrack/($re_word_str)" {
			set proctype $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $proctype
}


#####################################################################
#
# NAME
#	get_srun_ports - determines the SrunPortRange
#
# SYNOPSIS
#	get_srun_ports
#
# RETURN VALUE
#	the SrunPortRange
#
#####################################################################

proc get_srun_ports { } {

	global scontrol re_word_str bin_grep bin_bash number
	log_user 0
	set ports 0
	spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SrunPortRange"
	expect {
		-re "SrunPortRange *=* ($re_word_str)" {
			set ports $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $ports
}


#####################################################################
#
# NAME
#	available_nodes_hostnames - gets all available nodes in the system
#
# SYNOPSIS
#	available_nodes_hostnames partition ?also_power_save?
#
# ARGUMENTS
#
#	partition
#		to only return nodes of an specific partition
#	also_power_save
#		to include POWERING_DOWN and POWER_DOWN nodes
#
# RETURN VALUE
#	idle nodes, and also_power_save nodes if specified
#
#####################################################################

proc available_nodes_hostnames { partition {also_power_save false} } {

	global sinfo re_word_str exit_code

	log_user 0
	set idle_nodelist ""
	set sep ""
	if {$also_power_save} {
		set avail_states "idle,power_down,powering_down"
	} else {
		set avail_states "idle"
	}
	if {[string compare $partition ""] == 0} {
		spawn $sinfo -t$avail_states -h -o%N
	} else {
		spawn $sinfo -t$avail_states -h -o%N -p$partition
	}
	expect {
		-re "($re_word_str)" {
			append idle_nodelist $sep
			append idle_nodelist $expect_out(1,string)
			set sep ","
			exp_continue
		}
		timeout {
			log_error "sinfo is not responding"
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1
	return $idle_nodelist
}


#####################################################################
#
# NAME
#	test_accting_steps - determines if nostep or nojobs is set for AccoutingStorageEnforce
#
# SYNOPSIS
#	test_accting_steps
#
# DESCRIPTION
#	Determine if nostep or nojobs is set for AccoutingStorageEnforce
#
# RETURN VALUE
#	1 if set else 0
#
#####################################################################

proc test_accting_steps { } {

	global scontrol re_word_str
	log_user 0
	set enforce_limits 1
	spawn $scontrol show config
	expect {
		-re "AccountingStorageEnforce *= ($re_word_str)" {
			if {[string first "nosteps" $expect_out(1,string)] != -1 } {
				set enforce_limits 0
			}
			if {[string first "nojobs" $expect_out(1,string)] != -1 } {
				set enforce_limits 0
			}
			exp_continue
		}
		eof {
			wait
		}

	}
	log_user 1

	return $enforce_limits
}


# Print the current line number in the script.  Calling like this
#  [get_curr_line_num [info frame]]
# will return the current line number
proc get_curr_line_num {frame_info} {
	# Getting value of the key 'line' from the dictionary
	# returned by 'info frame'
	set result [dict get [info frame $frame_info] line]
}


#####################################################################
#
# NAME
#	get_partition_nodes - gets the list of node names in a given partition/states
#
# SYNOPSIS
#	get_partition_nodes partition states
#
# DESCRIPTION
#	Get the list of node names in a given partition/states
#
# ARGUMENTS
#	partition
#		partition to get nodes off
#	states
#		states to filter on nodes
#
# RETURN VALUE
#	node names list, -1 on sinfo error
#
#####################################################################

proc get_partition_nodes {partition states} {

	global sinfo re_word_str
	log_user 0
	set node_list ""

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	if {[string length $states] == 0} {
		set sinfo_pid [spawn -noecho $sinfo -h -N -p $partition -o %N -e]
	} else {
		set sinfo_pid [spawn -noecho $sinfo -h -N -p $partition -o %N -t $states -e]
	}

	expect {
		-re "($re_word_str)" {
			lappend node_list $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "sinfo not responding"
			slow_kill $sinfo_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}

	log_user 1
	return $node_list
}


#####################################################################
#
# NAME
#	set_partition_maximum_time_limit - sets the maximum time limit in a given partition
#
# SYNOPSIS
#	set_partition_maximum_time_limit partition limit
#
# RETURN VALUE
#	0 if succeeded, -1 if error
#
#####################################################################

proc set_partition_maximum_time_limit {partition limit} {
	global scontrol exit_code

	if {[string length $partition] == 0} {
		set partition [default_partition]
		if { $partition == "" } {
			return 1
		}
	}

	if { $limit < -1 } {
		log_error "Trying to set invalid partition time limit of $limit"
		return 1
	}
	if { $limit == -1 } {
		set expected_lim "UNLIMITED"
	} else {
		set expected_lim limit
	}

	spawn $scontrol update partitionname=$partition MaxTime=-1
	expect {
		timeout {
			log_error "scontrol not responding"
			return 1
		}
		eof {
			wait
		}
	}

	set maxtime [get_partition_maximum_time_limit $partition]
	if { $maxtime != $limit } {
		log_error "Unable to update partition MaxTime, got $maxtime, wanted $limit"
		return 1
	}
	if { $exit_code != 0 } {
		log_error "set_partition_maximum_time_limit: Unexpected error."
		return 1
	}

	return 0
}


#####################################################################
#
# NAME
#	get_partition_maximum_time_limit - gets the maximum time limit in a given partition
#
# SYNOPSIS
#	get_partition_maximum_time_limit partition
#
# DESCRIPTION
#	Get the maximum time limit in a given partition
#
# RETURN VALUE
#	time limit in seconds, -1 if undefined or error
#
#####################################################################

proc get_partition_maximum_time_limit {partition} {

	global sinfo number exit_code

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	set secs 0
	log_user 0
	set sinfo_pid [spawn -noecho $sinfo -h -p $partition -O time -e]
	expect {
		-re "infinite" {
			set secs -1
			exp_continue
		}
		-re "n/a" {
			set secs -1
			exp_continue
		}
		-re "($number)-($number):($number):($number)" {
			set days  [expr $expect_out(1,string) * 24 * 60 * 60]
			set hours [expr $expect_out(2,string) * 60 * 60]
			set mins  [expr $expect_out(3,string) * 60]
			set secs  [expr $days + $hours + $mins + $expect_out(4,string)]
			exp_continue
		}
		-re "($number):($number):($number)" {
			set hours [expr $expect_out(1,string) * 60 * 60]
			set mins  [expr $expect_out(2,string) * 60]
			set secs  [expr $hours + $mins + $expect_out(3,string)]
			exp_continue
		}
		-re "($number):($number)" {
			set mins  [expr $expect_out(1,string) * 60]
			set secs  [expr $mins + $expect_out(2,string)]
			exp_continue
		}
		-re "($number)" {
			set secs  [expr $expect_out(1,string) * 60]
			exp_continue
		}
		timeout {
			log_error "sinfo not responding"
			slow_kill $sinfo_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}

	log_user 1
	return $secs
}


################################################################
#
# NAME
#	get_partition_default_time_limit - gets the default time limit in a given partition
#
# SYNOPSIS
#	get_partition_default_time_limit ?partition?
#
# DESCRIPTION
#	Get the default time limit in a given partition.
#	If the partition is not specified, the default partition will be used.
#
# RETURN VALUE
#	Returns: time limit in seconds, -1 if undefined or error.
#
################################################################

proc get_partition_default_time_limit { {partition ""} } {
	global sinfo number exit_code

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	set secs 0
	log_user 0
	set sinfo_pid [spawn -noecho $sinfo -h -p $partition -O defaulttime -e]
	expect {
		-re "infinite" {
			set secs -1
			exp_continue
		}
		-re "n/a" {
			set secs -1
			exp_continue
		}
		-re "($number)-($number):($number):($number)" {
			set days  [expr $expect_out(1,string) * 24 * 60 * 60]
			set hours [expr $expect_out(2,string) * 60 * 60]
			set mins  [expr $expect_out(3,string) * 60]
			set secs  [expr $days + $hours + $mins + $expect_out(4,string)]
			exp_continue
		}
		-re "($number):($number):($number)" {
			set hours [expr $expect_out(1,string) * 60 * 60]
			set mins  [expr $expect_out(2,string) * 60]
			set secs  [expr $hours + $mins + $expect_out(3,string)]
			exp_continue
		}
		-re "($number):($number)" {
			set mins  [expr $expect_out(1,string) * 60]
			set secs  [expr $mins + $expect_out(2,string)]
			exp_continue
		}
		-re "($number)" {
			set secs  [expr $expect_out(1,string) * 60]
			exp_continue
		}
		timeout {
			log_error "sinfo not responding"
			slow_kill $sinfo_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}

	log_user 1
	return $secs
}


#####################################################################
#
# NAME
#	get_node_cores - given a node, returns its total number of cores
#
# SYNOPSIS
#	get_node_cores node
#
# DESCRIPTION
#	Given a node, return its total number of cores
#	(not the CoresPerSocket, but the total cores)
#
# RETURN VALUE
#	node cores if retrieved, -1 otherwise
#
#####################################################################

proc get_node_cores {node} {

	global sinfo number
	set cores -1
	set sockets_per_node 0
	set cores_per_socket 0

	if {[string length $node] == 0} {
		return $cores
	}

	log_user 0
	set sinfo_pid [spawn -noecho $sinfo -o "%X %Y" -h -n $node]

	expect {
		-re "($number)" {
			if {$sockets_per_node == 0} {
				set sockets_per_node $expect_out(1,string)
			} else {
				set cores_per_socket $expect_out(1,string)
			}
			exp_continue
		}
		timeout {
			log_error "sinfo not responding"
			slow_kill $sinfo_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}

	log_user 1

	set cores [expr $sockets_per_node * $cores_per_socket]

	return $cores
}


#####################################################################
#
# NAME
#	get_node_cpus - given a node, returns its total number of threads we account for
#
# SYNOPSIS
#	get_node_cpus node
#
# DESCRIPTION
#	Given a node, return its total number of threads we account for.
#	(not always ThreadsPerCore, but how many threads are in use.
#	i.e. CPUs=6 CoresPerSocket=6 ThreadsPerCore=2 Socket=1 would
#	result in only 1 thread we care about instead of the 2 listed.)
#
# RETURN VALUE
#	list of node [ tot_cpus threads ] if retrieved, [ -1 -1 ] otherwise
#
#####################################################################

proc get_node_cpus {node} {

	global scontrol number
	set nthreads -1
	set nsockets 0
	set ncores 0
	set totcpus -1

	if {[string length $node] == 0} {
		return [list $totcpus $nthreads]
	}

	# Get the number of CPUs on a node
	set my_pid [spawn $scontrol show node $node]
	expect {
		-re "CoresPerSocket=($number)" {
			set ncores $expect_out(1,string)
			exp_continue
		}
		-re "CPUTot=($number)" {
			set totcpus $expect_out(1,string)
			exp_continue
		}
		-re "Sockets=($number)" {
			set nsockets $expect_out(1,string)
			exp_continue
		}
		-re "ThreadsPerCore=($number)" {
			set nthreads $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "scontrol is not responding"
			slow_kill $scontrol_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}

	set core_cnt [expr $nsockets * $ncores]
	set thread_cnt [expr $ncores * $nthreads]
	if {$totcpus != $nthreads && $totcpus == $ncores} {
		log_info "Cores rather than threads are being allocated"
		set nthreads 1
	}

	return [list $totcpus $nthreads]
}


#####################################################################
#
# NAME
#	get_part_total_cores - given a partition and/or states, return its total cores
#
# SYNOPSIS
#	get_part_total_cores partition states
#
# DESCRIPTION
#	Given a partition and/or states, return its total cores
#
# ARGUMENTS
#	partition
#		partition to check cores
#	states
#		states to filter on partition cores
#
# RETURN VALUE
#	partition cores
#
#####################################################################

proc get_part_total_cores {part states} {
	global sinfo number
	log_user 0
	set cores 0
	set tmp 0
	set i 0

	if {[string length $part] == 0} {
		set part [default_partition]
	}

	if {[string length $states] == 0} {
		set sinfo_pid [spawn -noecho $sinfo -h -N -p $part -o "%X %Y"]
	} else {
		set sinfo_pid [spawn -noecho $sinfo -h -N -p $part -t $states -o "%X %Y"]
	}

	expect {
		-re "($number)" {
			set is_even [expr {($i % 2) == 0}]
			if {$is_even == 1} {
				set tmp $expect_out(1,string)
			} else {
				set tmp [expr $tmp * $expect_out(1,string)]
				set cores [expr $cores + $tmp]
			}
			incr i
			exp_continue
		}
		timeout {
			log_error "sinfo not responding"
			slow_kill $sinfo_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}

	log_user 1
	return $cores
}


#####################################################################
#
# NAME
#	check_hosts_contiguous - verify if all hosts belong to the partition and are contiguous
#
# SYNOPSIS
#	check_hosts_contiguous check_hosts_list partition
#
# DESCRIPTION
#	Given a partition and a list of hosts, verify if all
#	hosts belong to the partition and are contiguous.
#	If the partition argument is empty, the default partition
#	will be used.
#
# RETURN VALUE
#	Returns: 0 on SUCCESS, 1 otherwise.
#
#####################################################################

proc check_hosts_contiguous { check_hosts_list {partition ""} } {

	global sinfo re_word_str

	if {[string length $partition] == 0} {
		set partition [default_partition]
	}

	set part_hosts_list {}

	log_user 0
	set sinfo_pid [spawn $sinfo --noheader -p $partition -N -o %N]
	expect {
		-re "($re_word_str)" {
			lappend part_hosts_list $expect_out(1,string)
			exp_continue
		}
		-re "Unable to contact" {
			log_user 1
			log_error "Slurm appears to be down"
			return 1
		}
		timeout {
			log_user 1
			log_error "sinfo not responding"
			slow_kill $sinfo_pid
			return 1
		}
		eof {
			wait
		}
	}
	log_user 1

	foreach host $check_hosts_list {
		set idx_cur [lsearch $part_hosts_list $host]
		if {$idx_cur == -1} {
			log_error "$host not found in list of hosts from partition $partition"
			return 1
		}
		if {[info exists idx_old]} {
			if {$idx_cur != [expr $idx_old + 1]} {
				log_error "Node sequence number not contiguous"
				return 1
			}
		}
		set idx_old $idx_cur
	}

	return 0
}


#####################################################################
#
# NAME
#	stop_root_user - detect, warn, and stop root user
#
# SYNOPSIS
#	stop_root_user
#
# DESCRIPTION
#	Detect, warn, and stop root user
#
# RETURN VALUE
#	uid if not root user, exits otherwise
#
#####################################################################

proc stop_root_user {} {

	global bin_id number

	set uid [get_my_uid]
	if {$uid == -1} {
		fail "Can't get my uid"
	} elseif {$uid == 0} {
		skip "Can't run this test as user root"
	}

	return $uid
}


################################################################
#
# NAME
#	get_requeue_exit - gets RequeueExit configuration parameter
#
# SYNOPSIS
#	get_requeue_exit
#
# RETURN VALUE
#	Returns RequeueExit number
#
################################################################

proc get_requeue_exit { } {
	global scontrol number exit_code

	log_user 0
	set re 0
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		-re "RequeueExit *= ($number)" {
			set re $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			slow_kill $scon_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1

	return $re
}


################################################################
#
# NAME
#	get_requeue_exit_hold - gets RequeueExitHold configuration parameter
#
# SYNOPSIS
#	get_requeue_exit_hold
#
# RETURN VALUE
#	Returns RequeueExitHold number
#
################################################################

proc get_requeue_exit_hold { } {
	global scontrol number exit_code

	log_user 0
	set re 0
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		-re "RequeueExitHold *= ($number)" {
			set re $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			slow_kill $scon_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1

	return $re
}


################################################################
#
# NAME
#	get_prolog - gets Prolog configuration parameter
#
# SYNOPSIS
#	get_prolog
#
# RETURN VALUE
#	Returns Prolog parameter
#
################################################################

proc get_prolog { } {
	global scontrol re_word_str exit_code

	log_user 0
	set prolog 0
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		-re "^Prolog *= ($re_word_str)" {
			set re $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			slow_kill $scon_pid
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1

	return $prolog
}


proc expect_extern_step { } {
	global scontrol
	# If PrologFlags=contain is in slurm.conf, then an "extern" step will be
	# launched on each node, so we need to check for 3 steps per
	# job instead of 2.

	log_user 0
	set extern_step 0
	set scon_pid [spawn -noecho $scontrol show config]
	expect {
		-re "PrologFlags\\s*=\\s*\[A-z/,\]*Contain" {
			set extern_step 1
		}
		timeout {
			fail "scontrol show config not responding"
		}
		eof {
			wait
		}
	}
	log_user 1

	return $extern_step
}


################################################################
#
# NAME
#	get_het_job_ids - gets list of component job ids for a het job
#
# SYNOPSIS
#	get_het_job_ids job_id ?use_offset?
#
# DESCRIPTION
#	Gets list of component job ids for a het job.
#
# ARGUMENTS
#	job_id
#		Slurm job id
#	use_offset
#		If zero, returns list of integer job ids, else returns ids in
#		the form of X+Y where X is het job master id and Y is the
#		offset.
#
# RETURN VALUE
#	A list of ids for a hetjob or an empty list if jobid
#	is not a het one.
#
################################################################

proc get_het_job_ids { jobid {use_offset 0}} {
	global scontrol number

	set id_list ""
	set log_user_save [log_user -info]
	log_user 0
	spawn $scontrol show job $jobid
	expect {
		-re "JobId=($number) HetJobId=($number) HetJobOffset=($number)" {
			if { $use_offset } {
				lappend id_list "$expect_out(2,string)+$expect_out(3,string)"
			} else {
				lappend id_list $expect_out(1,string)
			}
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			set id_list ""
		}
		eof {
			wait
		}
	}
	log_user $log_user_save
	return $id_list
}


################################################################
#
# NAME
#	test_hetjob_step - tests if steps can span multiple heterogeneous job allocations
#
# SYNOPSIS
#	test_hetjob_step
#
# Returns 1 if steps can span multiple heterogeneous job components,
#	  0 otherwise
#
################################################################

proc test_hetjob_step { } {
	global scontrol number exit_code

	log_user 0
	set hetjob_step 0
	set major 0
	set minor 0
	spawn $scontrol -V
	expect {
		-re " ($number)\.($number)\.($number)" {
			set version_major $expect_out(1,string)
			set version_minor $expect_out(2,string)
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			set exit_code 1
		}
		eof {
			wait
		}
	}
	if {$version_major >= 19} {
		set hetjob_step 1
	} elseif {$version_major >= 18} {
		set hetjob_step 1
		spawn $scontrol show config
		expect {
			-re "select/cray" {
				set hetjob_step 0
				exp_continue
			}
			timeout {
				log_error "scontrol not responding"
				set exit_code 1
			}
			eof {
				wait
			}
		}
	} elseif {$version_major == 17 && $version_minor == 11} {
		spawn $scontrol show config
		expect {
			-re "select/cray" {
				set hetjob_step 0
				exp_continue
			}
			-re "disable_hetjob_steps" {
				set hetjob_step 0
				exp_continue
			}
			-re "enable_hetjob_steps" {
				set hetjob_step 1
				exp_continue
			}
			timeout {
				log_error "scontrol not responding"
				set exit_code 1
			}
			eof {
				wait
			}
		}
	}
	log_user 1
	return $hetjob_step
}


################################################################
#
# NAME
#	reconfigure - calls scontrol reconfigure
#
# SYNOPSIS
#	reconfigure ?cluster?
#
# DESCRIPTION
#	Calls scontrol reconfigure.
#
# ARGUMENTS
#	cluster
#		The cluster to reconfigure
#
# RETURN VALUE
#	void
#
# ENVIRONMENT
#	Sets exit_code to 1 on failure.
#
################################################################

proc reconfigure { {cluster ""} } {
	global exit_code scontrol timeout

	#
	# Increase timeout just in case we're running under valgrind
	#
	set save_timeout $timeout
	set timeout 20
	if { $cluster == "" } {
		spawn $scontrol reconfigure
	} else {
		spawn $scontrol -M$cluster reconfigure
	}
	expect {
		-re "slurm_reconfigure error: Invalid user id" {
			log_error "Invalid user id"
			set exit_code 1
			exp_continue
		}
		-re "Error|error" {
			log_error "scontrol reconfigure error"
			set exit_code 1
			exp_continue
		}
		timeout {
			log_error "scontrol not responding"
			set exit_code 1
		}
		eof {
			wait
		}
	}
	#
	# Wait for reconfigure to complete, then reset timeout and return.
	#
	sleep 5
	set timeout $save_timeout
}


#####################################################################
#
# NAME
#	log_fatal - prints a fatal message
#
# SYNOPSIS
#	log_fatal message
#
# SEE ALSO
#	_log_format for options governing the message format and colorization
#
#####################################################################

proc log_fatal {message} {
	global testsuite_log_level LOG_LEVEL_FATAL

	if {$testsuite_log_level >= $LOG_LEVEL_FATAL} {
		_log_format "fatal" "$message"
	}
}


#####################################################################
#
# NAME
#	log_error - prints an error message
#
# SYNOPSIS
#	log_error message
#
# SEE ALSO
#	_log_format for options governing the message format and colorization
#
#####################################################################

proc log_error {message} {
	global testsuite_log_level LOG_LEVEL_ERROR

	if {$testsuite_log_level >= $LOG_LEVEL_ERROR} {
		_log_format "error" "$message"
	}
}


#####################################################################
#
# NAME
#	log_warn - prints a warning message
#
# SYNOPSIS
#	log_warn message
#
# SEE ALSO
#	_log_format for options governing the message format and colorization
#
#####################################################################

proc log_warn {message} {
	global testsuite_log_level LOG_LEVEL_WARNING

	if {$testsuite_log_level >= $LOG_LEVEL_WARNING} {
		_log_format "warning" "$message"
	}
}


#####################################################################
#
# NAME
#	log_info - prints an information message
#
# SYNOPSIS
#	log_info message
#
# SEE ALSO
#	_log_format for options governing the message format and colorization
#
#####################################################################

proc log_info {message} {
	global testsuite_log_level LOG_LEVEL_INFO

	if {$testsuite_log_level >= $LOG_LEVEL_INFO} {
		_log_format "info" "$message"
	}
}


#####################################################################
#
# NAME
#	log_debug - prints a debug level message
#
# SYNOPSIS
#	log_debug message
#
# SEE ALSO
#	_log_format for options governing the message format and colorization
#
#####################################################################

proc log_debug {message} {
	global testsuite_log_level LOG_LEVEL_DEBUG

	if {$testsuite_log_level >= $LOG_LEVEL_DEBUG} {
		_log_format "debug" "$message"
	}
}


#####################################################################
#
# NAME
#	log_trace - prints a trace level message
#
# SYNOPSIS
#	log_trace message
#
# SEE ALSO
#	_log_format for options governing the message format and colorization
#
#####################################################################

proc log_trace {message} {
	global testsuite_log_level LOG_LEVEL_TRACE

	if {$testsuite_log_level >= $LOG_LEVEL_TRACE} {
		_log_format "trace" "$message"
	}
}


################################################################
#
# NAME
#	in_fed - checks whether this cluster is in a federation
#
# SYNOPSIS
#	in_fed
#
# RETURN VALUE
#	Returns 1 if this cluster is in a federation, 0 otherwise
#
################################################################

proc in_fed {} {
	global scontrol

	spawn $scontrol show fed
	expect {
		-re "Federation" {
			return 1
		}
		timeout {
			log_error "scontrol not responding"
		}
		eof {
			wait
		}
	}
	return 0
}


################################################################
#
# NAME
#	check_job_state - checks if the state of a job is the expected one
#
# SYNOPSIS
#	check_job_state job state ?het_job?
#
# DESCRIPTION
#	Checks if the state of a job is the expected one.
#
# ARGUMENTS
#	job
#		Job ID to check
#	state
#		Desired state of the job to match
#	het_job
#		If set, checks state of each component job if the
#		job is a hetjob.
#
# RETURN VALUE
#	1 if job was on the desired state, or the number of job components
#	on that state if it's a hetjob and het_job option enabled. 0 otherwise.
#
# ENVIRONMENT
#	Also sets exit_code to 1 if job (or component) if there are some
#	error in the called commands.
#
################################################################

proc check_job_state { job state {het_job 0}} {
	global scontrol exit_code

	set rc 0
	set jid_list ""
	if { $het_job } {
		set jid_list [get_het_job_ids $job 1]
	}

	if { $jid_list == "" } {
		# non-het job
		set jid_list $job
	}

	foreach jid $jid_list {
		set state_match 0
		spawn $scontrol show job $jid
		expect {
			-re "JobState=($state)" {
				incr state_match
			}
			timeout {
				log_error "scontrol not responding"
				set exit_code 1
			}
			eof {
				wait
			}
		}

		incr rc $state_match
		if {$state_match != 1} {
			log_error "job $jid should be in $state state, but is not"
			set exit_code 1
			break
		}
	}

	return $rc
}


################################################################
#
# NAME
#	get_gres_count - returns a dict of nodes and GRES counts
#
# SYNOPSIS
#	get_gres_count gres_name ?node_list?
#
# DESCRIPTION
#	Returns a dict of node names and the count of a specifed
#	GRES aggregating all its types on each node.
#
# RETURN VALUE
#	If the node_list is not specified node name is specified,
#	this function will return a dict with the GRES count for all
#	the nodes of the default partition.
#	If specified, a dict only with the nodes of the node_list.
#
################################################################

proc get_gres_count { gres_name {node_list ""} } {
	global gres_regex

	set nodes_dict [get_nodes $node_list]
	set gres_dict [dict create]
	dict for {node_name node_dict} $nodes_dict {
		set gres_count 0
		if [dict exists $node_dict "Gres"] {
			set gres_expression [dict get $node_dict "Gres"]
			foreach gres [split $gres_expression ","] {
				if {[regexp $gres_regex $gres {} name type count] == 1} {
					if {$count eq ""} { set count $type }
					if {$name eq $gres_name} {
						incr gres_count $count
					}
				}
			}
		}
		dict set gres_dict $node_name $gres_count
	}

	return $gres_dict
}


################################################################
#
# NAME
#	get_highest_gres_count - returns highest number of GRES per node on node_count nodes
#
# SYNOPSIS
#	get_highest_gres_count node_count gres_name
#
# DESCRIPTION
#	For a given number of nodes, returns the highest GRES count per
#	node available on at least that number of nodes.
#
# EXAMPLE
#	For example: node1 has 1 GPU, node2 has 2 GPUs and node3 has 3 GPUs
#	[get_highest_gres_count 1 "gpu"] returns 3 (i.e. 1 node 3 GPUs)
#	[get_highest_gres_count 2 "gpu"] returns 2 (i.e. 2 nodes have at least 2 GPUs each)
#	[get_highest_gres_count 3 "gpu"] returns 1 (i.e. 3 nodes have at least 1 GPU each)
#
################################################################

proc get_highest_gres_count { node_count gres_name } {

	set available_nodes [available_nodes_hostnames [default_partition]]
	set gres_dict [get_gres_count $gres_name $available_nodes]
	set gres_count [list]

	dict for {node gres} $gres_dict {
		lappend gres_count $gres
	}

	set count [lindex [lsort -decreasing -integer $gres_count] [expr $node_count - 1]]
	return $count
}


################################################################
#
# NAME
#	_set_gpu_socket_inx - adds a socket index to the gpu_sock_list if not already on it
#
# SYNOPSIS
#	_set_gpu_socket_inx sock_inx
#
# DESCRIPTION
#	Add a socket index to the array gpu_sock_list if not already
#	on the list. Subroutine used by get_gpu_socket_count
#
################################################################

proc _set_gpu_socket_inx { sock_inx } {
	global gpu_sock_list

	if {$sock_inx == -1} {
		set gpu_sock_list [lreplace $gpu_sock_list 0 99]
		return
	}

	set sock_cnt [llength $gpu_sock_list]
	for {set i 0} {$i < $sock_cnt} {incr i} {
		if {[lindex $gpu_sock_list $i] == $sock_inx} {
			return
		}
	}
	lappend gpu_sock_list $sock_inx
}


################################################################
# Subroutine used by get_gpu_socket_count
# Add a socket index to the array gpu_sock_list if not already
# on the list.
################################################################

proc _set_gpu_socket_range { sock_first_inx sock_last_inx } {
	global gpu_sock_list

	set sock_cnt [llength $gpu_sock_list]
	for {set s $sock_first_inx} {$s <= $sock_last_inx} {incr s} {
		set found 0
		for {set i 0} {$i < $sock_cnt} {incr i} {
			if {[lindex $gpu_sock_list $i] == $s} {
				set found 1
				break
			}
		}
		if {$found == 0} {
			lappend gpu_sock_list $s
		}
	}
}


################################################################
#
# NAME
#	get_gpu_socket_count - returns the number of sockets with GPUS on a node with the given per-node GPU count
#
# SYNOPSIS
#	get_gpu_socket_count gpu_cnt sockets_per_node
#
# DESCRIPTION
#	Given a per-node GPU count, return the number of sockets with
#	GPUs on a node with the given per-node GPU count.
#	If the sockets_per_node has a value of 1 then just return 1
#	rather than determine the count (for performance reasons).
#
################################################################

proc get_gpu_socket_count { gpu_cnt sockets_per_node } {
	global re_word_str bin_rm number scontrol srun
	global gpu_sock_list
	set sockets_with_gpus 1
	set file_in "test_get_gpu_socket_count.input"

	if {$sockets_per_node == 1} {
		return 1
	}

	log_user 0
	_set_gpu_socket_inx -1
	make_bash_script $file_in "$scontrol show node \$SLURMD_NODENAME"
	spawn $srun -N1 --gres=gpu:$gpu_cnt $file_in
	expect {
		-re "gpu:${number}.S:($number)-($number)" {
			_set_gpu_socket_range $expect_out(1,string) $expect_out(2,string)
			exp_continue
		}
		-re "gpu:${re_word_str}:${number}.S:($number),($number),($number),($number)" {
			_set_gpu_socket_inx $expect_out(1,string)
			_set_gpu_socket_inx $expect_out(2,string)
			_set_gpu_socket_inx $expect_out(3,string)
			_set_gpu_socket_inx $expect_out(4,string)
			exp_continue
		}
		-re "gpu:${re_word_str}:${number}.S:($number),($number),($number)" {
			_set_gpu_socket_inx $expect_out(1,string)
			_set_gpu_socket_inx $expect_out(2,string)
			_set_gpu_socket_inx $expect_out(3,string)
			exp_continue
		}
		-re "gpu:${re_word_str}:${number}.S:($number),($number)" {
			_set_gpu_socket_inx $expect_out(1,string)
			_set_gpu_socket_inx $expect_out(2,string)
			exp_continue
		}
		-re "gpu:${re_word_str}:${number}.S:($number)" {
			_set_gpu_socket_inx $expect_out(1,string)
			exp_continue
		}
		-re "gpu:${number}.S:($number),($number),($number),($number)" {
			_set_gpu_socket_inx $expect_out(1,string)
			_set_gpu_socket_inx $expect_out(2,string)
			_set_gpu_socket_inx $expect_out(3,string)
			_set_gpu_socket_inx $expect_out(4,string)
			exp_continue
		}
		-re "gpu:${number}.S:($number),($number),($number)" {
			_set_gpu_socket_inx $expect_out(1,string)
			_set_gpu_socket_inx $expect_out(2,string)
			_set_gpu_socket_inx $expect_out(3,string)
			exp_continue
		}
		-re "gpu:${number}.S:($number),($number)" {
			_set_gpu_socket_inx $expect_out(1,string)
			_set_gpu_socket_inx $expect_out(2,string)
			exp_continue
		}
		-re "gpu:${number}.S:($number)" {
			_set_gpu_socket_inx $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	exec $bin_rm -f $file_in

	set sock_cnt [llength $gpu_sock_list]
	if {$sock_cnt > 1} {
		set sockets_with_gpus $sock_cnt
	}

	return $sockets_with_gpus
}


################################################################
#
# NAME
#	get_highest_mps_count - get_highest_gres_count nodes mps, but for "mps per GPU"
#
# SYNOPSIS
#	get_highest_mps_count node_count
#
# DESCRIPTION
#	For a given number of nodes, returns the higest number of MPS per GPU
#	available at least on those number of nodes.
#
################################################################

proc get_highest_mps_count { node_count } {
	# We cannot use get_highest_gres_count because we need "per gpu",
	# so we get all the mps per node and all gpus per node, to create
	# a mps_per_gpu list to sort and get the count.
	set available_nodes [available_nodes_hostnames [default_partition]]
	set mps_dict [get_gres_count "mps" $available_nodes]
	set gpu_dict [get_gres_count "gpu" $available_nodes]
	set mps_per_gpu [list]

	dict for {node mps} $mps_dict {
		if { $mps > 0 } {
			if [dict exists $gpu_dict $node] {
				set gpu [dict get $gpu_dict $node]
				if { $gpu > 0 } {
					lappend mps_per_gpu [expr $mps / $gpu]
				} else {
					fail "All nodes with MPS should have a GPU"
				}
			} else {
				fail "All nodes with MPS should have a GPU"
			}
		}
	}

	set count [lindex [lsort -decreasing -integer $mps_per_gpu] [expr $node_count - 1]]
	return $count
}


################################################################
#
# NAME
#	get_mps_node_count - gets the number of nodes with a positive number of GRES MPS
#
# SYNOPSIS
#	get_mps_node_count
#
# RETURN VALUE
#	Return the count of nodes with a non-zero count of GRES MPS
#
################################################################

proc get_mps_node_count { } {
	global number sinfo re_word_str
	set fini 0
	set node_inx 0
	set def_part [default_partition]

	log_user 0
	spawn $sinfo -N -p$def_part -oGRES=%G -h
	expect {
		-re "GRES=($re_word_str)" {
			set mps_count 0
			set parts [split $expect_out(1,string) ",/"]
			while 1 {
				set mps_found [lsearch $parts "mps*"]
				if { $mps_found == -1 } break

				set parts2 [split [lindex $parts $mps_found] ":(/"]
				set col [lsearch -regexp $parts2 ^$number$]
				if { $col == -1 } {
					incr mps_count
				} else {
					set mps_count [expr $mps_count + [lindex $parts2 $col]]
				}
				set parts [lreplace $parts $mps_found $mps_found]
			}

			if {$mps_count > 0} {
				incr node_inx
			}
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1

	return $node_inx
}


################################################################
#
# NAME
#	get_over_time_limit - returns the value of OverTimeLimit
#
# SYNOPSIS
#	get_over_time_limit
#
# RETURN VALUE
#	Returns the value of OverTimeLimit in slurm.conf
#
################################################################

proc get_over_time_limit {} {
	global number scontrol exit_code
	set ret 0
	log_user 0
	spawn $scontrol show config
	expect {
		-re "OverTimeLimit *= *($number) min" {
			set ret $expect_out(1,string)
		}
		timeout {
			log_error "scontrol not responding"
			incr exit_code
		}
		eof {
			wait
		}
	}
	log_user 1
	return $ret
}


################################################################
#
# NAME
#	cuda_count - determines the count of allocated GPUs
#
# SYNOPSIS
#	cuda_count cuda_string
#
# ARGUMENTS
#	cuda_string
#		Contents of a CUDA_VISIBLE_DEVICES environment variable
#
# RETURN VALUE
#	Return the number of GPUs or -1 on error
#
################################################################

proc cuda_count { cuda_string } {
	set cuda_count 0
	set has_number 0
	set len [string length $cuda_string]
	for {set char_inx 0} {$char_inx < $len} {incr char_inx} {
		set cuda_char [string index $cuda_string $char_inx]
		if {[string match , $cuda_char]} {
			if {$has_number > 0} {
				incr cuda_count
				set has_number 0
			} else {
				log_error "cuda_count: Invalid input ($cuda_string)"
				return -1
			}
		} elseif {[string is digit $cuda_char]} {
			set has_number 1
		}
	}
	if {$has_number > 0} {
		incr cuda_count
	} else {
		log_error "cuda_count: Invalid input ($cuda_string)"
		return -1
	}
	return $cuda_count
}


################################################################
#
# NAME
#	get_acct_store_tres - gets the configured value of AccountingStorageTRES
#
# SYNOPSIS
#	get_acct_store_tres
#
# DESCRIPTION
#	Get the configured value of AccountingStorageTRES
#
# RETURN VALUE
#	Returns the configured value of AccountingStorageTRES
#
################################################################

proc get_acct_store_tres { } {
	global scontrol re_word_str

	log_user 0
	set store_tres ""
	spawn $scontrol show config
	expect {
		-re "AccountingStorageTRES *= ($re_word_str)" {
			set store_tres $expect_out(1,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	log_user 1
	return $store_tres
}


################################################################
#
# NAME
#	get_conf_path - gets the path to the slurm.conf file
#
# SYNOPSIS
#	get_conf_path
#
# RETURN VALUE
#	Returns the path to the slurm.conf file
#
################################################################

proc get_conf_path { } {
	global scontrol re_word_str eol exit_code

	set config_path ""
	set got_config 0
	log_user 0
	spawn $scontrol show config
	expect {
		-re "SLURM_CONF.*= (/.*)/($re_word_str)${eol}SLURM_VERSION" {
			set config_path $expect_out(1,string)
			set got_config 1
			exp_continue
		}
		timeout {
			log_error "scontrol is not responding"
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1

	if {$got_config != 1} {
		log_error "Did not get slurm.conf path"
		set exit_code 1
	}
	return $config_path
}


################################################################
#
# NAME
#	copy_conf - backs up the slurm.conf file
#
# SYNOPSIS
#	copy_conf config_path cwd
#
# DESCRIPTION
#	Copy the slurm.conf file to the a new file called
#	slurm.conf.orig in the current working directory
#
# ARGUMENTS
#	config_path
#		The path to slurm.conf
#	cwd
#		The full path of the current working directory
#
################################################################

proc copy_conf { config_path cwd } {
	global bin_cp bin_rm exit_code

	exec $bin_rm -fr $cwd/slurm.conf.orig
	spawn $bin_cp -v $config_path/slurm.conf $cwd/slurm.conf.orig
	expect {
		timeout {
			log_error "slurm.conf was not copied"
			set exit_code 1
		}
		eof {
			wait
		}
	}
}


################################################################
#
# NAME
#	save_conf - saves a copy of the specfied configuration file
#
# SYNOPSIS
#	save_conf file_name
#
# DESCRIPTION
#	Rename a file as <file_name>.<test_id> and make a copy of it
#	to its original name to be able to modify it.
#	It registers the saved files to allow to restore them properly
#	with restore_conf.
#	It will log_warn if the file dosn't exist, but will register it
#	to allow restore_conf to "restore" the original state: no file.
#
#	It throws a TCL error (FATAL: ) if unsafe scenarios are detected,
#	like a backup file already existing, or any error with mv or cp.
#	It does this to encourage the user executing the test to manually
#	check the files to avoid automatically removing any original file.
#
# SEE ALSO
#	restore_conf
#
################################################################

proc save_conf { file_name } {
	global test_id
	global bin_mv bin_cp bin_rm
	global _global_bkp_register _global_bkp_noexist

	set log_prev [log_user -info]
	set rc       0

	log_info "Saving backup of $file_name"

	#
	# Check for existing backups
	# If it exists, throw a tcl exception/error to stop.
	# If it exists, manual intervation should be encouraged to avoid
	# original config files to be overwriten by a test.
	#
	set backup_files [glob -nocomplain $file_name*\[0-9\].\[0-9\]*]
	if {$backup_files != ""} {
		error [format "\nFATAL: Backup file already exists: %s" \
		       $backup_files]
	}

	#
	# Check if file to backup exists.
	# If is doesn't warn the user, register the file as nonexistent and
	# allow the test to continue.
	# The restore_conf should remove the file.
	#
	if {![file exists $file_name]} {
		log_warn [format "Backup of a nonexistent file registered: %s" \
			  $file_name]
		lappend _global_bkp_register $file_name
		lappend _global_bkp_noexist  $file_name
		return
	}

	log_user 0
	spawn $bin_mv $file_name $file_name.$test_id
	expect {
		timeout {
			error [format "\nFATAL: Timeout moving original %s" \
			       $file_name]
		}
		eof {
			lassign [wait] pid spawnid os_error_flag rc
			if {$rc != 0} {
				error [format "\nFATAL: Error moving original %s" \
				       $file_name]
			}
		}
	}
	spawn $bin_cp $file_name.$test_id $file_name
	expect {
		timeout {
			error [format "\nFATAL: Timeout copying original %s" \
			       $file_name]
		}
		eof {
			lassign [wait] pid spawnid os_error_flag rc
			if {$rc != 0} {
				error [format "\nFATAL: Error copying original %s" \
				       $file_name]
			}
		}
	}
	# If everything is ok, register the backup file
	lappend _global_bkp_register $file_name
	log_user $log_prev
}


################################################################
#
# NAME
#	restore_conf - restores the original confiration file
#
# SYNOPSIS
#	restore_conf file_name
#
# DESCRIPTION
#	Restore the original file saved previously with save_config.
#
#	It throws a TCL error (FATAL: ) if unsafe scenarios are detected,
#	like a file_name not previously saved or, or any error with the
#	mv command.
#	It does this to encourage the user executing the test to manually
#	check the files to avoid automatically removing any original file.
#
# SEE ALSO
#	save_conf
#
################################################################

proc restore_conf { file_name } {
	global test_id
	global bin_mv bin_rm
	global _global_bkp_register _global_bkp_noexist

	set log_prev [log_user -info]
	set rc       0

	#
	# We don't want to restore any file not previously saved.
	# This avoids possible overwrtites or removals.
	# We want to throw an error because this may indicate data corruption.
	#
	if { [lsearch $_global_bkp_register $file_name] == -1 } {
		error [format "\nFATAL: Cannot restore a not previously saved file:\n%s\nSaved files are: %s\n" \
		       $file_name [join $_global_bkp_register "\n"]]
	}

	log_user 0
	if {[file exists $file_name.$test_id]} {
		#
		# If backup file exist but it was nonexistent when saved it
		# means that it has been created in the middle of the test.
		# This shouldn't happen and manual intervention is prefeared.
		#
		if { [lsearch $_global_bkp_noexist $file_name] != -1 } {
			error [format "\nFATAL: Backup file created by someone, not by this test!" \
			       $file_name]
		}

		log_info "Restoring backup of $file_name"
		spawn $bin_mv $file_name.$test_id $file_name
		expect {
			timeout {
				error [format "\nFATAL: Timeout restoring original: %s" \
				       $file_name.$test_id]
			}
			eof {
				lassign [wait] pid spawnid os_error_flag rc
				if {$rc != 0} {
					error [format "\nFATAL: Error restoring original %s" \
					       $file_name]
				}
			}
		}
	} else {
		#
		# If backup file doesn't exist but it did when when we saved
		# it means that it has been removed while the test was running.
		# This should never happen, manual intervention is prefered.
		#
		if { [lsearch $_global_bkp_noexist $file_name] == -1 } {
			error [format "\nFATAL: Backup file was removed since saved!" \
				$file_name]
		}

		log_info "Removing file used for the test: $file_name"
		spawn $bin_rm -f $file_name
		expect {
			timeout {
				error [format "\nFATAL: Timeout removing vestigial file: %s" \
					$file_name]
			}
			eof {
				lassign [wait] pid spawnid os_error_flag rc
				if {$rc != 0} {
					error [format "\nFATAL: Error removing vestigial file: %s" \
					       $file_name]
				}
			}
		}
	}

	log_user $log_prev
}


################################################################
#
# NAME
#	have_nvml - checks if HAVE_NVML is set in config.h
#
# SYNOPSIS
#	have_nvml
#
# RETURN VALUE
#	Returns 1 if HAVE_NVML is set in config.h. Else, returns 0.
#
################################################################

proc have_nvml { } {
	global bin_grep number exit_code config_h

	set grep_fail 0
	set have_nvml 0
	log_user 0
	spawn $bin_grep "HAVE_NVML" $config_h
	expect {
		-re "HAVE_NVML ($number)" {
			set have_nvml $expect_out(1,string)
			exp_continue
		}
		timeout {
			set grep_fail 1
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1

	if {$grep_fail == 1} {
		log_warn "Could not grep $config_h for HAVE_NVML"
		set have_nvml 0
	}

	return $have_nvml
}


################################################################
#
# NAME
#	delete_part - deletes partition on system
#
# SYNOPSIS
#	delete_part partition
#
################################################################

proc delete_part { part_name } {
	global scontrol

	# Remove part
	spawn $scontrol delete partition=$part_name
	expect {
		timeout {
			log_error "scontrol is not responding"
			set exit_code 1
		}
		eof {
			wait
		}
	}
}


################################################################
#
# NAME
#	have_lua - checks if HAVE_LUA is set in config.h
#
# SYNOPSIS
#	have_lua
#
# RETURN VALUE
#	Returns 1 if HAVE_LUA is set in config.h. Else, returns 0.
#
################################################################

proc have_lua { } {
	global bin_grep number exit_code config_h

	set grep_fail 0
	set have_lua 0
	log_user 0
	spawn $bin_grep "HAVE_LUA" $config_h
	expect {
		-re "HAVE_LUA ($number)" {
			set have_lua $expect_out(1,string)
			exp_continue
		}
		timeout {
			set grep_fail 1
			set exit_code 1
		}
		eof {
			wait
		}
	}
	log_user 1

	if {$grep_fail == 1} {
		log_error "Could not grep $config_h for HAVE_LUA"
		set have_lua 0
	}

	return $have_lua
}


################################################################
#
# NAME
#	get_reservations - returns a dictionary of dictionaries of reservation parameters
#
# SYNOPSIS
#	get_reservations ?resv_name?
#
# RETURN VALUE
#	Uses `scontrol show reservation` to return a dictionary of dictionaries
#	of job parameters. Specifying an invalid resv_name result in a failure.
#
################################################################

proc get_reservations { {resv_name ""} } {
	global scontrol

	set command "$scontrol show reservation -o"
	if {$resv_name ne ""} {
		append command " $resv_name"
	}
	set output [run_command_output -fail "$command"]

	# Iterate over each reservation's parameter list
	foreach line [split $output "\n"] {
		if {$line eq ""} { continue }

		# Peel off the resv parameters one at a time
		# The first quantifier sets the greediness for the whole RE
		while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} {
			# Remove the consumed parameter from the line
			set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}]
			# Add it to the temporary job dictionary
			dict set resv_dict $param_name $param_value
		}

		set resv_name_dict [dict get $resv_dict "ReservationName"]

		# Add the resv dictionary to resvs dictionary
		dict set resvs_dict $resv_name_dict $resv_dict

		# Clear the resv dictionary for the next resv
		set resv_dict {}
	}

	return $resvs_dict
}


################################################################
#
# NAME
#	get_resvation_param - returns a specific parameter value for a specific reservation
#
# SYNOPSIS
#	get_resvation_param resv_name parameter_name
#
# DESCRIPTION
#	Returns a specific parameter value for a specified resvation if the
#	parameter exists for the resvation, or MISSING if it does not exist.
#	Specifying an invalid resvation name will result in a failure.
#
################################################################

proc get_resvation_param { resv_name parameter_name } {

	set resvs_dict [get_reservations $resv_name]

	if [dict exists $resvs_dict $resv_name $parameter_name] {
		return [dict get $resvs_dict $resv_name $parameter_name]
	} else {
		return "MISSING"
	}
}


################################################################
#
# NAME
#	create_res - create new reservation in system
#
# SYNOPSIS
#	create_res ?res_name? ?res_params?
#
# RETURN VALUE
#	the exit code of the scontrol command run
#
################################################################

proc create_res { res_name res_params } {
	global scontrol

	set result   [run_command "$scontrol create res ReservationName=$res_name $res_params"]
	set output   [dict get $result output]
	set ret_code [dict get $result exit_code]

	if { $ret_code } {
		log_warn "[lindex [info level 0] 0]: error from scontrol: $output"
	} else {
		log_debug "[lindex [info level 0] 0]: success from scontrol: $output"
	}

	return $ret_code
}


################################################################
#
# NAME
#	update_res - update exisiting reservation in system
#
# SYNOPSIS
#	update_res ?res_name? ?res_params?
#
# RETURN VALUE
#	the exit code of the scontrol command run
#
################################################################

proc update_res { res_name res_params } {
	global scontrol

	set result   [run_command "$scontrol update ReservationName=$res_name $res_params"]
	set output   [dict get $result output]
	set ret_code [dict get $result exit_code]

	if { $ret_code } {
		log_warn "[lindex [info level 0] 0]: error from scontrol: $output"
	} else {
		log_debug "[lindex [info level 0] 0]: success from scontrol: $output"
	}

	return $ret_code
}


################################################################
#
# NAME
#	delete_res - delete reservation from system
#
# SYNOPSIS
#	delete_res ?res_name?
#
# RETURN VALUE
#	the exit code of the scontrol command run
#
################################################################

proc delete_res { res_name } {
	global scontrol

	set result   [run_command "$scontrol delete ReservationName=$res_name"]
	set output   [dict get $result output]
	set ret_code [dict get $result exit_code]

	if { $ret_code } {
		log_warn "[lindex [info level 0] 0]: error from scontrol: $output"
	} else {
		log_debug "[lindex [info level 0] 0]: success from scontrol: $output"
	}

	return $ret_code
}


################################################################
#
# NAME
#	create_part - creates a partition
#
# SYNOPSIS
#	create_part partition num_nodes
#
# ARGUMENTS
#	partition
#		Name of partition to create
#	num_nodes
#		Number of nodes of partition to create
#
################################################################

proc create_part { part_name num_nodes_in } {
	global scontrol srun bin_printenv number re_word_str

	set nodes ""
	set num_nodes_out 0

	set found 0
	spawn $scontrol show partitionname=$part_name
	expect {
		-re "PartitionName=$part_name" {
			set found 1
			exp_continue
		}
		timeout {
			log_err "scontrol is not responding"
			set exit_code 1
		}
		eof {
			wait
		}
	}

	if {$found == 1} {
		log_error "There is already a partition $part_name"
		return 1
	}

	if {[string length [default_partition]] == 0} {
		log_warn "create_part does not work without a default partition"
		return 1
	}

	if { $num_nodes_in } {
		set num_nodes $num_nodes_in
	} else {
		set num_nodes [available_nodes idle]
	}

	log_user 0
	# Get a list of nodes
	spawn $srun -t1 -N1-$num_nodes $bin_printenv
	expect {
		-re "SLURM_JOB_NUM_NODES=($number)" {
			set num_nodes_out $expect_out(1,string)
			exp_continue
		}
		-re "SLURM_NODELIST=($re_word_str)" {
			set nodes $expect_out(1,string)
			exp_continue
		}
		timeout {
			log_error "srun is not responding getting number of nodes creating part"
			return 1
		}
		eof {
			wait
		}
	}

	if {[string length $nodes] == 0} {
		log_error "Did not get a valid node list"
		return 1
	} elseif { $num_nodes_out != $num_nodes_in } {
		log_error "Did not get enough nodes ($num_nodes_out != $num_nodes_in) to run test"
		return 1
	}

	spawn $scontrol create partitionname=$part_name nodes=$nodes
	expect {
		timeout {
			log_error "scontrol is not responding creating partition"
			return 1
		}
		eof {
			wait
		}
	}

	set found 0
	spawn $scontrol show partitionname=$part_name
	expect {
		-re "PartitionName=$part_name" {
			set found 1
			exp_continue
		}
		timeout {
			log_error "scontrol is not responding"
			set exit_code 1
		}
		eof {
			wait
		}
	}

	if { $found == 0 } {
		log_error "scontrol did not create partition $part_name"
		return 1
	}
	log_user 1

	log_info "Created partition $part_name with $num_nodes_in nodes"
	return 0
}


################################################################
#
# NAME
#	get_nodes - returns a dictionary of dictionaries of node parameters
#
# SYNOPSIS
#	get_nodes ?hostlist_expression?
#
# DESCRIPTION
#	Uses `scontrol show node` to query node parameters, returning a
#	dictionary of dictionaries with the node names as keys of the first
#	level dictionary and with the parameters as keys of the second level
#	dictionary. Specifying an invalid node name will result in a failure.
#
# RETURN VALUE
#	If the optional node expression argument is specified, the result will
#	be constrained by the specified hostlist expression. Otherwise, the
#	results for all nodes will be returned.
#
################################################################

proc get_nodes { {hostlist_expression ""} } {
	global scontrol

	set command "$scontrol show node -o"
	if {$hostlist_expression ne ""} {
		append command " $hostlist_expression"
	}
	set output [run_command_output -fail -nolog "$command"]

	# Iterate over each node parameter line
	foreach line [split $output "\n"] {
		if {$line eq ""} { continue }

		# Peel off the node parameters one at a time
		# The first quantifier sets the greediness for the whole RE
		while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} {
			# Remove the consumed parameter from the line
			set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}]
			# Add it to the temporary node dictionary
			dict set node_dict $param_name $param_value
		}

		set node_name [dict get $node_dict "NodeName"]

		# Add the node dictionary to nodes dictionary
		dict set nodes_dict $node_name $node_dict

		# Clear the node dictionary for use by the next node
		set node_dict {}
	}

	return $nodes_dict
}


################################################################
#
# NAME
#	get_node_param - returns a specific parameter value for a specific node
#
# SYNOPSIS
#	get_node_param node_name parameter_name
#
# DESCRIPTION
#	Returns a specific parameter value for a specified node if the
#	parameter exists for the node, or MISSING if it does not exist.
#	Specifying an invalid node name will result in a failure.
#
################################################################

proc get_node_param { node_name parameter_name } {

	set nodes_dict [get_nodes $node_name]

	if [dict exists $nodes_dict $node_name $parameter_name] {
		return [dict get $nodes_dict $node_name $parameter_name]
	} else {
		log_warn "Parameter $parameter_name not found on node $node_name"
		return "MISSING"
	}
}


################################################################
#
# NAME
#	get_nodes_by_request - get a list of nodes satisfying requested resources
#
# SYNOPSIS
#	get_nodes_by_request ?options? ?request_args?
#
# DESCRIPTION
#	Using srun (optionally with the specified arguments), returns a list
#	of nodes having the requested resources.
#	If an error occurs, the invoking test will fail.
#
# OPTIONS
#	-fail
#		fail the test if the execution of srun results in an error or timeout
#
# ARGUMENTS
#	request_args
#		Desired resources of a node in form of srun arguments,
#		e.g. "--gres=gpu:1 -n1 -t1"
#
# RETURN VALUE
#	A list of nodes with at least the requested resources, or an empty
#	list otherwise.
#
################################################################

proc get_nodes_by_request args {
	global srun

	set options [list]
	while {[llength $args]} {
		switch -glob -- [lindex $args 0] {
			-fail {
				lappend options [lrange $args 0 0]
				set args [lrange $args 1 end]
			}
			default  break
		}
	}

	if {[llength $args] == 1} {
		lassign $args request_args
	} elseif {[llength $args] == 0} {
		set request_args "-n1 -t1"
	} else {
		fail "[lindex [info level 0] 0]: Invalid number of arguments ([llength $args]): $args"
	}

	set command "$srun -Q $request_args printenv SLURMD_NODENAME"
	set result [run_command {*}$options $command]

	if [dict get $result exit_code] {
		return {}
	}

	set output [dict get $result output]
	foreach line [split $output "\n"] {
		if {$line eq ""} { continue }
		dict incr allocated_nodes $line
	}

	return [lsort [dict keys $allocated_nodes]]
}


################################################################
#
# NAME
#	get_jobs - returns a dictionary of dictionaries of job parameters
#
# SYNOPSIS
#	get_jobs ?job_id?
#
# DESCRIPTION
#	Uses `scontrol show job` to return a dictionary of dictionaries of job
#	parameters. Specifying an invalid job id will result in a failure.
#
################################################################

proc get_jobs { {job_id_in ""} } {
	global scontrol

	set command "$scontrol show job -o"
	if {$job_id_in ne ""} {
		append command " $job_id_in"
	}
	set output [run_command_output -fail "$command"]

	# Iterate over each job's parameter list
	foreach line [split $output "\n"] {
		if {$line eq ""} { continue }

		# Peel off the job parameters one at a time
		# The first quantifier sets the greediness for the whole RE
		while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} {
			# Remove the consumed parameter from the line
			set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}]
			# Add it to the temporary job dictionary
			dict set job_dict $param_name $param_value
		}

		set job_id [dict get $job_dict "JobId"]

		# Add the job dictionary to jobs dictionary
		dict set jobs_dict $job_id $job_dict

		# Clear the job dictionary for the next job
		set job_dict {}
	}

	return $jobs_dict
}


################################################################
#
# NAME
#	get_job_param - returns a specific parameter value for a specific job
#
# SYNOPSIS
#	get_job_param job_id parameter_name
#
# DESCRIPTION
#	Returns a specific parameter value for a specified job if the
#	parameter exists for the job, or MISSING if it does not exist.
#	Specifying an invalid job id will result in a failure.
#
################################################################

proc get_job_param { job_id parameter_name } {

	set jobs_dict [get_jobs $job_id]

	if [dict exists $jobs_dict $job_id $parameter_name] {
		return [dict get $jobs_dict $job_id $parameter_name]
	} else {
		return "MISSING"
	}
}

proc get_job_gpu_cnt { job_id } {
	global scontrol gres_regex
	set job_gpu_cnt 0
	array set gpu_type_found {}
	spawn $scontrol show job -d $job_id
	expect {
		-re "(JOB_GRES=|,)$gres_regex" {
			set count 0
			set name $expect_out(2,string)
			# Assume typed GRES of format gpu:1080:5 to start out
			set type $expect_out(3,string)
			set count $expect_out(4,string)

			# Skip all GRES of different names
			if {$name != "gpu"} {
				exp_continue
			}
			if {$count == ""} {
				# Now assume GRES format gpu:5
				set count $type
				set type "notype"
			}
			# Skip if type already found (in another GRES line)
			if {[info exists gpu_type_found($type)]} {
				exp_continue
			}
			if { $count } {
				incr job_gpu_cnt $count
				set gpu_type_found($type) true
			}

			exp_continue
		}
		timeout {
			log_error "scontrol not responding\n"
			set exit_code 1
		}
		eof {
			wait
		}
	}

	return $job_gpu_cnt
}

proc check_reason { job_id reason } {
	global squeue

	set found 0
	spawn $squeue -j $job_id --noheader -o "%r"
	expect {
		-re "$reason" {
			set found 1
			exp_continue
		}
		timeout {
			log_error "squeue not responding"
		}
		eof {
			wait
		}
	}

	if {$found == 0} {
		log_error "Job $job_id should have a wait reason of $reason"
	}
	return $found
}


################################################################################
#
# NAME
#	compile_against_libslurm - compiles a test program against either libslurm.so or libslurmfull.so
#
# SYNOPSIS
#	compile_against_libslurm test_prog ?use_full? ?build_args?
#
# DESCRIPTION
#	Compile a test program against either libslurm.so or libslurmfull.so.
#
# ARGUMENTS
#	test_prog
#		The name of the test program (and .c file)
#	use_full
#		If 1, use libslurmfull.so instead of libslurm.so. Defaults to 0.
#	build_args
#		Additional string to be appended to the build command.
#		E.g. "-DUSING_VALGRIND -lm ${build_dir}/src/slurmctld/locks.o"
#		(initial space will be added automatically).
#
################################################################################

proc compile_against_libslurm { test_prog {use_full 0} {build_args ""} } {
	global slurm_dir bin_cc src_dir build_dir exit_code bin_chmod

	if {$use_full} {
		set libfile "libslurmfull.so"
	} else {
		set libfile "libslurm.so"
	}

	if [file exists $slurm_dir/lib64/slurm/$libfile] {
		set libdir "lib64"
	} else {
		set libdir "lib"
	}

	if {$use_full} {
		set libline "$slurm_dir/$libdir/slurm"
		set libfile "slurmfull"
	} else {
		set libline "$slurm_dir/$libdir"
		set libfile "slurm"
	}

	set build_cmd "$bin_cc ${test_prog}.c -g -pthread -o $test_prog -I$src_dir -I$build_dir -I$slurm_dir/include -Wl,-rpath=$libline -L$libline -l$libfile -lresolv"

	# Add additional arguments to the build command
	if {$build_args != ""} {
		set build_cmd "$build_cmd $build_args"
	}

	log_info "Build command: $build_cmd"
	catch {exec {*}$build_cmd} out_str out_dict
	if {[dict get $out_dict -code]} {
		log_error $out_str
		set exit_code 1
		return
	}

	exec $bin_chmod 700 $test_prog
}


################################################################
#
# NAME
#	assert - tests a boolean expression
#
# SYNOPSIS
#	assert condition message ?action?
#
# DESCRIPTION
#	Tests a boolean expression. If the condition is false, the
#	function will take the designated action (error, fail, skip,
#	or warn) using the specified message.
#
# ARGUMENTS
#	condition
#		The boolean expression to test
#	message
#		The message to print with the action
#	action
#		Specifies the action to take when false. The default is "error"
#		The action will be one of the following:
#
#		error - Prints an error if the boolean_expression is false
#		fail  - Fails the test if the boolean_expression is false
#		skip  - Skips the test if the boolean_expression is false
#		warn  - Prints a warning if the boolean_expression is false
#
################################################################

proc assert { condition message { action "error" } } {

	# We must skip two levels if being called by one of the assert_* helpers
	set uplevel 1
	if {[info level] > 1 && [string match "assert_*" [lindex [info level -1] 0] ]} {
		incr uplevel
	}

	if {![uplevel $uplevel expr [format "{%s}" $condition]]} {
		if {$action eq "fail"} {
			fail "$message"
		} elseif {$action eq "error"} {
			log_error "$message"
		} elseif {$action eq "warn"} {
			log_warn "$message"
		} elseif {$action eq "skip"} {
			skip "$message"
		} else {
			fail "Invalid action ($action) with assert"
		}
	}
}


################################################################
#
# NAME
#	assert_or_fail - tests a boolean condition, failing if false
#
# SYNOPSIS
#	assert_or_fail condition message
#
# DESCRIPTION
#	Tests a boolean condition. If the condition is false, the
#	function will call fail with the specified message.
#
# ARGUMENTS
#	condition
#		The boolean expression to test
#	message
#		The message to print with the action
#
################################################################

proc assert_or_fail { condition message } {
	assert "$condition" "$message" "fail"
}


################################################################
#
# NAME
#	assert_or_error - tests a boolean expression, logging an error if false
#
# SYNOPSIS
#	assert_or_error condition message
#
# DESCRIPTION
#	Tests a boolean condition. If the condition is false, the
#	function will log_error with the specified message.
#
# ARGUMENTS
#	condition
#		The boolean expression to test
#	message
#		The message to print with the action
#
################################################################

proc assert_or_error { condition message } {
	assert "$condition" "$message" "error"
}


################################################################
#
# NAME
#	assert_or_warn - tests a boolean expression, logging a warning if false
#
# SYNOPSIS
#	assert_or_warn condition message
#
# DESCRIPTION
#	Tests a boolean condition. If the condition is false, the
#	function will log_warn with the specified message.
#
# ARGUMENTS
#	condition
#		The boolean expression to test
#	message
#		The message to print with the action
#
################################################################

proc assert_or_warn { condition message } {
	assert "$condition" "$message" "warn"
}


################################################################
#
# NAME
#	assert_or_skip - tests a boolean expression, skipping the test if false
#
# SYNOPSIS
#	assert_or_skip condition message
#
# DESCRIPTION
#	Tests a boolean condition. If the condition is false, the
#	function will call skip with the specified message.
#
# ARGUMENTS
#	condition
#		The boolean expression to test
#	message
#		The message to print with the action
#
################################################################

proc assert_or_skip { condition message } {
	assert "$condition" "$message" "skip"
}


################################################################
#
# NAME
#	_log_format - prints a log message with colorization and formatting
#
# SYNOPSIS
#	_log_format log_level message
#
# DESCRIPTION
#	This procedure is called by the log_<level> procedures and
#	derives the relevant log level from the caller's procedure name.
#
# ARGUMENTS
#	log_level
#		The logging threshold that triggered the log statement
#	message
#		The message to print with colorization and formatting
#
# ENVIRONMENT
#	testsuite_log_format
#		Used as the template for the fields to be output.
#		Fields must be expressed in the form:
#			%{<field_name>}<format_conversion_specifier>
#		Supported fields include the following:
#			message
#				The log message
#			filename
#				The file name where the log_<log_level>
#				procedure was called from
#			lineno
#				The line number where the log_<log_level>
#				procedure was called from
#			timestamp
#				The date and time when the log_<log_level>
#				procedure was called at
#			msecs
#				The milliseconds when the log_<log_level>
#				procedure was called at
#			loglevel
#				The log level that triggers the log_<log_level>
#				procedure to be called
#	testsuite_time_format
#		Used as a template for the timestamp. See the format groups
#		for the tcl clock format command.
#	testsuite_colorize
#		Boolean that turns colorization on or off
#	testsuite_color_<log_level>
#		Can be set to define the color used for each log level
#
################################################################

proc _log_format { log_level message } {
	global testsuite_colorize testsuite_log_format testsuite_time_format
	global COLOR_NONE
	global testsuite_color_fatal testsuite_color_error testsuite_color_warn
	global testsuite_color_info testsuite_color_debug testsuite_color_trace

	set format_string $testsuite_log_format
	set milliseconds_since_epoch [clock milliseconds]
	set date_time [clock format [expr {$milliseconds_since_epoch / 1000}] -format "$testsuite_time_format"]
	set milliseconds [expr {$milliseconds_since_epoch % 1000}]

	set frame_level -2
	while { [dict get [info frame $frame_level] type] != "source" } {
		incr frame_level -1
	}

	set format_args {}
	while {[regexp "%{\[a-z]+}" $format_string format_field]} {
		if {$format_field eq "%{message}"} {
			lappend format_args $message
		} elseif {$format_field eq "%{filename}"} {
			lappend format_args [file tail [dict get [info frame $frame_level] file]]
		} elseif {$format_field eq "%{lineno}"} {
			lappend format_args [dict get [info frame $frame_level] line]
		} elseif {$format_field eq "%{timestamp}"} {
			lappend format_args $date_time
		} elseif {$format_field eq "%{msecs}" || $format_field eq "%{milliseconds}"} {
			lappend format_args $milliseconds
		} elseif {$format_field eq "%{loglevel}" || $format_field eq "%{levelname}"} {
			lappend format_args [string totitle $log_level]
		} else {
			fail "Invalid field ($format_field) specified in testsuite_log_format"
		}
		regsub $format_field $format_string "%" format_string
	}

	if ($testsuite_colorize) {
		switch $log_level {
			fatal   { append output $testsuite_color_fatal }
			error   { append output $testsuite_color_error }
			warning { append output $testsuite_color_warn }
			info    { append output $testsuite_color_info }
			debug   { append output $testsuite_color_debug }
			trace   { append output $testsuite_color_trace }
		}
	}
	append output [format $format_string {*}$format_args]
	if ($testsuite_colorize) {
		append output $COLOR_NONE
	}
	puts $output
}


################################################################
#
# NAME
#	_print_header - prints a test header
#
# SYNOPSIS
#	_print_header
#
# ENVIRONMENT
#	testsuite_colorize
#		Boolean that turns colorization on or off
#	testsuite_color_header
#		Can be set to define the color used for the header
#
################################################################

proc _print_header { } {
	global test_name testsuite_color_header testsuite_colorize COLOR_NONE

	if ($testsuite_colorize) {
		append output $testsuite_color_header
	}
	append output [string repeat = 78]\n
	append output [format "%-9s" "TEST:"]${test_name}\n
	append output [string repeat = 78]
	if ($testsuite_colorize) {
		append output $COLOR_NONE
	}
	puts $output
}


################################################################
#
# NAME
#	_print_failure - prints FAILURE status with test name
#
# SYNOPSIS
#	_print_failure
#
# ENVIRONMENT
#	testsuite_colorize
#		Boolean that turns colorization on or off
#	testsuite_color_<test_status>
#		Can be set to define the color used for each test status
#
################################################################

proc _print_failure { } {
	global test_name testsuite_color_failure testsuite_colorize COLOR_NONE

	if ($testsuite_colorize) {
		append output $testsuite_color_failure
	}
	append output [string repeat = 78]\n
	append output [format "%-9s" "FAILURE:"]${test_name}\n
	append output [string repeat = 78]
	if ($testsuite_colorize) {
		append output $COLOR_NONE
	}
	puts $output
}


################################################################
#
# NAME
#	_print_skipped - prints SKIPPED status with test name
#
# SYNOPSIS
#	_print_skipped
#
# ENVIRONMENT
#	testsuite_colorize
#		Boolean that turns colorization on or off
#	testsuite_color_<test_status>
#		Can be set to define the color used for each test status
#
################################################################

proc _print_skipped { } {
	global test_name testsuite_color_skipped testsuite_colorize COLOR_NONE


	if ($testsuite_colorize) {
		append output $testsuite_color_skipped
	}
	append output [string repeat = 78]\n
	append output [format "%-9s" "SKIPPED:"]${test_name}\n
	append output [string repeat = 78]
	if ($testsuite_colorize) {
		append output $COLOR_NONE
	}
	puts $output
}


################################################################
#
# NAME
#	_print_success - prints SUCCESS status with test name
#
# SYNOPSIS
#	_print_success
#
# ENVIRONMENT
#	testsuite_colorize
#		Boolean that turns colorization on or off
#	testsuite_color_<test_status>
#		Can be set to define the color used for each test status
#
################################################################

proc _print_success { } {
	global test_name testsuite_color_success testsuite_colorize COLOR_NONE


	if ($testsuite_colorize) {
		append output $testsuite_color_success
	}
	append output [string repeat = 78]\n
	append output [format "%-9s" "SUCCESS:"]${test_name}\n
	append output [string repeat = 78]
	if ($testsuite_colorize) {
		append output $COLOR_NONE
	}
	puts $output
}


################################################################
#
# NAME
#	_get_test_name - gets the name of the invoking source script
#
# SYNOPSIS
#	_get_test_name
#
# RETURN VALUE
#	The name of the originally called script
#
################################################################

proc _get_test_name { } {

	set test_name unknown
	set frame_level 1
	while { $frame_level <= [info frame] } {
		if { [dict get [info frame $frame_level] type] == "source" } {
			set test_name [file tail [dict get [info frame $frame_level] file]]
			break
		}
		incr frame_level
	}

	return $test_name
}


################################################################
#
# NAME
#	_test_cleanup - performs the test cleanup
#
# SYNOPSIS
#	_test_cleanup
#
# DESCRIPTION
#	This procedure removes the temporary test_dir and calls the
#	test-defined cleanup procedure.
#
# NOTES
#	This function should be called only from _test_init and _exit.
#
################################################################

proc _test_cleanup {} {
	global log_warn test_dir

	set rc 0

	# Call global cleanup procedure if it is defined by the test
	if {[info procs cleanup] eq "cleanup"} {
		if {[catch {cleanup} cleanup_error ]} {
			log_error "Cleanup had errors: $cleanup_error"
			set rc 1
		}
	 }

	# Remove the temporary test directory
	exec rm -rf $test_dir

	return $rc
}


################################################################
#
# NAME
#	_test_init - performs test initialization
#
# SYNOPSIS
#	_test_init
#
# DESCRIPTION
#	This procedure is called automatically at the beginning of each test.
#	It prints the header, creates the temporary test dir, etc.
#
################################################################

proc _test_init {} {
	global test_dir test_id test_name testsuite_shared_dir

	# Set test name to name of originally invoked test script, e.g. test1.1
	set test_name [_get_test_name]

	# Set test id to suffix of the test script, e.g. 1.1
	set test_id [string map {test ""} $test_name]

	# Temporary test directory used to stash saved configs, output files...
	set test_dir  "$testsuite_shared_dir/${test_name}dir"

	# Print test header
	_print_header

	# Cleanup in case test was not cleaned up on last execution
	if { [_test_cleanup] } {
		fail "Error in the initial cleanup"
	}

	# Create temporary shared test directory
	exec mkdir -p $test_dir
}


################################################################
#
# NAME
#	_test_fini - performs test finalization
#
# SYNOPSIS
#	_test_fini ?status?
#
# DESCRIPTION
#	This procedure is called automatically from the ending functions
#	pass, skip and failure.
#	It cleans up based on the status and the testsuite_cleanup_on_failure
#	variable, prints the final test status/summary, and exits the test.
#
#	When status is zero, we print SUCCESS.
#	When status is negative, we print SKIPPED.
#	When status is positive, we print FAILURE.
#
#	We will always cleanup for SUCCESS or SKIPPED tests.
#	Whether or not the cleanup procedure is called for FAILURE tests
#	depends on the setting of the $testsuite_cleanup_on_failure variable
#	which can be set in the globals.local file or overridden with the
#	SLURM_TESTSUITE_CLEANUP_ON_FAILURE environment variable.
#
################################################################

proc _test_fini { status } {
	global testsuite_cleanup_on_failure

	if {$status == 0} {
		# Successful test
		_test_cleanup
		_print_success
	} elseif {$status < 0} {
		# Skipped test
		_test_cleanup
		_print_skipped
	} else {
		# Failed test
		# Only cleanup the test if configured to do so
		if {$testsuite_cleanup_on_failure} {
			_test_cleanup
		}
		_print_failure
	}

	__exit $status
}


################################################################
#
# Overload the exit routine to ensure that no one is explicitly
# calling it, and to enforce _test_fini if exit is called when
# the test reach its EOF.
#
# All tests should exit using pass, skip or fail.
#
################################################################

rename exit __exit

proc exit { {status 0} } {
	global test_name

	#
	# Minor sanity check to detect if exit was explicitly called (not
	# allowed) or automatically executed when the test ends
	#
	if {[info level] > 1} {
		# exit was called from a function, and it shouldn't
		log_error "Exit should not be directly called, use pass, skip or fail instead"
	} else {
		set frame_level 1
		while { $frame_level <= [info frame] } {
			if { [dict get [info frame $frame_level] type] == "source" } {
				if { [file tail [dict get [info frame $frame_level] file]] eq $test_name } {
					# exit was called explicitly from the
					# test, and it shouldn't
					log_error "Exit should not be directly called, use pass, skip or fail instead"
				}
				break
			}
			incr frame_level
		}
	}

	# The exit was called implicitly when the test ends, allowed but
	# _test_fini call enforced
	_test_fini $status
}


# Call _test_init at the beginning of each test
_test_init