#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate that squeue -O (--Format) option displays the # correct user specified values. ############################################################################ # Copyright (C) 2014 SchedMD LLC # Written by Nathan Yee # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals source ./globals_accounting set cwd "[$bin_pwd]" set exit_code 0 set test_node "" set job_id 0 set step_id 0 set file_err "${cwd}/test${test_id}_err" set file_in "test${test_id}_script" set file_out "${cwd}/test${test_id}_out" set test_acct "test${test_id}_acct" ############################Job Format Test############################ ####################### # Format value ####################### # stderr set sq_format(stderr) $file_err # stdout set sq_format(stdout) $file_out # stdin set sq_format(stdin) /dev/null # numcpus set sq_format(numcpus) 1 # numtasks set sq_format(numtasks) "DUMMY" # numnodes set sq_format(numnodes) 1 # timelimit set sq_format(timelimit) 2 # job set sq_format(name) $file_in # account set sq_format(account) $test_acct # cpus-per-task set sq_format(cpuspertask) 2 # network set sq_format(network) "ip" # requeue set sq_format(requeue) 1 # profile set sq_format(profile) Energy # ntasks-per-socket set sq_format(ntpersocket) 2 # ntasks-per-node set sq_format(ntpernode) 2 # state compact set sq_format(statecompact) PD|R|CA|CR|CG|CD|F|TO|NF|SE # jobid set sq_format(jobid) 0 # user set sq_format(username) "DUMMY" # switches set sq_format(reqswitch) 1 # partition set sq_format(partition) [default_partition] # comment set sq_format(comment) 1234567890 if { [test_using_slurmdbd] == 0 } { skip "This test can't be run without AccountStorageType=slurmdbd" } if {[string compare [check_accounting_admin_level] "Administrator"]} { skip "This test can't be run without being an Accounting administrator" } set available [available_nodes idle] if {$available < 2} { skip "Not enough nodes currently available ($available avail, 2 needed)" } remove_acct [get_cluster_name] $test_acct # Run a job to get a usable node to test set tmp_sc "tmp_sc" set tmp_job 0 make_bash_script $tmp_sc "sleep 2" spawn $sbatch -o/dev/null -N1 --exclusive $tmp_sc expect { -re "Submitted batch job ($number)" { set tmp_job $expect_out(1,string) exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$tmp_job == 0} { fail "Job was not submitted" } if {[wait_for_job $tmp_job "RUNNING"] != 0} { log_error "Error waiting for job $tmp_job to start" cancel_job $tmp_job set exit_code 1 } set got_node 0 spawn $scontrol show job $tmp_job expect { -re "NodeList=($re_word_str)" { set test_node $expect_out(1,string) set got_node 1 exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } if {$got_node != 1} { fail "Was not able to get usable node" } lassign [get_node_cpus $test_node] cpu_tot sq_format(cpuspertask) set socket_cnt 1 spawn $scontrol show node $test_node expect { -re "CoresPerSocket=($number)" { set sq_format(ntpersocket) $expect_out(1,string) exp_continue } -re "Sockets=($number)" { set socket_cnt $expect_out(1,string) exp_continue } timeout { log_error "scontrol is not responding" exp_continue } eof { wait } } set core_cnt [expr $socket_cnt * $sq_format(ntpersocket)] if {$sq_format(ntpernode) > $core_cnt} { set sq_format(ntpernode) $core_cnt } if {$sq_format(ntpersocket) == 0 || $sq_format(cpuspertask) == 0} { fail "failed to get number of threads or cores ThreadsPerCore=$sq_format(cpuspertask) & CoresPerSocket=$sq_format(ntpersocket)" } # Remove the tmp script and cancel tmp job exec $bin_rm $tmp_sc cancel_job $tmp_job set sq_format(username) [get_my_user_name] set match 0 spawn $sacctmgr add -i account $test_acct expect { -re "Associations" { set match 1 exp_continue } timeout { log_error "sacctmgr is not responding" set exit_code 1 } eof { wait } } if {$match != 1} { log_error "Account was not added" set exit_code 1 } set match 0 spawn $sacctmgr add -i user $sq_format(username) account=$sq_format(account) expect { -re "Association" { set match 1 exp_continue } timeout { log_error "sacctmgr is not responding" set exit_code 1 } eof { wait } } if {$match != 1} { log_error "Account was not added" set exit_code 1 } # # Make a bash script # make_bash_script $file_in "$srun $bin_sleep 10000 $srun $bin_sleep 200 $srun $bin_sleep 100" spawn $sbatch -A$sq_format(account) -N$sq_format(numnodes) \ -n$sq_format(numcpus) -t$sq_format(timelimit) -c$sq_format(cpuspertask) \ --switch=$sq_format(reqswitch) --network=$sq_format(network) --requeue \ --profile=$sq_format(profile) --ntasks-per-socket=$sq_format(ntpersocket) \ --ntasks-per-node=$sq_format(ntpernode) -o$sq_format(stdout) \ --comment=$sq_format(comment) -e$sq_format(stderr) --exclusive \ -w$test_node $sq_format(name) expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) set sq_format(jobid) $expect_out(1,string) exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {[wait_for_job $job_id "RUNNING"] != 0} { log_error "Error waiting for job $job_id to start" cancel_job $job_id set exit_code 1 } # Wait for steps to start too sleep 5 # The number of allocated CPUs can vary depending upon the allocation unit set match 0 spawn $scontrol show job $job_id expect { -re "NumCPUs=($number)" { set sq_format(numcpus) $expect_out(1,string) incr match 1 exp_continue } -re "Tasks=($number)" { set sq_format(numtasks) $expect_out(1,string) incr match 1 exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } if {$match != 2} { log_error "scontrol did not provide correct job values ($match != 1)" set exit_code 1 } set match 0 set cpu_match 0 set cnt 0 foreach option [array names sq_format] { incr cnt 1 set this_match 0 spawn $squeue --job=$job_id --noheader -O$option:99 expect { -re "$sq_format($option)" { incr match 1 incr this_match 1 exp_continue } timeout { log_error "squeue is not responding" set exit_code 1 } eof { wait } } if {$this_match == 0} { log_error "Failed to match $option with $sq_format($option)" set exit_code 1 } } if {$match != $cnt} { log_error "Not all squeue outputs match ($match != $cnt)" set exit_code 1 } set match 0 spawn $squeue --job=$job_id --noheader -Ocomment:4 expect { -re "12345" { log_error "Field width control failure" set exit_code 1 exp_continue } -re "1234" { incr match 1 exp_continue } timeout { log_error "squeue is not responding" set exit_code 1 } eof { wait } } if {$match != 1} { log_error "Not all squeue outputs match ($match != 1)" set exit_code 1 } ############################Step Format Test############################ ##################### # Format value ##################### # jobid set sq_step_format(jobid) $job_id # stepid set sq_step_format(stepid) $job_id.$step_id # stepname set sq_step_format(stepname) "sleep" # state set sq_step_format(stepstate) "DUMMY" # network set sq_step_format(network) "DUMMY" # numcpus set sq_step_format(numcpus) $sq_format(numcpus) # numtasks set sq_step_format(numtasks) "DUMMY" # username set sq_step_format(username) $sq_format(username) set match 0 spawn $scontrol show step $sq_step_format(stepid) expect { -re "State=($re_word_str)" { set sq_step_format(stepstate) $expect_out(1,string) incr match 1 exp_continue } -re "CPUs=($number)" { set sq_step_format(numcpus) $expect_out(1,string) incr match 1 exp_continue } -re "Tasks=($number)" { set sq_step_format(numtasks) $expect_out(1,string) incr match 1 exp_continue } -re "Network=($re_word_str)" { set sq_step_format(network) $expect_out(1,string) incr match 1 exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } if {$match != 4} { log_error "scontrol did not provide correct step values ($match != 4)" set exit_code 1 } set match 0 set cpu_match 0 set cnt 0 foreach option [array names sq_step_format] { incr cnt 1 set this_match 0 spawn $squeue --step=$sq_step_format(stepid) --noheader -O$option expect { -re "$sq_step_format($option)" { incr match 1 incr this_match 1 exp_continue } timeout { log_error "squeue is not responding" set exit_code 1 } eof { wait } } if {$this_match == 0} { log_error "Failed to match $option with $sq_step_format($option)" set exit_code 1 } } if {$match != $cnt} { log_error "Not all squeue outputs match ($match != $cnt)" set exit_code 1 } cancel_job $job_id if {[wait_for_job $job_id "DONE"] != 0} { log_error "Error waiting for job $job_id to complete" set exit_code 1 } remove_acct "" $test_acct if {$exit_code == 0} { exec $bin_rm -f $file_err $file_out $file_in } else { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }