#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Test of srun --spread-job option. ############################################################################ # Copyright (C) 2016 SchedMD LLC. # Written by Alejandro Sanchez # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set node_cnt 0 set exit_code 0 proc run_spread_job { task_cnt } { global max_job_delay srun scontrol bin_printenv number exit_code set timeout $max_job_delay set job_id 0 set srun_pid [spawn $srun -n $task_cnt --spread-job $bin_printenv SLURM_JOB_ID] expect { -re "($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "srun not responding" slow_kill $sun_pid set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Srun job ID not found" } if {[wait_for_job $job_id "DONE"] != 0} { log_error "Error waiting for job $job_id to complete" cancel_job $job_id set exit_code 1 } set timeout 10 set num_nodes 0 spawn $scontrol show job $job_id expect { -re "NumNodes=($number)" { set num_nodes $expect_out(1,string) exp_continue } timeout { log_error "scontrol not responding" set exit_code 1 } eof { wait } } if {$num_nodes != $task_cnt} { log_error "Invalid node count for job $job_id ($num_nodes != $task_cnt)" set exit_code 1 } } if {![test_cons_res] && ![test_cons_tres]} { skip "Test is only compatible with a config of SelectType=select/cons_res or select/cons_tres" } log_user 1 spawn $sinfo -h -p [default_partition] -t idle -o "NODES=%D CPUS=%c" expect { -re "NODES=($number)" { set node_cnt $expect_out(1,string) exp_continue } timeout { log_error "sinfo not responding" set exit_code 1 } eof { wait } } log_user 1 if {$node_cnt < 2} { skip "Insufficient nodes for test in default partition ($node_cnt < 2)" } if {$node_cnt > 8} { set max_task_cnt 8 } else { set max_task_cnt $node_cnt } for {set inx 2} {$inx < $max_task_cnt} {incr inx 2} { run_spread_job $inx if {$exit_code != 0} { break } sleep 1 } if {$exit_code != 0} { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }