#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate sbcast for a hetjob allocation. ############################################################################ # Copyright (C) 2015-2107 SchedMD LLC # Written by Nathan Yee # and Isaac Hartung # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set script1 "test$test_id.in1" set exit_code 0 set job_id 0 set hostlist "" set node1 "" set node2 "" set prompt "PROMPT: " proc cleanup { } { global het_job_id scancel if {$het_job_id > 0} { exec $scancel $het_job_id } } if {![test_accting_steps]} { skip "This test can not be run with nosteps or nojobs (AccountingStorageEnforce)" } if {[test_front_end] != 0} { skip "This test is incompatible with front-end systems" } elseif {[slurmd_user_root] == 0} { skip "This test requires that the SlurmdUser be root" } elseif {[get_node_cnt] < 2} { skip "This test requires that the configuration has at least 2 nodes" } set def_part [default_partition] spawn $bin_bash -c "exec $scontrol show hostnames \$($sinfo -tidle --partition=$def_part -h -o%N) | head -n2 |tr \'\n\' ' ' " expect { -re "($re_word_str) ($re_word_str)" { set node1 $expect_out(1,string) set node2 $expect_out(2,string) exp_continue } timeout { log_error "sinfo is not responding" set exit_code 1 } eof { wait } } if {![string compare $node1 ""] || ![string compare $node2 ""]} { skip "Did not find at least 2 idle nodes" } set het_job_id 0 set matches 0 set index 0 set timeout $max_job_delay spawn $salloc -t2 -N1 : -N1 $bin_bash expect { -re "job ($number) has been allocated resources" { set het_job_id $expect_out(1,string) send "export PS1=\"$prompt\"\r" exp_continue } -re "\"$prompt" { # skip this, just echo of setting prompt" exp_continue } -re "$prompt" { #log_debug "Job initiated" } timeout { fail "salloc: allocation not granted in $timeout seconds" } eof { wait } } if {$het_job_id == 0} { fail "salloc failure" } make_bash_script $script1 " $srun --het-group=0 $bin_rm -f /tmp/test_${test_id}/$node1/test$test_id\_file\_comp0 $srun --het-group=1 $bin_rm -f /tmp/test_${test_id}/$node2/test$test_id\_file\_comp1 $srun --het-group=0 $bin_rm -f /tmp/test_${test_id}/test$test_id\_file $srun --het-group=0 $bin_rm -fr /tmp/test_${test_id}/$node1 $srun --het-group=1 $bin_rm -f /tmp/test_${test_id}/test$test_id\_file $srun --het-group=1 $bin_rm -fr /tmp/test_${test_id}/$node2 $srun --het-group=1 $bin_rm -fr /tmp/test_${test_id} $srun -N1 -n1 --het-group=0 mkdir /tmp/test_${test_id} $srun -N1 -n1 --het-group=1 mkdir /tmp/test_${test_id} $srun -N1 -n1 --het-group=0 mkdir /tmp/test_${test_id}/$node1 $srun -N1 -n1 --het-group=1 mkdir /tmp/test_${test_id}/$node2 $sbcast -f -j$het_job_id $srun /tmp/test_${test_id}/test$test_id\_file $sbcast -f -j${het_job_id}+0 $srun /tmp/test_${test_id}/$node1/test$test_id\_file\_comp0 $sbcast -f -j${het_job_id}+1 $srun /tmp/test_${test_id}/$node2/test$test_id\_file\_comp1 echo -n \"\nChecking node 1: \" $srun -Q -N1 -n1 --het-group=1 ls /tmp/test_${test_id}/test$test_id\_file echo -n \"\nChecking node 0: \" $srun -Q -N1 -n1 --het-group=0 ls /tmp/test_${test_id}/test$test_id\_file echo -n \"\nChecking node 1 again: \" $srun -Q -N1 -n1 --het-group=1 ls /tmp/test_${test_id}/$node2/test$test_id\_file\_comp1 echo -n \"\nChecking node 0 again: \" $srun -Q -N1 -n1 --het-group=0 ls /tmp/test_${test_id}/$node1/test$test_id\_file\_comp0 echo -n \"\nChecking node 1 for lack: \" $srun -Q -N1 -n1 --het-group=1 ls /tmp/test_${test_id}/$node2/test$test_id\_file\_comp0 echo -n \"\nChecking node 0 for lack: \" $srun -Q -N1 -n1 --het-group=0 ls /tmp/test_${test_id}/$node1/test$test_id\_file\_comp1 $srun --het-group=0 $bin_rm -f /tmp/test_${test_id}/$node1/test$test_id\_file\_comp0 $srun --het-group=1 $bin_rm -f /tmp/test_${test_id}/$node2/test$test_id\_file\_comp1 $srun --het-group=0 $bin_rm -f /tmp/test_${test_id}/test$test_id\_file $srun --het-group=0 $bin_rm -fr /tmp/test_${test_id}/$node1 $srun --het-group=1 $bin_rm -f /tmp/test_${test_id}/test$test_id\_file $srun --het-group=1 $bin_rm -fr /tmp/test_${test_id}/$node2 $srun --het-group=1 $bin_rm -fr /tmp/test_${test_id} $srun --het-group=0 $bin_rm -fr /tmp/test_${test_id} " set timeout $max_job_delay set matches 0 send "./$script1 \r" expect { -re "Permission denied" { fail "Unable to delete/create file, check permissions" } -re "cannot create directory" { log_debug "This error is expected when nodes share the same tmp directory" exp_continue } -re "Checking node 1: */tmp/test_${test_id}/test$test_id\_file" { incr matches exp_continue } -re "Checking node 0: */tmp/test_${test_id}/test$test_id\_file" { incr matches exp_continue } -re "Checking node 1 again: */tmp/test_${test_id}/$node2/test$test_id\_file\_comp1" { incr matches exp_continue } -re "Checking node 0 again: */tmp/test_${test_id}/$node1/test$test_id\_file\_comp0" { incr matches exp_continue } -re "Checking node 1 for lack: */tmp/test_${test_id}/$node1/test$test_id\_file\_comp0" { log_error "sbcast copied file to wrong node" set exit_code 1 exp_continue } -re "Checking node 0 for lack: */tmp/test_${test_id}/$node2/test$test_id\_file\_comp1" { log_error "sbcast copied file to wrong node" set exit_code 1 exp_continue } -re "$prompt" { #log_debug "Job initiated" } timeout { log_error "salloc is not responding" set exit_code 1 } eof { wait } } if {$matches != 4} { fail "sbcast did not copy the file(s) to the correct nodes ($matches != 4)" } if {$exit_code == 0} { exec $bin_rm -rf $script1 } else { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }