#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Test of srun's --kill-on-bad-exit option. ############################################################################ # Copyright (C) 2002 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set file_in "test$test_id.prog" set matches 0 # # Delete left-over input script # Build input script file # # We have the last of the tasks exit. proctrack/linuxproc will not # signal processes named "slurmstepd" and later user tasks could still # be named "slurmstepd" when the termination signal comes in. # exec $bin_rm -f $file_in set fd [open "$file_in.c" w] puts $fd { #include #include #include int main() { char *id = getenv("SLURM_PROCID"); if (atoi(id) == 9) { exit(2); } sleep(15); printf("SHOULD_NOT_BE_HERE\n"); } } close $fd # Add delay due to sporatic error "Clock skew detected" exec $bin_sleep 1 exec $bin_cc -O -o $file_in ${file_in}.c exec $bin_chmod 700 $file_in # # Spawn a shell via srun and send exit command to task 1 only # set timeout $max_job_delay set srun_pid [spawn $srun -n10 -N1 -O --kill-on-bad-exit -t1 ./$file_in] expect { -re "exit code 2" { log_debug "This error is expected, no worries" incr matches exp_continue } -re "SHOULD_NOT_BE_HERE" { set matches -10 exp_continue } timeout { log_error "srun not responding" slow_kill $srun_pid set exit_code 1 } eof { wait } } if {$matches != 1} { fail "Problem with --kill-on-bad-exit option" } if {$exit_code == 0} { exec $bin_rm -f $file_in $file_in.c } else { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }