#!/usr/bin/env expect ############################################################################ # Purpose: Verify the correct setting of a job's ExitCode. ############################################################################ # Copyright (C) 2010 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Don Lipari # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set file_in "test$test_id.input" set file_prog1 "test$test_id.prog1" set file_prog2 "test$test_id.prog2" if {![test_accting_steps]} { skip "This test can not be run with nosteps or nojobs (AccoutingStorageEnforce)" } # # Delete left-over programs and rebuild them. # exec $bin_rm -f $file_in exec $bin_rm -f $file_prog1 $file_prog2 exec $bin_cc -O -o $file_prog1 ${file_prog1}.c exec $bin_cc -O -o $file_prog2 ${file_prog2}.c # # Submit a script that returns a successful exit code and confirm that # the job record's ExitCode reflects this value. $file_prog1 returns a # successful error code (0) and $file_prog2 returns an unsuccessful # error code (123). # # The failed job step should have no influence on the job's ExitCode # value. However the DerivedExitCode value should be set to the # highest value of all the job steps, in this case, 123. # make_bash_script $file_in " $bin_echo 'testing successful job return code' $srun ./$file_prog1 $srun ./$file_prog2 exit 0 " set job_id 0 spawn $sbatch --output=/dev/null -t1 ./$file_in expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } eof { wait } } if {$job_id == 0} { fail "Batch submit failure" } # # Wait for job to complete # if {[wait_for_job $job_id "DONE"] != 0} { fail "Waiting for job to complete" } # # Confirm correct ExitCode setting in job record # set matches 0 spawn $scontrol --detail show job $job_id expect { -re " ExitCode=0:0" { incr matches exp_continue } -re "DerivedExitCode=123:0" { incr matches exp_continue } timeout { fail "scontrol not responding" } eof { wait } } if {$matches != 2} { fail "Job ExitCode incorrect ($matches != 2)" } # # Confirm the job record in the db has the same exit codes # if { [test_account_storage] == 1 } { sleep 2 set matches 0 spawn $sacct -n -P -j $job_id -o JobID,ExitCode,DerivedExitCode,Comment expect { -re "$job_id\\|0:0\\|123:0\\|" { # Job record incr matches exp_continue } -re "$job_id.batch\\|0:0\\|\\|" { # Batch script incr matches exp_continue } -re "$job_id.extern\\|0:0\\|\\|" { # Container, optional exp_continue } -re "$job_id.($number)\\|0:0\\|\\|" { # Step 0 record incr matches exp_continue } -re "$job_id.($number)\\|123:0\\|\\|" { # Step 1 record incr matches exp_continue } timeout { fail "sacct not responding" } eof { wait } } if {$matches != 4} { fail "sacct of $job_id failed ($matches != 4)" } } # # Now submit a script that returns a non-zero exit code and confirm # that the job record's ExitCode reflects this value. The two # successful job steps ($file_prog1 from above) should have no # influence on the job's, unsuccessful ExitCode. # # Here, the DerivedExitCode value should be set to the highest value # of the two successful job steps (i.e., 0). # exec $bin_rm -f $file_in make_bash_script $file_in " $bin_echo 'testing unsuccessful job return code' $srun ./$file_prog1 $srun ./$file_prog1 exit 33 " set job_id 0 spawn $sbatch --output=/dev/null -t1 ./$file_in expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } eof { wait } } if {$job_id == 0} { fail "Batch submit failure" } # # Wait for job to complete # if {[wait_for_job $job_id "DONE"] != 0} { fail "Waiting for job to complete" } # # Confirm correct ExitCode setting in job record # set matches 0 spawn $scontrol --detail show job $job_id expect { -re " ExitCode=33:0" { incr matches exp_continue } -re "DerivedExitCode=0:0" { incr matches exp_continue } timeout { fail "scontrol not responding" } eof { wait } } if {$matches != 2} { fail "Job ExitCode incorrect ($matches != 2)" } # # Confirm the job record in the db has the same exit codes # if { [test_account_storage] == 1 } { sleep 2 set matches 0 spawn $sacct -n -P -j $job_id -o JobID,ExitCode,DerivedExitCode,Comment expect { -re "$job_id\\|33:0\\|0:0\\|" { # Job record incr matches exp_continue } -re "$job_id.batch\\|33:0\\|\\|" { # Batch script incr matches exp_continue } -re "$job_id.extern\\|0:0\\|\\|" { # Container, optional exp_continue } -re "$job_id.($number)\\|0:0\\|\\|" { # Step 0 record incr matches exp_continue } -re "$job_id.($number)\\|0:0\\|\\|" { # Step 1 record incr matches exp_continue } timeout { fail "sacct not responding" } eof { wait } } if {$matches != 4} { fail "sacct of $job_id failed ($matches != 4)" } } if {$exit_code == 0} { exec $bin_rm -f $file_in $file_prog1 $file_prog2 } else { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }