#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Test that no files are open in spawned tasks (except stdin, # stdout, and stderr) to ensure successful checkpoint/restart. ############################################################################ # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set file_in "test$test_id.input" set file_out "test$test_id.output" set file_prog "test$test_id.prog" set iterations 50 if {![test_accting_steps]} { skip "This test can not be run with nosteps or nojobs (AccoutingStorageEnforce)" } # # Test is incompatible with certain plugins. # # Each leave open files, although we could clear the proctrack related files # by just closing all files after fd=3. # if {[string compare [test_proctrack] "cray_aries"] == 0} { skip "Test incompatible with proctrack/cray_aries" } # # Delete left-over programs and rebuild them. # We use our own program to get ulimit values since the output # of the ulimit program is inconsistent across systems. # exec $bin_rm -f $file_prog $file_in $file_out exec $bin_cc -O -o $file_prog ${file_prog}.c make_bash_script $file_in " $bin_echo 'testing within script' ./$file_prog $bin_echo ' ' $bin_echo 'testing $iterations sets of spawned tasks' for ((i=0; i<$iterations; i++)) ; do $srun --mpi=none ./$file_prog done $bin_echo 'fini' " set job_id 0 spawn $sbatch --output=$file_out -t1 ./$file_in expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } eof { wait } } if {$job_id == 0} { fail "Batch submit failure" } # # Wait for job to complete # if {[wait_for_job $job_id "DONE"] != 0} { fail "Waiting for job to complete" } # # Inspect the job's output file # if {[wait_for_file $file_out] != 0} { fail "Job $job_id's output file ($file_out) did not get created" } set fini 0 set matches 0 spawn $bin_cat $file_out expect { -re "FAILED" { incr matches exp_continue } -re "fini" { set fini 1 exp_continue } timeout { log_error "/bin/cat not responding" set exit_code 1 } eof { wait } } if {$fini != 1} { log_error "Script never completed" set exit_code 1 } if {$matches != 0} { set tot [expr $iterations + 1] if {$matches <= 1} { log_warn "$matches of $tot tests failed" } else { log_error "$matches of $tot tests failed" set exit_code 1 } log_debug " This should happen infrequently, typically when" log_debug " JobAcctFrequency is set to a small value and is" log_debug " indicative of a non-checkpointable job. For details," log_debug " see src/plugins/jobacct/linux/jobacct_linux.c" log_debug " To diagnose, use lsof to examine spawned job" } if {$exit_code == 0} { exec $bin_rm -f $file_in $file_prog $file_out } else { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }