#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate Slurm debugger infrastructure (--debugger-test option). ############################################################################ # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set task_cnt 4 set resp_cnt 0 set timed_out 0 # # Cannot run the test if OverTimeLimit is set, since we test time limits. # set overtimelim [get_over_time_limit] if {$overtimelim != 0} { skip "Cannot run this test when OverTimeLimit is set. Exiting now." } # # Submit a slurm job that will execute 'id' on 1 node and over task_cnt tasks # # Timeout is max_job_delay (to spawn task) + # 60 (job time limit) + # 60 (slurmctld time limit check poll interval) + # KillWait (120 secs on BlueGene per IBM recommendation) # set timeout [expr $max_job_delay + 60 + 60 + 120] set node_cnt 1-2 set srun_pid [spawn $srun -N$node_cnt -n$task_cnt --overcommit --debugger-test -t1 $bin_id] expect { -re "uid=" { log_error "Task not stopped" set exit_code 1 exp_continue } -re " host:" { incr resp_cnt exp_continue } -re "timelimit" { set timed_out 1 exp_continue; } timeout { log_error "srun not responding" slow_kill $srun_pid set exit_code 1 } eof { wait } } if {$timed_out == 1} { log_debug "Early termination is expected, no worries" } if {$task_cnt != $resp_cnt} { fail "Did not get proper number of tasks: $task_cnt, $resp_cnt" } if {$exit_code != 0} { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }