#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Test of node selection from within a job step on existing allocation # # NOTE: This assumes node names are of the form , where # the value of indicates the nodes relative location. # Change tha node name parsing logic as needed for other formats. ############################################################################ # Copyright (C) 2013 SchedMD LLC # Written by David Bigagli, SchedMD # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set prompt "PROMPT: " # spawn an allocation to run on 2 # hosts with 4 tasks. # set job_id 0 set salloc_id [spawn $salloc --gres=craynetwork:0 -N 2 -n 4 -t 1 $bin_bash] expect { -re "Node count specification invalid|More processors requested|not available" { skip "Can't test srun task distribution" } -re "Unable to contact" { fail "Slurm appears to be down" } -re "Granted job allocation ($number)" { set job_id $expect_out(1,string) log_debug "Allocation started" send "export PS1=\"$prompt\"\r" exp_continue } -re "\"$prompt" { # skip this, just echo of setting prompt" exp_continue } -re "$prompt" { } timeout { slow_kill $salloc_id fail "salloc not responding" } } if {$job_id == 0} { fail "salloc failed to allocate a job" } # srun the nodelist allocated by slurm. set host_num 0 set host_list "" send "$srun -l -n 1 -N 1 $bin_printenv SLURM_NODELIST\r" expect { -re "($number): *($re_word_str)" { set host_num $expect_out(1,string) set host_list $expect_out(2,string) exp_continue } timeout { log_error "srun not responding" set exit_code 1 } -re $prompt { } } # convert the nodelist into a list of hostnames. set count 0 set host1 "" set host2 "" send "$srun -l -n1 -N1 $scontrol show hostnames $host_list\r" expect { -re "($number): *($re_word_str)" { if { $count == 0 } { set host1 $expect_out(2,string) incr count exp_continue } if { $count == 1 } { set host2 $expect_out(2,string) incr count exp_continue } } timeout { log_error "scontrol not responding" set exit_code 1 } -re $prompt { } } log_debug "Got exec hosts $host1 $host2" # reset the spawn process to be initial allocation # then srun a first task. set hostname "" send "$srun -l -n 1 -N 1 $bin_printenv SLURM_STEP_NODELIST\r" expect { -re "($number): *($re_word_str)" { set hostname $expect_out(2,string) exp_continue } -re $prompt { } timeout { log_error "srun not responding" set exit_code 1 } eof { wait } } # force the second task on the second node, at the end # make sure the task ran on the specified node. set hostname "" send "$srun -l -n 1 -N 1 -w $host2 $bin_printenv SLURM_STEP_NODELIST\r" expect { -re "($number): *($re_word_str)" { set hostname $expect_out(2,string) exp_continue } -re $prompt { send "exit\r" } timeout { log_error "srun not responding" set exit_code 1 } eof { wait } } if {[ string compare $host2 $hostname] != 0} { fail "The execution hostname $hostname != $host2 requested hostname." }