#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Test of cpus-per-task option on a single node (--cpus-per-task # option). # # NOTE: This assumes node names are of the form , where # the value of indicates the nodes relative location. # Change tha node name parsing logic as needed for other formats. ############################################################################ # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set file_in "test$test_id.bash" if { [test_front_end] } { skip "This test is incompatible with FRONT_END systems" } # Various configurations allocate nodes, sockets, cores, cpus or threads; # not all of which are compatible with this test if {[test_linear]} { skip "This test is incompatible with select/linear" } # # Submit a 1 node job to determine the node's CPU count # set core_spec_cnt 0 set cpu_cnt 0 set cpus_per_node 0 set host "" set timeout $max_job_delay set task_cnt 0 set threads_per_core 1 set units "" # find me an idle node in default partition log_user 0 # find the nodes in the default partition log_user 0 set def_hostlist "" set part_exclusive 0 set part_oversubscribe_force 0 spawn $scontrol show part [default_partition] expect { -re " Nodes=($re_word_str)" { set def_hostlist $expect_out(1,string) exp_continue } -re " BasePartitions=($re_word_str)" { set def_hostlist $expect_out(1,string) exp_continue } -re " OverSubscribe=EXCLUSIVE" { set part_exclusive 1 exp_continue } -re " OverSubscribe=FORCE" { set part_oversubscribe_force 1 exp_continue } timeout { log_error "scontrol not responding" set exit_code 1 } eof { wait } } if {$part_oversubscribe_force != 0} { skip "This test is incompatible with forced sharing of nodes" } # Add sleep for any epilog clean up of previous jobs sleep 2 set fd [open "|$scontrol --oneliner show node $def_hostlist"] exp_internal 1 while {[gets $fd line] != -1} { if {[regexp {NodeName=([^\s]+).*CPUTot=(\d+)(K?).*CoreSpecCount=(\d+).*State=IDLE ThreadsPerCore=(\d+)} $line frag host cpu_cnt units core_spec_cnt threads_per_core] == 1} { break } if {[regexp {NodeName=([^\s]+).*CPUTot=(\d+)(K?).*State=IDLE ThreadsPerCore=(\d+)} $line frag host cpu_cnt units threads_per_core] == 1} { break } } exp_internal 0 if {[string compare $units ""]} { set cpu_cnt [expr $cpu_cnt * 1024] } catch {close $fd} log_user 1 if {!$cpu_cnt} { fail "Couldn't find an idle node in the default partition" } # # Check CPU count # if {[string compare $host ""] == 0} { fail "Did not get hostname from idle node" } log_debug "Found idle node $host with $cpu_cnt processors" set srun_pid [spawn $srun -N1 -w$host --cpus-per-task=1 --exclusive -l -t1 $bin_printenv SLURM_PROCID] expect { -re "($number):" { incr task_cnt exp_continue } -re "Task count specification invalid" { skip "Nodes have too many CPUs for test" } timeout { log_error "srun not responding" slow_kill $srun_pid set exit_code 1 } eof { wait } } # Convert CPU count to core count if necessary if {$core_spec_cnt != 0} { set cpu_cnt [expr $cpu_cnt - ($core_spec_cnt * $threads_per_core)] } if {$cpu_cnt != $task_cnt} { set core_cnt $cpu_cnt if {$threads_per_core != 0} { set cpu_cnt [expr $cpu_cnt * $threads_per_core] } if {$cpu_cnt == $task_cnt} { log_debug "Allocated $core_cnt cores and $threads_per_core CPUs per core" } } if {$cpu_cnt != $task_cnt} { log_error "Should have run $cpu_cnt tasks (one per core) instead of $task_cnt tasks. This could be due to memory limit per allocated CPU, MaxCPUsPerNode, CR_ONE_TASK_PER_CORE, or OverSubscribe=FORCE" set exit_code 1 } if {$cpu_cnt < 2} { skip "The node only has one core" } # # Now verify the --cpus-per-task option # set task_cnt 0 set srun_pid [spawn $srun -N1 --cpus-per-task=2 --exclusive -l -t1 --nodelist=$host $bin_printenv SLURM_PROCID] expect { -re "Invalid node name specified" { skip "Appears you are using multiple slurmd testing. This test won't work in that fashion" } -re "$number: *($re_word_str)" { incr task_cnt exp_continue } timeout { log_error "srun not responding" slow_kill $srun_pid set exit_code 1 } eof { wait } } # # Verify that half the number of tasks were spawned # if {$task_cnt != [expr $cpu_cnt / 2]} { log_error "Improper task count for given cpus-per-task. This could be due to memory limit per allocated CPU or OverSubscribe=FORCE" set exit_code 1 } if {$exit_code != 0} { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }