#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Test of sinfo cpu total and allocated ############################################################################ # Copyright (C) 2009 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Joseph Donaghy # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set file_in "test$test_id.in" set exit_code 0 set pnumsc 0 set pnumsi 0 set aprocsc 0 set aprocsi 0 set inode_found 0 set inode_name "" set smallest 1 set layout "static" set select_type "" proc scontrol_test { node proc_cnt } { global scontrol number prompt upvar spawn_id spawn_id set found 0 set rc 0 send "$scontrol show node $node\r" expect { -re "CPUAlloc=($number)" { set num_alloc $expect_out(1,string) set found 1 if {$proc_cnt != $num_alloc} { log_error "Requested $proc_cnt but got $num_alloc instead" set rc 1 } exp_continue } -re $prompt { } timeout { log_error "scontrol not responding" slow_kill $mypid return 1 } eof { wait } } if {!$found} { log_error "Didn't get expected output from scontrol" set rc 1 } return $rc } proc sinfo_test_1 { node proc_cnt total_procs idle_cpus } { global float number prompt sinfo slash upvar spawn_id spawn_id set found 0 set rc 0 send "$sinfo -o \"%C %A %N\" -h -n $node\r" # make sure we get by the sinfo command so we don't # think the %'s are a prompt expect { "%C %A %N" { } } expect { -re "($float)(K?)($slash)($float)(K?)($slash)($float)(K?)($slash)($float)(K?) ($number)($slash)($number) $node" { set found 1 set num_alloc $expect_out(1,string) if {[string compare $expect_out(2,string) ""]} { set num_alloc [expr $num_alloc * 1024] } set num_idle $expect_out(4,string) if {[string compare $expect_out(5,string) ""]} { set num_idle [expr $num_idle * 1024] } set num_other $expect_out(7,string) if {[string compare $expect_out(8,string) ""]} { set num_other [expr $num_other * 1024] } set num_total $expect_out(10,string) if {[string compare $expect_out(11,string) ""]} { set num_total [expr $num_total * 1024] } if { $num_alloc != $proc_cnt } { log_error "sinfo 1 allocated cpus wrong, got $num_alloc but needed $proc_cnt" set rc 1 } elseif { $num_idle != $idle_cpus } { log_error "sinfo 1 idle cpus wrong, got $num_idle but needed $idle_cpus" set rc 1 } elseif { $num_total != $total_procs } { log_error "sinfo 1 total cpus wrong, got $num_total but needed $total_procs" set rc 1 } exp_continue } -re $prompt { } timeout { log_error "sinfo not responding" slow_kill $mypid return 1 } eof { wait } } if {!$found} { log_error "Didn't get expected output from sinfo" set rc 1 } return $rc } proc sinfo_test_2 { node proc_cnt total_procs } { global sinfo number prompt upvar spawn_id spawn_id set rc 0 set num_alloc 0 set num_idle 0 set alloc_nodes 1 set total_nodes 1 set idle_nodes [expr $total_nodes - $alloc_nodes] send "$sinfo -o \"%t %D %N\" -h -n $node\r" # make sure we get by the sinfo command so we don't # think the %'s are a prompt expect { "%t %D %N" { } } expect { -re "alloc ($number)(K?) $node" { set num_alloc $expect_out(1,string) if {[string compare $expect_out(2,string) ""]} { set num_alloc [expr $inode_procs * 1024] } exp_continue } -re "idle ($number)(K?) $node" { set num_idle $expect_out(1,string) if {[string compare $expect_out(2,string) ""]} { set num_idle [expr $num_idle * 1024] } exp_continue } -re "mix ($number)(K?) $node" { set num_alloc $expect_out(1,string) if {[string compare $expect_out(2,string) ""]} { set num_alloc [expr $inode_procs * 1024] } exp_continue } -re $prompt { } timeout { log_error "sinfo not responding" slow_kill $mypid return 1 } eof { wait } } if { $num_alloc != $alloc_nodes } { log_error "sinfo 2 allocated nodes wrong, got $num_alloc but needed $alloc_nodes" set rc 1 } elseif { $num_idle != $idle_nodes } { log_error "sinfo 2 idle nodes wrong, got $num_idle but needed $idle_nodes" set rc 1 } return $rc } # allocate a set of nodes (node_cnt) and the quit right after proc allocate_and_quit { node proc_cnt total_procs } { global bin_bash salloc scontrol sinfo number re_word_str global prompt select_type procs_per_node test_id bin_rm set file_in "test$test_id.input" set job_id 0 set num_alloc 0 set block "" set rc 0 set timeout 60 set idle_cpus [expr $total_procs - $proc_cnt] make_bash_script $file_in " export PS1=\"$prompt\" $bin_bash --norc " set mypid [spawn $salloc -w $node -N 1 -t 5 -n $proc_cnt ./$file_in] expect { -re "Granted job allocation ($number)" { set job_id $expect_out(1,string) exp_continue } -re $prompt { # test for scontrol to give me the correct cpu count if { [scontrol_test $node $proc_cnt] } { send "exit\r" return 1 } # test for sinfo to give me the correct cpu count if { [sinfo_test_1 $node $proc_cnt $total_procs $idle_cpus] } { send "exit\r" return 1 } # test for sinfo to give me the correct node count if { [sinfo_test_2 $node $proc_cnt $total_procs] } { send "exit\r" return 1 } send "exit\r" exp_continue } -re "Unable to contact" { log_error "Slurm appears to be down" exp_continue } timeout { log_error "salloc not responding" if {$job_id != 0} { cancel_job $job_id } slow_kill $mypid return 1 } eof { wait } } exec $bin_rm -f $file_in return $rc } ############################################################################ # test starts here ############################################################################ if {[test_select_type_params "CR_ONE_TASK_PER_CORE"]} { skip "This test is incompatible SelectTypeParameters=CR_ONE_TASK_PER_CORE" } # find the default partition set def_part [default_partition] # find the nodes in the default partition set def_node_name "" spawn $sinfo -h -o "=%N=" -p $def_part expect { -re "=($re_word_str).($number)-($number).=" { set def_node_name $expect_out(1,string) exp_continue } eof { wait } } if {[string compare $def_node_name ""] == 0} { skip "Node name format not supported for this test" } log_user 0 set def_hostlist "" set part_exclusive 0 spawn $scontrol show part $def_part expect { -re " Nodes=($re_word_str)" { set def_hostlist $expect_out(1,string) exp_continue } -re " BasePartitions=($re_word_str)" { set def_hostlist $expect_out(1,string) exp_continue } -re " OverSubscribe=EXCLUSIVE" { set part_exclusive 1 exp_continue } timeout { log_error "scontrol not responding" set exit_code 1 } eof { wait } } set host_cnt 0 spawn $scontrol show hostnames $def_hostlist expect { -re "($re_word_str)" { set host_name($host_cnt) $expect_out(1,string) incr host_cnt exp_continue } timeout { log_error "scontrol not responding" set exit_code 1 } eof { wait } } log_user 1 if {$host_cnt == 0} { fail "Could not find any nodes in default partition" } # find me an idle node in default partition log_user 0 set inode_name "" set inode_cores_per_socket 0 set inode_procs 0 set units "" set inode_sockets 0 set inode_threads_per_core 0 set fd [open "|$scontrol --oneliner show node $def_hostlist"] exp_internal 1 while {[gets $fd line] != -1} { if {[regexp {NodeName=(\w+).*CoresPerSocket=(\d+).*CPUTot=(\d+)(K?).*Sockets=(\d+).*State=IDLE ThreadsPerCore=(\d+)} $line frag inode_name inode_cores_per_socket inode_procs units inode_sockets inode_threads_per_core] == 1} { break } if {[regexp {NodeName=(\w+).*CoresPerSocket=(\d+).*CPUTot=(\d+)(K?).*Sockets=(\d+).*State=IDLE.CLOUD ThreadsPerCore=(\d+)} $line frag inode_name inode_cores_per_socket inode_procs units inode_sockets inode_threads_per_core] == 1} { break } } exp_internal 0 if {[string compare $units ""]} { set inode_procs [expr $inode_procs * 1024] } catch {close $fd} log_user 1 if {!$inode_procs} { fail "Couldn't find an idle node in the default partition" } log_debug "Found idle node $inode_name with $inode_procs processors" # figure out the select plugin we are using if {[test_linear]} { set smallest $inode_procs } else { if {$part_exclusive == 1} { set smallest $inode_procs } elseif {[test_select_type_params "CR_CPU"]} { set smallest $inode_threads_per_core } elseif {[test_select_type_params "NONE"]} { set smallest $inode_threads_per_core } elseif {[test_select_type_params "CR_CORE"]} { set smallest $inode_threads_per_core } elseif {[test_select_type_params "CR_SOCKET"]} { set smallest [expr $inode_cores_per_socket *$inode_threads_per_core] } else { log_warn "Failed to parse SelectTypeParameters '$select_params'" set smallest $inode_procs } } set exit_code [allocate_and_quit $inode_name $smallest $inode_procs] if {!$exit_code && $smallest != $inode_procs} { # just to make sure we get a clean state we will sleep a bit sleep 1 set exit_code [allocate_and_quit $inode_name $inode_procs $inode_procs] } if {$exit_code != 0} { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }