#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Test accounting for GPU resources with various allocation options ############################################################################ # Copyright (C) 2018 SchedMD LLC # Written by Morris Jette # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ############################################################################ source ./globals set exit_code 0 set file_in1 "test$test_id.input1" set file_in2 "test$test_id.input2" set file_out "test$test_id.output" # # Validate the job, batch step and step 0 of a job have the proper GPU counts # No step to test if step_gpus == -1 # # NOTE: AllocGRES and ReqGRES values for all steps (including batch step) # are reported based upon the job specification # proc test_acct { job_id job_gpus step_gpus have_gpu_types } { global re_word_str number global bin_cat bin_rm bin_grep exit_code file_out sacct if {$job_id == 0} { return } sleep 2 log_debug "Job $job_id Expected job GPUs:$job_gpus Step GPUs:$step_gpus" exec $bin_rm -f $file_out exec >$file_out $sacct --job=$job_id --parsable2 --start=today -o JobID,AllocGRES,ReqGRES,AllocTRES spawn $bin_cat $file_out expect { eof { wait } } log_user 0 if {$step_gpus != -1} { set match 0 spawn $bin_grep $job_id.0 $file_out expect { -re "gpu:($number)" { incr match if {$expect_out(1,string) != $job_gpus} { log_error "Bad step GPU count reported by sacct ($expect_out(1,string) != $step_gpus)" set exit_code 1 } exp_continue } -re "gres/gpu=($number)" { incr match if {$expect_out(1,string) != $step_gpus} { log_error "Bad step GPU count reported by sacct ($expect_out(1,string) != $step_gpus)" set exit_code 1 } exp_continue } -re "gres/gpu:($re_word_str)=($number)" { if {$have_gpu_types != 0} { incr match if {$expect_out(2,string) != $step_gpus} { log_error "Bad step GPU count reported by sacct ($expect_out(2,string) != $step_gpus)" set exit_code 1 } } exp_continue } eof { wait } } if {$match != 3} { log_error "Missing step GPU count report from sacct ($match != 3)" } } set match 0 spawn $bin_grep $job_id.batch $file_out expect { -re "gpu:($number)" { incr match if {$expect_out(1,string) != $job_gpus} { log_error "Bad batch GPU count reported by sacct ($expect_out(1,string) != $job_gpus)" set exit_code 1 } exp_continue } eof { wait } } if {$match != 2} { log_error "Missing batch GPU count report from sacct ($match != 2)" } set match 0 spawn $bin_grep $job_id| $file_out expect { -re "gpu:($number)" { incr match if {$expect_out(1,string) != $job_gpus} { log_error "Bad job GPU count reported by sacct ($expect_out(1,string) != $job_gpus)" set exit_code 1 } exp_continue } -re "gres/gpu=($number)" { incr match if {$expect_out(1,string) != $job_gpus} { log_error "Bad job GPU count reported by sacct ($expect_out(1,string) != $job_gpus)" set exit_code 1 } exp_continue } -re "gres/gpu:($re_word_str)=($number)" { if {$have_gpu_types != 0} { incr match if {$expect_out(2,string) != $job_gpus} { log_error "Bad job GPU count reported by sacct ($expect_out(2,string) != $job_gpus)" set exit_code 1 } } exp_continue } eof { wait } } if {$match != 3} { log_error "Missing job GPU count report from sacct ($match != 3)" } exec $bin_rm -f $file_out log_user 1 } # # Validate the job, batch step and step 0 of a job have the proper GPU counts # No step to test if step_gpus == -1 # # NOTE: AllocGRES and ReqGRES values for all steps (including batch step) # are reported based upon the job specification # proc test_out_file { file_out target } { global re_word_str number bin_cat exit_code if {[wait_for_file $file_out] != 0} { fail "No output file" } set match 0 spawn $bin_cat $file_out expect { -re "TRES=.*,gres/gpu=($number)" { set match $expect_out(1,string) exp_continue } -re "TRES=.*,gres/gpu:($re_word_str)=($number)" { set match $expect_out(2,string) exp_continue } eof { wait } } if {$match != $target} { log_error "Failed to account for proper GPU count ($match != $target)" set exit_code 1 } } if { [test_account_storage] == 0 } { skip "This test can't be run without a usable AccountStorageType" } set store_tres [string tolower [get_acct_store_tres]] set store_gpu [string first "gres/gpu" $store_tres] if {$store_gpu == -1} { skip "This test requires accounting for GPUs" } elseif {[test_front_end]} { skip "This test is incompatible with front-end systems" } if {[test_cons_tres]} { log_debug "Valid configuration, using select/cons_tres" } else { skip "This test is only compatible with select/cons_tres" } set nb_nodes [get_node_cnt_in_part] log_debug "Default partition node count is $nb_nodes" if {$nb_nodes > 2} { set nb_nodes 2 } set gpu_cnt [get_highest_gres_count $nb_nodes "gpu"] if {$gpu_cnt < 0} { fail "Error getting GPU count" } if {$gpu_cnt < 2} { skip "This test requires 2 or more GPUs in the default partition" } set node_name [get_nodes_by_request "--gres=gpu:1 -n1 -t1"] if { [llength $node_name] != 1 } { skip "This test need to be able to submit jobs with at least --gres=gpu:1" } set cpus_per_node [get_node_param $node_name "CPUTot"] set sockets_per_node [get_node_param $node_name "Sockets"] set cpus_per_socket [expr $cpus_per_node / $sockets_per_node] set sockets_with_gpus [get_gpu_socket_count $gpu_cnt $sockets_per_node] log_debug "GPUs per node is $gpu_cnt" log_debug "Sockets with GPUs $sockets_with_gpus" log_debug "Sockets per node is $sockets_per_node" log_debug "CPUs per socket is $cpus_per_socket" log_debug "CPUs per node is $cpus_per_node" if {$cpus_per_node < 3} { skip "This test requires 3 or more CPUs per node in the default partition" } # # Test --gpus-per-node option by job # make_bash_script $file_in1 " $scontrol -dd show job \${SLURM_JOBID} | grep gpu exit 0" log_info "TEST 1: --gpus-per-node option by job" set target [expr $nb_nodes * 2] exec $bin_rm -f $file_out set job_id 0 set timeout $max_job_delay spawn $sbatch --gres=craynetwork:0 --gpus-per-node=2 -N$nb_nodes -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } if {[wait_for_file $file_out] != 0} { fail "No output file" } set have_gpu_types 0 set match 0 spawn $bin_cat $file_out expect { -re "TRES=.*,gres/gpu=($number)" { set match $expect_out(1,string) exp_continue } -re "TRES=.*,gres/gpu:($re_word_str)=($number)" { if {$match == 0} { set have_gpu_types 1 set match $expect_out(2,string) } exp_continue } eof { wait } } set target [expr $nb_nodes * 2] if {$match != $target} { log_error "Failed to account for proper GPU count ($match != $target)" set exit_code 1 } test_acct $job_id $target -1 $have_gpu_types # # Test --gpus option by job # log_info "TEST 2: --gpus option by job" exec $bin_rm -f $file_out if {$nb_nodes >= 2 || $gpu_cnt >= 2} { set target 3 } else { set target 2 } set job_id 0 spawn $sbatch --gres=craynetwork:0 --gpus=$target -N$nb_nodes -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } test_out_file $file_out $target test_acct $job_id $target -1 $have_gpu_types # # Test --gpus-per-task option by job # log_info "TEST 3: --gpus-per-task option by job" exec $bin_rm -f $file_out if {$cpus_per_node >= 2 && $nb_nodes >= 2} { set nb_tasks 3 } elseif {$cpus_per_node >= 2 || $nb_nodes >= 2} { set nb_tasks 2 } else { set nb_tasks 1 } set job_id 0 spawn $sbatch --gres=craynetwork:0 --gpus-per-task=1 -N$nb_nodes -n$nb_tasks -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } test_out_file $file_out $nb_tasks test_acct $job_id $nb_tasks -1 $have_gpu_types # # Test --gpus-per-socket option by job # log_info "TEST 4: --gpus-per-socket option by job" exec $bin_rm -f $file_out if {$sockets_with_gpus >= 2} { set nb_sockets 2 set cpus_per_task $cpus_per_socket } else { set nb_sockets 1 set cpus_per_task 1 } set job_id 0 spawn $sbatch --gres=craynetwork:0 --gpus-per-socket=1 -N$nb_nodes --ntasks=$nb_nodes --sockets-per-node=$nb_sockets --cpus-per-task=$cpus_per_task -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } set target [expr $nb_nodes * $nb_sockets] test_out_file $file_out $target test_acct $job_id $target -1 $have_gpu_types # # Test --gpus-per-node option by step # make_bash_script $file_in1 " $srun ./$file_in2 exit 0" make_bash_script $file_in2 " if \[ \$SLURM_PROCID -eq 0 \]; then $scontrol show step \${SLURM_JOBID}.\${SLURM_STEPID} fi exit 0" log_info "TEST 5: --gpus-per-node option by step" set target [expr $nb_nodes * 2] exec $bin_rm -f $file_out set job_id 0 set timeout $max_job_delay spawn $sbatch --gres=craynetwork:0 --gpus-per-node=2 -N$nb_nodes -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } set target [expr $nb_nodes * 2] test_out_file $file_out $target test_acct $job_id $target $target $have_gpu_types # # Test --gpus option by step # log_info "TEST 6: --gpus option by step" exec $bin_rm -f $file_out if {$nb_nodes >= 2 || $gpu_cnt >= 2} { set target 3 } else { set target 2 } set job_id 0 spawn $sbatch --gres=craynetwork:0 --gpus=$target -N$nb_nodes -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } test_out_file $file_out $target test_acct $job_id $target $target $have_gpu_types # # Test --gpus-per-task option by step # log_info "TEST 7: --gpus-per-task option by step" exec $bin_rm -f $file_out if {$cpus_per_node >= 2 && $nb_nodes >= 2} { set nb_tasks 3 } elseif {$cpus_per_node >= 2 || $nb_nodes >= 2} { set nb_tasks 2 } else { set nb_tasks 1 } set job_id 0 spawn $sbatch --gres=craynetwork:0 --gpus-per-task=1 -N$nb_nodes -n$nb_tasks -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } test_out_file $file_out $nb_tasks test_acct $job_id $nb_tasks $nb_tasks $have_gpu_types # # Test --gpus-per-socket option by step # log_info "TEST 8: --gpus-per-socket option by step" exec $bin_rm -f $file_out if {$sockets_with_gpus >= 2} { set nb_sockets 2 set cpus_per_task $cpus_per_socket } else { set nb_sockets 1 set cpus_per_task 1 } set job_id 0 spawn $sbatch --gres=craynetwork:0 --gpus-per-socket=1 -N$nb_nodes --ntasks=$nb_nodes --sockets-per-node=$nb_sockets --cpus-per-task=$cpus_per_task -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } set target [expr $nb_nodes * $nb_sockets] test_out_file $file_out $target test_acct $job_id $target $target $have_gpu_types # # Test --gpus-per-task option by step # log_info "TEST 9: --gpus-per-task option by step" exec $bin_rm -f $file_out if {$cpus_per_node >= 2 && $nb_nodes >= 2} { set job_tasks 3 set step_tasks 2 } elseif {$cpus_per_node >= 2 || $nb_nodes >= 2} { set job_tasks 2 set step_tasks 2 } else { set job_tasks 1 set step_tasks 1 } if {$nb_nodes >= 2} { set step_nodes 2 } else { set step_nodes 1 } make_bash_script $file_in1 " $srun -N$step_nodes -n$step_tasks ./$file_in2 exit 0" set job_id 0 spawn $sbatch --gres=craynetwork:0 --gpus-per-task=1 -N$nb_nodes -n$job_tasks -t1 -o $file_out -J "test$test_id" ./$file_in1 expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 } eof { wait } } if {$job_id == 0} { fail "Job not submitted" } if {[wait_for_job $job_id "DONE"] != 0} { cancel_job $job_id fail "Job $job_id did not complete" } test_out_file $file_out $step_tasks test_acct $job_id $job_tasks $step_tasks $have_gpu_types if {$exit_code == 0} { exec $bin_rm -f $file_in1 $file_in2 $file_out } else { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }