#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Verify node configuration specification (--constraint option). ############################################################################ # Portions Copyright (C) 2018 SchedMD LLC. # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set file_in "test$test_id.input" set test_part "test$test_id\_part" set feat0 "test$test_id\_zero" set feat1 "test$test_id\_one" set feat2 "test$test_id\_two" set feat3 "test$test_id\_three" set feat4 "test$test_id\_four" array set nodes {} array set def_active_node_feat {} array set def_avail_node_feat {} proc check_job_nodes {test_job node_name find_node} { global exit_code sbatch test_part file_in global feat0 feat1 number scontrol re_word_str bin_bash global bin_grep set nodelist "" if {[wait_for_job $test_job "RUNNING"] != 0} { log_error "Error waiting for job $test_job to start" cancel_job $test_job set exit_code 1 } # Check that job that the job used the correct nodes log_user 0 spawn $scontrol show job $test_job expect { -re " NodeList=($re_word_str)" { set nodelist $expect_out(1,string) exp_continue } timeout { log_error "scontrol is not responding" set exit_code } eof { wait } } log_user 1 set match 0 spawn $bin_bash -c "$scontrol show hostnames $nodelist" expect { -re "$node_name" { set match 1 exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } if {$match != $find_node} { log_error "Invalid node found" set exit_code 1 } } proc check_job {nnode test_job} { global exit_code sbatch test_part file_in global feat0 feat1 number scontrol re_word_str bin_bash global bin_grep set nodelist "" if {[wait_for_job $test_job "RUNNING"] != 0} { log_error "Error waiting for job $test_job to start" cancel_job $test_job set exit_code 1 } # Check that job that the job used the correct nodes spawn $scontrol show job $test_job expect { -re " NodeList=($re_word_str)" { set nodelist $expect_out(1,string) exp_continue } timeout { log_error "scontrol is not responding" set exit_code } eof { wait } } set node_cnt 0 spawn $bin_bash -c "$scontrol show node $nodelist | $bin_grep AvailableFeatures" expect { -re "AvailableFeatures=$feat0|$feat1" { incr node_cnt exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } if {$node_cnt != $nnode} { log_error "sbatch bad node count ($node_cnt != $nnode)" set exit_code 1 } cancel_job $test_job } proc sub_job_and {nnode fail_test} { global exit_code sbatch test_part file_in nodes global feat0 feat1 feat2 feat3 number scontrol re_word_str bin_bash global bin_grep set test_job 0 set fail_node_config 0 if {$fail_test} { spawn $sbatch -C "$feat1&$feat3" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { set fail_node_config 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$fail_node_config == $fail_test} { log_debug "This error is expected, no worries" } if {$test_job != 0} { log_error "Job should have been rejected, but was not" cancel_job $test_job set exit_code 1 } } else { spawn $sbatch -C "$feat2&$feat3" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job == 0} { log_error "Job submit failure" set exit_code 1 } else { # Make sure nodes(3) included in allocation check_job_nodes $test_job $nodes(3) 1 check_job $nnode $test_job } } } proc sub_batch_host {nnode} { global exit_code sbatch scontrol test_part file_in nodes global feat0 feat1 feat2 feat3 number scontrol re_word_str bin_bash global bin_grep set test_job 0 set fail_node_config 0 spawn $sbatch -C "$feat0|$feat3" --batch=$feat3 -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$test_job == 0} { log_error "Job submit failure" set exit_code 1 } else { if {[wait_for_job $test_job "RUNNING"] != 0} { log_error "Waiting for job to start" set exit_code 1 } else { set match 0 spawn $scontrol show job $test_job expect { -re "BatchHost=$nodes(3)" { set match 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$match != 1} { log_error "Invalid BatchHost value" set exit_code 1 } } cancel_job $test_job } } proc sub_job_count {nnode fail_test} { global exit_code sbatch test_part file_in nodes global feat0 feat1 feat2 feat3 number scontrol re_word_str bin_bash global bin_grep set test_job 0 set fail_node_config 0 if {$fail_test} { spawn $sbatch -C "$feat2*$nnode" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { set fail_node_config 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$fail_node_config == $fail_test} { log_debug "This error is expected, no worries" } if {$test_job != 0} { log_error "Job should have been rejected, but was not" cancel_job $test_job set exit_code 1 } } else { spawn $sbatch -C "$feat2*2" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job == 0} { log_error "Job submit failure" set exit_code 1 } else { # Make sure nodes(2 and 3) included in allocation check_job_nodes $test_job $nodes(2) 1 check_job_nodes $test_job $nodes(3) 1 check_job $nnode $test_job } } } proc sub_job_or {nnode fail_test} { global exit_code sbatch test_part file_in global feat0 feat1 feat2 feat3 number scontrol re_word_str bin_bash global bin_grep set test_job 0 set fail_node_config 0 if {$fail_test} { spawn $sbatch -C "$feat0|$feat1" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { set fail_node_config 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$fail_node_config == $fail_test} { log_debug "This error is expected, no worries" } if {$test_job != 0} { log_error "Job should have been rejected, but was not" cancel_job $test_job set exit_code 1 } } else { spawn $sbatch -C "$feat0|$feat1" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job == 0} { log_error "Job submit failure" set exit_code 1 } else { check_job $nnode $test_job } } } proc sub_job_or_paren {nnode fail_test} { global exit_code sbatch test_part file_in nodes global feat0 feat1 feat2 feat3 number scontrol re_word_str bin_bash global bin_grep set test_job 0 set fail_node_config 0 spawn $sbatch -C "($feat0|$feat1)" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job != 0} { check_job $nnode $test_job } # (feat0|feat2) set test_job 0 set nnode 3 spawn $sbatch -C "($feat0|$feat2)" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job != 0} { check_job_nodes $test_job $nodes(0) 1 check_job_nodes $test_job $nodes(1) 0 check_job_nodes $test_job $nodes(2) 1 check_job_nodes $test_job $nodes(3) 1 check_job $nnode $test_job } # (feat1|feat3) set test_job 0 set nnode 2 spawn $sbatch -C "($feat1|$feat3)" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job != 0} { check_job_nodes $test_job $nodes(0) 0 check_job_nodes $test_job $nodes(1) 1 check_job_nodes $test_job $nodes(2) 0 check_job_nodes $test_job $nodes(3) 1 check_job $nnode $test_job } # feat0&(feat2|feat3) set test_job 0 set nnode 2 spawn $sbatch -C "feat0&(feat2|feat3)" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job != 0} { check_job_nodes $test_job $nodes(0) 0 check_job_nodes $test_job $nodes(1) 0 check_job_nodes $test_job $nodes(2) 1 check_job_nodes $test_job $nodes(3) 1 check_job $nnode $test_job } } proc sub_job_xand {nnode fail_test} { global exit_code sbatch test_part file_in nodes global feat0 feat1 feat2 feat3 number scontrol re_word_str bin_bash global bin_grep set test_job 0 set fail_node_config 0 if {$fail_test} { spawn $sbatch -C "\[($feat0&$feat3)*2&$feat1*2\]" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { set fail_node_config 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$fail_node_config == $fail_test} { log_debug "This error is expected, no worries" } if {$test_job != 0} { log_error "Job should have been rejected, but was not" cancel_job $test_job set exit_code 1 } } else { spawn $sbatch -C "\[($feat0&$feat2)*2&$feat1*1\]" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job == 0} { log_error "Job submit failure" set exit_code 1 } else { # Make sure nodes(1, 2 and 3) included in allocation check_job_nodes $test_job $nodes(1) 1 check_job_nodes $test_job $nodes(2) 1 check_job_nodes $test_job $nodes(3) 1 check_job $nnode $test_job } } } proc sub_job_xor {nnode fail_test} { global exit_code sbatch test_part file_in nodes global feat0 feat1 feat2 feat3 number scontrol re_word_str bin_bash global bin_grep set test_job 0 set fail_node_config 0 if {$fail_test} { spawn $sbatch -C "\[$feat0|$feat1\]" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { set fail_node_config 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$fail_node_config == $fail_test} { log_debug "This error is expected, no worries" } if {$test_job != 0} { log_error "Job should have been rejected, but was not" cancel_job $test_job set exit_code 1 } } else { spawn $sbatch -C "\[$feat0|$feat1\]" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job == 0} { log_error "Job submit failure" set exit_code 1 } else { # Make sure nodes(1) not included in allocation check_job_nodes $test_job $nodes(1) 0 check_job $nnode $test_job } } } proc sub_job_and_xor {nnode fail_test} { global sbatch scontrol number global exit_code test_part file_in nodes global feat0 feat1 feat2 feat3 feat4 set test_job 0 set fail_node_config 0 if {$fail_test} { spawn $sbatch -C "$feat4&\[$feat1|$feat0\]" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { set fail_node_config 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } if {$fail_node_config == $fail_test} { log_info "This error is expected, no worries" } if {$test_job != 0} { log_error "Job should have been rejected, but was not" cancel_job $test_job set exit_code 1 } } else { spawn $sbatch -C "$feat4&\[$feat1|$feat0\]" -p$test_part -N$nnode -t1 -o /dev/null $file_in expect { -re "Submitted batch job ($number)" { set test_job $expect_out(1,string) exp_continue } -re "Requested node configuration is not available" { log_error "Unexpected job submit failure" set exit_code 1 exp_continue } timeout { log_error "sbatch is not responding" set exit_code 1 } eof { wait } } # Check that nodes have the correct features if {$test_job == 0} { log_error "Job submit failure" set exit_code 1 } else { # Make sure nodes(1) not included in allocation check_job_nodes $test_job $nodes(1) 0 check_job $nnode $test_job } } } proc set_node_feature {node_name new_feature} { global scontrol exit_code spawn $scontrol update node=$node_name AvailableFeatures=$new_feature ActiveFeatures=$new_feature expect { timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } } proc clean_up {} { global test_part scontrol def_active_node_feat def_avail_node_feat nodes exit_code spawn $scontrol delete partition=$test_part expect { timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } # Reset nodes back to normal foreach option [array names nodes] { set node $nodes($option) spawn $scontrol update node=$node AvailableFeatures=$def_avail_node_feat($node) ActiveFeatures=$def_active_node_feat($node) expect { timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } } } # # Delete left-over input script # Build input script file # exec $bin_rm -f $file_in make_bash_script $file_in " $bin_sleep 5 " # # Submit a job with invalid constraint requirement # log_info "==========Test invalid constraints==========" set err_msg 0 set timeout $max_job_delay spawn $sbatch -N1 --constraint=invalid,constraint -t1 -o /dev/null $file_in expect { -re "error:.*Invalid feature specification" { log_debug "This error is expected, no worries" set err_msg 1 exp_continue } -re "Submitted batch job ($number)" { log_error "sbatch job submitted with invalid constraint" cancel_job $expect_out(1,string) set exit_code 1 exp_continue } timeout { log_error "sbatch not responding" set exit_code 1 exp_continue } eof { wait } } if {$err_msg != 1} { log_error "Job failed to report required error" set exit_code 1 } # Must be root and no NodeFeaturesPlugin to proceed */ if {[test_super_user] == 0 || [string compare [test_node_features_plugin] ""]} { exec $bin_rm -f $file_in skip "Configuration not compatible with additional tests" } # # Run job to get available nodes # log_user 0 set job_id 0 set srun_pid [spawn $srun -N1-4 -t1 -l printenv SLURM_JOB_ID] expect { -re "\[0-3\]: ($number)" { # set nodes($i) $expect_out(1,string) set job_id $expect_out(1,string) exp_continue } timeout { log_error "srun is not responding" slow_kill $srun_pid set exit_code 1 } eof { wait } } if {$job_id == 0} { log_user 1 fail "Job submission failed" } set nodelist "" spawn $scontrol show job $job_id expect { -re " NodeList=($re_word_str)" { set nodelist $expect_out(1,string) exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } set i 0 spawn $scontrol show hostnames $nodelist expect { -re "($re_word_str)" { set nodes($i) $expect_out(1,string) incr i exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } log_user 1 if {$i != 4} { exec $bin_rm -f $file_in skip "There are not enough nodes to run the remaining tests ($i != 4)" } set node_save 0 foreach option [array names nodes] { # Save the original features that are on the node. log_user 0 set node $nodes($option) set def_active_node_feat($node) "" set def_avail_node_feat($node) "" spawn $bin_bash -c "$scontrol show node $node | $bin_grep Features" expect { -re "AvailableFeatures=($re_word_str)" { set def_avail_node_feat($node) $expect_out(1,string) incr node_save exp_continue } -re "ActiveFeatures=($re_word_str)" { set def_active_node_feat($node) $expect_out(1,string) incr node_save exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } log_user 1 } # Set node features for testing log_info "==========Set node features to test-specific values for testing==========" set_node_feature $nodes(0) $feat0 set_node_feature $nodes(1) $feat1 set_node_feature $nodes(2) $feat0,$feat2,$feat4 set_node_feature $nodes(3) $feat0,$feat2,$feat3,$feat4 # Create a test partition spawn $scontrol create partition=$test_part \ nodes=$nodes(0)\,$nodes(1)\,$nodes(2)\,$nodes(3) expect { -re "Error" { log_error "scontrol returned an error when creating a partition" set exit_code 1 } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } # Test feature count logic with 2 nodes log_info "==========Test feature count 2 nodes==========" sub_job_count 2 0 # Test feature count logic with 3 nodes log_info "==========Test feature count 3 nodes==========" sub_job_count 3 0 # Test feature count logic with 3 nodes (This is expected to fail) log_info "==========Test feature count 3 nodes with failure==========" sub_job_count 3 1 # Test AND with 1 node log_info "==========Test AND 1 node==========" sub_job_and 1 0 # Test AND with 1 node (This is expected to fail) log_info "==========Test AND 1 node with failure==========" sub_job_and 1 1 # Test eXclusive AND (XAND) with 3 nodes log_info "==========Test XAND 3 nodes==========" sub_job_xand 3 0 # Test eXclusive AND (XAND) with 3 nodes (This is expected to fail) log_info "==========Test XAND 3 nodes with failure==========" sub_job_xand 3 1 # Test eXclusive OR (XOR) with 2 nodes log_info "==========Test XOR 2 nodes==========" sub_job_xor 2 0 # Test eXclusive OR (XOR) with 3 nodes log_info "==========Test XOR 3 nodes==========" sub_job_xor 3 0 # Test eXclusive OR (XOR) with 4 nodes (This is expected to fail) log_info "==========Test XOR 4 nodes with failure==========" sub_job_xor 4 1 # Test OR with 4 nodes log_info "==========Test OR 4 nodes==========" sub_job_or 4 0 # Test OR within parenthesis log_info "==========Test OR within parenthesis==========" sub_job_or_paren 4 0 # Test mixed AND and XOR with 2 nodes log_info "==========Test mix of AND and XOR==========" sub_job_and_xor 2 0 # Test mixed AND and XOR with 3 nodes (This is expected to fail) log_info "==========Test mix of AND and XOR==========" sub_job_and_xor 3 1 # Test batch host if {[test_front_end] == 0} { log_info "==========Test batch host==========" sub_batch_host 3 } # Reset node features and remove test part log_debug "==========Clean Up==========" clean_up if {$exit_code == 0} { exec $bin_rm -f $file_in } else { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }