#!/usr/bin/env expect ############################################################################ # Purpose: Test of node feature changes with reconfiguration. ############################################################################ # Copyright (C) 2018 SchedMD LLC # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set feature "test$test_id" set file_in "test$test_id.input" if { ![test_super_user] } { skip "This test can't be run without being a super user of the cluster" } proc set_node_feature {node_name new_avail_feature new_active_feature} { global scontrol exit_code spawn $scontrol update node=$node_name AvailableFeatures=$new_avail_feature ActiveFeatures=$new_active_feature expect { timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } } # Identify a node to use set match 0 exec $bin_rm -f $file_in make_bash_script $file_in " $scontrol show job \$SLURM_JOB_ID | grep ' NodeList=' " set timeout $max_job_delay set srun_pid [spawn $srun -t1 --exclusive -N1 $file_in] expect { -re "NodeList=($re_word_str)" { set node_name $expect_out(1,string) set match 1 exp_continue } -re "Unable to contact" { fail "Slurm appears to be down" } timeout { log_error "srun not responding" slow_kill $srun_pid set exit_code 1 } eof { wait } } if {$match == 0} { fail "Could not run job in exclusive node" } # Save the node's current features set match 0 spawn $scontrol show node $node_name expect { -re "AvailableFeatures=($re_word_str)" { if {![string compare orig_avail_node_feat "(null)"]} { set orig_avail_node_feat "" } else { set orig_avail_node_feat $expect_out(1,string) } incr match exp_continue } -re "AvailableFeatures=" { set orig_avail_node_feat "" incr match exp_continue } -re "ActiveFeatures=($re_word_str)" { if {![string compare orig_avail_node_feat "(null)"]} { set orig_active_node_feat "" } else { set orig_active_node_feat $expect_out(1,string) } incr match exp_continue } -re "ActiveFeatures=" { set orig_active_node_feat "" incr match exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } if {$match != 2} { fail "Could not determine current features for node $node_name" } # Set new feature names to use if {[string length $orig_active_node_feat] == 0} { set new_active_node_feat $feature } else { set new_active_node_feat $orig_active_node_feat append new_active_node_feat "," $feature } if {[string length $orig_avail_node_feat] == 0} { set new_avail_node_feat $feature } else { set new_avail_node_feat $orig_active_node_feat append new_avail_node_feat "," $feature } set_node_feature $node_name $new_avail_node_feat $new_active_node_feat # Now run a job with that constraint and make sure it runs on the same node set match 0 set srun_pid [spawn $srun -t1 -C $feature -N1 $file_in] expect { -re "NodeList=$node_name" { set match 1 exp_continue } -re "Unable to contact" { log_error "Slurm appears to be down" set exit_code 1 } timeout { log_error "srun not responding" slow_kill $srun_pid set exit_code 1 } eof { wait } } if {$match == 0} { log_error "Could not run job with constraint" set exit_code 1 } # Run "scontrol reconfig" to restore proper features on the node reconfigure # Now run a job with that constraint and make sure it fails set match 0 set srun_pid [spawn $srun -t1 -C $feature -N1 $file_in] expect { -re "Invalid feature" { log_debug "This error was expected, no worries" set match 1 exp_continue } -re "Unable to contact" { log_error "Slurm appears to be down" set exit_code 1 } timeout { log_error "srun not responding" slow_kill $srun_pid set exit_code 1 } eof { wait } } if {$match != 1} { fail "Ran job with what should be invalid constraint" } # Explicitly reset features, just to be safe set_node_feature $node_name $orig_avail_node_feat $orig_active_node_feat if {$exit_code == 0} { exec $bin_rm -f $file_in } else { fail "Test failed due to previous errors (\$exit_code = $exit_code)" }