#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Confirms that a job array that requested a feature with # the --constraint option is correctly purged from the controller # (see bug 5702). ############################################################################ # Copyright (C) 2018 SchedMD LLC # Written by Marshall Garey # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set exit_code 0 set file_script "test$test_id.sh" set config_path "" set cwd "[$bin_pwd]" proc cleanup { } { global exit_code file_script reconfigure bin_cp cwd config_file test_id restore_conf $config_file file delete $file_script reconfigure } proc get_single_node_feature { } { global scontrol exit_code re_word_str set ret_features "" log_user 1 set scon_pid [spawn $scontrol show node] expect { -re "AvailableFeatures=($re_word_str)" { set ret_features $expect_out(1,string) if [string match "(null)" $ret_features] { set ret_features "" exp_continue } set coma_pos [expr [string last , $ret_features] -1] if { $coma_pos > 0 } { set ret_features [string range $ret_features 0 $coma_pos] } # Don't exp_continue since we only care about one. } timeout { log_error "scontrol not responding" slow_kill $scon_pid set exit_code 1 } eof { wait } } return $ret_features } if {[test_super_user] != 1} { skip "This test must be run as SlurmUser or root" } set config_path [get_conf_path] fail_on_error "Not able to get config_path" set config_file $config_path/slurm.conf save_conf $config_file # Change the slurm.conf MinJobAge=10 so we don't have to wait very long. exec $bin_sed -i /^\[\t\s\]*MinJobAge\[\t\s\]*=/Id $config_file exec $bin_echo -e "\nMinJobAge=10" >> $config_file reconfigure fail_on_error "Not able to reconfigure" # Verify that MinJobAge was set set min_job_age [get_min_job_age] assert_or_fail {$min_job_age == 10} "Failed to set MinJobAge" set node_feature [get_single_node_feature] if { $node_feature == "" } { skip "Cannot run this test without any node features." } # It doesn't matter how long the job sleeps; it will be cancelled anyway. make_bash_script $file_script "sleep 5" set job_id 0 spawn $sbatch --constraint=$node_feature --array=1-2 --output="/dev/null" \ $file_script expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) log_debug "Job $job_id was submitted" exp_continue } -re "Batch job submission failed" { set exit_code 1 log_error "Batch job submission failed" exp_continue } timeout { log_error "sbatch timed out" set exit_code 1 } eof { wait } } fail_on_error "Error submitting job" # Wait enough time for the jobs to be in the controller set count 0 set count_exp 2 log_info "Verifying the jobs correctly requested the feature $node_feature" wait_for { $count == $count_exp } { set count 0 dict for {jobId job_dict} [get_jobs $job_id] { if {[dict get $job_dict "Features"] eq "$node_feature"} { incr count } } } assert_or_fail { $count == $count_exp } [format "Expected each of the %s jobs to have features=%s, but got %s" \ $count_exp $node_feature $count] # Cancel the jobs and wait for them to purged from the controller set exit_code [cancel_job $job_id] fail_on_error "Problem cancelling job $job_id" set exit_code 1 set sleep_time $min_job_age incr sleep_time 10 log_info "Waiting for $sleep_time seconds for the jobs to be purged from the controller" $bin_sleep $sleep_time spawn $scontrol show job $job_id expect { -re "Invalid job id specified" { # Good, the job isn't in the controller anymore set exit_code 0 exp_continue } timeout { log_error "scontrol timed out" set exit_code 1 } eof { wait } } pass