#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate that DenyOnLimit is enforced on QoS and # Association limits. ############################################################################ # Copyright (C) 2015 SchedMD LLC # Written by Nathan Yee # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals source ./globals_accounting set test_acct "test$test_id\_acct" set test_qos "test$test_id\_qos" set file_in "test$test_id.sc" set max_wall_val 2 set job_id 0 set exit_code 0 set save_billing_weights "" set tres_cpu_mult 2 # mod qos array set max_tres_type { billing 2 cpu 2 node 1 } array set lim_type_type { 0 MaxTres 1 MaxTresPerUser 2 MaxTresMin } # Reset QoS array set reset_qos_val { MaxCpus -1 MaxNode -1 MaxJobs -1 MaxSubmitJobs -1 MaxCpuMin -1 MaxWall -1 MaxCpusPerUser -1 MaxNode -1 MaxNodesPerUser -1 MaxTRESMinsPerJob=billing -1 MaxTRESPerJob=billing -1 MaxTRESPerUser=billing -1 } # Reset Associtions array set reset_assoc_val { MaxCpus -1 MaxNode -1 MaxJobs -1 MaxSubmitJobs -1 MaxCpuMin -1 MaxWall -1 MaxNode -1 MaxTRESMinsPerJob=billing -1 MaxTRESPerJob=billing -1 } proc check_limit { type name } { global sacctmgr sbatch lim_type_type i option max_tres_type global job_id test_qos test_acct reset_qos_val reset_assoc_val global file_in number exit_code tres_cpu_mult set flag "" set modified 0 set type_value $max_tres_type($option) if { ![string compare $option "billing"] } { set type_value [expr $type_value * $tres_cpu_mult] } spawn $sacctmgr -i mod $type $name set $lim_type_type($i)=$option=$type_value expect { -re "Modified" { set modified 1 exp_continue } timeout { log_error "sacctmgr is not responding" set exit_code 1 } eof { wait } } if {$modified != 1} { log_error "sacctmgr did not modify tres=$option=$max_tres_type($option)" set exit_code 1 } set tmp [expr $max_tres_type($option) + 1] if { ![string compare $option "node"] } { set flag "-N$tmp" } else { set flag "-n$tmp" } set match 0 spawn $sbatch $flag -t1 -o/dev/null --account=$test_acct $file_in expect { -re "Job violates accounting/QOS policy" { set match 1 exp_continue } -re "Node count specification invalid" { log_warn "Test exceeds available node count" set match 1 exp_continue } -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) set exit_code 1 } timeout { log_error "srun is not responding" set exit_code 1 } eof { wait } } if {$match != 1 || $job_id} { cancel_job $job_id fail "Job should have been killed upon submission" } # Reset limits mod_qos $test_qos [array get reset_qos_val] mod_acct $test_acct "" "" [array get reset_assoc_val] } proc cleanup { } { global sacctmgr file_in bin_rm test_acct test_qos exit_code global partition save_billing_weights scontrol # Delete test account and qos spawn $sacctmgr -i delete account $test_acct expect { timeout { log_error "sacctmgr is not responding" set exit_code 1 } eof { wait } } spawn $sacctmgr -i delete qos $test_qos expect { timeout { log_error "sacctmgr is not responding" set exit_code 1 } eof { wait } } if {$save_billing_weights ne ""} { spawn $scontrol update partitionname=$partition TRESBillingWeights=$save_billing_weights expect { -re "error" { log_error "Failed to reset TRESBillingWeights" set exit_code 1 } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } } # Remove test script exec $bin_rm -f $file_in } if { [test_account_storage] == 0 } { skip "This test can't be run without a usable AccountStorageType" } elseif { [test_enforce_limits] == 0 } { skip "This test can't be run without a usable AccountingStorageEnforce" } if { [test_limits_enforced] == 0 } { skip "This test can't be run without enforcing limits" } if {[test_super_user] == 0} { skip "Test can only be ran as SlurmUser" } # Clean up any vestigial data cleanup set partition [default_partition] make_bash_script $file_in " sleep 2" #add qos set added 1 spawn $sacctmgr -i create qos $test_qos expect { -re "Adding QOS" { set added 1 exp_continue } timeout { log_error "sacctmgr is not responding" set exit_code 1 } eof { wait } } if {$added != 1} { fail "Test QoS $test_qos was not created" } set match 0 spawn $sacctmgr -i create account $test_acct qos=$test_qos expect { -re "Associations" { set match 1 exp_continue } timeout { log_error"sacctmgr is not responding" set exit_code 1 } eof { wait } } if {$match != 1} { fail "Test account $test_acct was not created" } spawn $sacctmgr -i create user name=[get_my_user_name] account=$test_acct expect { timeout { log_error"sacctmgr is not responding" set exit_code 1 } eof { wait } } # Set DenyOnLimit qos flag set modified 0 spawn $sacctmgr -i mod qos $test_qos set flag=DenyOnLimit expect { -re "Modified qos" { set modified 1 exp_continue } timeout { log_error "sacctmgr is not responding" set exit_code 1 } eof { wait } } spawn $scontrol show part $partition expect { -re "TRESBillingWeights=(\\S+)" { set save_billing_weights $expect_out(1,string) exp_continue } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } spawn $scontrol update partitionname=$partition tresbillingweights=cpu=$tres_cpu_mult expect { -re "error" { log_error "Failed to set TRESBillingWeights" set exit_code 1 } timeout { log_error "scontrol is not responding" set exit_code 1 } eof { wait } } if {$exit_code != 0} { fail "Test failed due to previous errors (\$exit_code = $exit_code)" } for {set i 0} {$i < 3} {incr i} { foreach option [array names max_tres_type] { # Check DenyOnLimit for QoS log_info "==== Testing QoS ====" check_limit "qos" $test_qos if {$i == 0} { # Check DenyOnLimit for Association log_info "==== Testing Association ====" check_limit "account" $test_acct } } } # # Test Max Wall # set modified 0 spawn $sacctmgr -i mod qos $test_qos set maxwall=$max_wall_val expect { -re "Modified qos" { set modified 1 exp_continue } timeout { log_error "sacctmgr is not responding" set exit_code 1 } eof { wait } } if {$modified != 1} { log_error "sacctmgr did not modify tres=$option=$max_tres_qos($option)" set exit_code 1 } set match 0 spawn $sbatch -N1 -t[expr $max_wall_val + 1] -o/dev/null --account=$test_acct $file_in expect { -re "Job violates accounting/QOS policy" { set match 1 exp_continue } -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) set exit_code 1 } timeout { log_error "srun is not responding" set exit_code 1 } eof { wait } } if {$match != 1 || $job_id} { fail "Job should have been killed upon submission" } # Cancel the submitted job cancel_job $job_id if {$exit_code != 0} { fail "Test failed due to previous errors" }