#!/usr/bin/env expect ############################################################################ # Purpose: Run "srun cat" and read srun's stdout SLOWLY, creating # stdout back pressure in srun. (srun used to lose data on stdout # in this situation.) ############################################################################ # Copyright (C) 2002-2007 The Regents of the University of California. # Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Chris Morrone # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set cycle_count 3 set target_lines 1000 set exit_code 0 set file_in "test$test_id.input" set file_out "test$test_id.output" set file_err "test$test_id.error" set task_cnt 1 # Execute an srun job to cat input_file. Read srun's stdout SLOWLY (1k/sec) # to create back pressure which used to reveal a stdout data-loss bug. # Write the srun's stdout to a file so that the caller can compare the sizes # of input_file and output_file. proc run_cat_backpressure { input_file output_file } { global bin_cat bin_sleep bin_rm srun node_cnt task_cnt timeout file_err log_debug "Running run_cat_backpressure (This is slow)" exec $bin_rm -f $file_err $output_file set iter 0 set output [open $output_file w] set srun_output [open "| $srun -e $file_err -N1 -t1 $bin_cat $input_file" r] while {![eof $srun_output]} { puts -nonewline $output [read $srun_output 1] incr iter if {[expr $iter % 1000] == 0} { puts -nonewline "." exec $bin_sleep 1 } } puts "" flush $output close $output } # # Create a sizable text file # exec $bin_rm -f $file_in $file_out for {set inx 0} {$inx < 10} {incr inx} { exec $bin_cat /etc/hosts >>$file_out exec $bin_cat /etc/passwd >>$file_out } exec head -n $target_lines <$file_out >$file_in exec $bin_rm -f $file_out set stdin_lines [get_line_cnt $file_in] set stdout_target [expr $stdin_lines * $task_cnt] # # Run cycle_count jobs to copy job input to job output and compare sizes # set success_cnt 0 set timeout $max_job_delay for {set inx 0} {$inx < $cycle_count} {incr inx} { run_cat_backpressure $file_in $file_out set stdout_lines [get_line_cnt $file_out] if {$stdout_lines != $stdout_target} { exec $bin_sleep 1 set stdout_lines [get_line_cnt $file_out] } if {$stdout_lines != $stdout_target} { if {$stdout_lines == 0} { log_error "Stdout is empty, is current working directory writable from compute nodes?" } else { log_error "Stdout is incomplete ($stdout_lines != $stdout_target)" } set exit_code 1 break } else { incr success_cnt } } exec $bin_rm -f $file_in $file_out $file_err if {$exit_code == 0} { exec $bin_rm -f $file_err $file_in $file_out } else { fail "Test failed due to previous errors with exit code $exit_code" }