#!/bin/bash set -x # ****************************************************************************** # EOS - the CERN Disk Storage System # Copyright (C) 2022 CERN/Switzerland # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ****************************************************************************** #------------------------------------------------------------------------------ # Description: Script testing the balancer mechanism of EOS. It assumes that # there are at least 7 FSTs available in the instance. # # Usage: # eos-balancer-test #------------------------------------------------------------------------------ SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" source ${SCRIPTPATH}/eos-test-utils # Helper cleanup method cleanup() { eos rm -rF "${EOS_BALANCE_DIR}/replica/*" eos rm -rF "${EOS_BALANCE_DIR}/raiddp/*" eos rm -rF "${EOS_BALANCE_DIR}/rain/*" eos rmdir "${EOS_BALANCE_DIR}/replica/" eos rmdir "${EOS_BALANCE_DIR}/raiddp/" eos rmdir "${EOS_BALANCE_DIR}/rain/" eos rmdir "${EOS_BALANCE_DIR}/" rm -rf ${TEST_FN1} ${TEST_FN2} eos space config default space.balancer=off } if [[ $# -eq 0 || $# -gt 2 ]]; then echo "Usage: $0 " exit 1 fi EOS_MGM_HOSTNAME=$1 EOS_BALANCER_THRESHOLD=5 # Check preconditions and make sure central balancer is enabled FST_ONLINE=$(eos fs ls | grep "online" | wc -l) if [[ ${FST_ONLINE} -lt 7 ]]; then echo "error: not enough FSTs configured" exit 1 fi # Create dummy test files TEST_FN1=/var/tmp/200MB.dat TEST_FN2=/var/tmp/400MB.dat dd if=/dev/urandom of=${TEST_FN1} bs=1M count=200 &> /dev/null dd if=/dev/urandom of=${TEST_FN2} bs=1M count=400 &> /dev/null # Create eos directory for tests EOS_BALANCE_DIR=/eos/dockertest/balance_test eos mkdir -p ${EOS_BALANCE_DIR}/replica/ eos mkdir -p ${EOS_BALANCE_DIR}/raiddp/ eos mkdir -p ${EOS_BALANCE_DIR}/rain/ eos chmod -r 2777 ${EOS_BALANCE_DIR}/ eos attr set default=replica ${EOS_BALANCE_DIR}/replica/ eos attr set default=raiddp ${EOS_BALANCE_DIR}/raiddp/ eos attr set default=raid6 ${EOS_BALANCE_DIR}/rain/ # Put one file system in RO mode while we copy data in so that we end up # with an unbalanced group eos fs config 1 configstatus=ro for i in {1..3}; do xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/replica/200MB_file$i.dat xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/raiddp/200MB_file$i.dat xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/rain/200MB_file$i.dat xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/replica/400MB_file$i.dat xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/raiddp/400MB_file$i.dat xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/rain/400MB_file$i.dat done # Allow for the publishing thread on the FSTs to report updated values sleep 10 eos fs config 1 configstatus=rw eos node ls eos fs ls eos fs ls --io eos group ls for i in {1..8}; do eos fs status $i done # Enable the balancing at the space level eos space config default space.balancer.threshold=${EOS_BALANCER_THRESHOLD} eos space config default space.balancer.update.interval=5 eos space config default space.balancer=on # Get current dev of the group START_DEV=$(sudo eos group ls -m | grep "default.0" | awk '{print $20;}' | cut -d '=' -f2) if [[ $(echo "${START_DEV} < ${EOS_BALANCER_THRESHOLD}" | bc) -eq 1 ]]; then echo "error: start deviation is already less then threshold" cleanup exit 1 fi MAX_DELAY=300 # seconds START_TIME=$(date +%s) while CURRENT_TIME=$(date +%s) NEW_DEV=$(sudo eos group ls -m | grep "default.0" | awk '{print $20;}' | cut -d '=' -f2) if [[ $(echo "${NEW_DEV} < ${EOS_BALANCER_THRESHOLD}" | bc) -eq 1 ]]; then echo "info: new deviation value is ${NEW_DEV}" false # exit the while loop else if (( $((${CURRENT_TIME} - ${START_TIME})) >= ${MAX_DELAY} )); then echo "error: balancing was not done within ${MAX_DELAY} seconds" cleanup exit 1 else echo "info: new dev is ${NEW_DEV}, sleep for 5 seconds, `secs_to_human $((${CURRENT_TIME} - ${START_TIME}))` passed" sleep 5 eos group ls fi fi do : done cleanup exit 0