#!/bin/bash
set -x
# ******************************************************************************
# EOS - the CERN Disk Storage System
# Copyright (C) 2022 CERN/Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# ******************************************************************************
#------------------------------------------------------------------------------
# Description: Script testing the balancer mechanism of EOS. It assumes that
# there are at least 7 FSTs available in the instance.
#
# Usage:
# eos-balancer-test
#------------------------------------------------------------------------------
SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
source ${SCRIPTPATH}/eos-test-utils
# Helper cleanup method
cleanup() {
eos rm -rF "${EOS_BALANCE_DIR}/replica/*"
eos rm -rF "${EOS_BALANCE_DIR}/raiddp/*"
eos rm -rF "${EOS_BALANCE_DIR}/rain/*"
eos rmdir "${EOS_BALANCE_DIR}/replica/"
eos rmdir "${EOS_BALANCE_DIR}/raiddp/"
eos rmdir "${EOS_BALANCE_DIR}/rain/"
eos rmdir "${EOS_BALANCE_DIR}/"
rm -rf ${TEST_FN1} ${TEST_FN2}
eos space config default space.balancer=off
}
if [[ $# -eq 0 || $# -gt 2 ]]; then
echo "Usage: $0 "
exit 1
fi
EOS_MGM_HOSTNAME=$1
EOS_BALANCER_THRESHOLD=5
# Check preconditions and make sure central balancer is enabled
FST_ONLINE=$(eos fs ls | grep "online" | wc -l)
if [[ ${FST_ONLINE} -lt 7 ]]; then
echo "error: not enough FSTs configured"
exit 1
fi
# Create dummy test files
TEST_FN1=/var/tmp/200MB.dat
TEST_FN2=/var/tmp/400MB.dat
dd if=/dev/urandom of=${TEST_FN1} bs=1M count=200 &> /dev/null
dd if=/dev/urandom of=${TEST_FN2} bs=1M count=400 &> /dev/null
# Create eos directory for tests
EOS_BALANCE_DIR=/eos/dockertest/balance_test
eos mkdir -p ${EOS_BALANCE_DIR}/replica/
eos mkdir -p ${EOS_BALANCE_DIR}/raiddp/
eos mkdir -p ${EOS_BALANCE_DIR}/rain/
eos chmod -r 2777 ${EOS_BALANCE_DIR}/
eos attr set default=replica ${EOS_BALANCE_DIR}/replica/
eos attr set default=raiddp ${EOS_BALANCE_DIR}/raiddp/
eos attr set default=raid6 ${EOS_BALANCE_DIR}/rain/
# Put one file system in RO mode while we copy data in so that we end up
# with an unbalanced group
eos fs config 1 configstatus=ro
for i in {1..3}; do
xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/replica/200MB_file$i.dat
xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/raiddp/200MB_file$i.dat
xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/rain/200MB_file$i.dat
xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/replica/400MB_file$i.dat
xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/raiddp/400MB_file$i.dat
xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}/${EOS_BALANCE_DIR}/rain/400MB_file$i.dat
done
# Allow for the publishing thread on the FSTs to report updated values
sleep 10
eos fs config 1 configstatus=rw
eos node ls
eos fs ls
eos fs ls --io
eos group ls
for i in {1..8}; do
eos fs status $i
done
# Enable the balancing at the space level
eos space config default space.balancer.threshold=${EOS_BALANCER_THRESHOLD}
eos space config default space.balancer.update.interval=5
eos space config default space.balancer=on
# Get current dev of the group
START_DEV=$(sudo eos group ls -m | grep "default.0" | awk '{print $20;}' | cut -d '=' -f2)
if [[ $(echo "${START_DEV} < ${EOS_BALANCER_THRESHOLD}" | bc) -eq 1 ]]; then
echo "error: start deviation is already less then threshold"
cleanup
exit 1
fi
MAX_DELAY=300 # seconds
START_TIME=$(date +%s)
while
CURRENT_TIME=$(date +%s)
NEW_DEV=$(sudo eos group ls -m | grep "default.0" | awk '{print $20;}' | cut -d '=' -f2)
if [[ $(echo "${NEW_DEV} < ${EOS_BALANCER_THRESHOLD}" | bc) -eq 1 ]]; then
echo "info: new deviation value is ${NEW_DEV}"
false # exit the while loop
else
if (( $((${CURRENT_TIME} - ${START_TIME})) >= ${MAX_DELAY} )); then
echo "error: balancing was not done within ${MAX_DELAY} seconds"
cleanup
exit 1
else
echo "info: new dev is ${NEW_DEV}, sleep for 5 seconds, `secs_to_human $((${CURRENT_TIME} - ${START_TIME}))` passed"
sleep 5
eos group ls
fi
fi
do
:
done
cleanup
exit 0