#!/bin/bash
# ----------------------------------------------------------------------
# File: eos-groupdrain-test
# Author: Abhishek Lekshmanan - CERN
# ----------------------------------------------------------------------
# ******************************************************************************
# EOS - the CERN Disk Storage System
# Copyright (C) 2022 CERN/Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# ******************************************************************************
#
#------------------------------------------------------------------------------
# Description: Script testing the draining mechanism of EOS. It assumes that
# there are at least 14 FSTs available in the instance.
#
# Usage:
# eos-groupdrain-test
#------------------------------------------------------------------------------
set -x
if [[ $# -eq 0 || $# -gt 2 ]]; then
echo "Usage: $0 "
exit 1
fi
EOS_MGM_HOSTNAME=$1
eos fs ls
# Check preconditions and make sure central draining is enabled
FST_ONLINE=$(eos fs ls | grep "online" | wc -l)
if [[ ${FST_ONLINE} -lt 14 ]]; then
echo "error: not enough FSTs configured"
exit 1
fi
# Create dummy test files
TEST_FN0=/tmp/zero.dat
TEST_FN1=/tmp/32kb.dat
TEST_FN2=/tmp/5mb.dat
touch ${TEST_FN0}
dd if=/dev/urandom of=${TEST_FN1} bs=1k count=32 &> /dev/null
dd if=/dev/urandom of=${TEST_FN2} bs=1M count=5 &> /dev/null
eos space config default space.converter=on
eos space config default space.groupdrainer=on
eos space config default space.groupdrainer.threshold=0
eos space config default space.groupdrainer.group_refresh_interval=30
eos space config default space.groupdrainer.retry_interval=10
eos space config default space.groupdrainer.retry_count=3
# Create eos directory for tests and copy some files in
eos mkdir -p /eos/dockertest/drain_test/replica/
eos mkdir -p /eos/dockertest/drain_test/raiddp/
eos mkdir -p /eos/dockertest/drain_test/rain/
eos chmod 2777 /eos/dockertest/
eos chmod 2777 /eos/dockertest/drain_test/replica/
eos chmod 2777 /eos/dockertest/drain_test/raiddp/
eos chmod 2777 /eos/dockertest/drain_test/rain/
eos attr set default=replica /eos/dockertest/drain_test/replica/
eos attr set default=raiddp /eos/dockertest/drain_test/raiddp/
eos attr set default=raid6 /eos/dockertest/drain_test/rain/
FST_ONLINE=$(eos fs ls | grep "online" | wc -l)
for ((i=1;i<=FST_ONLINE;i++)); do
eos fs config $i headroom=100M
done
eos group ls | grep "default.1"
if [[ $? -ne 0 ]]; then
echo "error: group default.1 is not online ..."
eos fs ls
exit 1
fi
# disable default.0 for writes right now
echo "disabling group default.0 for writes"
eos fs ls default.0 | grep online | awk '{print $6}' | sort | uniq | xargs -I {} eos geosched disabled add {} plct default.0
eos geosched disabled show \* \* \*
eos geosched disabled show \* \* \* | grep -v "default.1"
if [[ $? -ne 0 ]]; then
echo "error: group default.1 is already disabled ..."
eos fs ls
exit 1
fi
for i in {1..4}; do
xrdcp -f --nopbar ${TEST_FN0} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/replica/0kb_file${i}.dat
xrdcp -f --nopbar ${TEST_FN0} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/raiddp/0kb_file${i}.dat
xrdcp -f --nopbar ${TEST_FN0} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/rain/0kb_file${i}.dat
xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/replica/32kb_file${i}.dat
xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/raiddp/32kb_file${i}.dat
xrdcp -f --nopbar ${TEST_FN1} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/rain/32kb_file${i}.dat
xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/replica/5mb_file${i}.dat
xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/raiddp/5mb_file${i}.dat
xrdcp -f --nopbar ${TEST_FN2} root://${EOS_MGM_HOSTNAME}//eos/dockertest/drain_test/rain/5mb_file${i}.dat
done
# Dump file info of the 3 layouts
echo "File info:"
eos file info /eos/dockertest/drain_test/replica/5mb_file1.dat
eos file info /eos/dockertest/drain_test/raiddp/5mb_file1.dat
eos file info /eos/dockertest/drain_test/rain/5mb_file1.dat
echo "re-enabling group default.0 for writes"
eos fs ls default.0 | grep online | awk '{print $6}' | sort | uniq | xargs -I {} eos geosched disabled rm {} plct default.0
eos geosched disabled show \* \* \*
eos group set default.1 drain
eos group ls
eos geosched disabled show \* \* \* | grep "default.1"
if [[ $? -ne 0 ]]; then
echo "error: group default.1 is not disabled ..."
exit 1
fi
count=$(eos fs ls | grep -c "drained\|failed")
while [[ "$count" -lt "7" ]]; do
sleep 10
count=$(eos fs ls | grep -c "drained\|failed")
eos space groupdrainer status default --detail
done
drained_fsids=$(eos -j fs ls default.1 | jq .result[].id)
for fsid in $drained_fsids; do
eos fs ls | grep " $fsid " | grep "empty" | grep "drained"
if [[ $? -ne 0 ]]; then
echo "error: file system $fsid is not drained"
exit 1
else
echo "File system $fsid successfully drained"
fi
done
echo "Drain Complete, FS Status:"
eos fs ls
echo "Drain Status: "
eos space groupdrainer status default --detail
echo "Group Status"
eos group ls
echo "Converter Status"
eos convert status
# Dump file info of the 3 layouts
echo "File info:"
eos file info /eos/dockertest/drain_test/replica/5mb_file1.dat
eos file info /eos/dockertest/drain_test/raiddp/5mb_file1.dat
eos file info /eos/dockertest/drain_test/rain/5mb_file1.dat
eos group ls | grep -i drained
if [[ $? -ne 0 ]]; then
echo "Error: group drain not complete"
exit 1
fi
echo "Setting Group Back to RW"
eos group set default.1 on
if [[ $? -ne 0 ]]; then
echo "error: failed to put group in rw mode"
exit 1
fi
# Remove all the files and directories
eos rm -rF "/eos/dockertest/drain_test/replica/*"
eos rm -rF "/eos/dockertest/drain_test/raiddp/*"
eos rm -rF "/eos/dockertest/drain_test/rain/*"
eos rmdir "/eos/dockertest/drain_test/replica/"
eos rmdir "/eos/dockertest/drain_test/raiddp/"
eos rmdir "/eos/dockertest/drain_test/rain/"
eos rmdir "/eos/dockertest/drain_test/"
exit 0