// ----------------------------------------------------------------------
// File: ShouldStall.cc
// Author: Andreas-Joachim Peters - CERN
// ----------------------------------------------------------------------
/************************************************************************
* EOS - the CERN Disk Storage System *
* Copyright (C) 2011 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see .*
************************************************************************/
#include "mgm/XrdMgmOfs.hh"
#include
// -----------------------------------------------------------------------
// This file is included source code in XrdMgmOfs.cc to make the code more
// transparent without slowing down the compilation time.
// -----------------------------------------------------------------------
bool
XrdMgmOfs::ShouldStall(const char* function,
int __AccessMode__,
eos::common::VirtualIdentity& vid,
int& stalltime, XrdOucString& stallmsg)
{
// Check for user, group or host banning
std::string smsg = "";
stalltime = 0;
bool stall = true;
std::string functionname = function;
bool saturated = false;
double limit = 0;
// After booting don't stall FST nodes
if (gOFS->IsNsBooted() && (vid.prot == "sss") &&
vid.hasUid(DAEMONUID)) {
eos_static_debug("info=\"avoid stalling of the FST node\" host=%s",
vid.host.c_str());
stall = false;
}
// Avoid stalling HTTP requests as these translate into errors on the client
if (vid.prot == "https") {
stall = false;
}
eos::common::RWMutexReadLock lock(Access::gAccessMutex);
std::string stallid = "Stall";
size_t uid_threads = 1;
if (stall) {
if ((vid.uid > 3) && (functionname != "stat") && (vid.app != "fuse::restic")) {
if ((stalltime = gOFS->mTracker.ShouldStall(vid.uid, saturated, uid_threads))) {
smsg = "operate - your are exceeding your thread pool limit";
stallid += "::threads::";
stallid += std::to_string(vid.uid);;
} else if (Access::gBannedUsers.count(vid.uid)) {
smsg = "operate - you are banned in this instance - contact an administrator";
// fuse clients don't get stalled by a booted namespace, they get EACCES
if (vid.app.substr(0, 4) == "fuse") {
stallmsg = smsg.c_str();
return true;
}
// BANNED USER
stalltime = 300;
} else if (Access::gBannedGroups.count(vid.gid)) {
smsg = "operate - your group is banned in this instance - contact an administrator";
// fuse clients don't get stalled by a booted namespace, they get EACCES
if (vid.app.substr(0, 4) == "fuse") {
stallmsg = smsg.c_str();
return true;
}
// BANNED GROUP
stalltime = 300;
} else if (Access::gBannedHosts.count(vid.host)) {
smsg = "operate - your client host is banned in this instance - contact an administrator";
// BANNED HOST
stalltime = 300;
} else if (Access::gBannedDomains.count(vid.domain)) {
smsg = "operate - your client domain is banned in this instance - contact an administrator";
// BANNED DOMAINS
stalltime = 300;
} else if (vid.token && Access::gBannedTokens.count(vid.token->Voucher())) {
smsg = "operate - your token is banned in this instance - contact an administrator";
// BANNED TOKEN
stalltime = 300;
} else if (Access::gStallRules.size() && (Access::gStallGlobal)) {
// GLOBAL STALL
stalltime = atoi(Access::gStallRules[std::string("*")].c_str());
smsg = Access::gStallComment[std::string("*")];
} else if ((IS_ACCESSMODE_R && (Access::gStallRead)) ||
(IS_ACCESSMODE_R_MASTER && (Access::gStallRead))) {
// READ STALL
stalltime = atoi(Access::gStallRules[std::string("r:*")].c_str());
smsg = Access::gStallComment[std::string("r:*")];
} else if (IS_ACCESSMODE_W && (Access::gStallWrite)) {
stalltime = atoi(Access::gStallRules[std::string("w:*")].c_str());
smsg = Access::gStallComment[std::string("w:*")];
} else if (Access::gStallUserGroup) {
std::string usermatch = "rate:user:";
usermatch += vid.uid_string;
std::string groupmatch = "rate:group:";
groupmatch += vid.gid_string;
std::string userwildcardmatch = "rate:user:*";
std::string groupwildcardmatch = "rate:group:*";
std::map::const_iterator it;
if ((functionname != "stat") && // never stall stats
(vid.app != "fuse::restic")) {
for (it = Access::gStallRules.begin();
it != Access::gStallRules.end();
it++) {
stallid = "Stall";
auto eosxd_pos = it->first.rfind("Eosxd");
auto pos = it->first.rfind(":");
std::string cmd = (eosxd_pos != std::string::npos) ?
it->first.substr(eosxd_pos) : it->first.substr(pos + 1);
stallid += "::";
stallid += cmd;
if (EOS_LOGS_DEBUG) {
eos_static_debug("rule=%s function=%s", cmd.c_str(), function);
}
// only Eosxd rates can be fine-grained by function
if (cmd.substr(0, 5) == "Eosxd") {
if (cmd != function) {
continue;
}
}
double cutoff = strtod(it->second.c_str(), 0) * 1.33;
if ((it->first.find(usermatch) == 0)) {
// check user rule
XrdSysMutexHelper statLock(gOFS->MgmStats.mMutex);
if ((cutoff == 0) ||
(gOFS->MgmStats.StatAvgUid.count(cmd) &&
gOFS->MgmStats.StatAvgUid[cmd].count(vid.uid) &&
(gOFS->MgmStats.StatAvgUid[cmd][vid.uid].GetAvg5() > cutoff)
)) {
// rate exceeded
if (!stalltime) {
stalltime = 5;
}
limit = cutoff;
smsg = Access::gStallComment[it->first];
break;
}
} else if ((it->first.find(groupmatch) == 0)) {
// check group rule
XrdSysMutexHelper statLock(gOFS->MgmStats.mMutex);
if ((cutoff == 0) ||
(gOFS->MgmStats.StatAvgGid.count(cmd) &&
gOFS->MgmStats.StatAvgGid[cmd].count(vid.gid) &&
(gOFS->MgmStats.StatAvgGid[cmd][vid.gid].GetAvg5() > cutoff)
)) {
// rate exceeded
if (!stalltime) {
stalltime = 5;
}
limit = cutoff;
smsg = Access::gStallComment[it->first];
break;
}
}
if ((it->first.find(userwildcardmatch) == 0)) {
// catch all rule = global user rate cut
XrdSysMutexHelper statLock(gOFS->MgmStats.mMutex);
if ((cutoff == 0) ||
(gOFS->MgmStats.StatAvgUid.count(cmd) &&
gOFS->MgmStats.StatAvgUid[cmd].count(vid.uid) &&
(gOFS->MgmStats.StatAvgUid[cmd][vid.uid].GetAvg5() > cutoff)
)) {
if (!stalltime) {
stalltime = 5;
}
limit = cutoff;
smsg = Access::gStallComment[it->first];
break;
}
} else if ((it->first.find(groupwildcardmatch) == 0)) {
// catch all rule = global user rate cut
XrdSysMutexHelper statLock(gOFS->MgmStats.mMutex);
if ((cutoff == 0) ||
(gOFS->MgmStats.StatAvgGid.count(cmd) &&
gOFS->MgmStats.StatAvgGid[cmd].count(vid.gid) &&
(gOFS->MgmStats.StatAvgGid[cmd][vid.gid].GetAvg5() > cutoff)
)) {
if (!stalltime) {
stalltime = 5;
}
limit = cutoff;
smsg = Access::gStallComment[it->first];
break;
}
}
}
}
}
if (stalltime && (saturated || ! limit)) {
// add random offset between 0 and 5 to stalltime
int random_stall = rand() % 6;
stalltime += random_stall;
stallmsg = "Attention: you are currently hold in this instance and each"
" request is stalled for ";
stallmsg += (int) stalltime;
stallmsg += " seconds ... ";
stallmsg += smsg.c_str();
eos_static_info("info=\"stalling access to\" uid=%u gid=%u host=%s stall=%d",
vid.uid, vid.gid, vid.host.c_str(), stalltime);
gOFS->MgmStats.Add(stallid.c_str(), vid.uid, vid.gid, 1);
return true;
} else {
if (limit) {
stallid = "Delay";
stallid += "::threads::";
stallid += std::to_string(vid.uid);;
std::string delayid = stallid;
delayid += "::ms";
size_t ms_to_delay = 1000.0 / limit;
if (uid_threads) {
// renomalize with the curent user thread pool size
ms_to_delay *= uid_threads;
if (ms_to_delay > 40000) {
// we should not hang longer than 40s not to trigger timeouts, which are 60s by default for FUSE clients and 5min for XRootD clients
ms_to_delay = 40000;
}
}
lock.Release();
std::this_thread::sleep_for(std::chrono::milliseconds(ms_to_delay));
gOFS->MgmStats.Add(stallid.c_str(), vid.uid, vid.gid, 1);
gOFS->MgmStats.Add(delayid.c_str(), vid.uid, vid.gid, ms_to_delay);
return false;
}
}
} else {
if (Access::gStallRules.size() &&
Access::gStallRules.count(std::string("*"))) {
if ((vid.host != "localhost.localdomain") &&
(vid.host != "localhost")) {
// admin/root is only stalled for global stalls not,
// for write-only or read-only stalls
stalltime = atoi(Access::gStallRules[std::string("*")].c_str());
stallmsg = "Attention: you are currently hold in this instance and each"
" request is stalled for ";
stallmsg += (int) stalltime;
stallmsg += " seconds ...";
eos_static_info("info=\"stalling access to\" uid=%u gid=%u host=%s",
vid.uid, vid.gid, vid.host.c_str());
gOFS->MgmStats.Add("Stall", vid.uid, vid.gid, 1);
return true;
} else {
// localhost does not get stalled but receives an error during boot when trying to write
if (IS_ACCESSMODE_W) {
stalltime = 0 ;
stallmsg = "do modifications - writing is currently stalled on the instance";
return true;
}
}
}
}
}
eos_static_debug("info=\"allowing access to\" uid=%u gid=%u host=%s",
vid.uid, vid.gid, vid.host.c_str());
return false;
}