// ----------------------------------------------------------------------
// File: Scheduler.cc
// Author: Andreas-Joachim Peters - CERN
// ----------------------------------------------------------------------
/************************************************************************
* EOS - the CERN Disk Storage System *
* Copyright (C) 2011 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see .*
************************************************************************/
#include "mgm/Scheduler.hh"
#include "mgm/Quota.hh"
#include "GeoTreeEngine.hh"
#include "mgm/XrdMgmOfs.hh"
EOSMGMNAMESPACE_BEGIN
XrdSysMutex Scheduler::pMapMutex;
std::map Scheduler::schedulingGroup;
//------------------------------------------------------------------------------
// Constructor
//------------------------------------------------------------------------------
Scheduler::Scheduler() { }
//------------------------------------------------------------------------------
// Destructor
//------------------------------------------------------------------------------
Scheduler::~Scheduler() { }
//------------------------------------------------------------------------------
// Write placement routine - the caller routine has to lock via =>
// eos::common::RWMutexReadLock(FsView::gFsView.ViewMutex)
//------------------------------------------------------------------------------
int
Scheduler::FilePlacement(PlacementArguments* args)
{
eos_static_debug("requesting file placement from geolocation %s",
args->vid->geolocation.c_str());
// The caller routine has to lock via =>
// eos::common::RWMutexReadLock(FsView::gFsView.ViewMutex)
std::map availablefs;
std::map availablefsgeolocation;
std::list availablevector;
// fill the avoid list from the selected_filesystems input vector
unsigned int nfilesystems = eos::common::LayoutId::GetStripeNumber(
args->lid) + 1;
unsigned int ncollocatedfs = 0;
switch (args->plctpolicy) {
case kScattered:
if (!(args->vid->geolocation.empty())) {
ncollocatedfs = 1;
} else {
ncollocatedfs = 0;
}
break;
case kHybrid:
switch (eos::common::LayoutId::GetLayoutType(args->lid)) {
case eos::common::LayoutId::kPlain:
ncollocatedfs = 1;
break;
case eos::common::LayoutId::kReplica:
ncollocatedfs = nfilesystems - 1;
break;
default:
ncollocatedfs = nfilesystems - eos::common::LayoutId::GetRedundancyStripeNumber(
args->lid);
break;
}
break;
// we only do geolocations for replica layouts
case kGathered:
ncollocatedfs = nfilesystems;
}
eos_static_debug("checking placement policy : policy is %d, nfilesystems is"
" %d and ncollocated is %d", (int)args->plctpolicy, (int)nfilesystems,
(int)ncollocatedfs);
uid_t uid = args->vid->uid;
gid_t gid = args->vid->gid;
XrdOucString lindextag = "";
if (args->grouptag) {
lindextag = args->grouptag;
} else {
lindextag += (int) uid;
lindextag += ":";
lindextag += (int) gid;
}
std::string indextag = lindextag.c_str();
std::set::const_iterator git;
std::vector fsidsgeotags;
std::vector groupsToTry;
// place the group iterator
if (!args->alreadyused_filesystems->empty()) {
if (!gOFS->mGeoTreeEngine->getInfosFromFsIds(*args->alreadyused_filesystems,
&fsidsgeotags,
0, &groupsToTry)) {
eos_static_debug("could not retrieve scheduling group for all avoid fsids");
} else {
eos_static_debug("succesfully retrieved scheduling groups for all avoid fsids");
}
}
if (args->forced_scheduling_group_index >= 0) {
eos_static_debug("searching for forced scheduling group=%i",
args->forced_scheduling_group_index);
for (git = FsView::gFsView.mSpaceGroupView[*args->spacename].begin();
git != FsView::gFsView.mSpaceGroupView[*args->spacename].end(); ++git) {
if ((*git)->GetIndex() == (unsigned int) args->forced_scheduling_group_index) {
break;
}
}
if ((git != FsView::gFsView.mSpaceGroupView[*args->spacename].end()) &&
((*git)->GetIndex() != (unsigned int) args->forced_scheduling_group_index)) {
args->selected_filesystems->clear();
return ENOSPC;
}
if (git == FsView::gFsView.mSpaceGroupView[*args->spacename].end()) {
args->selected_filesystems->clear();
return ENOSPC;
}
eos_static_debug("forced scheduling group index %d",
args->forced_scheduling_group_index);
} else {
XrdSysMutexHelper scope_lock(pMapMutex);
if (schedulingGroup.count(indextag)) {
git = FsView::gFsView.mSpaceGroupView[*args->spacename].find(
schedulingGroup[indextag]);
schedulingGroup[indextag] = *git;
} else {
git = FsView::gFsView.mSpaceGroupView[*args->spacename].begin();
schedulingGroup[indextag] = *git;
}
if (git == FsView::gFsView.mSpaceGroupView[*args->spacename].end()) {
git = FsView::gFsView.mSpaceGroupView[*args->spacename].begin();
}
}
// Rotate scheduling view ptr,updating schedulingGroup map
// if groupsToTry is not empty we try to first use the same scheduling groups of the already used filesystems
for (unsigned int groupindex = 0;
groupindex < FsView::gFsView.mSpaceGroupView[*args->spacename].size() +
groupsToTry.size(); groupindex++) {
FsGroup* group = nullptr;
// Try first the forced scheduling group and fail if we cannot schedule there
if (args->forced_scheduling_group_index >= 0) {
group = *git;
} else {
// Rotate scheduling view ptr - we select a random one
group = (groupindex < groupsToTry.size() ? groupsToTry[groupindex] :
*git);
}
eos_static_debug("Trying GeoTree Placement on group: %s, total groups: %d, groupsToTry: %d ",
group->mName.c_str(), FsView::gFsView.mSpaceGroupView[*args->spacename].size(),
groupsToTry.size());
bool placeRes = gOFS->mGeoTreeEngine->placeNewReplicasOneGroup(
group, nfilesystems,
args->selected_filesystems,
args->inode,
args->dataproxys,
args->firewallentpts,
GeoTreeEngine::regularRW,
// file systems to avoid are assumed to already host a replica
args->alreadyused_filesystems,
&fsidsgeotags,
args->bookingsize,
args->plctTrgGeotag ? *args->plctTrgGeotag : "",
args->vid->geolocation,
ncollocatedfs,
args->exclude_filesystems,
NULL);
eos::common::Logging& g_logging = eos::common::Logging::GetInstance();
if (g_logging.gLogMask & LOG_MASK(LOG_DEBUG)) {
char buffer[1024];
buffer[0] = 0;
char* buf = buffer;
for (auto it = args->selected_filesystems->begin();
it != args->selected_filesystems->end(); ++it) {
buf += sprintf(buf, "%lu ", (unsigned long)(*it));
}
eos_static_debug("GeoTree Placement returned %d with fs id's -> %s",
(int)placeRes, buffer);
}
if (placeRes) {
eos_static_debug("placing replicas for %s in subgroup %s", args->path,
group->mName.c_str());
} else {
if (args->forced_scheduling_group_index >= 0) {
eos_static_debug("msg=\"could not place all replica(s) for %s in the "
"forced subgroup %s\"", args->path, group->mName.c_str());
args->selected_filesystems->clear();
return ENOSPC;
} else {
eos_static_debug("msg=\"could not place all replica(s) for %s in subgroup %s, "
"checking next group\"", args->path, group->mName.c_str());
}
}
if (groupindex >= groupsToTry.size()) {
if ((git == FsView::gFsView.mSpaceGroupView[*args->spacename].end()) ||
(++git == FsView::gFsView.mSpaceGroupView[*args->spacename].end())) {
git = FsView::gFsView.mSpaceGroupView[*args->spacename].begin();
}
// remember the last group for that indextag
pMapMutex.Lock();
schedulingGroup[indextag] = *git;
pMapMutex.UnLock();
}
if (placeRes) {
return 0;
} else {
continue;
}
}
// Check if we are in any kind of no-update mode
args->selected_filesystems->clear();
return ENOSPC;
}
//------------------------------------------------------------------------------
// File access method
//------------------------------------------------------------------------------
int Scheduler::FileAccess(AccessArguments* args)
{
size_t nReqStripes = (args->isRW ?
eos::common::LayoutId::GetOnlineStripeNumber(args->lid) :
eos::common::LayoutId::GetMinOnlineReplica(args->lid));
eos_static_debug("requesting file access from geolocation %s",
args->vid->geolocation.c_str());
GeoTreeEngine::SchedType st = GeoTreeEngine::regularRO;
// we set a low weight for drain filesystems if there is more than one replica
if (args->schedtype == regular) {
if (args->isRW) {
st = GeoTreeEngine::regularRW;
} else {
st = GeoTreeEngine::regularRO;
}
}
if (args->schedtype == draining) {
st = GeoTreeEngine::draining;
}
// make sure we have the matching geo location before the not matching one
if (!args->tried_cgi->empty()) {
std::vector hosts;
if (!gOFS->mGeoTreeEngine->getInfosFromFsIds(*args->locationsfs, 0,
&hosts, 0)) {
eos_static_debug("could not retrieve host for all the avoided fsids");
}
size_t idx = 0;
// we store unavailable filesystems in the unavail vector
for (auto it = hosts.begin(); it != hosts.end(); it++) {
if ((!it->empty()) && args->tried_cgi->find((*it) + ",") != std::string::npos) {
// - this matters for RAID layouts because we have to remove there URLs
// to let the RAID driver use only online stripes
args->unavailfs->push_back((*args->locationsfs)[idx]);
}
idx++;
}
}
return gOFS->mGeoTreeEngine->accessHeadReplicaMultipleGroup(nReqStripes,
*args->fsindex,
args->locationsfs,
args->inode,
args->dataproxys,
args->firewallentpts,
st,
args->vid->geolocation,
args->forcedfsid, args->unavailfs);
}
EOSMGMNAMESPACE_END