// ----------------------------------------------------------------------
// File: Supervisor.cc
// Author: Andreas-Joachim Peters - CERN
// ----------------------------------------------------------------------
/************************************************************************
* EOS - the CERN Disk Storage System *
* Copyright (C) 2011 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see .*
************************************************************************/
#include "fst/storage/Storage.hh"
#include "fst/XrdFstOfs.hh"
#include "fst/storage/FileSystem.hh"
EOSFSTNAMESPACE_BEGIN
//------------------------------------------------------------------------------
// Supervisor thread
//------------------------------------------------------------------------------
void
Storage::Supervisor()
{
// this thread does an automatic self-restart if this storage node has
// filesystems configured but they don't boot - this can happen by a
//timing issue during the autoboot phase
eos_static_info("Supervisor activated ...");
while (true) {
size_t ndown = 0;
size_t nfs = 0;
{
eos::common::RWMutexReadLock fs_rd_lock(mFsMutex);
for (const auto& elem : mFsMap) {
auto fs = elem.second;
if (!fs) {
eos_warning("msg=\"skip file system id without object in map\" "
"fsid=%lu", elem.first);
continue;
}
eos::common::BootStatus bootstatus = fs->GetStatus();
eos::common::ConfigStatus configstatus = fs->GetConfigStatus();
if ((bootstatus == eos::common::BootStatus::kDown) &&
(configstatus > eos::common::ConfigStatus::kDrain)) {
++ndown;
}
}
}
if (ndown) {
// We give one more minute to get things going
std::this_thread::sleep_for(std::chrono::seconds(60));
ndown = 0;
{
eos::common::RWMutexReadLock fs_rd_lock(mFsMutex);
nfs = mFsMap.size();
for (const auto& elem : mFsMap) {
auto fs = elem.second;
if (!fs) {
eos_warning("msg=\"skip file system id without object in map\" "
"fsid=%lu", elem.first);
continue;
}
eos::common::BootStatus bootstatus = fs->GetStatus();
eos::common::ConfigStatus configstatus = fs->GetConfigStatus();
if ((bootstatus == eos::common::BootStatus::kDown) &&
(configstatus > eos::common::ConfigStatus::kDrain)) {
++ndown;
}
}
}
if (ndown == nfs) {
// shutdown this daemon
eos_static_alert("found %d/%d filesystems in status - committing suicide !",
ndown, nfs);
std::this_thread::sleep_for(std::chrono::seconds(10));
kill(getpid(), SIGQUIT);
}
}
std::this_thread::sleep_for(std::chrono::seconds(60));
}
}
EOSFSTNAMESPACE_END