// ----------------------------------------------------------------------
// File: MultiSpaceTapeGc.cc
// Author: Steven Murray - CERN
// ----------------------------------------------------------------------
/************************************************************************
* EOS - the CERN Disk Storage System *
* Copyright (C) 2011 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see .*
************************************************************************/
#include "common/Logging.hh"
#include "mgm/tgc/MaxLenExceeded.hh"
#include "mgm/tgc/MultiSpaceTapeGc.hh"
#include "mgm/CtaUtils.hh"
#include
#include
#include
#include
/*----------------------------------------------------------------------------*/
/**
* @file MultiSpaceTapeGc.cc
*
* @brief Class implementing a tape aware garbage collector that can work over
* multiple EOS spaces
*
*/
/*----------------------------------------------------------------------------*/
EOSTGCNAMESPACE_BEGIN
//------------------------------------------------------------------------------
// Constructor
//------------------------------------------------------------------------------
MultiSpaceTapeGc::MultiSpaceTapeGc(ITapeGcMgm& mgm):
m_tapeEnabled(false), m_gcIsActive(false), m_mgm(mgm), m_gcs(mgm)
{
}
//------------------------------------------------------------------------------
// Destructor
//------------------------------------------------------------------------------
MultiSpaceTapeGc::~MultiSpaceTapeGc()
{
try {
stop();
} catch (std::exception& ex) {
eos_static_err("msg=\"%s\"", ex.what());
} catch (...) {
eos_static_err("msg=\"Caught an unknown exception\"");
}
}
//------------------------------------------------------------------------------
// Notify GC the specified file has been opened for write
//------------------------------------------------------------------------------
void
MultiSpaceTapeGc::fileOpenedForWrite(const std::string& space,
const eos::IFileMD::id_t fid)
{
if (!m_tapeEnabled || !m_gcsPopulatedUsingQdb) {
return;
}
try {
dispatchFileAccessedToGc("file opened for write", space, fid);
} catch (SpaceToTapeGcMap::UnknownEOSSpace&) {
// Ignore events for EOS spaces that do not have a tape-aware GC
} catch (std::exception& ex) {
eos_static_err("%s failed: %s", __FUNCTION__, ex.what());
} catch (...) {
eos_static_err("%s failed: Caught an unknown exception", __FUNCTION__);
}
}
//------------------------------------------------------------------------------
// Notify GC the specified file has been opened for read
//------------------------------------------------------------------------------
void
MultiSpaceTapeGc::fileOpenedForRead(const std::string& space,
const eos::IFileMD::id_t fid)
{
if (!m_tapeEnabled && !m_gcsPopulatedUsingQdb) {
return;
}
try {
dispatchFileAccessedToGc("file opened for read", space, fid);
} catch (std::exception& ex) {
eos_static_err("%s failed: %s", __FUNCTION__, ex.what());
} catch (...) {
eos_static_err("%s failed: Caught an unknown exception", __FUNCTION__);
}
}
//------------------------------------------------------------------------------
// Notify GC the specified file has been converted
//------------------------------------------------------------------------------
void
MultiSpaceTapeGc::fileConverted(const std::string& space,
const eos::IFileMD::id_t fid)
{
if (!m_tapeEnabled && !m_gcsPopulatedUsingQdb) {
return;
}
try {
dispatchFileAccessedToGc("file converted", space, fid);
} catch (std::exception& ex) {
eos_static_err("%s failed: %s", __FUNCTION__, ex.what());
} catch (...) {
eos_static_err("%s failed: Caught an unknown exception", __FUNCTION__);
}
}
//------------------------------------------------------------------------------
// Dispatch file accessed event to the space specific tape garbage collector
//------------------------------------------------------------------------------
void
MultiSpaceTapeGc::dispatchFileAccessedToGc(const std::string& event,
const std::string& space,
const IFileMD::id_t fileId)
{
const char* const msgFormat =
"event=\"%s\" space=\"%s\" fxid=%08llx msg=\"%s failed: %s\"";
try {
auto& gc = m_gcs.getGc(space);
gc.fileAccessed(fileId);
} catch (SpaceToTapeGcMap::UnknownEOSSpace&) {
// Ignore events for EOS spaces that do not have a tape-aware GC
} catch (std::exception& ex) {
eos_static_err(msgFormat, event.c_str(), space.c_str(), fileId, __FUNCTION__,
ex.what());
} catch (...) {
eos_static_err(msgFormat, event.c_str(), space.c_str(), fileId, __FUNCTION__,
"Caught an unknown exception");
}
}
//------------------------------------------------------------------------------
// Return map from EOS space name to tape-aware GC statistics
//------------------------------------------------------------------------------
std::map
MultiSpaceTapeGc::getStats() const
{
const char* const msgFormat =
"msg=\"Unable to get statistics about tape-aware garbage collectors: %s\"";
try {
if (!m_tapeEnabled) {
return std::map();
}
return m_gcs.getStats();
} catch (std::exception& ex) {
eos_static_err(msgFormat, ex.what());
} catch (...) {
eos_static_err(msgFormat, "Caught an unknown exception");
}
return std::map();
}
//----------------------------------------------------------------------------
// Handles a cmd=SFS_FSCTL_PLUGIO arg1=tgc request
//----------------------------------------------------------------------------
int
MultiSpaceTapeGc::handleFSCTL_PLUGIO_tgc(XrdOucErrInfo& error,
eos::common::VirtualIdentity& vid,
const XrdSecEntity* client)
{
try {
if (vid.host != "localhost" && vid.host != "localhost.localdomain") {
std::ostringstream replyMsg, logMsg;
replyMsg << __FUNCTION__ <<
": System access restricted - unauthorized identity used";
logMsg << "msg=\"" << replyMsg.str() << "\"";
eos_static_err(logMsg.str().c_str());
error.setErrInfo(EACCES, replyMsg.str().c_str());
return SFS_ERROR;
}
if (!m_tapeEnabled) {
std::ostringstream replyMsg, logMsg;
replyMsg << __FUNCTION__ << ": Support for tape is not enabled";
logMsg << "msg=\"" << replyMsg.str() << "\"";
eos_static_err(logMsg.str().c_str());
error.setErrInfo(ENOTSUP, replyMsg.str().c_str());
return SFS_ERROR;
}
const uint64_t replySize = 1048576; // 1 MiB
char* const reply = static_cast(malloc(replySize));
if (!reply) {
std::ostringstream replyMsg, logMsg;
replyMsg << __FUNCTION__ << ": Failed to allocate memory for reply: replySize="
<< replySize;
logMsg << "msg=\"" << replyMsg.str() << "\"";
eos_static_err(logMsg.str().c_str());
error.setErrInfo(ENOMEM, replyMsg.str().c_str());
return SFS_ERROR;
}
std::ostringstream json;
try {
m_gcs.toJson(json, replySize - 1);
} catch (MaxLenExceeded& ml) {
std::ostringstream msg;
msg << "msg=\"" << ml.what() << "\"";
eos_static_err(msg.str().c_str());
error.setErrInfo(ERANGE, ml.what());
return SFS_ERROR;
}
std::strncpy(reply, json.str().c_str(), replySize - 1);
reply[replySize - 1] = '\0';
// Ownership of reply is taken by the xrd_buff object.
// Error then takes ownership of the xrd_buff object
XrdOucBuffer* const xrd_buff = new XrdOucBuffer(reply, replySize);
xrd_buff->SetLen(strlen(reply + 1));
error.setErrInfo(xrd_buff->BuffSize(), xrd_buff);
return SFS_DATA;
} catch (std::exception& ex) {
eos_static_err("msg=\"handleFSCTL_PLUGIO_tgc failed: %s\"", ex.what());
} catch (...) {
eos_static_err("msg=\"handleFSCTL_PLUGIO_tgc failed: Caught an unknown exception\"");
}
error.setErrInfo(ECANCELED, "handleFSCTL_PLUGIO_tgc failed");
return SFS_ERROR;
}
//------------------------------------------------------------------------------
// Enable garbage collection
//------------------------------------------------------------------------------
void
MultiSpaceTapeGc::setTapeEnabled(const std::set& spaces)
{
std::lock_guard workerLock(m_gcStartupMutex);
m_tapeEnabled = true;
m_spaces.insert(spaces.begin(), spaces.end());
}
//------------------------------------------------------------------------------
// Start garbage collection for the specified EOS spaces
//------------------------------------------------------------------------------
void
MultiSpaceTapeGc::start()
{
std::lock_guard workerLock(m_gcStartupMutex);
// Starting garbage collecton requires it to have been enabled
if (!m_tapeEnabled) {
std::ostringstream msg;
msg << __FUNCTION__ <<
" failed: Trying to start garbage collection without enabling first";
throw GcIsNotEnabled(msg.str());
}
if (m_gcIsActive) {
std::ostringstream msg;
msg << __FUNCTION__ << " failed: Garbage collection has already been started";
throw GcAlreadyStarted(msg.str());
}
for (const auto& space : m_spaces) {
m_gcs.createGc(space);
}
std::function entryPoint = std::bind(
&MultiSpaceTapeGc::workerThreadEntryPoint, this);
m_worker = std::make_unique(entryPoint);
m_gcIsActive = true;
}
//------------------------------------------------------------------------------
// Stop garbage collection for all previously specified EOS spaces
//------------------------------------------------------------------------------
void
MultiSpaceTapeGc::stop()
{
std::lock_guard workerLock(m_gcStartupMutex);
try {
if (m_worker) {
m_stop = true;
m_worker->join();
m_worker.reset();
}
} catch (std::exception& ex) {
eos_static_err("msg=\"%s\"", ex.what());
} catch (...) {
eos_static_err("msg=\"Caught an unknown exception\"");
}
m_gcs.destroyAllGc();
m_gcsPopulatedUsingQdb = false;
m_gcIsActive = false;
}
//----------------------------------------------------------------------------
// Check if garbage collection is active
//----------------------------------------------------------------------------
bool
MultiSpaceTapeGc::isGcActive()
{
return m_gcIsActive;
}
//------------------------------------------------------------------------------
// Entry point for the worker thread of this object
//------------------------------------------------------------------------------
void
MultiSpaceTapeGc::workerThreadEntryPoint() noexcept
{
try {
populateGcsUsingQdb();
m_gcsPopulatedUsingQdb = true;
m_gcs.startGcWorkerThreads();
} catch (std::exception& ex) {
eos_static_crit("msg=\"Worker thread of the multi-space tape-aware garbage collector failed: %s\"",
ex.what());
} catch (...) {
eos_static_crit("msg=\"Worker thread of the multi-space tape-aware garbage collector failed:"
" Caught an unknown exception\"");
}
}
//----------------------------------------------------------------------------
// Populate the in-memory LRUs of the tape garbage collectors using Quark DB
//----------------------------------------------------------------------------
void
MultiSpaceTapeGc::populateGcsUsingQdb()
{
eos_static_info("msg=\"Starting to populate the meta-data of the tape-aware garbage collectors\"");
const auto startTgcPopulation = time(nullptr);
const auto gcSpaces = m_gcs.getSpaces();
uint64_t nbFilesScanned = 0;
auto gcSpaceToFiles = m_mgm.getSpaceToDiskReplicasMap(gcSpaces, m_stop,
nbFilesScanned);
// Build up space GC LRU structures whilst reducing space file lists
for (auto& spaceAndFiles : gcSpaceToFiles) {
const auto& space = spaceAndFiles.first;
auto& files = spaceAndFiles.second;
auto& gc = m_gcs.getGc(space);
{
std::ostringstream msg;
msg << "msg=\"About to populate the tape-aware GC meta-data for an EOS space\" space=\""
<< space << "\" nbFiles="
<< files.size();
eos_static_info(msg.str().c_str());
}
for (auto fileItor = files.begin(); fileItor != files.end();) {
if (m_stop) {
eos_static_info("msg=\"Requested to stop populating the meta-data of the tape-aware garbage collectors\"");
return;
}
gc.fileAccessed(fileItor->id);
fileItor = files.erase(fileItor);
}
}
{
const auto populationDurationSecs = time(nullptr) - startTgcPopulation;
std::ostringstream msg;
msg << "msg=\"Finished populating the meta-data of the tape-aware garbage collectors\" nbFilesScanned="
<< nbFilesScanned << " durationSecs=" <<
populationDurationSecs;
eos_static_info(msg.str().c_str());
}
}
EOSTGCNAMESPACE_END