// ----------------------------------------------------------------------
// File: TapeGc.cc
// Author: Steven Murray - CERN
// ----------------------------------------------------------------------
/************************************************************************
* EOS - the CERN Disk Storage System *
* Copyright (C) 2011 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see .*
************************************************************************/
#include "mgm/tgc/Constants.hh"
#include "mgm/tgc/MaxLenExceeded.hh"
#include "mgm/tgc/TapeGc.hh"
#include "mgm/tgc/SpaceNotFound.hh"
#include "mgm/CtaUtils.hh"
#include
#include
#include
#include
#include
EOSTGCNAMESPACE_BEGIN
//------------------------------------------------------------------------------
// Constructor
//------------------------------------------------------------------------------
TapeGc::TapeGc(ITapeGcMgm& mgm, const std::string& spaceName,
const std::time_t maxConfigCacheAgeSecs):
m_mgm(mgm),
m_spaceName(spaceName),
m_config(std::bind(&ITapeGcMgm::getTapeGcSpaceConfig, &mgm, spaceName),
maxConfigCacheAgeSecs),
m_spaceStats(spaceName, mgm, m_config), m_nbEvicts(0)
{
}
//------------------------------------------------------------------------------
// Destructor
//------------------------------------------------------------------------------
TapeGc::~TapeGc()
{
try {
std::lock_guard workerLock(m_workerMutex);
if (m_worker) {
m_stop.setToTrue();
m_worker->join();
}
} catch (std::exception& ex) {
eos_static_err("msg=\"%s\"", ex.what());
} catch (...) {
eos_static_err("msg=\"Caught an unknown exception\"");
}
}
//------------------------------------------------------------------------------
// Idempotent method to start the worker thread of the tape-aware GC
//------------------------------------------------------------------------------
void
TapeGc::startWorkerThread()
{
try {
// Do nothing if calling thread is not the first to call startWorkerThread()
if (m_startWorkerThreadMethodCalled.test_and_set()) {
return;
}
std::function entryPoint = std::bind(&TapeGc::workerThreadEntryPoint,
this);
{
std::lock_guard workerLock(m_workerMutex);
m_worker = std::make_unique(entryPoint);
}
} catch (std::exception& ex) {
std::ostringstream msg;
msg << __FUNCTION__ << " failed: " << ex.what();
throw std::runtime_error(msg.str());
} catch (...) {
std::ostringstream msg;
msg << __FUNCTION__ << " failed: Caught an unknown exception";
throw std::runtime_error(msg.str());
}
}
//------------------------------------------------------------------------------
// Entry point for the GC worker thread
//------------------------------------------------------------------------------
void
TapeGc::workerThreadEntryPoint() noexcept
{
do {
while (!m_stop && tryToGarbageCollectASingleFile()) {
}
} while (!m_stop.waitForTrue(std::chrono::seconds(1)));
}
//------------------------------------------------------------------------------
// Notify GC the specified file has been accessed
//------------------------------------------------------------------------------
void
TapeGc::fileAccessed(const IFileMD::id_t fid) noexcept
{
try {
std::lock_guard lruQueueLock(m_lruQueueMutex);
const bool exceededBefore = m_lruQueue.maxQueueSizeExceeded();
m_lruQueue.fileAccessed(fid);
// Only log crossing the max queue size threshold - don't log each access
if (!exceededBefore && m_lruQueue.maxQueueSizeExceeded()) {
std::ostringstream msg;
msg << "space=\"" << m_spaceName << "\" fxid=" << std::hex << fid <<
" msg=\"Max queue size of tape-aware GC has been passed - new files will be ignored\"";
eos_static_warning(msg.str().c_str());
}
} catch (std::exception& ex) {
eos_static_err("msg=\"%s\"", ex.what());
} catch (...) {
eos_static_err("msg=\"Caught an unknown exception\"");
}
}
//------------------------------------------------------------------------------
// Try to garage collect a single file if necessary and possible
//------------------------------------------------------------------------------
bool
TapeGc::tryToGarbageCollectASingleFile() noexcept
{
try {
const auto config = m_config.get();
try {
const auto spaceStats = m_spaceStats.get().stats;
// Return no file was garbage collected if there is still enough available
// space or if the total amount of space is not enough (not all disk
// systems are on-line)
if (spaceStats.availBytes >= config.availBytes ||
spaceStats.totalBytes < config.totalBytes) {
return false;
}
} catch (SpaceNotFound&) {
// Return no file was garbage collected if the space was not found
return false;
}
IFileMD::id_t fid = 0;
{
std::lock_guard lruQueueLock(m_lruQueueMutex);
if (m_lruQueue.empty()) {
return false; // No file was garbage collected
}
fid = m_lruQueue.getAndPopFidOfLeastUsedFile();
}
std::uint64_t diskReplicaToBeDeletedSizeBytes = 0;
try {
diskReplicaToBeDeletedSizeBytes = m_mgm.getFileSizeBytes(fid);
} catch (std::exception& ex) {
std::ostringstream msg;
msg << "fxid=" << std::hex << fid <<
" msg=\"Unable to garbage collect disk replica: "
<< ex.what() << "\"";
eos_static_info(msg.str().c_str());
// Please note that a file is considered successfully garbage collected
// if its size cannot be determined
return true;
} catch (...) {
std::ostringstream msg;
msg << "fxid=" << std::hex << fid <<
" msg=\"Unable to garbage collect disk replica: Unknown exception";
eos_static_info(msg.str().c_str());
// Please note that a file is considered successfully garbage collected
// if its size cannot be determined
return true;
}
// The garbage collector should explicitly ignore zero length files by
// returning success
if (0 == diskReplicaToBeDeletedSizeBytes) {
std::ostringstream msg;
msg << "fxid=" << std::hex << fid <<
" msg=\"Garbage collector ignoring zero length file\"";
eos_static_info(msg.str().c_str());
return true;
}
try {
m_mgm.evictAsRoot(fid);
} catch (std::exception& ex) {
std::ostringstream msg;
msg << "fxid=" << std::hex << fid <<
" msg=\"Putting file back in GC queue after failing to garbage collect its disk replica: "
<< ex.what();
eos_static_info(msg.str().c_str());
std::lock_guard lruQueueLock(m_lruQueueMutex);
m_lruQueue.fileAccessed(fid);
return false; // No disk replica was garbage collected
} catch (...) {
std::ostringstream msg;
msg << "fxid=" << std::hex << fid <<
" msg=\"Putting file back in GC queue after failing to garbage collect its disk replica: Unknown exception";
eos_static_info(msg.str().c_str());
std::lock_guard lruQueueLock(m_lruQueueMutex);
m_lruQueue.fileAccessed(fid);
return false; // No disk replica was garbage collected
}
m_nbEvicts++;
diskReplicaQueuedForDeletion(diskReplicaToBeDeletedSizeBytes);
std::ostringstream msg;
msg << "fxid=" << std::hex << fid <<
" msg=\"Garbage collected disk replica using evict\"";
eos_static_info(msg.str().c_str());
return true; // A disk replica was garbage collected
} catch (std::exception& ex) {
eos_static_err("msg=\"%s\"", ex.what());
} catch (...) {
eos_static_err("msg=\"Caught an unknown exception\"");
}
return false; // No disk replica was garbage collected
}
//----------------------------------------------------------------------------
// Return statistics
//----------------------------------------------------------------------------
TapeGcStats
TapeGc::getStats() noexcept
{
try {
TapeGcStats tgcStats;
tgcStats.nbEvicts = m_nbEvicts;
tgcStats.lruQueueSize = getLruQueueSize();
tgcStats.spaceStats = m_spaceStats.get().stats;
tgcStats.queryTimestamp = m_spaceStats.getQueryTimestamp();
return tgcStats;
} catch (...) {
return TapeGcStats();
}
}
//----------------------------------------------------------------------------
// Return the size of the LRU queue
//----------------------------------------------------------------------------
Lru::FidQueue::size_type
TapeGc::getLruQueueSize() const noexcept
{
const char* const msgFormat =
"TapeGc::getLruQueueSize() failed space=%s: %s";
try {
std::lock_guard lruQueueLock(m_lruQueueMutex);
return m_lruQueue.size();
} catch (std::exception& ex) {
eos_static_err(msgFormat, m_spaceName.c_str(), ex.what());
} catch (...) {
eos_static_err(msgFormat, m_spaceName.c_str(), "Caught an unknown exception");
}
return 0;
}
//----------------------------------------------------------------------------
// Return A JSON string representation of the GC
//----------------------------------------------------------------------------
void
TapeGc::toJson(std::ostringstream& os, const std::uint64_t maxLen) const
{
{
std::lock_guard lruQueueLock(m_lruQueueMutex);
os <<
"{"
"\"spaceName\":\"" << m_spaceName << "\","
"\"lruQueue\":";
m_lruQueue.toJson(os, maxLen);
os << "}";
}
{
const auto osSize = os.tellp();
if (0 > osSize) {
throw std::runtime_error(std::string(__FUNCTION__) +
": os.tellp() returned a negative number");
}
if (maxLen && maxLen < (std::string::size_type)osSize) {
std::ostringstream msg;
msg << __FUNCTION__ << ": maxLen exceeded: maxLen=" << maxLen;
throw MaxLenExceeded(msg.str());
}
}
}
//------------------------------------------------------------------------------
// Notify this object that a disk replica has been queued for deletion
//------------------------------------------------------------------------------
void
TapeGc::diskReplicaQueuedForDeletion(const size_t fileSizeBytes)
{
m_spaceStats.diskReplicaQueuedForDeletion(fileSizeBytes);
}
EOSTGCNAMESPACE_END