// ---------------------------------------------------------------------- // File: MultiSpaceTapeGc.cc // Author: Steven Murray - CERN // ---------------------------------------------------------------------- /************************************************************************ * EOS - the CERN Disk Storage System * * Copyright (C) 2011 CERN/Switzerland * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see .* ************************************************************************/ #include "common/Logging.hh" #include "mgm/tgc/MaxLenExceeded.hh" #include "mgm/tgc/MultiSpaceTapeGc.hh" #include "mgm/CtaUtils.hh" #include #include #include #include /*----------------------------------------------------------------------------*/ /** * @file MultiSpaceTapeGc.cc * * @brief Class implementing a tape aware garbage collector that can work over * multiple EOS spaces * */ /*----------------------------------------------------------------------------*/ EOSTGCNAMESPACE_BEGIN //------------------------------------------------------------------------------ // Constructor //------------------------------------------------------------------------------ MultiSpaceTapeGc::MultiSpaceTapeGc(ITapeGcMgm& mgm): m_tapeEnabled(false), m_gcIsActive(false), m_mgm(mgm), m_gcs(mgm) { } //------------------------------------------------------------------------------ // Destructor //------------------------------------------------------------------------------ MultiSpaceTapeGc::~MultiSpaceTapeGc() { try { stop(); } catch (std::exception& ex) { eos_static_err("msg=\"%s\"", ex.what()); } catch (...) { eos_static_err("msg=\"Caught an unknown exception\""); } } //------------------------------------------------------------------------------ // Notify GC the specified file has been opened for write //------------------------------------------------------------------------------ void MultiSpaceTapeGc::fileOpenedForWrite(const std::string& space, const eos::IFileMD::id_t fid) { if (!m_tapeEnabled || !m_gcsPopulatedUsingQdb) { return; } try { dispatchFileAccessedToGc("file opened for write", space, fid); } catch (SpaceToTapeGcMap::UnknownEOSSpace&) { // Ignore events for EOS spaces that do not have a tape-aware GC } catch (std::exception& ex) { eos_static_err("%s failed: %s", __FUNCTION__, ex.what()); } catch (...) { eos_static_err("%s failed: Caught an unknown exception", __FUNCTION__); } } //------------------------------------------------------------------------------ // Notify GC the specified file has been opened for read //------------------------------------------------------------------------------ void MultiSpaceTapeGc::fileOpenedForRead(const std::string& space, const eos::IFileMD::id_t fid) { if (!m_tapeEnabled && !m_gcsPopulatedUsingQdb) { return; } try { dispatchFileAccessedToGc("file opened for read", space, fid); } catch (std::exception& ex) { eos_static_err("%s failed: %s", __FUNCTION__, ex.what()); } catch (...) { eos_static_err("%s failed: Caught an unknown exception", __FUNCTION__); } } //------------------------------------------------------------------------------ // Notify GC the specified file has been converted //------------------------------------------------------------------------------ void MultiSpaceTapeGc::fileConverted(const std::string& space, const eos::IFileMD::id_t fid) { if (!m_tapeEnabled && !m_gcsPopulatedUsingQdb) { return; } try { dispatchFileAccessedToGc("file converted", space, fid); } catch (std::exception& ex) { eos_static_err("%s failed: %s", __FUNCTION__, ex.what()); } catch (...) { eos_static_err("%s failed: Caught an unknown exception", __FUNCTION__); } } //------------------------------------------------------------------------------ // Dispatch file accessed event to the space specific tape garbage collector //------------------------------------------------------------------------------ void MultiSpaceTapeGc::dispatchFileAccessedToGc(const std::string& event, const std::string& space, const IFileMD::id_t fileId) { const char* const msgFormat = "event=\"%s\" space=\"%s\" fxid=%08llx msg=\"%s failed: %s\""; try { auto& gc = m_gcs.getGc(space); gc.fileAccessed(fileId); } catch (SpaceToTapeGcMap::UnknownEOSSpace&) { // Ignore events for EOS spaces that do not have a tape-aware GC } catch (std::exception& ex) { eos_static_err(msgFormat, event.c_str(), space.c_str(), fileId, __FUNCTION__, ex.what()); } catch (...) { eos_static_err(msgFormat, event.c_str(), space.c_str(), fileId, __FUNCTION__, "Caught an unknown exception"); } } //------------------------------------------------------------------------------ // Return map from EOS space name to tape-aware GC statistics //------------------------------------------------------------------------------ std::map MultiSpaceTapeGc::getStats() const { const char* const msgFormat = "msg=\"Unable to get statistics about tape-aware garbage collectors: %s\""; try { if (!m_tapeEnabled) { return std::map(); } return m_gcs.getStats(); } catch (std::exception& ex) { eos_static_err(msgFormat, ex.what()); } catch (...) { eos_static_err(msgFormat, "Caught an unknown exception"); } return std::map(); } //---------------------------------------------------------------------------- // Handles a cmd=SFS_FSCTL_PLUGIO arg1=tgc request //---------------------------------------------------------------------------- int MultiSpaceTapeGc::handleFSCTL_PLUGIO_tgc(XrdOucErrInfo& error, eos::common::VirtualIdentity& vid, const XrdSecEntity* client) { try { if (vid.host != "localhost" && vid.host != "localhost.localdomain") { std::ostringstream replyMsg, logMsg; replyMsg << __FUNCTION__ << ": System access restricted - unauthorized identity used"; logMsg << "msg=\"" << replyMsg.str() << "\""; eos_static_err(logMsg.str().c_str()); error.setErrInfo(EACCES, replyMsg.str().c_str()); return SFS_ERROR; } if (!m_tapeEnabled) { std::ostringstream replyMsg, logMsg; replyMsg << __FUNCTION__ << ": Support for tape is not enabled"; logMsg << "msg=\"" << replyMsg.str() << "\""; eos_static_err(logMsg.str().c_str()); error.setErrInfo(ENOTSUP, replyMsg.str().c_str()); return SFS_ERROR; } const uint64_t replySize = 1048576; // 1 MiB char* const reply = static_cast(malloc(replySize)); if (!reply) { std::ostringstream replyMsg, logMsg; replyMsg << __FUNCTION__ << ": Failed to allocate memory for reply: replySize=" << replySize; logMsg << "msg=\"" << replyMsg.str() << "\""; eos_static_err(logMsg.str().c_str()); error.setErrInfo(ENOMEM, replyMsg.str().c_str()); return SFS_ERROR; } std::ostringstream json; try { m_gcs.toJson(json, replySize - 1); } catch (MaxLenExceeded& ml) { std::ostringstream msg; msg << "msg=\"" << ml.what() << "\""; eos_static_err(msg.str().c_str()); error.setErrInfo(ERANGE, ml.what()); return SFS_ERROR; } std::strncpy(reply, json.str().c_str(), replySize - 1); reply[replySize - 1] = '\0'; // Ownership of reply is taken by the xrd_buff object. // Error then takes ownership of the xrd_buff object XrdOucBuffer* const xrd_buff = new XrdOucBuffer(reply, replySize); xrd_buff->SetLen(strlen(reply + 1)); error.setErrInfo(xrd_buff->BuffSize(), xrd_buff); return SFS_DATA; } catch (std::exception& ex) { eos_static_err("msg=\"handleFSCTL_PLUGIO_tgc failed: %s\"", ex.what()); } catch (...) { eos_static_err("msg=\"handleFSCTL_PLUGIO_tgc failed: Caught an unknown exception\""); } error.setErrInfo(ECANCELED, "handleFSCTL_PLUGIO_tgc failed"); return SFS_ERROR; } //------------------------------------------------------------------------------ // Enable garbage collection //------------------------------------------------------------------------------ void MultiSpaceTapeGc::setTapeEnabled(const std::set& spaces) { std::lock_guard workerLock(m_gcStartupMutex); m_tapeEnabled = true; m_spaces.insert(spaces.begin(), spaces.end()); } //------------------------------------------------------------------------------ // Start garbage collection for the specified EOS spaces //------------------------------------------------------------------------------ void MultiSpaceTapeGc::start() { std::lock_guard workerLock(m_gcStartupMutex); // Starting garbage collecton requires it to have been enabled if (!m_tapeEnabled) { std::ostringstream msg; msg << __FUNCTION__ << " failed: Trying to start garbage collection without enabling first"; throw GcIsNotEnabled(msg.str()); } if (m_gcIsActive) { std::ostringstream msg; msg << __FUNCTION__ << " failed: Garbage collection has already been started"; throw GcAlreadyStarted(msg.str()); } for (const auto& space : m_spaces) { m_gcs.createGc(space); } std::function entryPoint = std::bind( &MultiSpaceTapeGc::workerThreadEntryPoint, this); m_worker = std::make_unique(entryPoint); m_gcIsActive = true; } //------------------------------------------------------------------------------ // Stop garbage collection for all previously specified EOS spaces //------------------------------------------------------------------------------ void MultiSpaceTapeGc::stop() { std::lock_guard workerLock(m_gcStartupMutex); try { if (m_worker) { m_stop = true; m_worker->join(); m_worker.reset(); } } catch (std::exception& ex) { eos_static_err("msg=\"%s\"", ex.what()); } catch (...) { eos_static_err("msg=\"Caught an unknown exception\""); } m_gcs.destroyAllGc(); m_gcsPopulatedUsingQdb = false; m_gcIsActive = false; } //---------------------------------------------------------------------------- // Check if garbage collection is active //---------------------------------------------------------------------------- bool MultiSpaceTapeGc::isGcActive() { return m_gcIsActive; } //------------------------------------------------------------------------------ // Entry point for the worker thread of this object //------------------------------------------------------------------------------ void MultiSpaceTapeGc::workerThreadEntryPoint() noexcept { try { populateGcsUsingQdb(); m_gcsPopulatedUsingQdb = true; m_gcs.startGcWorkerThreads(); } catch (std::exception& ex) { eos_static_crit("msg=\"Worker thread of the multi-space tape-aware garbage collector failed: %s\"", ex.what()); } catch (...) { eos_static_crit("msg=\"Worker thread of the multi-space tape-aware garbage collector failed:" " Caught an unknown exception\""); } } //---------------------------------------------------------------------------- // Populate the in-memory LRUs of the tape garbage collectors using Quark DB //---------------------------------------------------------------------------- void MultiSpaceTapeGc::populateGcsUsingQdb() { eos_static_info("msg=\"Starting to populate the meta-data of the tape-aware garbage collectors\""); const auto startTgcPopulation = time(nullptr); const auto gcSpaces = m_gcs.getSpaces(); uint64_t nbFilesScanned = 0; auto gcSpaceToFiles = m_mgm.getSpaceToDiskReplicasMap(gcSpaces, m_stop, nbFilesScanned); // Build up space GC LRU structures whilst reducing space file lists for (auto& spaceAndFiles : gcSpaceToFiles) { const auto& space = spaceAndFiles.first; auto& files = spaceAndFiles.second; auto& gc = m_gcs.getGc(space); { std::ostringstream msg; msg << "msg=\"About to populate the tape-aware GC meta-data for an EOS space\" space=\"" << space << "\" nbFiles=" << files.size(); eos_static_info(msg.str().c_str()); } for (auto fileItor = files.begin(); fileItor != files.end();) { if (m_stop) { eos_static_info("msg=\"Requested to stop populating the meta-data of the tape-aware garbage collectors\""); return; } gc.fileAccessed(fileItor->id); fileItor = files.erase(fileItor); } } { const auto populationDurationSecs = time(nullptr) - startTgcPopulation; std::ostringstream msg; msg << "msg=\"Finished populating the meta-data of the tape-aware garbage collectors\" nbFilesScanned=" << nbFilesScanned << " durationSecs=" << populationDurationSecs; eos_static_info(msg.str().c_str()); } } EOSTGCNAMESPACE_END