// ----------------------------------------------------------------------
// File: Devices.cc
// Author: Andreas-Joachim Peters - CERN
// ----------------------------------------------------------------------
/************************************************************************
* EOS - the CERN Disk Storage System *
* Copyright (C) 2023 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see .*
************************************************************************/
#include "common/Constants.hh"
#include "common/Logging.hh"
#include "common/RWMutex.hh"
#include "common/Path.hh"
#include "common/utils/BackOffInvoker.hh"
#include "mgm/Devices.hh"
#include "mgm/XrdMgmOfs.hh"
#include "mgm/FsView.hh"
#include "mgm/Stat.hh"
#include "namespace/Prefetcher.hh"
#include "namespace/interface/IView.hh"
EOSMGMNAMESPACE_BEGIN
//------------------------------------------------------------------------------
// Run asynchronous devices thread
//------------------------------------------------------------------------------
bool
Devices::Start()
{
mThread.reset(&Devices::Recorder, this);
return true;
}
//------------------------------------------------------------------------------
// Cancel the asynchronous devices thread
//------------------------------------------------------------------------------
void
Devices::Stop()
{
mThread.join();
}
//------------------------------------------------------------------------------
// Eternal thread registering device information, which allows to detect
// devices which have been removed
//------------------------------------------------------------------------------
void
Devices::Recorder(ThreadAssistant& assistant) noexcept
{
time_t snoozetime = 900;
if (getenv("EOS_MGM_DEVICES_PUBLISHING_INTERVAL")) {
auto rtime = std::atoi(getenv("EOS_MGM_DEVICES_PUBLISHING_INTERVAL"));
if (rtime==0 || rtime > 86400) {
rtime = 900;
}
}
gOFS->WaitUntilNamespaceIsBooted(assistant);
if (assistant.terminationRequested()) {
return;
}
assistant.wait_for(std::chrono::seconds(15));
eos::common::BackOffInvoker backoff_logger;
while (!assistant.terminationRequested()) {
// Every now and then we wake up
backoff_logger.invoke([&snoozetime]() {
eos_static_info("msg=\"devices thread\" snooze-time=%llu",
snoozetime);
});
if (!gOFS->mMaster->IsMaster()) {
continue;
}
// get the latest info
Extract();
// store in the namespace
Store();
for (int i = 0; i < snoozetime / 1; i++) {
if (assistant.terminationRequested()) {
eos_static_info("%s", "msg=\"devices thread exiting\"");
return;
}
assistant.wait_for(std::chrono::seconds(1));
}
}
eos_static_info("%s", "msg=\"devices thread exiting\"");
}
// Function extracting device information either on request or by the background thread
void
Devices::Extract()
{
gOFS->MgmStats.Add("Devices::Extract", 0,0 , 1);
json_map_t jm = std::make_shared();
space_map_t sp = std::make_shared();
smart_map_t sm = std::make_shared();
std::set fsids;
{
// get all the filesystem which are currently visible quickly
eos::common::RWMutexReadLock fs_rd_lock(FsView::gFsView.ViewMutex);
for ( auto it = FsView::gFsView.mSpaceView.begin(); it != FsView::gFsView.mSpaceView.end(); ++it ) {
// loop over all filesystems
for (auto fsit = FsView::gFsView.mIdView.begin(); fsit != FsView::gFsView.mIdView.end(); ++fsit) {
FileSystem* fs = fsit->second;
if (fs->GetSpace() != it->first) {
// only look at the current space
continue;
}
fsids.insert(fs->GetId());
(*sp)[fs->GetId()]=fs->GetSpace();
}
}
}
// loop over the filesystems and take short locks to extract
for ( auto it= fsids.begin(); it != fsids.end(); ++it ) {
uint64_t id = *it;
{
eos::common::RWMutexReadLock fs_rd_lock(FsView::gFsView.ViewMutex);
FileSystem* fs = FsView::gFsView.mIdView.lookupByID(id);
if (!fs) {
// skip this disappeared
continue;
}
// store the compressed maps
(*jm)[id] = fs->GetString("stat.health.z64smart");
(*sm)[id] = fs->GetString("stat.health");
}
// avoid tight locking loops
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
// decompress without any lock
for ( auto it = (*jm).begin(); it != (*jm).end(); ++it) {
std::string compressedjson = it->second;
std::string ojson;
bool done = eos::common::SymKey::ZDeBase64(compressedjson, ojson);
if (!done) {
eos_static_err("msg=\"failed to decompress JSON smart info from fsid=%lu\"",
it->first);
it->second = "";
} else {
it->second = ojson;
}
}
lastExtraction = time(NULL);
// swap the new map with the current one
setJson(jm);
setSpaceMap(sp);
setSmartMap(sm);
}
void
Devices::Store()
{
gOFS->MgmStats.Add("Devices::Store", 0,0 , 1);
auto jinfo = getJson();
auto sminfo = getSmartMap();
for (auto it=jinfo->begin(); it != jinfo->end(); ++it) {
std::string storagepath = mDevicesPath;
storagepath += "/";
std::string smartstatus="unknown";
if (sminfo->count(it->first)) {
smartstatus = (*sminfo)[it->first];
}
std::string serial;
{
Json::Value root;
std::string errs;
Json::CharReaderBuilder jsonReaderBuilder;
std::unique_ptr const reader(jsonReaderBuilder.newCharReader());
const std::string& ojson = it->second;
if (reader->parse(ojson.c_str(), ojson.c_str() + ojson.size(), &root, &errs)) {
try {
serial = root.isMember("serial_number")?root["serial_number"].asString():"";
} catch (Json::Exception const&) {
}
}
}
if (serial.empty()) {
continue;
}
storagepath += serial; // serial number
storagepath += ".";
storagepath += std::to_string(it->first); // fsid
eos::Prefetcher::prefetchFileMDAndWait(gOFS->eosView, storagepath.c_str());
eos::IFileMD::IFileMDWriteLockerPtr fmdLock;
std::shared_ptr fmd;
try {
fmdLock = gOFS->eosView->getFileWriteLocked(storagepath.c_str());
fmd = fmdLock->getUnderlyingPtr();
errno = 0;
} catch (eos::MDException& e) {
errno = e.getErrno();
eos_static_debug("msg=\"exception\" ec=%d emsg=\"%s\"\n",
e.getErrno(), e.getMessage().str().c_str());
}
if (!fmd) {
// if it does not exist, create it
try {
fmd = gOFS->eosView->createFile(storagepath.c_str(), 0, 0);
fmdLock = std::make_unique(fmd);
fmd->setMTimeNow();
fmd->setCTimeNow();
eos::IFileMD::ctime_t mtime;
fmd->getMTime(mtime);
char btime[256];
snprintf(btime, sizeof(btime), "%lu.%lu", mtime.tv_sec, mtime.tv_nsec);
fmd->setAttribute("sys.eos.btime", btime);
errno = 0;
} catch (eos::MDException& e) {
errno = e.getErrno();
eos_static_debug("msg=\"exception\" ec=%d emsg=\"%s\"\n",
e.getErrno(), e.getMessage().str().c_str());
}
}
// if it exists now, store the latest json and update the mtime
if (fmd) {
fmd->setAttribute("sys.smart.json", it->second);
fmd->setAttribute("sys.smart.status", smartstatus);
fmd->setMTimeNow();
fmdLock.reset(nullptr);
gOFS->eosView->updateFileStore(fmd.get());
}
}
}
EOSMGMNAMESPACE_END