// ----------------------------------------------------------------------
// File: FileSystem.cc
// Author: Andreas-Joachim Peters - CERN
// ----------------------------------------------------------------------
/************************************************************************
* EOS - the CERN Disk Storage System *
* Copyright (C) 2011 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version.
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see .*
************************************************************************/
#include "fst/storage/FileSystem.hh"
#include "fst/XrdFstOfs.hh"
#include "fst/ScanDir.hh"
#include "fst/Config.hh"
#include "fst/utils/DiskMeasurements.hh"
#include "common/Constants.hh"
#include "qclient/shared/SharedHashSubscription.hh"
#ifdef __APPLE__
#define O_DIRECT 0
#endif
EOSFSTNAMESPACE_BEGIN
// Set of key updates to be tracked at the file system level
std::set FileSystem::sFsUpdateKeys {
"id", "uuid", "bootsenttime",
eos::common::SCAN_IO_RATE_NAME,
eos::common::SCAN_ENTRY_INTERVAL_NAME,
eos::common::SCAN_RAIN_ENTRY_INTERVAL_NAME,
eos::common::SCAN_DISK_INTERVAL_NAME,
eos::common::SCAN_NS_INTERVAL_NAME,
eos::common::SCAN_NS_RATE_NAME };
//------------------------------------------------------------------------------
// Constructor
//------------------------------------------------------------------------------
FileSystem::FileSystem(const common::FileSystemLocator& locator,
mq::MessagingRealm* realm) :
eos::common::FileSystem(locator, realm, true),
mLocalId(0ul), mLocalUuid(""), mScanDir(nullptr), mFileIO(nullptr)
{
last_blocks_free = 0;
last_status_broadcast = 0;
seqBandwidth = 0;
IOPS = 0;
mLocalBootStatus = eos::common::BootStatus::kDown;
mRecoverable = false;
mFileIO.reset(FileIoPlugin::GetIoObject(mLocator.getStoragePath()));
if (mRealm->haveQDB()) {
// Subscribe to the underlying SharedHash object to get updates
mSubscription = mq::SharedHashWrapper(mRealm, mHashLocator).subscribe();
if (mSubscription) {
using namespace std::placeholders;
mSubscription->attachCallback(std::bind(&FileSystem::ProcessUpdateCb,
this, _1));
}
}
}
//------------------------------------------------------------------------------
// Destructor
//------------------------------------------------------------------------------
FileSystem::~FileSystem()
{
if (mSubscription) {
mSubscription->detachCallback();
}
mScanDir.release();
mFileIO.release();
// Notify the MGM this file system is down
SetStatus(eos::common::BootStatus::kDown);
}
//------------------------------------------------------------------------------
// Process shared hash update
//------------------------------------------------------------------------------
void
FileSystem::ProcessUpdateCb(qclient::SharedHashUpdate&& upd)
{
if (sFsUpdateKeys.find(upd.key) != sFsUpdateKeys.end()) {
eos_static_info("msg=\"process update callback\" key=%s value=%s",
upd.key.c_str(), upd.value.c_str());
if (upd.key == "id") {
try {
mLocalId = std::stoul(upd.value);
} catch (...) {}
} else if (upd.key == "uuid") {
mLocalUuid = upd.value;
}
// @note handle here the updates but make sure not to access or set any
// shared hash values as this will trigger a deadlock. We are now called
// from the shared hash itself that digest the updates and also pushes them
// through a subscriber to us. Digesting these update is done in an
// exclusive lock region that protects the contents of the shared hash -
// therefore we risk ending up in a deadlock situation
gOFS.Storage->ProcessFsConfigChange(this, upd.key, upd.value);
}
}
//------------------------------------------------------------------------------
// Broadcast given error message
//------------------------------------------------------------------------------
void
FileSystem::BroadcastError(const char* msg)
{
if (!gOFS.sShutdown) {
SetStatus(eos::common::BootStatus::kOpsError);
SetError(errno ? errno : EIO, msg);
}
}
//------------------------------------------------------------------------------
// Broadcast given error code and message
//------------------------------------------------------------------------------
void
FileSystem::BroadcastError(int errc, const char* errmsg)
{
if (!gOFS.sShutdown) {
SetStatus(eos::common::BootStatus::kOpsError);
SetError(errno ? errno : EIO, errmsg);
}
}
//------------------------------------------------------------------------------
// Set given error code and message
//------------------------------------------------------------------------------
void
FileSystem::SetError(int errc, const char* errmsg)
{
if (errc) {
eos_static_err("setting errc=%d errmsg=%s", errc, errmsg ? errmsg : "");
}
if (!SetLongLong("stat.errc", errc)) {
eos_static_err("cannot set errcode for filesystem %s", GetQueuePath().c_str());
}
if (errmsg && strlen(errmsg) && !SetString("stat.errmsg", errmsg)) {
eos_static_err("cannot set errmsg for filesystem %s", GetQueuePath().c_str());
}
}
//------------------------------------------------------------------------------
// Get statfs info about mountpoint
//------------------------------------------------------------------------------
std::unique_ptr
FileSystem::GetStatfs()
{
if (!GetPath().length()) {
return nullptr;
}
std::unique_ptr statFs;
if (mFileIO) {
statFs = mFileIO->GetStatfs();
}
if ((!statFs) && GetPath().length()) {
eos_err("msg=\"cannot statfs\" path=\"%s\"", GetPath().c_str());
BroadcastError("cannot statfs");
return nullptr;
} else {
eos_static_debug("ec=%d error=%s recover=%d", GetStatus(),
GetString("stat.errmsg").c_str(), mRecoverable);
if ((GetStatus() == eos::common::BootStatus::kOpsError) && mRecoverable) {
if (GetString("stat.errmsg") == "cannot statfs") {
// reset the statfs error
SetStatus(eos::common::BootStatus::kBooted);
SetError(0, "");
}
}
}
return statFs;
}
//------------------------------------------------------------------------------
// Configure scanner thread - possibly start the scanner
//------------------------------------------------------------------------------
void
FileSystem::ConfigScanner(Load* fst_load, const std::string& key,
long long value)
{
// Don't scan filesystems which are 'remote'
if (GetPath()[0] != '/') {
return;
}
// If not running then create scanner thread with default parameters
if (mLocalId && !mLocalUuid.empty()) {
if (mScanDir == nullptr) {
mScanDir.reset(new ScanDir(GetPath().c_str(), mLocalId, fst_load, true));
eos_info("msg=\"started ScanDir thread with default parameters\" fsid=%d",
mLocalId);
}
mScanDir->SetConfig(key, value);
} else {
eos_static_notice("msg=\"skip scanner config for partial file system\" "
"queue=\"%s\"", GetQueuePath().c_str());
}
}
//------------------------------------------------------------------------------
// Set file system boot status
//------------------------------------------------------------------------------
void
FileSystem::SetStatus(eos::common::BootStatus status)
{
eos::common::FileSystem::SetStatus(status);
if (mLocalBootStatus == status) {
return;
}
eos_debug("before=%d after=%d", mLocalBootStatus.load(), status);
if ((mLocalBootStatus == eos::common::BootStatus::kBooted) &&
(status == eos::common::BootStatus::kOpsError)) {
mRecoverable = true;
} else {
mRecoverable = false;
}
mLocalBootStatus = status;
}
//------------------------------------------------------------------------------
// Get file system disk performance metrics eg. IOPS/seq bandwidth
//------------------------------------------------------------------------------
void
FileSystem::IoPing()
{
IOPS = 0;
seqBandwidth = 0;
// Exclude 'remote' disks
if (GetPath()[0] != '/') {
eos_static_notice("msg=\"skip disk measurements for \'remote\' disk\" "
"path=%s", GetPath().c_str());
return;
}
// Create temporary file (1GB) name on the mountpoint
uint64_t fn_size = 1 << 30; // 1 GB
const std::string fn_path = eos::fst::MakeTemporaryFile(GetPath());
if (fn_path.empty()) {
eos_static_err("msg=\"failed to create tmp file\" base_path=%s",
GetPath().c_str());
return;
}
// Open the file for direct access
int fd = open(fn_path.c_str(), O_RDWR | O_TRUNC | O_DIRECT | O_SYNC);
if (fd == -1) {
eos_static_err("msg=\"failed to open file\" path=%s", fn_path.c_str());
return;
}
// Unlink the file so that we don't leave any behind even in the case of
// a crash of the FST. The file descritor will still be valid for use.
(void) unlink(fn_path.c_str());
// Fill the file up to the given size with random data
if (!eos::fst::FillFileGivenSize(fd, fn_size)) {
eos_static_err("msg=\"failed to fill file\" path=%s", fn_path.c_str());
(void) close(fd);
return;
}
using namespace std::chrono;
auto start_iops = high_resolution_clock::now();
IOPS = eos::fst::ComputeIops(fd);
auto end_iops = high_resolution_clock::now();
uint64_t rd_buf_size = 4 * (1 << 20); // 4MB
auto start_bw = high_resolution_clock::now();
seqBandwidth = eos::fst::ComputeBandwidth(fd, rd_buf_size);
auto end_bw = high_resolution_clock::now();
(void) close(fd);
eos_info("bw=%lld iops=%d iops_time=%llums bw_time=%llums", seqBandwidth, IOPS,
duration_cast(end_iops - start_iops).count(),
duration_cast(end_bw - start_bw).count());
return;
}
//------------------------------------------------------------------------------
// Get IO statistics from the `sys.iostats` xattr
//------------------------------------------------------------------------------
bool
FileSystem::GetFileIOStats(std::map& map)
{
if (!mFileIO) {
return false;
}
// Avoid querying IO stats attributes for certain storage types
if (mFileIO->GetIoType() == "DavixIo" ||
mFileIO->GetIoType() == "XrdIo") {
return false;
}
std::string iostats;
mFileIO->attrGet("sys.iostats", iostats);
return eos::common::StringConversion::GetKeyValueMap(iostats.c_str(),
map, "=", ",");
}
//------------------------------------------------------------------------------
// Get health information from the `sys.health` xattr
//------------------------------------------------------------------------------
bool
FileSystem::GetHealthInfo(std::map& map)
{
if (!mFileIO) {
return false;
}
// Avoid querying Health attributes for certain storage types
if (mFileIO->GetIoType() == "DavixIo" ||
mFileIO->GetIoType() == "XrdIo") {
return false;
}
// Avoid querying Health attributes for certain storage types
if (mFileIO->GetIoType() == "DavixIo" ||
mFileIO->GetIoType() == "XrdIo") {
return false;
}
std::string health;
mFileIO->attrGet("sys.health", health);
return eos::common::StringConversion::GetKeyValueMap(health.c_str(),
map, "=", ",");
}
//----------------------------------------------------------------------------
// Decide if we should run the boot procedure for current file system
//----------------------------------------------------------------------------
bool
FileSystem::ShouldBoot(const std::string& trigger)
{
if ((trigger == "id") || (trigger == "uuid")) {
// Check if we are auto-booting
if (gConfig.autoBoot &&
(GetStatus() <= eos::common::BootStatus::kDown) &&
(GetConfigStatus() > eos::common::ConfigStatus::kOff)) {
return true;
}
}
if (trigger == "bootsenttime") {
uint64_t bootcheck_val = GetLongLong("bootcheck");
if (GetInternalBootStatus() == eos::common::BootStatus::kBooted) {
if (bootcheck_val) {
eos_static_info("msg=\"boot enforced\" queue=%s status=%d check=%lld",
GetQueuePath().c_str(), GetStatus(), bootcheck_val);
return true;
} else {
eos_static_info("msg=\"skip boot, already booted\" queue=%s "
"status=%d check=%lld", GetQueuePath().c_str(),
GetStatus(), bootcheck_val);
SetStatus(eos::common::BootStatus::kBooted);
return false;
}
} else {
eos_static_info("msg=\"do boot as we're not yet booted\" queue=%s "
"status=%d check=%lld", GetQueuePath().c_str(),
GetStatus(), bootcheck_val);
return true;
}
}
if (trigger.empty()) {
return true;
}
return false;
}
EOSFSTNAMESPACE_END