//------------------------------------------------------------------------------ //! file DrainFS.hh //------------------------------------------------------------------------------ /************************************************************************ * EOS - the CERN Disk Storage System * * Copyright (C) 2019 CERN/Switzerland * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see .* ************************************************************************/ #pragma once #include "mgm/Namespace.hh" #include "mgm/FileSystem.hh" #include "namespace/interface/IFsView.hh" #include "common/Logging.hh" #include #include #include //! Forward declarations namespace eos { namespace common { class ThreadPool; } } EOSMGMNAMESPACE_BEGIN class DrainTransferJob; class TableFormatterBase; //------------------------------------------------------------------------------ //! @brief Class implementing the draining of a filesystem //------------------------------------------------------------------------------ class DrainFs: public eos::common::LogId { public: //---------------------------------------------------------------------------- //! State of file system drain operation //---------------------------------------------------------------------------- enum class State {Done, Failed, Running, Rerun}; //---------------------------------------------------------------------------- //! Constructor //! //! @param thread_pool drain thread pool to use for jobs //! @param fs_view file system view //! @param src_fsid filesystem id to drain //! @param dst_fsid file system where to drain //---------------------------------------------------------------------------- DrainFs(eos::common::ThreadPool& thread_pool, eos::IFsView* fs_view, eos::common::FileSystem::fsid_t src_fsid, eos::common::FileSystem::fsid_t dst_fsid = 0); //---------------------------------------------------------------------------- //! Destructor //---------------------------------------------------------------------------- virtual ~DrainFs(); //---------------------------------------------------------------------------- //! Signal an ongoing drain to stop //--------------------------------------------------------------------------- inline void SignalStop() { mDrainStop = true; } //--------------------------------------------------------------------------- //! Get drain status //--------------------------------------------------------------------------- inline eos::common::DrainStatus GetDrainStatus() const { return mStatus; } //--------------------------------------------------------------------------- //! Get the file system id //--------------------------------------------------------------------------- inline const eos::common::FileSystem::fsid_t GetFsId() const { return mFsId; } //---------------------------------------------------------------------------- //! Method draining the file system //! //! @return status of the file system at the end //---------------------------------------------------------------------------- State DoIt(); //---------------------------------------------------------------------------- //! Set future holding the result of the drain //! //! @param future future object //---------------------------------------------------------------------------- inline void SetFuture(std::future&& future) { std::swap(mFuture, future); } //---------------------------------------------------------------------------- //! Check if drain fs is still running by inspecting the future object //! //! @return true if running, otherwise false //---------------------------------------------------------------------------- inline bool IsRunning() const { return (mFuture.valid() && (mFuture.wait_for(std::chrono::seconds(0)) != std::future_status::ready)); } //---------------------------------------------------------------------------- //! Populate table with drain jobs info corresponding to the current fs //! //! @param table table objec //! @param show_errors if true then display only failed transfers //! @param itags list of internal tags for info collection //! //! @note: Table header tags must match the order of the internal tags //---------------------------------------------------------------------------- void PrintJobsTable(TableFormatterBase& table, bool show_errors, const std::list& itags) const; private: //---------------------------------------------------------------------------- //! Reset drain counters and status //---------------------------------------------------------------------------- void ResetCounters(); //---------------------------------------------------------------------------- //! Get space defined drain variables i.e. number of retires, number of //! transfers per fs, etc. //! //! @param space space name //! @note method must be called with a lock on gFsView.ViewMutex //---------------------------------------------------------------------------- void GetSpaceConfiguration(const std::string& space); //--------------------------------------------------------------------------- //! Prepare the file system for drain i.e. delay the start by the configured //! amount of timem, set the status //! //! @return true if successful, otherwise false //--------------------------------------------------------------------------- bool PrepareFs(); //--------------------------------------------------------------------------- //! Update the file system state to draining //! //! @return true if successful, otherwise false //--------------------------------------------------------------------------- bool MarkFsDraining(); //--------------------------------------------------------------------------- //! Collect and prepare all the drain jobs //! //! @returns number of drain jobs prepared //--------------------------------------------------------------------------- uint64_t CollectDrainJobs(); //--------------------------------------------------------------------------- //! Update progress of the drain //! //! @return progress state of the drain job //--------------------------------------------------------------------------- State UpdateProgress(); //---------------------------------------------------------------------------- //! Handle running jobs //---------------------------------------------------------------------------- void HandleRunningJobs(); //---------------------------------------------------------------------------- //! Mark file system drain as failed //---------------------------------------------------------------------------- void FailedDrain(); //--------------------------------------------------------------------------- //! Mark file system drain as successful //--------------------------------------------------------------------------- void SuccessfulDrain(); //---------------------------------------------------------------------------- //! Stop ongoing drain jobs - must be called by the same thread supervising //! the draining. //---------------------------------------------------------------------------- void StopJobs(); //---------------------------------------------------------------------------- //! Wait until namespace is booted or drain stop is requested //---------------------------------------------------------------------------- void WaitUntilNamespaceIsBooted() const; //---------------------------------------------------------------------------- //! Get number of running jobs //---------------------------------------------------------------------------- inline uint64_t NumRunningJobs() const { eos::common::RWMutexReadLock rd_lock(mJobsMutex); return mJobsRunning.size(); } //---------------------------------------------------------------------------- //! Get number of failed jobs //---------------------------------------------------------------------------- inline uint64_t NumFailedJobs() const { eos::common::RWMutexReadLock rd_lock(mJobsMutex); return mJobsFailed.size(); } constexpr static std::chrono::seconds sRefreshTimeout {60}; constexpr static std::chrono::seconds sStallTimeout {600}; eos::IFsView* mNsFsView; ///< File system view eos::common::FileSystem::fsid_t mFsId; ///< Drain source fsid eos::common::FileSystem::fsid_t mTargetFsId; /// Drain target fsid eos::common::DrainStatus mStatus; bool mDidRerun; ///< Flag if a rerun was already tried std::atomic mDrainStop; ///< Flag to cancel an ongoing draining std::atomic mMaxJobs; ///< Max number of drain jobs std::chrono::seconds mDrainPeriod; ///< Allowed time for file system to drain std::chrono::time_point mDrainStart; std::chrono::time_point mDrainEnd; //! Collection of failed drain jobs std::set> mJobsFailed; //! Collection of running drain jobs std::list> mJobsRunning; mutable eos::common::RWMutex mJobsMutex; ///< RW mutex protecting job lists eos::common::ThreadPool& mThreadPool; std::future mFuture; uint64_t mTotalFiles; ///< Total number of files to drain uint64_t mPending; ///< Current num. of pending files to drain uint64_t mLastPending; ///< Previous num. of pending files to drain //! Last timestamp when drain progress was recorded std::chrono::time_point mLastProgressTime; //! Last timestamp when drain status was updated std::chrono::time_point mLastUpdateTime; }; EOSMGMNAMESPACE_END