//------------------------------------------------------------------------------ // File: FsckEntryTests.cc // Author: Elvin-Alin Sindrilaru - CERN //------------------------------------------------------------------------------ /************************************************************************ * EOS - the CERN Disk Storage System * * Copyright (C) 2019 CERN/Switzerland * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see .* ************************************************************************/ #include "gtest/gtest.h" #include "gmock/gmock.h" #include "common/LayoutId.hh" #define IN_TEST_HARNESS #include "mgm/fsck/FsckEntry.hh" #undef IN_TEST_HARNESS using ::testing::Return; using eos::common::LayoutId; using eos::common::FileSystem; static constexpr uint64_t kTimestampSec {1560331003}; static constexpr uint64_t kFileSize {256256}; static std::string kChecksum {"74d77c3a"}; //------------------------------------------------------------------------------ // MockRepairJob that doesn't trigger any TPC transfer //------------------------------------------------------------------------------ class MockRepairJob: public eos::mgm::FsckRepairJob { public: MockRepairJob(eos::common::FileId::fileid_t fid, FileSystem::fsid_t fsid_src, FileSystem::fsid_t fsid_trg = 0, std::set exclude_srcs = {}, std::set exclude_dsts = {}, bool drop_src = true, const std::string& app_tag = "fsck_mock"): eos::mgm::FsckRepairJob(fid, fsid_src, fsid_trg, exclude_srcs, exclude_dsts, drop_src, app_tag) {} MOCK_METHOD0(DoItNoExcept, void()); virtual void DoIt() noexcept { return DoItNoExcept(); } MOCK_CONST_METHOD0(GetStatus, eos::mgm::FsckRepairJob::Status()); }; //------------------------------------------------------------------------------ // Test fixture for the FsckEntry //------------------------------------------------------------------------------ class FsckEntryTest: public ::testing::Test { protected: //---------------------------------------------------------------------------- //! Set up //------------------------------------------------------------------------------ void SetUp() override { mRepairJob = nullptr; mFsckEntry = std::unique_ptr (new eos::mgm::FsckEntry(1234567, {3}, "none", nullptr)); PopulateMgmFmd(); for (auto fsid : mFsckEntry->mMgmFmd.locations()) { PopulateFstFmd(fsid); } // Redefine the repair factory to return a MockRepairJob mFsckEntry->mRepairFactory = [&](eos::common::FileId::fileid_t fid, FileSystem::fsid_t fsid_src, FileSystem::fsid_t fsid_trg , std::set exclude_srcs, std::set exclude_dsts, bool drop_src, const std::string & app_tag, bool repair_excluded) { if (mRepairJob) { return mRepairJob; } else { mRepairJob.reset(new MockRepairJob(fid, fsid_src, fsid_trg, exclude_srcs, exclude_dsts, drop_src, app_tag)); } return mRepairJob; }; } //---------------------------------------------------------------------------- //! Tear down - not needed as everything is already handled by destructor //---------------------------------------------------------------------------- // void TearDown() override; //---------------------------------------------------------------------------- //! Populate with dummy data the MGM fmd structure //---------------------------------------------------------------------------- void PopulateMgmFmd() { auto& fmd = mFsckEntry->mMgmFmd; // Populate FileMd.proto fmd.set_id(1234567); fmd.set_cont_id(199991); fmd.set_uid(1001); fmd.set_gid(2002); fmd.set_size(kFileSize); // Layout with two replicas, adler checkusm fmd.set_layout_id(std::stoul("0x0100112", nullptr, 16)); fmd.set_name("test_file.dat"); // Date: 06/12/2019 @ 9:16am struct timespec ts; ts.tv_sec = kTimestampSec; ts.tv_nsec = 0; fmd.set_ctime(&ts, sizeof(ts)); fmd.set_mtime(&ts, sizeof(ts)); size_t xs_sz; auto xs_buff = eos::common::StringConversion::Hex2BinDataChar(kChecksum, xs_sz); fmd.set_checksum(xs_buff.get(), xs_sz); fmd.add_locations(3); fmd.add_locations(5); } //---------------------------------------------------------------------------- //! Populate with dummy data the FST fmd structure //---------------------------------------------------------------------------- void PopulateFstFmd(FileSystem::fsid_t fsid) { std::unique_ptr finfo { new eos::mgm::FstFileInfoT("/data01/00000000/0012d687", eos::mgm::FstErr::None)}; finfo->mDiskSize = kFileSize; auto& proto_fmd = finfo->mFstFmd.mProtoFmd; // Populate FmdBase.proto proto_fmd.set_fid(123456); proto_fmd.set_cid(199991); proto_fmd.set_fsid(fsid); proto_fmd.set_ctime(kTimestampSec); proto_fmd.set_ctime_ns(0); proto_fmd.set_mtime(kTimestampSec); proto_fmd.set_mtime_ns(0); proto_fmd.set_atime(kTimestampSec); proto_fmd.set_atime_ns(0); // proto_fmd.set_checktime() unset proto_fmd.set_size(kFileSize); proto_fmd.set_disksize(kFileSize); proto_fmd.set_mgmsize(kFileSize); proto_fmd.set_checksum(kChecksum); proto_fmd.set_diskchecksum(kChecksum); proto_fmd.set_mgmchecksum(kChecksum); proto_fmd.set_lid(std::stoul("0x0100112", nullptr, 16)); proto_fmd.set_uid(1001); proto_fmd.set_gid(2002); proto_fmd.set_filecxerror(0); proto_fmd.set_blockcxerror(0); proto_fmd.set_layouterror(0); proto_fmd.set_locations("3,5,"); mFsckEntry->mFstFileInfo.insert(std::make_pair(fsid, std::move(finfo))); } std::unique_ptr mFsckEntry; std::shared_ptr mRepairJob; }; //------------------------------------------------------------------------------ // MGM checksum difference //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, MgmXsDiff) { using eos::common::StringConversion; mFsckEntry->mReportedErr = eos::common::FsckErr::MgmXsDiff; size_t xs_sz; auto xs_buff = eos::common::StringConversion::Hex2BinDataChar("aabbccdd", xs_sz); auto& mgm_fmd = mFsckEntry->mMgmFmd; mgm_fmd.set_checksum(xs_buff.get(), xs_sz); // The new MGM FMD chechsum should be different from the initial one ASSERT_STRNE(kChecksum.c_str(), StringConversion::BinData2HexString (mgm_fmd.checksum().c_str(), SHA256_DIGEST_LENGTH, LayoutId::GetChecksumLen(mgm_fmd.layout_id())).c_str()); ASSERT_TRUE(mFsckEntry->Repair()); // After a successful repair the checksum should match the original one ASSERT_STREQ(kChecksum.c_str(), StringConversion::BinData2HexString (mgm_fmd.checksum().c_str(), SHA256_DIGEST_LENGTH, LayoutId::GetChecksumLen(mgm_fmd.layout_id())).c_str()); } //------------------------------------------------------------------------------ // MGM checksum difference and one FST replica can not be contacted //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, MgmXsDiffFstNoContact) { using eos::common::StringConversion; mFsckEntry->mReportedErr = eos::common::FsckErr::MgmXsDiff; size_t xs_sz; auto xs_buff = eos::common::StringConversion::Hex2BinDataChar("aabbccdd", xs_sz); auto& mgm_fmd = mFsckEntry->mMgmFmd; mgm_fmd.set_checksum(xs_buff.get(), xs_sz); // The new MGM FMD chechsum should be different from the initial one ASSERT_STRNE(kChecksum.c_str(), StringConversion::BinData2HexString (mgm_fmd.checksum().c_str(), SHA256_DIGEST_LENGTH, LayoutId::GetChecksumLen(mgm_fmd.layout_id())).c_str()); // Mark one of the FST replicas as NoContact auto& finfo = mFsckEntry->mFstFileInfo.begin()->second; finfo->mFstErr = eos::mgm::FstErr::NoContact; ASSERT_FALSE(mFsckEntry->Repair()); } //------------------------------------------------------------------------------ // MGM size difference //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, MgmSzDiff) { mFsckEntry->mReportedErr = eos::common::FsckErr::MgmSzDiff; auto& mgm_fmd = mFsckEntry->mMgmFmd; mgm_fmd.set_size(123456789); // The new MGM FMD size should be different from the initial one ASSERT_NE(kFileSize, mgm_fmd.size()); ASSERT_TRUE(mFsckEntry->Repair()); // After a successful repair the size should match the original one ASSERT_EQ(kFileSize, mgm_fmd.size()); } //------------------------------------------------------------------------------ // FST size difference //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, FstSzDiff) { // Set the desired type of error mFsckEntry->mReportedErr = eos::common::FsckErr::FstSzDiff; // All FST sizes match, repair succeeds - no bad replicas ASSERT_TRUE(mFsckEntry->Repair()); // All FST fmd sizes are different, repair fails - no good replicas for (auto& pair : mFsckEntry->mFstFileInfo) { auto& finfo = pair.second; finfo->mFstFmd.mProtoFmd.set_disksize(1); } ASSERT_FALSE(mFsckEntry->Repair()); // Set the first FST fmd disksize to the correct one - repair successful std::shared_ptr repair_job = mFsckEntry->mRepairFactory(0, 0, 0, {}, {}, true, "none", false); MockRepairJob* mock_job = static_cast(repair_job.get()); EXPECT_CALL(*mock_job, DoItNoExcept); EXPECT_CALL(*mock_job, GetStatus). WillOnce(Return(eos::mgm::FsckRepairJob::Status::OK)); auto& finfo = mFsckEntry->mFstFileInfo.begin()->second; finfo->mFstFmd.mProtoFmd.set_disksize(finfo->mFstFmd.mProtoFmd.size()); ASSERT_TRUE(mFsckEntry->Repair()); } //------------------------------------------------------------------------------ // FST xs difference //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, FstXsDiff) { // Set the desired type of error mFsckEntry->mReportedErr = eos::common::FsckErr::FstXsDiff; // All FST xs match, repair succeeds - no bad replicas ASSERT_TRUE(mFsckEntry->Repair()); // All FST fmd xs are different, repair failes - no good replicas for (auto& pair : mFsckEntry->mFstFileInfo) { auto& finfo = pair.second; finfo->mFstFmd.mProtoFmd.set_diskchecksum("abcdefab"); } ASSERT_FALSE(mFsckEntry->Repair()); // Set the first FST fmd xs to the correct one - repair successful // @note the repair factory always returns the same repair job object so that // we can easily set expecteations on it std::shared_ptr repair_job = mFsckEntry->mRepairFactory(0, 0, 0, {}, {}, true, "none", false); MockRepairJob* mock_job = static_cast(repair_job.get()); EXPECT_CALL(*mock_job, DoItNoExcept); EXPECT_CALL(*mock_job, GetStatus). WillOnce(Return(eos::mgm::FsckRepairJob::Status::OK)); auto& finfo = mFsckEntry->mFstFileInfo.begin()->second; finfo->mFstFmd.mProtoFmd.set_diskchecksum(kChecksum); ASSERT_TRUE(mFsckEntry->Repair()); } //------------------------------------------------------------------------------ // Unregistered replica when file has enough replicas gets dropped // Begin: Final: // MGM: 3 5 MGM: 3 5 // FST: 3 5 101(u) FST: 3 5 //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, UnregReplicaDrop) { FileSystem::fsid_t unreg_fsid = 101; // Set the desired type of error mFsckEntry->mReportedErr = eos::common::FsckErr::UnregRepl; // Add one more FST replica which is unregistered PopulateFstFmd(unreg_fsid); ASSERT_TRUE(mFsckEntry->Repair()); // The replica on FS 101 should be dropped from the map ASSERT_TRUE(mFsckEntry->mFstFileInfo.find(unreg_fsid) == mFsckEntry->mFstFileInfo.end()); ASSERT_TRUE(mFsckEntry->mFstFileInfo.size() == LayoutId::GetStripeNumber(mFsckEntry->mMgmFmd.layout_id()) + 1); } //------------------------------------------------------------------------------ // Unregistered replica when file doesn't have enough replicas gets added // Begin: Final: // MGM: 5 MGM: 5 101 // FST: 5 101(u) FST: 5 101 //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, UnregReplicaAdd) { FileSystem::fsid_t unreg_fsid = 101; // Set the desired type of error mFsckEntry->mReportedErr = eos::common::FsckErr::UnregRepl; // Add one more FST replica which is unregistered PopulateFstFmd(unreg_fsid); // Drop the replica on fsid 3 FileSystem::fsid_t drop_fsid = 3; ASSERT_EQ(1, mFsckEntry->mFstFileInfo.erase(drop_fsid)); auto locations = mFsckEntry->mMgmFmd.mutable_locations(); for (auto it = locations->begin(); it != locations->end(); ++it) { if (*it == drop_fsid) { locations->erase(it); break; } } ASSERT_TRUE(mFsckEntry->Repair()); // The replica on FS 101 should be added to the map and MGM meta data info ASSERT_TRUE(mFsckEntry->mFstFileInfo.find(unreg_fsid) != mFsckEntry->mFstFileInfo.end()); ASSERT_TRUE(mFsckEntry->mFstFileInfo.size() == LayoutId::GetStripeNumber(mFsckEntry->mMgmFmd.layout_id()) + 1); } //------------------------------------------------------------------------------ // Over-replicated files should drop some of their replicas to reach the // nominal number of replicas of the layout // Begin: Final: // MGM: 3 5 6 7 MGM: 3 5 // FST: 3 5 6 7 FST: 3 5 //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, FileOverReplicated) { // Set the desired type of error mFsckEntry->mReportedErr = eos::common::FsckErr::DiffRepl; for (const auto& elem : { 6, 7 }) { PopulateFstFmd(elem); mFsckEntry->mMgmFmd.add_locations(elem); } // Over-replicated ASSERT_TRUE(mFsckEntry->mFstFileInfo.size() > LayoutId::GetStripeNumber(mFsckEntry->mMgmFmd.layout_id()) + 1); ASSERT_TRUE(mFsckEntry->Repair()); ASSERT_TRUE(mFsckEntry->mFstFileInfo.size() == LayoutId::GetStripeNumber(mFsckEntry->mMgmFmd.layout_id()) + 1); } //------------------------------------------------------------------------------ // Under-replicated files should trigger new FsckRepair jobs that create new // replicas up to the nominal number of replicas of the layout // Begin: Final: // MGM: 3 MGM: 3 x // FST: 3 FST: 3 x //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, FileUnderReplicated) { // Set the desired type of error mFsckEntry->mReportedErr = eos::common::FsckErr::DiffRepl; // Drop the replica on fsid 5 FileSystem::fsid_t drop_fsid = 5; ASSERT_EQ(1, mFsckEntry->mFstFileInfo.erase(drop_fsid)); auto locations = mFsckEntry->mMgmFmd.mutable_locations(); for (auto it = locations->begin(); it != locations->end(); ++it) { if (*it == drop_fsid) { locations->erase(it); break; } } // Under-replicated ASSERT_TRUE(mFsckEntry->mFstFileInfo.size() < LayoutId::GetStripeNumber(mFsckEntry->mMgmFmd.layout_id()) + 1); // Set the expectations // @note the repair factory always returns the same repair job object so that // we can easily set expecteations on it std::shared_ptr repair_job = mFsckEntry->mRepairFactory(0, 0, 0, {}, {}, false, "none", false); MockRepairJob* mock_job = static_cast(repair_job.get()); EXPECT_CALL(*mock_job, DoItNoExcept).Times(1); EXPECT_CALL(*mock_job, GetStatus). WillOnce(Return(eos::mgm::FsckRepairJob::Status::OK)); ASSERT_TRUE(mFsckEntry->Repair()); } //------------------------------------------------------------------------------ // Missgin replica should be dropped from the MGM file metadata and a repair // job shoudl bring the number of replicas back up to nominal number // Begin: Final: // MGM: 3 5 MGM: 3 y // FST: 3 FST: 3 y //------------------------------------------------------------------------------ TEST_F(FsckEntryTest, FileMissingReplica) { // Set the desired type of error mFsckEntry->mReportedErr = eos::common::FsckErr::MissRepl; // Mark replica on file system 5 as not on disk FileSystem::fsid_t miss_fsid = 5; auto it = mFsckEntry->mFstFileInfo.find(miss_fsid); it->second->mFstErr = eos::mgm::FstErr::NotOnDisk; // Set the expectations // @note the repair factory always returns the same repair job object so that // we can easily set expecteations on it std::shared_ptr repair_job = mFsckEntry->mRepairFactory(0, 0, 0, {}, {}, false, "none", false); MockRepairJob* mock_job = static_cast(repair_job.get()); EXPECT_CALL(*mock_job, DoItNoExcept).Times(1); EXPECT_CALL(*mock_job, GetStatus). WillOnce(Return(eos::mgm::FsckRepairJob::Status::OK)); ASSERT_TRUE(mFsckEntry->Repair()); // The missing replicas should no longer be registered with the MGM fmd bool found = false; for (const auto& fsid : mFsckEntry->mMgmFmd.locations()) { if (fsid == miss_fsid) { found = true; break; } } ASSERT_FALSE(found); }