From a72868bd26fd2296c2e2fa0a42985faa3b4855e5 Mon Sep 17 00:00:00 2001 From: Andrei Strelkovskii Date: Tue, 12 Nov 2024 12:28:16 +0100 Subject: [PATCH] merge to stable-23-3: blockstore-server: use-intermediate-write-buffer tag implementation; filestore: compaction tuning, cli tuning, shard autocreation bits (#2470) * moved unstable output from test canondata to test stderr (#2416) * issue-2421: added volume tag which forces user write buffer copying to our internal buffers to avoid writing different data to different replicas of the same volume in case of concurrent buffer modifications by the client (#2431) * issue-1932: automatic filesystem shard configuration upon filesystem creation (#2415) * issue-1932: autosharding params * issue-1932: automatic filesystem shard creation+configuration upon filesystem creation * issue-1932: properly limiting max shard count * issue-1932: fixed signed/unsigned comparison * issue-1932: fixed use-after-free in ut (#2441) * filestore-client: outputting progress in findgarbage; properly processing ForcedOperationStatus errors in forcedcompaction (#2442) * issue-2137: if all ranges in CompactionMap have the 'compacted' flag then GetTop{Compaction,Garbage}Score should return zero (in order not to trigger dud Compaction iterations all the time) (#2455) * issue-2421: safer code + extra ut for use-intermediate-write-buffer volume tag (#2452) * issue-2421: nonrepl disk ut with use-intermediate-write-buffer volume tag * issue-2421: nonrepl disk ut with use-intermediate-write-buffer volume tag - making the intentions a bit more clear * issue-2421: made local->remote request conversion in partition_nonrepl safer, added some other safety checks and comments * fixed build after incorrect merge https://github.com/ydb-platform/nbs/commit/5887fb6ab02139236aa9a6d4826028485b73e130 * fixed build 2 --- .../libs/diagnostics/critical_events.h | 1 + .../part_nonrepl_actor_writeblocks.cpp | 23 +- .../storage/volume/volume_actor_forward.cpp | 50 ++++ .../libs/storage/volume/volume_state.cpp | 3 + .../libs/storage/volume/volume_state.h | 6 + .../libs/storage/volume/volume_ut.cpp | 170 +++++++++++- .../libs/storage/volume/volume_ut.h | 2 +- .../apps/client/lib/find_garbage.cpp | 10 +- .../apps/client/lib/forced_compaction.cpp | 6 +- cloud/filestore/config/storage.proto | 13 +- cloud/filestore/libs/service/filestore.h | 1 + cloud/filestore/libs/storage/core/config.cpp | 3 + cloud/filestore/libs/storage/core/config.h | 3 + cloud/filestore/libs/storage/core/model.cpp | 80 +++++- cloud/filestore/libs/storage/core/model.h | 19 +- .../filestore/libs/storage/core/model_ut.cpp | 240 +++++++++------- .../service/service_actor_createfs.cpp | 239 +++++++++++++++- .../service/service_actor_destroyfs.cpp | 7 +- .../libs/storage/service/service_ut.cpp | 258 ++++++++++++++++++ .../storage/tablet/model/compaction_map.cpp | 6 +- .../tablet/model/compaction_map_ut.cpp | 16 ++ .../libs/storage/tablet/protos/tablet.proto | 2 + .../tablet/tablet_actor_createsession.cpp | 17 ++ .../tablet/tablet_actor_updateconfig.cpp | 20 +- .../libs/storage/tablet/tablet_state.cpp | 20 ++ .../libs/storage/tablet/tablet_state.h | 2 + cloud/filestore/public/api/protos/const.proto | 6 + .../test.test_forced_compaction/results.txt | 1 - 28 files changed, 1082 insertions(+), 142 deletions(-) diff --git a/cloud/blockstore/libs/diagnostics/critical_events.h b/cloud/blockstore/libs/diagnostics/critical_events.h index 6edd7f8a86b..7395c1239a3 100644 --- a/cloud/blockstore/libs/diagnostics/critical_events.h +++ b/cloud/blockstore/libs/diagnostics/critical_events.h @@ -92,6 +92,7 @@ namespace NCloud::NBlockStore { xxx(DiskRegistryInsertToPendingCleanupFailed) \ xxx(OverlappingRangesDuringMigrationDetected) \ xxx(StartExternalEndpointError) \ + xxx(EmptyRequestSgList) \ // BLOCKSTORE_IMPOSSIBLE_EVENTS //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_writeblocks.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_writeblocks.cpp index 357a45bc539..287aa3a888e 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_writeblocks.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_writeblocks.cpp @@ -454,14 +454,35 @@ void TNonreplicatedPartitionActor::HandleWriteBlocksLocal( return; } + if (guard.Get().empty()) { + // can happen only if there is a bug in the code of the layers above + // this one + ReportEmptyRequestSgList(); + replyError( + ctx, + *requestInfo, + E_ARGUMENT, + "empty SgList in request"); + return; + } + // convert local request to remote + // copying request data into a new TIOVector and moving it to msg->Record + // afterwards since msg->Record.Blocks can be holding current request data + // or parts of it + NProto::TIOVector blocks; SgListCopy( guard.Get(), ResizeIOVector( - *msg->Record.MutableBlocks(), + blocks, msg->Record.BlocksCount, PartConfig->GetBlockSize())); + *msg->Record.MutableBlocks() = std::move(blocks); + + // explicitly clearing request data (SgList) just in case anyone adds some + // code to TDiskAgentWriteActor that tries to use it + msg->Record.Sglist.SetSgList({}); const bool assignVolumeRequestId = Config->GetAssignIdToWriteAndZeroRequestsEnabled() && diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_forward.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_forward.cpp index d6029ada659..cec719ed843 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_forward.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_forward.cpp @@ -59,6 +59,40 @@ void RejectVolumeRequest( NCloud::Send(ctx, caller, std::move(response), callerCookie); } +//////////////////////////////////////////////////////////////////////////////// + +void CopySgListIntoRequestBuffers( + TEvService::TEvWriteBlocksLocalRequest& request) +{ + auto& record = request.Record; + auto g = record.Sglist.Acquire(); + if (!g) { + return; + } + + const auto& sgList = g.Get(); + STORAGE_VERIFY_C( + record.GetBlocks().BuffersSize() == 0, + TWellKnownEntityTypes::DISK, + record.GetDiskId(), + TStringBuilder() << "Buffers: " << record.GetBlocks().BuffersSize()); + TSgList newSgList; + newSgList.reserve(sgList.size()); + for (const auto& block: sgList) { + auto& buffer = *record.MutableBlocks()->AddBuffers(); + buffer.ReserveAndResize(block.Size()); + memcpy(buffer.begin(), block.Data(), block.Size()); + newSgList.emplace_back(buffer.data(), buffer.size()); + } + record.Sglist.SetSgList(std::move(newSgList)); +} + +template +void CopySgListIntoRequestBuffers(T& t) +{ + Y_UNUSED(t); +} + } // namespace //////////////////////////////////////////////////////////////////////////////// @@ -714,6 +748,22 @@ void TVolumeActor::ForwardRequest( } } + /* + * Support for copying request data from the user-supplied buffer to some + * buffers which we own to protect the layer below Volume from bugged + * guests which modify buffer contents before receiving write response. + * + * Impacts performance (due to extra copying) and is thus not switched on + * by default. + * + * See https://github.com/ydb-platform/nbs/issues/2421 + */ + if constexpr (IsWriteMethod) { + if (State->GetUseIntermediateWriteBuffer()) { + CopySgListIntoRequestBuffers(*msg); + } + } + /* * Processing overlapping writes. Overlapping writes should not be sent * to the underlying (storage) layer. diff --git a/cloud/blockstore/libs/storage/volume/volume_state.cpp b/cloud/blockstore/libs/storage/volume/volume_state.cpp index c66eefaf44b..d264fdadb61 100644 --- a/cloud/blockstore/libs/storage/volume/volume_state.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_state.cpp @@ -239,6 +239,7 @@ void TVolumeState::Reset() UseFastPath = false; UseRdmaForThisVolume = false; AcceptInvalidDiskAllocationResponse = false; + UseIntermediateWriteBuffer = false; if (IsDiskRegistryMediaKind()) { if (Meta.GetDevices().size()) { @@ -297,6 +298,8 @@ void TVolumeState::Reset() TDuration::TryParse(value, MaxTimedOutDeviceStateDuration); } else if (tag == "use-fastpath") { UseFastPath = true; + } else if (tag == "use-intermediate-write-buffer") { + UseIntermediateWriteBuffer = true; } } diff --git a/cloud/blockstore/libs/storage/volume/volume_state.h b/cloud/blockstore/libs/storage/volume/volume_state.h index 6ab94dd831d..80f721553ad 100644 --- a/cloud/blockstore/libs/storage/volume/volume_state.h +++ b/cloud/blockstore/libs/storage/volume/volume_state.h @@ -226,6 +226,7 @@ class TVolumeState bool UseRdmaForThisVolume = false; bool RdmaUnavailable = false; TDuration MaxTimedOutDeviceStateDuration; + bool UseIntermediateWriteBuffer = false; bool UseMirrorResync = false; bool ForceMirrorResync = false; @@ -677,6 +678,11 @@ class TVolumeState return MaxTimedOutDeviceStateDuration; } + bool GetUseIntermediateWriteBuffer() const + { + return UseIntermediateWriteBuffer; + } + size_t GetUsedBlockCount() const { return UsedBlocks ? UsedBlocks->Count() : 0; diff --git a/cloud/blockstore/libs/storage/volume/volume_ut.cpp b/cloud/blockstore/libs/storage/volume/volume_ut.cpp index ed68c832ed9..312c7703e62 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_ut.cpp @@ -21,10 +21,10 @@ using namespace NTestVolume; using namespace NTestVolumeHelpers; -//////////////////////////////////////////////////////////////////////////////// - namespace NTestVolumeHelpers { +//////////////////////////////////////////////////////////////////////////////// + TBlockRange64 GetBlockRangeById(ui32 blockIndex) { return TBlockRange64::WithLength(1024 * blockIndex, 1024); @@ -9180,6 +9180,172 @@ Y_UNIT_TEST_SUITE(TVolumeTest) v.GetReplicas(1).GetDevices(0).GetTransportId()); } } + + TVector WriteToDiskWithInflightDataCorruptionAndReadResults( + NCloud::NProto::EStorageMediaKind mediaKind, + ui32 writeNumberToIntercept, + const TString& tags) + { + NProto::TStorageServiceConfig config; + config.SetAcquireNonReplicatedDevices(true); + auto state = MakeIntrusive(); + auto runtime = PrepareTestActorRuntime(config, state); + + TVolumeClient volume(*runtime); + + state->ReplicaCount = 2; + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + mediaKind, + 1024, + "vol0", + "cloud", + "folder", + 1, // partition count + 0, // blocksPerStripe + tags); + + volume.WaitReady(); + + auto clientInfo = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + volume.AddClient(clientInfo); + + // intercepting write request to one of the replicas + TAutoPtr writeToReplica; + ui32 writeNo = 0; + auto obs = [&] (TTestActorRuntimeBase&, TAutoPtr& event) { + if (event->GetTypeRewrite() + == TEvService::EvWriteBlocksLocalRequest) + { + ++writeNo; + if (writeNo == writeNumberToIntercept) { + writeToReplica = event.Release(); + return true; + } + } + + return false; + }; + + runtime->SetEventFilter(obs); + + const auto range = TBlockRange64::WithLength(0, 1); + const TString adata(4_KB, 'a'); + const TString bdata(4_KB, 'b'); + TString blockData; + // using explicit memcpy to avoid COW + blockData.ReserveAndResize(adata.size()); + memcpy(blockData.begin(), adata.c_str(), adata.size()); + + // sending write request to the volume - the request should hang + { + volume.SendWriteBlocksLocalRequest( + range, + clientInfo.GetClientId(), + blockData); + + runtime->DispatchEvents({}, TDuration::MilliSeconds(100)); + UNIT_ASSERT(writeToReplica); + TEST_NO_RESPONSE(runtime, WriteBlocksLocal); + + } + + // replacing block data + memcpy(blockData.begin(), bdata.c_str(), bdata.size()); + + // releasing the intercepted request + runtime->Send(writeToReplica.Release()); + runtime->DispatchEvents({}, TDuration::MilliSeconds(100)); + { + auto response = volume.RecvWriteBlocksLocalResponse(); + UNIT_ASSERT_VALUES_EQUAL_C( + S_OK, + response->GetStatus(), + response->GetErrorReason()); + } + + // the data in all replicas should be the same and should be equal to + // the version before the replacement + TVector results; + for (ui32 i = 0; i < 3; ++i) { + auto response = volume.ReadBlocks(range, clientInfo.GetClientId()); + const auto& bufs = response->Record.GetBlocks().GetBuffers(); + UNIT_ASSERT_VALUES_EQUAL(1, bufs.size()); + results.push_back(bufs[0]); + } + + volume.RemoveClient(clientInfo.GetClientId()); + + return results; + } + + Y_UNIT_TEST(ShouldCopyWriteRequestDataBeforeWritingToStorageIfTagIsSetM3) + { + auto results = WriteToDiskWithInflightDataCorruptionAndReadResults( + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + // 1 - to volume + // 1 - to mirror actor + // 3 - to 3 replicas + 1 + 1 + 3, + "use-intermediate-write-buffer"); + const TString adata(4_KB, 'a'); + UNIT_ASSERT_VALUES_EQUAL(adata, results[0]); + UNIT_ASSERT_VALUES_EQUAL(adata, results[1]); + UNIT_ASSERT_VALUES_EQUAL(adata, results[2]); + } + + Y_UNIT_TEST(ShouldHaveDifferentDataInReplicasUponInflightBufferCorruptionM3) + { + auto results = WriteToDiskWithInflightDataCorruptionAndReadResults( + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + // 1 - to volume + // 1 - to mirror actor + // 3 - to 3 replicas (nonrepl part actors) + 1 + 1 + 3, + ""); + const TString adata(4_KB, 'a'); + const TString bdata(4_KB, 'b'); + UNIT_ASSERT_VALUES_EQUAL(adata, results[0]); + UNIT_ASSERT_VALUES_EQUAL(adata, results[1]); + UNIT_ASSERT_VALUES_EQUAL(bdata, results[2]); + } + + Y_UNIT_TEST(ShouldCopyWriteRequestDataBeforeWritingToStorageIfTagIsSetNonrepl) + { + auto results = WriteToDiskWithInflightDataCorruptionAndReadResults( + NCloud::NProto::STORAGE_MEDIA_SSD_NONREPLICATED, + // 1 - to volume + // 1 - to nonrepl part actor + 1 + 1, + "use-intermediate-write-buffer"); + const TString adata(4_KB, 'a'); + UNIT_ASSERT_VALUES_EQUAL(adata, results[0]); + UNIT_ASSERT_VALUES_EQUAL(adata, results[1]); + UNIT_ASSERT_VALUES_EQUAL(adata, results[2]); + } + + Y_UNIT_TEST(ShouldHaveChangedDataInStorageUponInflightBufferCorruptionNonrepl) + { + auto results = WriteToDiskWithInflightDataCorruptionAndReadResults( + NCloud::NProto::STORAGE_MEDIA_SSD_NONREPLICATED, + // 1 - to volume + // 1 - to nonrepl part actor + 1 + 1, + ""); + const TString bdata(4_KB, 'b'); + UNIT_ASSERT_VALUES_EQUAL(bdata, results[0]); + UNIT_ASSERT_VALUES_EQUAL(bdata, results[1]); + UNIT_ASSERT_VALUES_EQUAL(bdata, results[2]); + } } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_ut.h b/cloud/blockstore/libs/storage/volume/volume_ut.h index 6f60491fdf2..f980bf25921 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut.h +++ b/cloud/blockstore/libs/storage/volume/volume_ut.h @@ -1,6 +1,6 @@ #pragma once -#include "testlib/test_env.h" +#include namespace NCloud::NBlockStore::NStorage::NTestVolumeHelpers { diff --git a/cloud/filestore/apps/client/lib/find_garbage.cpp b/cloud/filestore/apps/client/lib/find_garbage.cpp index 5b95bb4b662..d9927d8b0b3 100644 --- a/cloud/filestore/apps/client/lib/find_garbage.cpp +++ b/cloud/filestore/apps/client/lib/find_garbage.cpp @@ -145,11 +145,16 @@ class TFindGarbageCommand final auto shardSessionGuard = CreateCustomSession(shard, shard + "::" + ClientId); auto& shardSession = shardSessionGuard.AccessSession(); - FetchAll(shardSession, shard, RootNodeId, &shard2Nodes[shard]); + auto& shardNodes = shard2Nodes[shard]; + STORAGE_INFO("Fetching nodes for shard " << shard); + FetchAll(shardSession, shard, RootNodeId, &shardNodes); + STORAGE_INFO("Fetched " << shardNodes.size() << " nodes"); } + STORAGE_INFO("Fetching nodes for leader"); TVector leaderNodes; FetchAll(session, FileSystemId, RootNodeId, &leaderNodes); + STORAGE_INFO("Fetched " << leaderNodes.size() << " nodes"); THashSet shardNames; for (const auto& node: leaderNodes) { @@ -183,8 +188,11 @@ class TFindGarbageCommand final auto& shardSession = shardSessionGuard.AccessSession(); for (const auto& node: nodes) { if (!shardNames.contains(node.Name)) { + STORAGE_INFO("Node " << node.Name << " not found in shard" + ", calling stat"); auto stat = Stat(shardSession, shard, RootNodeId, node.Name); + STORAGE_INFO("Stat done"); if (stat) { results.push_back({shard, node.Name, stat->GetSize()}); diff --git a/cloud/filestore/apps/client/lib/forced_compaction.cpp b/cloud/filestore/apps/client/lib/forced_compaction.cpp index c87a5c68751..34612715cd8 100644 --- a/cloud/filestore/apps/client/lib/forced_compaction.cpp +++ b/cloud/filestore/apps/client/lib/forced_compaction.cpp @@ -103,6 +103,8 @@ class TForcedCompactionCommand final continue; } + CheckResponse(statusResponse); + const auto processed = statusResponse.GetProcessedRangeCount(); const auto total = statusResponse.GetRangeCount(); if (processed >= total) { @@ -111,9 +113,7 @@ class TForcedCompactionCommand final break; } - CheckResponse(statusResponse); - - Cout << "progress: " << statusResponse.GetProcessedRangeCount() + Cerr << "progress: " << statusResponse.GetProcessedRangeCount() << "/" << statusResponse.GetRangeCount() << ", last=" << statusResponse.GetLastProcessedRangeId() << Endl; diff --git a/cloud/filestore/config/storage.proto b/cloud/filestore/config/storage.proto index 1ec58aa4abc..d817de6474b 100644 --- a/cloud/filestore/config/storage.proto +++ b/cloud/filestore/config/storage.proto @@ -456,6 +456,17 @@ message TStorageConfig // Disables TwoStageRead for HDD filesystems. optional bool TwoStageReadDisabledForHDD = 404; + // Enables automatic shard creation for new filesystems upon create/resize + // operations. Shard count is calculated based on FS size. If this flag is + // enabled, new filesystems will have this feature enabled and will create + // extra shards upon resize if needed. Existing filesystems are not + // affected. Filesystems created with this flag enabled will maintain the + // autosharding feature even if this flag is disabled in the global storage + // config. + optional bool AutomaticShardCreationEnabled = 405; + // Affects shard count calculation if AutomaticShardCreationEnabled is on. + optional uint64 MaxShardSize = 406; + // Enable Writeback cache on guest (fuse client) - optional bool GuestWritebackCacheEnabled = 405; + optional bool GuestWritebackCacheEnabled = 407; } diff --git a/cloud/filestore/libs/service/filestore.h b/cloud/filestore/libs/service/filestore.h index 2398132f212..676723520f0 100644 --- a/cloud/filestore/libs/service/filestore.h +++ b/cloud/filestore/libs/service/filestore.h @@ -37,6 +37,7 @@ constexpr ui32 MaxSymlink = NProto::E_FS_LIMITS_SYMLINK; constexpr ui64 MaxNodes = static_cast(NProto::E_FS_LIMITS_INODES); constexpr ui64 MaxXAttrName = NProto::E_FS_LIMITS_XATTR_NAME; constexpr ui64 MaxXAttrValue = NProto::E_FS_LIMITS_XATTR_VALUE; +constexpr ui32 MaxShardCount = NProto::E_FS_LIMITS_MAX_SHARDS; //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/filestore/libs/storage/core/config.cpp b/cloud/filestore/libs/storage/core/config.cpp index bc9f1ef97cf..7ed8a98710b 100644 --- a/cloud/filestore/libs/storage/core/config.cpp +++ b/cloud/filestore/libs/storage/core/config.cpp @@ -53,6 +53,9 @@ using TAliases = NProto::TStorageConfig::TFilestoreAliases; xxx(MaxBlocksPerTruncateTx, ui32, 0 /*TODO: 32GiB/4KiB*/ )\ xxx(MaxTruncateTxInflight, ui32, 10 )\ \ + xxx(AutomaticShardCreationEnabled, bool, false )\ + xxx(MaxShardSize, ui64, 4_TB )\ + \ xxx(MaxFileBlocks, ui32, 300_GB / 4_KB )\ xxx(LargeDeletionMarkersEnabled, bool, false )\ xxx(LargeDeletionMarkerBlocks, ui64, 1_GB / 4_KB )\ diff --git a/cloud/filestore/libs/storage/core/config.h b/cloud/filestore/libs/storage/core/config.h index d49d76811bf..f664fa08320 100644 --- a/cloud/filestore/libs/storage/core/config.h +++ b/cloud/filestore/libs/storage/core/config.h @@ -284,6 +284,9 @@ class TStorageConfig bool GetTwoStageReadDisabledForHDD() const; bool GetThreeStageWriteDisabledForHDD() const; + bool GetAutomaticShardCreationEnabled() const; + ui64 GetMaxShardSize() const; + bool GetGuestWritebackCacheEnabled() const; }; diff --git a/cloud/filestore/libs/storage/core/model.cpp b/cloud/filestore/libs/storage/core/model.cpp index da8ab869e1a..404df811eba 100644 --- a/cloud/filestore/libs/storage/core/model.cpp +++ b/cloud/filestore/libs/storage/core/model.cpp @@ -1,10 +1,11 @@ #include "model.h" +#include #include #include -#include +#include namespace NCloud::NFileStore::NStorage { @@ -277,6 +278,23 @@ TPoolKinds GetPoolKinds( //////////////////////////////////////////////////////////////////////////////// +ui32 ComputeShardCount( + const TStorageConfig& config, + const NKikimrFileStore::TConfig& fileStore) +{ + const double fileStoreSize = + fileStore.GetBlocksCount() * fileStore.GetBlockSize(); + + ui32 shardCount = std::ceil(fileStoreSize / config.GetMaxShardSize()); + Y_DEBUG_ABORT_UNLESS( + shardCount >= 1, + "size %f shard %lu", + fileStoreSize, + config.GetMaxShardSize()); + + return Min(shardCount, MaxShardCount); +} + ui32 ComputeAllocationUnitCount( const TStorageConfig& config, const NKikimrFileStore::TConfig& fileStore) @@ -285,7 +303,7 @@ ui32 ComputeAllocationUnitCount( return 1; } - double fileStoreSize = + const double fileStoreSize = fileStore.GetBlocksCount() * fileStore.GetBlockSize() / double(1_GB); const auto unit = GetAllocationUnit( @@ -437,10 +455,6 @@ ui32 NodesLimit( return Max(limit, static_cast(config.GetDefaultNodesLimit())); } -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - #define PERFORMANCE_PROFILE_PARAMETERS_SIMPLE(xxx, ...) \ xxx(ThrottlingEnabled, __VA_ARGS__) \ xxx(BoostTime, __VA_ARGS__) \ @@ -463,13 +477,11 @@ ui32 NodesLimit( void SetupFileStorePerformanceAndChannels( bool allocateMixed0Channel, + const ui32 allocationUnitCount, const TStorageConfig& config, NKikimrFileStore::TConfig& fileStore, const NProto::TFileStorePerformanceProfile& clientProfile) { - const auto allocationUnitCount = - ComputeAllocationUnitCount(config, fileStore); - OverrideStorageMediaKind(config, fileStore); #define SETUP_PARAMETER_SIMPLE(name, ...) \ @@ -501,4 +513,54 @@ void SetupFileStorePerformanceAndChannels( fileStore); } +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +void SetupFileStorePerformanceAndChannels( + bool allocateMixed0Channel, + const TStorageConfig& config, + NKikimrFileStore::TConfig& fileStore, + const NProto::TFileStorePerformanceProfile& clientProfile) +{ + SetupFileStorePerformanceAndChannels( + allocateMixed0Channel, + ComputeAllocationUnitCount(config, fileStore), + config, + fileStore, + clientProfile); +} + +TMultiShardFileStoreConfig SetupMultiShardFileStorePerformanceAndChannels( + const TStorageConfig& config, + const NKikimrFileStore::TConfig& fileStore, + const NProto::TFileStorePerformanceProfile& clientProfile) +{ + TMultiShardFileStoreConfig result; + result.MainFileSystemConfig = fileStore; + SetupFileStorePerformanceAndChannels( + false, // allocateMixed0Channel + 1, // allocationUnitCount + config, + result.MainFileSystemConfig, + clientProfile); + + const auto shardCount = ComputeShardCount(config, fileStore); + result.ShardConfigs.resize(shardCount); + for (ui32 i = 0; i < shardCount; ++i) { + result.ShardConfigs[i] = fileStore; + result.ShardConfigs[i].SetBlocksCount( + config.GetMaxShardSize() / fileStore.GetBlockSize()); + result.ShardConfigs[i].SetFileSystemId( + Sprintf("%s_s%u", fileStore.GetFileSystemId().c_str(), i + 1)); + SetupFileStorePerformanceAndChannels( + false, // allocateMixed0Channel + config, + result.ShardConfigs[i], + clientProfile); + } + + return result; +} + } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/core/model.h b/cloud/filestore/libs/storage/core/model.h index 7d4a27ef74c..5ef8b376295 100644 --- a/cloud/filestore/libs/storage/core/model.h +++ b/cloud/filestore/libs/storage/core/model.h @@ -2,18 +2,29 @@ #include -namespace NKikimrFileStore { - class TConfig; -} +#include + +#include namespace NCloud::NFileStore::NStorage { //////////////////////////////////////////////////////////////////////////////// +struct TMultiShardFileStoreConfig +{ + NKikimrFileStore::TConfig MainFileSystemConfig; + TVector ShardConfigs; +}; + +TMultiShardFileStoreConfig SetupMultiShardFileStorePerformanceAndChannels( + const TStorageConfig& config, + const NKikimrFileStore::TConfig& fileStore, + const NProto::TFileStorePerformanceProfile& clientProfile); + void SetupFileStorePerformanceAndChannels( bool allocateMixed0Channel, const TStorageConfig& config, NKikimrFileStore::TConfig& fileStore, - const NProto::TFileStorePerformanceProfile& clientPerformanceProfile); + const NProto::TFileStorePerformanceProfile& clientProfile); } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/core/model_ut.cpp b/cloud/filestore/libs/storage/core/model_ut.cpp index 94d38a1bd24..5cf6012ac89 100644 --- a/cloud/filestore/libs/storage/core/model_ut.cpp +++ b/cloud/filestore/libs/storage/core/model_ut.cpp @@ -425,7 +425,7 @@ Y_UNIT_TEST_SUITE(TModel) } \ // CHECK_CHANNEL - struct ChannelState final + struct TChannelState final { ui32 DataType; TString PoolType; @@ -443,7 +443,7 @@ Y_UNIT_TEST_SUITE(TModel) ui32 minChannelsCount, bool allocateMixed0, ui32 channelsCount, - TVector channels, + TVector channels, NKikimrFileStore::TConfig& kikimrConfig, NProto::TStorageConfig& storageConfig) { @@ -484,8 +484,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHDDMinGreater, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "rot", .Size = 128_MB, @@ -494,7 +494,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "rot", .Size = 16_MB, @@ -503,7 +503,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "rot", .Size = 128_MB, @@ -515,7 +515,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 7; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -540,8 +540,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHDDMinLower, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "rot", .Size = 128_MB, @@ -550,7 +550,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "rot", .Size = 16_MB, @@ -559,7 +559,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "rot", .Size = 128_MB, @@ -571,7 +571,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 6; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -596,8 +596,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHDDEnormousSize, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "rot", .Size = 128_MB, @@ -606,7 +606,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "rot", .Size = 16_MB, @@ -615,7 +615,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "rot", .Size = 128_MB, @@ -627,7 +627,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 19; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -655,8 +655,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsSSDMinGreater, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -665,7 +665,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -674,7 +674,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -686,7 +686,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 7; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "ssd", .Size = 2_GB, @@ -711,8 +711,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsSSDMinLower, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -721,7 +721,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -730,7 +730,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -742,7 +742,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 6; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "ssd", .Size = 2_GB, @@ -767,8 +767,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsSSDEnormousSize, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -777,7 +777,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 31'000, .WriteBandwidth = 471'859'200, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -786,7 +786,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 31'000, .WriteBandwidth = 471'859'200, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -798,7 +798,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 34; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "ssd", .Size = 2_GB, @@ -826,8 +826,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHybridMinGreater, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -836,7 +836,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -845,7 +845,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -857,7 +857,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 7; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -882,8 +882,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHybridMinLower, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -892,7 +892,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -901,7 +901,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -913,7 +913,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 6; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -938,8 +938,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHybridEnormousSize, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -948,7 +948,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -957,7 +957,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -969,7 +969,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 19; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -997,8 +997,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHDDMinGreaterDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "rot", .Size = 128_MB, @@ -1007,7 +1007,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "rot", .Size = 16_MB, @@ -1016,7 +1016,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "rot", .Size = 128_MB, @@ -1025,7 +1025,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "rot", .Size = 4_GB, @@ -1037,7 +1037,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 4; i < 8; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -1062,8 +1062,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHDDMinLowerDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "rot", .Size = 128_MB, @@ -1072,7 +1072,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "rot", .Size = 16_MB, @@ -1081,7 +1081,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "rot", .Size = 128_MB, @@ -1090,7 +1090,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "rot", .Size = 4_GB, @@ -1102,7 +1102,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 4; i < 7; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -1127,8 +1127,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHDDEnormousSizeDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "rot", .Size = 128_MB, @@ -1137,7 +1137,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "rot", .Size = 16_MB, @@ -1146,7 +1146,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "rot", .Size = 128_MB, @@ -1155,7 +1155,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "rot", .Size = 4_GB, @@ -1167,7 +1167,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 4; i < 20; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -1195,8 +1195,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsSSDMinGreaterDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -1205,7 +1205,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -1214,7 +1214,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -1223,7 +1223,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "ssd", .Size = 2_GB, @@ -1235,7 +1235,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 4; i < 8; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "ssd", .Size = 2_GB, @@ -1260,8 +1260,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsSSDMinLowerDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -1270,7 +1270,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -1279,7 +1279,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -1288,7 +1288,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 1'000, .WriteBandwidth = 15'728'640, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "ssd", .Size = 2_GB, @@ -1300,7 +1300,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 6; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "ssd", .Size = 2_GB, @@ -1325,8 +1325,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsSSDEnormousSizeDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -1335,7 +1335,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 31'000, .WriteBandwidth = 471'859'200, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -1344,7 +1344,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 31'000, .WriteBandwidth = 471'859'200, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -1353,7 +1353,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 31'000, .WriteBandwidth = 471'859'200, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "ssd", .Size = 2_GB, @@ -1365,7 +1365,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 3; i < 34; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "ssd", .Size = 2_GB, @@ -1393,8 +1393,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHybridMinGreaterDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -1403,7 +1403,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -1412,7 +1412,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -1421,7 +1421,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "rot", .Size = 4_GB, @@ -1433,7 +1433,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 4; i < 8; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -1458,8 +1458,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHybridMinLowerDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -1468,7 +1468,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -1477,7 +1477,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -1486,7 +1486,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 300, .WriteBandwidth = 31'457'280, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "rot", .Size = 4_GB, @@ -1498,7 +1498,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 4; i < 7; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -1523,8 +1523,8 @@ Y_UNIT_TEST_SUITE(TModel) Y_UNIT_TEST_F(ShouldCorrectlySetupChannelsHybridEnormousSizeDefault, TConfigs) { using namespace ::NCloud::NProto; - TVector channels = { - ChannelState{ + TVector channels = { + TChannelState{ .DataType = static_cast(EChannelDataKind::System), .PoolType = "ssd", .Size = 128_MB, @@ -1533,7 +1533,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Index), .PoolType = "ssd", .Size = 16_MB, @@ -1542,7 +1542,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Fresh), .PoolType = "ssd", .Size = 128_MB, @@ -1551,7 +1551,7 @@ Y_UNIT_TEST_SUITE(TModel) .WriteIops = 4'800, .WriteBandwidth = 251'658'240, }, - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed0), .PoolType = "rot", .Size = 4_GB, @@ -1563,7 +1563,7 @@ Y_UNIT_TEST_SUITE(TModel) }; for (size_t i = 4; i < 20; ++i) { channels.push_back( - ChannelState{ + TChannelState{ .DataType = static_cast(EChannelDataKind::Mixed), .PoolType = "rot", .Size = 4_GB, @@ -2290,6 +2290,44 @@ Y_UNIT_TEST_SUITE(TModel) } #undef DO_TEST + + Y_UNIT_TEST_F(ShouldCreateProperNumberOfShards, TConfigs) + { + using namespace ::NCloud::NProto; + KikimrConfig.SetBlockSize(4_KB); + KikimrConfig.SetBlocksCount(4_TB / 4_KB); + + // Disable media type override. + StorageConfig.SetAutomaticShardCreationEnabled(true); + StorageConfig.SetMaxShardSize(4_TB); + + auto fs = SetupMultiShardFileStorePerformanceAndChannels( + StorageConfig, + KikimrConfig, + ClientPerformanceProfile); + UNIT_ASSERT_VALUES_EQUAL(1, fs.ShardConfigs.size()); + + KikimrConfig.SetBlocksCount(16_TB / 4_KB); + fs = SetupMultiShardFileStorePerformanceAndChannels( + StorageConfig, + KikimrConfig, + ClientPerformanceProfile); + UNIT_ASSERT_VALUES_EQUAL(4, fs.ShardConfigs.size()); + + KikimrConfig.SetBlocksCount(512_TB / 4_KB); + fs = SetupMultiShardFileStorePerformanceAndChannels( + StorageConfig, + KikimrConfig, + ClientPerformanceProfile); + UNIT_ASSERT_VALUES_EQUAL(128, fs.ShardConfigs.size()); + + KikimrConfig.SetBlocksCount(1_PB / 4_KB); + fs = SetupMultiShardFileStorePerformanceAndChannels( + StorageConfig, + KikimrConfig, + ClientPerformanceProfile); + UNIT_ASSERT_VALUES_EQUAL(254, fs.ShardConfigs.size()); + } } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/service/service_actor_createfs.cpp b/cloud/filestore/libs/storage/service/service_actor_createfs.cpp index 5094d921006..fa2742e14d7 100644 --- a/cloud/filestore/libs/storage/service/service_actor_createfs.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_createfs.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include @@ -65,6 +67,12 @@ class TCreateFileStoreActor final const TStorageConfigPtr StorageConfig; const TRequestInfoPtr RequestInfo; const NProto::TCreateFileStoreRequest Request; + const TString& LogTag; + TMultiShardFileStoreConfig FileStoreConfig; + + bool MainFileSystemCreated = false; + ui32 ShardsToCreate = 0; + ui32 ShardsToConfigure = 0; public: TCreateFileStoreActor( @@ -77,11 +85,23 @@ class TCreateFileStoreActor final private: STFUNC(StateWork); - void CreateFileStore(const TActorContext& ctx); + void CreateMainFileStore(const TActorContext& ctx); + void CreateShards(const TActorContext& ctx); + void ConfigureShards(const TActorContext& ctx); + void ConfigureMainFileStore(const TActorContext& ctx); + void HandleCreateFileStoreResponse( const TEvSSProxy::TEvCreateFileStoreResponse::TPtr& ev, const TActorContext& ctx); + void HandleConfigureShardResponse( + const TEvIndexTablet::TEvConfigureAsShardResponse::TPtr& ev, + const TActorContext& ctx); + + void HandleConfigureMainFileStoreResponse( + const TEvIndexTablet::TEvConfigureShardsResponse::TPtr& ev, + const TActorContext& ctx); + void HandlePoisonPill( const TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx); @@ -104,15 +124,18 @@ TCreateFileStoreActor::TCreateFileStoreActor( : StorageConfig(std::move(storageConfig)) , RequestInfo(std::move(requestInfo)) , Request(std::move(request)) + , LogTag(Request.GetFileSystemId()) {} void TCreateFileStoreActor::Bootstrap(const TActorContext& ctx) { - CreateFileStore(ctx); + CreateMainFileStore(ctx); Become(&TThis::StateWork); } -void TCreateFileStoreActor::CreateFileStore(const TActorContext& ctx) +//////////////////////////////////////////////////////////////////////////////// + +void TCreateFileStoreActor::CreateMainFileStore(const TActorContext& ctx) { NKikimrFileStore::TConfig config; config.SetFileSystemId(Request.GetFileSystemId()); @@ -124,11 +147,28 @@ void TCreateFileStoreActor::CreateFileStore(const TActorContext& ctx) config.SetStorageMediaKind(Request.GetStorageMediaKind()); config.SetRangeIdHasherType(1); - SetupFileStorePerformanceAndChannels( - false, // do not allocate mixed0 channel - *StorageConfig, - config, - Request.GetPerformanceProfile()); + if (StorageConfig->GetAutomaticShardCreationEnabled()) { + FileStoreConfig = SetupMultiShardFileStorePerformanceAndChannels( + *StorageConfig, + config, + Request.GetPerformanceProfile()); + ShardsToCreate = FileStoreConfig.ShardConfigs.size(); + ShardsToConfigure = ShardsToCreate; + config = FileStoreConfig.MainFileSystemConfig; + + LOG_INFO( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Will create filesystem with %u shards", + LogTag.c_str(), + FileStoreConfig.ShardConfigs.size()); + } else { + SetupFileStorePerformanceAndChannels( + false, // do not allocate mixed0 channel + *StorageConfig, + config, + Request.GetPerformanceProfile()); + } auto request = std::make_unique( std::move(config)); @@ -136,22 +176,188 @@ void TCreateFileStoreActor::CreateFileStore(const TActorContext& ctx) NCloud::Send(ctx, MakeSSProxyServiceId(), std::move(request)); } +void TCreateFileStoreActor::CreateShards(const TActorContext& ctx) +{ + for (ui32 i = 0; i < FileStoreConfig.ShardConfigs.size(); ++i) { + auto request = std::make_unique( + FileStoreConfig.ShardConfigs[i]); + + LOG_INFO( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Creating shard %s", + LogTag.c_str(), + request->Config.GetFileSystemId().c_str()); + + NCloud::Send( + ctx, + MakeSSProxyServiceId(), + std::move(request), + i // cookie + ); + } +} + +void TCreateFileStoreActor::ConfigureShards(const TActorContext& ctx) +{ + for (ui32 i = 0; i < FileStoreConfig.ShardConfigs.size(); ++i) { + auto request = + std::make_unique(); + request->Record.SetFileSystemId( + FileStoreConfig.ShardConfigs[i].GetFileSystemId()); + request->Record.SetShardNo(i + 1); + + LOG_INFO( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Configuring shard %s", + LogTag.c_str(), + request->Record.Utf8DebugString().Quote().c_str()); + + NCloud::Send( + ctx, + MakeIndexTabletProxyServiceId(), + std::move(request), + i // cookie + ); + } +} + +void TCreateFileStoreActor::ConfigureMainFileStore(const TActorContext& ctx) +{ + auto request = + std::make_unique(); + request->Record.SetFileSystemId( + FileStoreConfig.MainFileSystemConfig.GetFileSystemId()); + for (const auto& shard: FileStoreConfig.ShardConfigs) { + request->Record.AddShardFileSystemIds(shard.GetFileSystemId()); + } + + LOG_INFO( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Configuring main filesystem %s", + LogTag.c_str(), + request->Record.Utf8DebugString().Quote().c_str()); + + NCloud::Send( + ctx, + MakeIndexTabletProxyServiceId(), + std::move(request)); +} + +//////////////////////////////////////////////////////////////////////////////// + void TCreateFileStoreActor::HandleCreateFileStoreResponse( const TEvSSProxy::TEvCreateFileStoreResponse::TPtr& ev, const TActorContext& ctx) { const auto* msg = ev->Get(); - if (FAILED(msg->GetStatus())) { + + if (HasError(msg->GetError())) { + LOG_WARN( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Filesystem creation error: %s", + LogTag.c_str(), + FormatError(msg->GetError()).Quote().c_str()); + ReplyAndDie(ctx, msg->GetError()); return; } + if (MainFileSystemCreated) { + Y_ABORT_UNLESS(ev->Cookie < FileStoreConfig.ShardConfigs.size()); + + LOG_INFO( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Created shard %s", + LogTag.c_str(), + FileStoreConfig.ShardConfigs[ev->Cookie].GetFileSystemId().c_str()); + + Y_DEBUG_ABORT_UNLESS(ShardsToCreate); + if (--ShardsToCreate == 0) { + ConfigureShards(ctx); + } + + return; + } + + LOG_INFO( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Created main filesystem", + LogTag.c_str()); + + MainFileSystemCreated = true; + if (ShardsToCreate) { + CreateShards(ctx); + return; + } + + auto response = std::make_unique(); + // TODO: fill filestore info + + ReplyAndDie(ctx, std::move(response)); +} + +void TCreateFileStoreActor::HandleConfigureShardResponse( + const TEvIndexTablet::TEvConfigureAsShardResponse::TPtr& ev, + const TActorContext& ctx) +{ + const auto* msg = ev->Get(); + if (HasError(msg->GetError())) { + LOG_WARN( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Shard configuration error: %s", + LogTag.c_str(), + FormatError(msg->GetError()).Quote().c_str()); + + ReplyAndDie(ctx, msg->GetError()); + return; + } + + Y_ABORT_UNLESS(ev->Cookie < FileStoreConfig.ShardConfigs.size()); + + LOG_INFO( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Configured shard %s", + LogTag.c_str(), + FileStoreConfig.ShardConfigs[ev->Cookie].GetFileSystemId().c_str()); + + Y_DEBUG_ABORT_UNLESS(ShardsToConfigure); + if (--ShardsToConfigure == 0) { + ConfigureMainFileStore(ctx); + } +} + +void TCreateFileStoreActor::HandleConfigureMainFileStoreResponse( + const TEvIndexTablet::TEvConfigureShardsResponse::TPtr& ev, + const TActorContext& ctx) +{ + const auto* msg = ev->Get(); + if (HasError(msg->GetError())) { + ReplyAndDie(ctx, msg->GetError()); + return; + } + + LOG_INFO( + ctx, + TFileStoreComponents::SERVICE, + "[%s] Configured main filesystem", + LogTag.c_str()); + auto response = std::make_unique(); // TODO: fill filestore info ReplyAndDie(ctx, std::move(response)); } +//////////////////////////////////////////////////////////////////////////////// + void TCreateFileStoreActor::HandlePoisonPill( const TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx) @@ -164,7 +370,8 @@ void TCreateFileStoreActor::ReplyAndDie( const TActorContext& ctx, const NProto::TError& error) { - auto response = std::make_unique(error); + auto response = + std::make_unique(error); ReplyAndDie(ctx, std::move(response)); } @@ -181,7 +388,15 @@ STFUNC(TCreateFileStoreActor::StateWork) switch (ev->GetTypeRewrite()) { HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); - HFunc(TEvSSProxy::TEvCreateFileStoreResponse, HandleCreateFileStoreResponse); + HFunc( + TEvSSProxy::TEvCreateFileStoreResponse, + HandleCreateFileStoreResponse); + HFunc( + TEvIndexTablet::TEvConfigureAsShardResponse, + HandleConfigureShardResponse); + HFunc( + TEvIndexTablet::TEvConfigureShardsResponse, + HandleConfigureMainFileStoreResponse); default: HandleUnexpectedEvent(ev, TFileStoreComponents::SERVICE_WORKER); @@ -208,7 +423,7 @@ void TStorageServiceActor::HandleCreateFileStore( InitProfileLogRequestInfo(inflight->ProfileLogRequest, msg->Record); auto error = ValidateCreateFileSystemRequest(msg->Record); - if (FAILED(error.GetCode())) { + if (HasError(error)) { auto response = std::make_unique(error); inflight->Complete(ctx.Now(), error); NCloud::Reply(ctx, *ev, std::move(response)); diff --git a/cloud/filestore/libs/storage/service/service_actor_destroyfs.cpp b/cloud/filestore/libs/storage/service/service_actor_destroyfs.cpp index 033402e1a9c..48a72bf1873 100644 --- a/cloud/filestore/libs/storage/service/service_actor_destroyfs.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_destroyfs.cpp @@ -152,7 +152,8 @@ void TDestroyFileStoreActor::ReplyAndDie( const TActorContext& ctx, const NProto::TError& error) { - auto response = std::make_unique(error); + auto response = + std::make_unique(error); NCloud::Reply(ctx, *RequestInfo, std::move(response)); Die(ctx); @@ -167,7 +168,9 @@ STFUNC(TDestroyFileStoreActor::StateWork) TEvIndexTablet::TEvDescribeSessionsResponse, HandleDescribeSessionsResponse); - HFunc(TEvSSProxy::TEvDestroyFileStoreResponse, HandleDestroyFileStoreResponse); + HFunc( + TEvSSProxy::TEvDestroyFileStoreResponse, + HandleDestroyFileStoreResponse); default: HandleUnexpectedEvent(ev, TFileStoreComponents::SERVICE_WORKER); diff --git a/cloud/filestore/libs/storage/service/service_ut.cpp b/cloud/filestore/libs/storage/service/service_ut.cpp index 61db286ca4b..4a33d6ffa9e 100644 --- a/cloud/filestore/libs/storage/service/service_ut.cpp +++ b/cloud/filestore/libs/storage/service/service_ut.cpp @@ -5978,6 +5978,264 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest) CheckThreeStageWrites(NProto::STORAGE_MEDIA_SSD, true); CheckTwoStageReads(NProto::STORAGE_MEDIA_SSD, true); } + + void DoTestShardedFileSystemConfigured( + const TString& fsId, + TServiceClient& service) + { + TVector expected = {fsId, fsId + "_s1", fsId + "_s2"}; + + auto response = service.ListFileStores(); + const auto& fsIds = response->Record.GetFileStores(); + TVector ids(fsIds.begin(), fsIds.end()); + Sort(ids); + + UNIT_ASSERT_VALUES_EQUAL(expected, ids); + + auto headers = service.InitSession(fsId, "client"); + + for (const auto& id: ids) { + NProtoPrivate::TDescribeSessionsRequest request; + request.SetFileSystemId(id); + + TString buf; + google::protobuf::util::MessageToJsonString(request, &buf); + auto jsonResponse = service.ExecuteAction("describesessions", buf); + NProtoPrivate::TDescribeSessionsResponse response; + UNIT_ASSERT(google::protobuf::util::JsonStringToMessage( + jsonResponse->Record.GetOutput(), &response).ok()); + + const auto& sessions = response.GetSessions(); + UNIT_ASSERT_VALUES_EQUAL(1, sessions.size()); + + UNIT_ASSERT_VALUES_EQUAL( + headers.SessionId, + sessions[0].GetSessionId()); + UNIT_ASSERT_VALUES_EQUAL( + headers.ClientId, + sessions[0].GetClientId()); + UNIT_ASSERT_VALUES_EQUAL("", sessions[0].GetSessionState()); + } + + const TString sessionState = "some_state"; + service.ResetSession(headers, sessionState); + + for (const auto& id: ids) { + NProtoPrivate::TDescribeSessionsRequest request; + request.SetFileSystemId(id); + + TString buf; + google::protobuf::util::MessageToJsonString(request, &buf); + auto jsonResponse = service.ExecuteAction("describesessions", buf); + NProtoPrivate::TDescribeSessionsResponse response; + UNIT_ASSERT(google::protobuf::util::JsonStringToMessage( + jsonResponse->Record.GetOutput(), &response).ok()); + + const auto& sessions = response.GetSessions(); + UNIT_ASSERT_VALUES_EQUAL(1, sessions.size()); + + UNIT_ASSERT_VALUES_EQUAL( + headers.SessionId, + sessions[0].GetSessionId()); + UNIT_ASSERT_VALUES_EQUAL( + headers.ClientId, + sessions[0].GetClientId()); + UNIT_ASSERT_VALUES_EQUAL( + sessionState, + sessions[0].GetSessionState()); + } + + service.DestroySession(headers); + + for (const auto& id: ids) { + NProtoPrivate::TDescribeSessionsRequest request; + request.SetFileSystemId(id); + + TString buf; + google::protobuf::util::MessageToJsonString(request, &buf); + auto jsonResponse = service.ExecuteAction("describesessions", buf); + NProtoPrivate::TDescribeSessionsResponse response; + UNIT_ASSERT(google::protobuf::util::JsonStringToMessage( + jsonResponse->Record.GetOutput(), &response).ok()); + + const auto& sessions = response.GetSessions(); + UNIT_ASSERT_VALUES_EQUAL(0, sessions.size()); + } + } + + Y_UNIT_TEST(ShouldConfigureShardsAutomatically) + { + NProto::TStorageConfig config; + config.SetAutomaticShardCreationEnabled(true); + config.SetMaxShardSize(1_GB); + TTestEnv env({}, config); + env.CreateSubDomain("nfs"); + + ui32 nodeIdx = env.CreateNode("nfs"); + + const TString fsId = "test"; + + TServiceClient service(env.GetRuntime(), nodeIdx); + service.CreateFileStore(fsId, 2_GB / 4_KB); + + DoTestShardedFileSystemConfigured(fsId, service); + } + + Y_UNIT_TEST(ShouldHandleErrorsDuringShardedFileSystemCreation) + { + NProto::TStorageConfig config; + config.SetAutomaticShardCreationEnabled(true); + config.SetMaxShardSize(1_GB); + TTestEnv env({}, config); + env.CreateSubDomain("nfs"); + + ui32 nodeIdx = env.CreateNode("nfs"); + + const TString fsId = "test"; + const auto blockCount = 2_GB / 4_KB; + + TServiceClient service(env.GetRuntime(), nodeIdx); + + TVector expected = {fsId, fsId + "_s1", fsId + "_s2"}; + + NProto::TError createShardError; + NProto::TError configureShardError; + NProto::TError configureShardsError; + + TAutoPtr toSend; + + env.GetRuntime().SetEventFilter( + [&] (TTestActorRuntimeBase&, TAutoPtr& event) { + switch (event->GetTypeRewrite()) { + case TEvSSProxy::EvCreateFileStoreRequest: { + using TRequest = TEvSSProxy::TEvCreateFileStoreRequest; + using TResponse = + TEvSSProxy::TEvCreateFileStoreResponse; + const auto* msg = event->Get(); + if (msg->Config.GetFileSystemId() != expected[1]) { + break; + } + + if (!HasError(createShardError)) { + break; + } + + auto response = std::make_unique( + createShardError); + + toSend = new IEventHandle( + event->Sender, + event->Recipient, + response.release(), + 0, // flags + event->Cookie); + + return true; + } + + case TEvIndexTablet::EvConfigureAsShardRequest: { + using TRequest = + TEvIndexTablet::TEvConfigureAsShardRequest; + using TResponse = + TEvIndexTablet::TEvConfigureAsShardResponse; + const auto* msg = event->Get(); + if (msg->Record.GetFileSystemId() != expected[1]) { + break; + } + + if (!HasError(configureShardError)) { + break; + } + + auto response = std::make_unique( + configureShardError); + + toSend = new IEventHandle( + event->Sender, + event->Recipient, + response.release(), + 0, // flags + event->Cookie); + + return true; + } + + case TEvIndexTablet::EvConfigureShardsRequest: { + using TResponse = + TEvIndexTablet::TEvConfigureShardsResponse; + + if (!HasError(configureShardsError)) { + break; + } + + auto response = std::make_unique( + configureShardsError); + + toSend = new IEventHandle( + event->Sender, + event->Recipient, + response.release(), + 0, // flags + event->Cookie); + + return true; + } + } + + return false; + }); + + createShardError = MakeError(E_REJECTED, "failed to create shard"); + service.SendCreateFileStoreRequest(fsId, blockCount); + env.GetRuntime().DispatchEvents({}, TDuration::MilliSeconds(100)); + UNIT_ASSERT(toSend); + env.GetRuntime().Send(toSend, nodeIdx); + { + auto response = service.RecvCreateFileStoreResponse(); + UNIT_ASSERT_VALUES_EQUAL( + FormatError(createShardError), + FormatError(response->GetError())); + } + + createShardError = {}; + configureShardError = + MakeError(E_REJECTED, "failed to configure shard"); + service.SendCreateFileStoreRequest(fsId, blockCount); + env.GetRuntime().DispatchEvents({}, TDuration::MilliSeconds(100)); + UNIT_ASSERT(toSend); + env.GetRuntime().Send(toSend, nodeIdx); + { + auto response = service.RecvCreateFileStoreResponse(); + UNIT_ASSERT_VALUES_EQUAL( + FormatError(configureShardError), + FormatError(response->GetError())); + } + + configureShardError = {}; + configureShardsError = + MakeError(E_REJECTED, "failed to configure shards"); + service.SendCreateFileStoreRequest(fsId, blockCount); + env.GetRuntime().DispatchEvents({}, TDuration::MilliSeconds(100)); + UNIT_ASSERT(toSend); + env.GetRuntime().Send(toSend, nodeIdx); + { + auto response = service.RecvCreateFileStoreResponse(); + UNIT_ASSERT_VALUES_EQUAL( + FormatError(configureShardsError), + FormatError(response->GetError())); + } + + configureShardsError = {}; + service.SendCreateFileStoreRequest(fsId, blockCount); + { + auto response = service.RecvCreateFileStoreResponse(); + UNIT_ASSERT_VALUES_EQUAL( + FormatError(MakeError(S_OK)), + FormatError(response->GetError())); + } + + DoTestShardedFileSystemConfigured(fsId, service); + } } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/model/compaction_map.cpp b/cloud/filestore/libs/storage/tablet/model/compaction_map.cpp index be55a352c8a..63ef16b3819 100644 --- a/cloud/filestore/libs/storage/tablet/model/compaction_map.cpp +++ b/cloud/filestore/libs/storage/tablet/model/compaction_map.cpp @@ -199,7 +199,7 @@ struct TGroup ui32 i = GetTop(CompactedRanges); TopCompactionScore = { GroupIndex + i, - GetCompactionScore(Stats[i])}; + CompactedRanges.Get(i) ? 0 : GetCompactionScore(Stats[i])}; } // 'compacted' flag is deliberately ignored for cleanup score @@ -216,7 +216,9 @@ struct TGroup TopGarbageScore = { rangeId, garbageScore}; } else if (TopGarbageScore.RangeId == rangeId) { ui32 i = GetTop(CompactedRanges); - TopGarbageScore = { GroupIndex + i, GetGarbageScore(Stats[i]) }; + TopGarbageScore = { + GroupIndex + i, + CompactedRanges.Get(i) ? 0 : GetGarbageScore(Stats[i])}; } return diff; diff --git a/cloud/filestore/libs/storage/tablet/model/compaction_map_ut.cpp b/cloud/filestore/libs/storage/tablet/model/compaction_map_ut.cpp index 41e30bea9ea..32c0b8400d6 100644 --- a/cloud/filestore/libs/storage/tablet/model/compaction_map_ut.cpp +++ b/cloud/filestore/libs/storage/tablet/model/compaction_map_ut.cpp @@ -304,6 +304,22 @@ Y_UNIT_TEST_SUITE(TCompactionMapTest) UNIT_ASSERT_VALUES_EQUAL(10, topRanges[1].Stats.BlobsCount); UNIT_ASSERT_VALUES_EQUAL(100, topRanges[1].Stats.DeletionsCount); UNIT_ASSERT_VALUES_EQUAL(1000, topRanges[1].Stats.GarbageBlocksCount); + + compactionMap.Update(1, 20, 200, 2000, true); + compactionMap.Update(0, 10, 100, 1000, true); + + topRanges = compactionMap.GetTopRangesByCompactionScore(3); + UNIT_ASSERT_VALUES_EQUAL(0, topRanges.size()); + + counter = compactionMap.GetTopCompactionScore(); + UNIT_ASSERT_VALUES_EQUAL(0, counter.Score); + + counter = compactionMap.GetTopCleanupScore(); + UNIT_ASSERT_VALUES_EQUAL(10001, counter.RangeId); + UNIT_ASSERT_VALUES_EQUAL(400, counter.Score); + + counter = compactionMap.GetTopGarbageScore(); + UNIT_ASSERT_VALUES_EQUAL(0, counter.Score); } Y_UNIT_TEST(ShouldReturnNonEmptyRanges) diff --git a/cloud/filestore/libs/storage/tablet/protos/tablet.proto b/cloud/filestore/libs/storage/tablet/protos/tablet.proto index 34d86720d0f..23678b2e352 100644 --- a/cloud/filestore/libs/storage/tablet/protos/tablet.proto +++ b/cloud/filestore/libs/storage/tablet/protos/tablet.proto @@ -44,6 +44,8 @@ message TFileSystem repeated string ShardFileSystemIds = 13; uint32 ShardNo = 14; + bool AutomaticShardCreationEnabled = 15; + uint64 MaxShardSize = 16; } //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_createsession.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_createsession.cpp index 718f727ec7a..afdaa0890b2 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_createsession.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_createsession.cpp @@ -142,6 +142,23 @@ void TIndexTabletActor::HandleCreateSession( ev->Cookie, msg->CallContext); + const auto expectedShardCount = CalculateExpectedShardCount(); + const auto actualShardCount = GetFileSystem().ShardFileSystemIdsSize(); + if (actualShardCount < expectedShardCount) { + auto message = TStringBuilder() << "Shard count smaller than expected: " + << actualShardCount << " < " << expectedShardCount; + LOG_INFO(ctx, TFileStoreComponents::TABLET, + "%s CreateSession rejected: %s", + LogTag.c_str(), + message.c_str()); + + using TResponse = TEvIndexTablet::TEvCreateSessionResponse; + auto response = std::make_unique( + MakeError(E_REJECTED, std::move(message))); + NCloud::Reply(ctx, *requestInfo, std::move(response)); + return; + } + AddTransaction(*requestInfo); ExecuteTx( diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_updateconfig.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_updateconfig.cpp index 0adf22913c2..56ce266adc8 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_updateconfig.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_updateconfig.cpp @@ -130,10 +130,18 @@ void TIndexTabletActor::HandleUpdateConfig( Convert(msg->Record.GetConfig(), newConfig); if (!GetFileSystemId()) { + // autosharding params are deliberately applied upon FS creation + newConfig.SetAutomaticShardCreationEnabled( + Config->GetAutomaticShardCreationEnabled()); + newConfig.SetMaxShardSize(Config->GetMaxShardSize()); + LOG_INFO(ctx,TFileStoreComponents::TABLET, - "%s Starting tablet config initialization [txId: %d]", + "%s Starting tablet config initialization [txId: %d]" + ", autosharding [%d, %lu]", LogTag.c_str(), - txId); + txId, + Config->GetAutomaticShardCreationEnabled(), + Config->GetMaxShardSize()); // First config update on tablet creation. No need to validate config. ExecuteTx( @@ -149,6 +157,9 @@ void TIndexTabletActor::HandleUpdateConfig( *newConfig.MutableShardFileSystemIds() = oldConfig.GetShardFileSystemIds(); newConfig.SetShardNo(oldConfig.GetShardNo()); + newConfig.SetAutomaticShardCreationEnabled( + oldConfig.GetAutomaticShardCreationEnabled()); + newConfig.SetMaxShardSize(oldConfig.GetMaxShardSize()); // Config update occured due to alter/resize. if (auto error = ValidateUpdateConfigRequest(oldConfig, newConfig)) { @@ -264,6 +275,11 @@ void TIndexTabletActor::HandleConfigureShards( " is smaller than prev shard list: " << msg->Record.GetShardFileSystemIds().size() << " < " << shardIds.size()); + } else if (msg->Record.ShardFileSystemIdsSize() > MaxShardCount) { + error = MakeError(E_ARGUMENT, TStringBuilder() << "new shard list" + " is bigger than limit: " + << msg->Record.GetShardFileSystemIds().size() << " > " + << MaxShardCount); } else { for (int i = 0; i < shardIds.size(); ++i) { if (shardIds[i] != msg->Record.GetShardFileSystemIds(i)) { diff --git a/cloud/filestore/libs/storage/tablet/tablet_state.cpp b/cloud/filestore/libs/storage/tablet/tablet_state.cpp index bedcc447452..9531ba9d22d 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state.cpp @@ -182,4 +182,24 @@ TMiscNodeStats TIndexTabletState::GetMiscNodeStats() const }; } +bool TIndexTabletState::CalculateExpectedShardCount() const +{ + if (FileSystem.GetShardNo()) { + // sharding is flat + return 0; + } + + const auto currentShardCount = FileSystem.ShardFileSystemIdsSize(); + ui64 autoShardCount = 0; + if (FileSystem.GetAutomaticShardCreationEnabled() + && FileSystem.GetMaxShardSize()) + { + const double fsSize = + FileSystem.GetBlockSize() * FileSystem.GetBlocksCount(); + autoShardCount = ceil(fsSize / FileSystem.GetMaxShardSize()); + } + + return Max(currentShardCount, autoShardCount); +} + } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/tablet_state.h b/cloud/filestore/libs/storage/tablet/tablet_state.h index 540ae271aa4..f94cf474c9e 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state.h +++ b/cloud/filestore/libs/storage/tablet/tablet_state.h @@ -296,6 +296,8 @@ class TIndexTabletState return AllocatorRegistry.GetAllocator(tag); } + bool CalculateExpectedShardCount() const; + // // FileSystem Stats // diff --git a/cloud/filestore/public/api/protos/const.proto b/cloud/filestore/public/api/protos/const.proto index 01de57be558..f14262cf5ea 100644 --- a/cloud/filestore/public/api/protos/const.proto +++ b/cloud/filestore/public/api/protos/const.proto @@ -78,4 +78,10 @@ enum EFilestoreLimits // Maximum number of inodes. E_FS_LIMITS_INODES = -2; // 0xFFFFFFFE + + // Shard number is encoded in the highest 8 bits of nodeId and handleId and + // 0 is reserved for the main tablet + // so 1 <= shardNo <= 255 + // so shardCount <= 254 + E_FS_LIMITS_MAX_SHARDS = 254; }; diff --git a/cloud/filestore/tests/client/canondata/test.test_forced_compaction/results.txt b/cloud/filestore/tests/client/canondata/test.test_forced_compaction/results.txt index 979270fd76b..f6563985ac2 100644 --- a/cloud/filestore/tests/client/canondata/test.test_forced_compaction/results.txt +++ b/cloud/filestore/tests/client/canondata/test.test_forced_compaction/results.txt @@ -1,2 +1 @@ -progress: 0/64, last=1177944064 finished