From 2c703b10933b880d70e332862fbc168f35e56645 Mon Sep 17 00:00:00 2001 From: Andrei Strelkovskii Date: Sat, 14 Sep 2024 22:40:09 +0300 Subject: [PATCH] issue-1922: introduced UseMixedBlocksInsteadOfAliveBlocksInCompaction flag (#2031) * issue-1922: introduced UseMixedBlocksInsteadOfAliveBlocksInCompaction flag * issue-1922: added config.md file which describes some important StorageConfig settings --- cloud/filestore/config/storage.proto | 8 +++++ cloud/filestore/libs/storage/core/config.cpp | 1 + cloud/filestore/libs/storage/core/config.h | 1 + .../libs/storage/tablet/tablet_actor.cpp | 16 +++++---- .../libs/storage/tablet/tablet_ut_data.cpp | 1 + .../nfs-storage.txt | 1 + doc/filestore/design/storage/config.md | 34 +++++++++++++++++++ 7 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 doc/filestore/design/storage/config.md diff --git a/cloud/filestore/config/storage.proto b/cloud/filestore/config/storage.proto index f46684b7e1f..bf63d1dce8e 100644 --- a/cloud/filestore/config/storage.proto +++ b/cloud/filestore/config/storage.proto @@ -394,4 +394,12 @@ message TStorageConfig // After reaching this percentage of Compaction/Cleanup backpressure // thresholds BlobIndexOps scheduling falls back to BIOP_FAIR. optional uint32 BackpressurePercentageForFairBlobIndexOpsPriority = 390; + + // If enabled, GarbageCompactionThresholdAverage will be compared to the + // difference between MixedBlocks and UsedBlocks instead of the difference + // between alive blocks and UsedBlocks. This actually should be the default + // behaviour but is implemented via this flag in order not to cause + // uncontrollable behaviour change for production systems. TODO: gradually + // enable this flag everywhere and make this behaviour the new default. + optional bool UseMixedBlocksInsteadOfAliveBlocksInCompaction = 391; } diff --git a/cloud/filestore/libs/storage/core/config.cpp b/cloud/filestore/libs/storage/core/config.cpp index 8973016d31e..1684010e56d 100644 --- a/cloud/filestore/libs/storage/core/config.cpp +++ b/cloud/filestore/libs/storage/core/config.cpp @@ -42,6 +42,7 @@ using TAliases = NProto::TStorageConfig::TFilestoreAliases; xxx(CompactionThresholdAverage, ui32, 4 )\ xxx(GarbageCompactionThresholdAverage, ui32, 20 )\ xxx(NewCompactionEnabled, bool, false )\ + xxx(UseMixedBlocksInsteadOfAliveBlocksInCompaction, bool, false )\ xxx(CollectGarbageThreshold, ui32, 4_MB )\ xxx(FlushBytesThreshold, ui64, 4_MB )\ xxx(MaxDeleteGarbageBlobsPerTx, ui32, 16384 )\ diff --git a/cloud/filestore/libs/storage/core/config.h b/cloud/filestore/libs/storage/core/config.h index 1f421fd094b..ae3103c9467 100644 --- a/cloud/filestore/libs/storage/core/config.h +++ b/cloud/filestore/libs/storage/core/config.h @@ -81,6 +81,7 @@ class TStorageConfig ui32 GetCompactionThresholdAverage() const; ui32 GetGarbageCompactionThresholdAverage() const; bool GetNewCompactionEnabled() const; + bool GetUseMixedBlocksInsteadOfAliveBlocksInCompaction() const; ui32 GetCollectGarbageThreshold() const; ui64 GetFlushBytesThreshold() const; ui32 GetMaxDeleteGarbageBlobsPerTx() const; diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor.cpp index ff522c6f2bc..6a5a44f9b12 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor.cpp @@ -440,14 +440,16 @@ TCompactionInfo TIndexTabletActor::GetCompactionInfo() const const auto& stats = GetFileSystemStats(); const auto compactionStats = GetCompactionMapStats(0); const auto used = stats.GetUsedBlocksCount(); - auto alive = stats.GetMixedBlocksCount(); - if (alive > stats.GetGarbageBlocksCount()) { - alive -= stats.GetGarbageBlocksCount(); - } else { - alive = 0; + auto stored = stats.GetMixedBlocksCount(); + if (!Config->GetUseMixedBlocksInsteadOfAliveBlocksInCompaction()) { + if (stored > stats.GetGarbageBlocksCount()) { + stored -= stats.GetGarbageBlocksCount(); + } else { + stored = 0; + } } - const auto avgGarbagePercentage = used && alive > used - ? 100 * static_cast(alive - used) / used + const auto avgGarbagePercentage = used && stored > used + ? 100 * static_cast(stored - used) / used : 0; const auto rangeCount = compactionStats.UsedRangesCount; const auto avgCompactionScore = rangeCount diff --git a/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp b/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp index bf5f2b2a8c6..99e5fb9f379 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp @@ -1598,6 +1598,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data) storageConfig.SetGarbageCompactionThresholdAverage(20); storageConfig.SetCompactionThreshold(999'999); storageConfig.SetCleanupThreshold(999'999); + storageConfig.SetUseMixedBlocksInsteadOfAliveBlocksInCompaction(true); storageConfig.SetWriteBlobThreshold(block); TTestEnv env({}, std::move(storageConfig)); diff --git a/cloud/filestore/tests/loadtest/service-kikimr-newfeatures-test/nfs-storage.txt b/cloud/filestore/tests/loadtest/service-kikimr-newfeatures-test/nfs-storage.txt index c4c0b3b4577..ec484a20159 100644 --- a/cloud/filestore/tests/loadtest/service-kikimr-newfeatures-test/nfs-storage.txt +++ b/cloud/filestore/tests/loadtest/service-kikimr-newfeatures-test/nfs-storage.txt @@ -8,3 +8,4 @@ PreferredBlockSizeMultiplier: 64 MultiTabletForwardingEnabled: true GetNodeAttrBatchEnabled: true UnalignedThreeStageWriteEnabled: true +UseMixedBlocksInsteadOfAliveBlocksInCompaction: true diff --git a/doc/filestore/design/storage/config.md b/doc/filestore/design/storage/config.md new file mode 100644 index 00000000000..3c5d11d564a --- /dev/null +++ b/doc/filestore/design/storage/config.md @@ -0,0 +1,34 @@ +# Storage layer configuration + +## What it looks like + +StorageConfig is represented by a [proto spec](https://github.com/ydb-platform/nbs/blob/main/cloud/filestore/config/storage.proto) like any other filestore and blockstore config. Supplied to the daemon as a prototext file - usually called nfs-storage.txt. + +There is a way to to override some of the config settings individually for a single tablet (single FS or FS shard). + +Example (overrides CompactionThreshold and CleanupThreshold fields): +``` +filestore-client executeaction --action changestorageconfig --input-json '{"FileSystemId": "your_fs_id", "StorageConfig": {"CompactionThreshold": 100, "CleanupThreshold": 10000}}' +``` + +The overrides can be viewed like this: +``` +filestore-client executeaction --action getstorageconfigfields --input-json '{FileSystemId: "your_fs_id", StorageConfigFields: ["CompactionThreshold","CleanupThreshold"]}' +``` + +## Recommended settings + +Most of the fields have reasonable defaults. But in order to keep the behaviour for the existing systems stable from release to release, some of the new features/settings are disabled by default and thus should be manually enabled to get the best performance. + +Here is the list of these settings: +* `TwoStageReadEnabled: true` - makes filestore-vhost fetch only metadata from the tablet and read the data directly from storage nodes +* `ThreeStageWriteEnabled: true` - makes filestore-vhost send only the metadata to the tablet upon writes and write the data directly to the storage nodes +* `NewCompactionEnabled: true` - enables a lot more sophisticated Compaction triggers like per-range and per-FS garbage level triggers and per-FS blob count trigger +* `NewCleanupEnabled: true` - similar thing for Cleanup - enables per-FS deletion marker count trigger +* `ReadAheadCacheRangeSize: 1048576` - enables index readahead for ranges up to 1MiB if a read pattern which is similar to sequential read is spotted (significantly improves performance for small and almost sequential reads) +* `NodeIndexCacheMaxNodes: 128` - enables node metadata index in the tablet (e.g. index for GetNodeAttr (stat) responses) +* `PreferredBlockSizeMultiplier: 64` - scales the recommended BlockSize shown to the guest which makes some apps like `cat` use larger read request sizes which optimizes their read throughput +* `MultiTabletForwardingEnabled: true` - basically enables the multitablet FS (#1350) feature +* `GetNodeAttrBatchEnabled: true` - enables fetching NodeAttr (stat) in large batches for multitablet filesystems +* `UnalignedThreeStageWriteEnabled: true` - causes unaligned writes to follow the efficient ThreeStageWrite datapath +* `UseMixedBlocksInsteadOfAliveBlocksInCompaction: true` - see the description in storage.proto (this flag basically fixes garbage level-based compaction triggers)