Skip to content

Commit

Permalink
issue-1825: deduplicate blobs in collect garbage with a linear comple…
Browse files Browse the repository at this point in the history
…xity (#1826)
  • Loading branch information
debnatkh authored and Maxim Deb Natkh committed Aug 21, 2024
1 parent 5bae10c commit 6053800
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 6 deletions.
18 changes: 14 additions & 4 deletions cloud/storage/core/libs/tablet/gc_logic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,23 @@ void RemoveDuplicates(
// we should leave it in garbageBlobs to be sure it is
// finally collected
if (nit->Generation() == genstep.first) {
git = garbageBlobs.erase(git);
gend = garbageBlobs.end();
*git = InvalidPartialBlobId;
++git;
}
nit = newBlobs.erase(nit);
nend = newBlobs.end();
*nit = InvalidPartialBlobId;
++nit;
}
}

auto it =
std::remove(newBlobs.begin(), newBlobs.end(), InvalidPartialBlobId);
newBlobs.erase(it, newBlobs.end());

it = std::remove(
garbageBlobs.begin(),
garbageBlobs.end(),
InvalidPartialBlobId);
garbageBlobs.erase(it, garbageBlobs.end());
}

void FindGarbageVersions(
Expand Down
34 changes: 34 additions & 0 deletions cloud/storage/core/libs/tablet/gc_logic_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,40 @@ Y_UNIT_TEST_SUITE(TGCLogicTest)
UNIT_ASSERT_VALUES_EQUAL(NumberOfNewBlobs, 0);
UNIT_ASSERT_VALUES_EQUAL(NumberOfGarbageBlobs, 0);
}

Y_UNIT_TEST(ShouldRemoveDuplicates)
{
auto blob1 = TPartialBlobId(2, 10, 3, 4*1024*1024, 0, 0);
auto blob2 = TPartialBlobId(3, 10, 2, 4*1024*1024, 0, 0);
auto blob3 = TPartialBlobId(3, 10, 4, 4*1024*1024, 0, 0);
auto blob4 = TPartialBlobId(3, 11, 6, 4*1024*1024, 0, 0);

TVector<TPartialBlobId> newBlobs = {
// previous generation, should only be present in garbageBlobs
blob1,
// present in both newBlobs and garbageBlobs, should be excluded from both
blob2,
// present only in newBlobs, should be kept
blob3,
};

TVector<TPartialBlobId> garbageBlobs = {
// previous generation, should only be present in garbageBlobs
blob1,
// present in both newBlobs and garbageBlobs, should be excluded from both
blob2,
// present only in garbageBlobs, should be kept
blob4,
};

RemoveDuplicates(newBlobs, garbageBlobs, MakeCommitId(3, 0));
UNIT_ASSERT_VALUES_EQUAL(1, newBlobs.size());
UNIT_ASSERT_VALUES_EQUAL(ToString(blob3), ToString(newBlobs[0]));

UNIT_ASSERT_VALUES_EQUAL(2, garbageBlobs.size());
UNIT_ASSERT_VALUES_EQUAL(ToString(blob1), ToString(garbageBlobs[0]));
UNIT_ASSERT_VALUES_EQUAL(ToString(blob4), ToString(garbageBlobs[1]));
}
}

} // namespace NCloud::NStorage
8 changes: 6 additions & 2 deletions cloud/storage/core/libs/tablet/model/partial_blob_id.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#pragma once

#include "commit.h"

#include <cloud/storage/core/libs/common/alloc.h>

#include <util/digest/multi.h>
#include <util/generic/hash_set.h>
#include <util/stream/output.h>
#include <util/system/defaults.h>

#include <utility>

namespace NCloud {

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -188,6 +188,10 @@ inline TPartialBlobId NextBlobId(TPartialBlobId blobId, ui64 maxUniqueId)
return MakePartialBlobId(blobId.CommitId(), blobId.UniqueId() + 1);
}

////////////////////////////////////////////////////////////////////////////////

const TPartialBlobId InvalidPartialBlobId = TPartialBlobId(InvalidCommitId, 0);

} // namespace NCloud

////////////////////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit 6053800

Please sign in to comment.