Skip to content

Commit

Permalink
chore(storage/bloom/v1): bind bloom page size to max bloom size
Browse files Browse the repository at this point in the history
Previously, the bloom page size was hard-coded to 256KB. However, 256KB
is much smaller than the normal bloom size, leading to nearly every
bloom page to be overfilled with a single, massive bloom.

Binding the bloom page size to the maximum bloom size allows bloom pages
to contain multiple smaller blooms (which are still larger than 256KB)
or one maxed out bloom.

Signed-off-by: Robert Fratto <[email protected]>
  • Loading branch information
rfratto committed Oct 3, 2024
1 parent f1425b6 commit a266b17
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 4 deletions.
15 changes: 11 additions & 4 deletions pkg/storage/bloom/v1/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,25 @@ func NewBlockOptions(enc compression.Codec, maxBlockSizeBytes, maxBloomSizeBytes
opts := NewBlockOptionsFromSchema(Schema{
version: CurrentSchemaVersion,
encoding: enc,
})
}, maxBloomSizeBytes)
opts.BlockSize = maxBlockSizeBytes
opts.UnencodedBlockOptions.MaxBloomSizeBytes = maxBloomSizeBytes
return opts
}

func NewBlockOptionsFromSchema(s Schema) BlockOptions {
func NewBlockOptionsFromSchema(s Schema, maxBloomSizeBytes uint64) BlockOptions {
return BlockOptions{
Schema: s,
// TODO(owen-d): benchmark and find good defaults
SeriesPageSize: 4 << 10, // 4KB, typical page size
BloomPageSize: 256 << 10, // 256KB, no idea what to make this
SeriesPageSize: 4 << 10, // 4KB, typical page size

// Allow one bloom page to fit either several small blooms or one large
// bloom at max size.
//
// Previously this value was fixed at 256KB, which is smaller than most
// blooms. Setting this value less than maxBloomSizeBytes means that most
// pages will consist of a single oversized bloom.
BloomPageSize: maxBloomSizeBytes,
}
}

Expand Down
16 changes: 16 additions & 0 deletions pkg/storage/bloom/v1/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ var blockEncodings = []compression.Codec{
compression.Zstd,
}

func TestBlockOptions_BloomPageSize(t *testing.T) {
t.Parallel()

var (
maxBlockSizeBytes = uint64(50 << 10)
maxBloomSizeBytes = uint64(10 << 10)
)

opts := NewBlockOptions(compression.None, uint64(maxBlockSizeBytes), uint64(maxBloomSizeBytes))

Check failure on line 34 in pkg/storage/bloom/v1/builder_test.go

View workflow job for this annotation

GitHub Actions / check / golangciLint

unnecessary conversion (unconvert)

require.GreaterOrEqual(
t, opts.BloomPageSize, maxBloomSizeBytes,
"opts.BloomPageSize should be greater or equal to the maximum bloom size to avoid having too many overfilled pages",
)
}

func TestBlockOptions_RoundTrip(t *testing.T) {
t.Parallel()
opts := BlockOptions{
Expand Down

0 comments on commit a266b17

Please sign in to comment.