Skip to content

Commit

Permalink
chore: [k207] fix(blooms): ensure tokenizer cache is reset between se…
Browse files Browse the repository at this point in the history
…ries (#13373)

Backport 04bc3a4 from #13370

---

Fixes bug where ngrams were not added to blooms b/c they had been added previously to a potentially different series. This caused blooms to fail membership tests incorrectly.

Co-authored-by: Owen Diehl <[email protected]>
  • Loading branch information
grafanabot and owen-d authored Jul 2, 2024
1 parent 46fa058 commit 7116a80
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
3 changes: 3 additions & 0 deletions pkg/storage/bloom/v1/bloom_tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,14 @@ func (bt *BloomTokenizer) newBloom() *Bloom {
}
}

// Populates a bloom filter(s) with the tokens from the given chunks.
// Called once per series
func (bt *BloomTokenizer) Populate(
blooms SizedIterator[*Bloom],
chks Iterator[ChunkRefWithIter],
ch chan *BloomCreation,
) {
clear(bt.cache) // MUST always clear the cache before starting a new series
var next bool

// All but the last bloom are considered full -- send back unaltered
Expand Down
39 changes: 39 additions & 0 deletions pkg/storage/bloom/v1/bloom_tokenizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,45 @@ func BenchmarkPopulateSeriesWithBloom(b *testing.B) {
}
}

func TestTokenizerClearsCacheBetweenPopulateCalls(t *testing.T) {
bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, NewMetrics(nil))
line := "foobarbazz"
var blooms []*Bloom

for i := 0; i < 2; i++ {
ch := make(chan *BloomCreation)
itr, err := chunkRefItrFromLines(line)
require.NoError(t, err)
go bt.Populate(
NewEmptyIter[*Bloom](),
NewSliceIter([]ChunkRefWithIter{
{
Ref: ChunkRef{},
Itr: itr,
},
}),
ch,
)
var ct int
for created := range ch {
blooms = append(blooms, created.Bloom)
ct++
}
// ensure we created one bloom for each call
require.Equal(t, 1, ct)

}

for _, bloom := range blooms {
toks := bt.lineTokenizer.Tokens(line)
for toks.Next() {
token := toks.At()
require.True(t, bloom.Test(token))
}
require.NoError(t, toks.Err())
}
}

func BenchmarkMapClear(b *testing.B) {
bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, metrics)
for i := 0; i < b.N; i++ {
Expand Down

0 comments on commit 7116a80

Please sign in to comment.