Discuss about MultiGet #334

Hojun-Cho · 2022-09-08T12:40:10Z

What is the issue you are having?
I saw #324 and I'm in the process of implementing MultiGet.
I don't know how to handle [cacheshard.hits] .

Calling hit causes DeadLock

// entryData used in GetMulti
type entryData struct {
	hashedKey uint64
	entryKey  string
	index     int
}
...

func (c *BigCache) GetMulti(keys ...string) [][]byte {
	var wg sync.WaitGroup
	entries := make([][]byte, len(keys))
	shardMap := map[*cacheShard][]entryData{}
	for i, key := range keys {
		hashedKey := c.hash.Sum64(key)
		shard := c.getShard(hashedKey)
		shardMap[shard] = append(shardMap[shard],
			entryData{
				hashedKey: hashedKey,
				entryKey:  key,
				index:     i,
			})
	}

	wg.Add(len(shardMap))
	for shard := range shardMap {
		go func(shard *cacheShard) {
			defer wg.Done()
			shard.getMulti(shardMap[shard], entries)
		}(shard)
	}
	wg.Wait()
	return entries
}
...

func (s *cacheShard) getMulti(keys []entryData, entries [][]byte) {
	defer s.lock.RUnlock()
	s.lock.RLock()
	for _, key := range keys {
		wrappedEntry, err := s.getWrappedEntry(key.hashedKey)
		if err != nil {
			continue
		}
		if entryKey := readKeyFromEntry(wrappedEntry); key.entryKey != entryKey {
			s.collision()
			if s.isVerbose {
				s.logger.Printf("Collision detected. Both %q and %q have the same hash %x", key.entryKey, entryKey, key.hashedKey)
			}
			continue
		}
		entry := readEntry(wrappedEntry)
                // Causes DeadLock
		s.hit(key.hashedKey)
		entries[key.index] = entry
	}
}

Test

func TestWriteAndGetMultiParallelSameKeyWithStats(t *testing.T) {
	t.Parallel()

	cache, _ := NewBigCache(Config{
		Shards:             4,
		LifeWindow:         0,
		MaxEntriesInWindow: 1000 * 10 * 60,
		MaxEntrySize:       500,
		Verbose:            true,
		StatsEnabled:       true,
	})

	var wg sync.WaitGroup
	ntest := 10
	n := 3
	wg.Add(n)
	keys := []string{"key_1", "key_2", "key_3", "key_4"}
	values := [][]byte{blob('1', 1024), blob('2', 1024),
		blob('3', 1024), blob('4', 1024)}
	for i := 0; i < ntest; i++ {
		for i, key := range keys {
			assertEqual(t, nil, cache.Set(key, values[i]))
		}
	}

	for j := 0; j < n; j++ {
		go func() {
			for i := 0; i < ntest; i++ {
				actual := cache.GetMulti(keys...)
				assertEqual(t, values, actual)
			}
			wg.Done()
		}()
	}

	...
}

Or am I getting the GetMulti implementation in the wrong direction?

Environment:

Version (git sha or release ): v3.0.2
go version: 1.18.4

Hojun-Cho · 2022-09-08T13:55:41Z

Is it correct to proceed sequentially without using a goroutine?

janisz · 2022-09-08T17:50:25Z

Is it correct to proceed sequentially without using a goroutine?

Yes, depend how you do locking. I think we should start with simple solution that we can iterate on with benchmark.

Hojun-Cho · 2022-09-09T03:59:43Z

Thank you so much for the advice.

Hojun-Cho · 2022-09-09T07:44:06Z

@janisz

func (c *BigCache) MultiGet(keys ...string) ([][]byte, error) {
	entries := make([][]byte, len(keys))
	var firstErr error

	for i, key := range keys {
		hashedKey := c.hash.Sum64(key)
		shard := c.getShard(hashedKey)
		entry, err := shard.get(key, hashedKey)
		if firstErr == nil {
			firstErr = err
		}
		entries[i] = entry
	}
	return entries, firstErr
}

This is the benchmark test code.

func BenchmarkMultiReadFromCache(b *testing.B) {
	for _, shards := range []int{1, 512, 1024, 8192} {
		for _, readCount := range []int{1, 4, 8, 16} {
			b.Run(fmt.Sprintf("%d-shards,%d-count", shards, readCount), func(b *testing.B) {
				multiReadFromCache(b, shards, true, readCount)
			})
		}
	}
}

func BenchmarkSequentialReadFromCache(b *testing.B) {
	for _, shards := range []int{1, 512, 1024, 8192} {
		for _, readCount := range []int{1, 4, 8, 16} {
			b.Run(fmt.Sprintf("%d-shards,%d-count", shards, readCount), func(b *testing.B) {
				multiReadFromCache(b, shards, false, readCount)
			})
		}
	}
}

func multiReadFromCache(b *testing.B, shards int, isMulti bool, readCount int) {
	cache, _ := NewBigCache(Config{
		Shards:             shards,
		LifeWindow:         1000 * time.Second,
		MaxEntriesInWindow: max(b.N, 100),
		MaxEntrySize:       500,
	})
	for i := 0; i < b.N; i++ {
		cache.Set(strconv.Itoa(i), message)
	}
	b.ResetTimer()

	b.RunParallel(func(pb *testing.PB) {
		b.ReportAllocs()

		for pb.Next() {
			keys := []string{}
			for i := 0; i < readCount; i++ {
				keys = append(keys, strconv.Itoa(rand.Intn(b.N)))
			}
			if isMulti {
				cache.MultiGet(keys...)
			} else {
				for _, key := range keys {
					cache.Get(key)
				}
			}
		}
	})
}

The following results are from MultiRead - SequentialRead
shard 1

count	ns/op	B/op	allocs/op
1	546	24	1
4	35	155	1
8	41	191	1

shard 512

count	ns/op	B/op	allocs/op
1	61	24	1
4	-46	97	1
8	-105	192	1

shard 1024

count	ns/op	B/op	allocs/op
1	16	24	1
4	32	97	1
8	-3	192	1

shard 8192

count	ns/op	B/op	allocs/op
1	4	24	1
4	4	108	1
8	32	191	1

MultiGet is not performing very well.

os: windows
arch: amd64
cpu: Intel(R) Core(TM) i5-8300H CPU @ 2.30GHz

Hojun-Cho · 2022-09-09T08:41:37Z

Doing a multiget on the shard gives similar results.

type entryData struct {
	hashedKey uint64
	entryKey  string
	index     int
}

func (s *cacheShard) multiGet(entryDatas []entryData, entries [][]byte) {
	s.lock.RLock()
	defer s.lock.RUnlock()
	for _, entryData := range entryDatas {
		wrappedEntry, err := s.getWrappedEntry(entryData.hashedKey)
		if err != nil {
			continue
		}
		if entryKey := readKeyFromEntry(wrappedEntry); entryData.entryKey != entryKey {
			s.collision()
			if s.isVerbose {
				s.logger.Printf("Collision detected. Both %q and %q have the same hash %x", entryData.entryKey, entryKey, entryData.hashedKey)
			}
			continue
		}
		entry := readEntry(wrappedEntry)
		s.hit(entryData.hashedKey)
		entries[entryData.index] = entry
	}
}

janisz · 2022-09-09T12:06:57Z

Why there are negative numbers in the table?

Hojun-Cho · 2022-09-09T12:13:29Z

negative numbers means MultirRead is faster than SequentialRead (MultirRead - SequentialRead).

Hojun-Cho closed this as completed Sep 9, 2022

Hojun-Cho reopened this Sep 9, 2022

Hojun-Cho closed this as completed Sep 9, 2022

Hojun-Cho reopened this Sep 9, 2022

Hojun-Cho closed this as completed Sep 12, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Discuss about MultiGet #334

Discuss about MultiGet #334

Hojun-Cho commented Sep 8, 2022

Hojun-Cho commented Sep 8, 2022

janisz commented Sep 8, 2022

Hojun-Cho commented Sep 9, 2022

Hojun-Cho commented Sep 9, 2022

Hojun-Cho commented Sep 9, 2022

janisz commented Sep 9, 2022

Hojun-Cho commented Sep 9, 2022

Discuss about MultiGet #334

Discuss about MultiGet #334

Comments

Hojun-Cho commented Sep 8, 2022

Hojun-Cho commented Sep 8, 2022

janisz commented Sep 8, 2022

Hojun-Cho commented Sep 9, 2022

Hojun-Cho commented Sep 9, 2022

Hojun-Cho commented Sep 9, 2022

janisz commented Sep 9, 2022

Hojun-Cho commented Sep 9, 2022