Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Discuss about MultiGet #334

Closed
Hojun-Cho opened this issue Sep 8, 2022 · 7 comments
Closed

Discuss about MultiGet #334

Hojun-Cho opened this issue Sep 8, 2022 · 7 comments

Comments

@Hojun-Cho
Copy link
Contributor

What is the issue you are having?
I saw #324 and I'm in the process of implementing MultiGet.
I don't know how to handle [cacheshard.hits] .

  • Calling hit causes DeadLock
// entryData used in GetMulti
type entryData struct {
	hashedKey uint64
	entryKey  string
	index     int
}
...

func (c *BigCache) GetMulti(keys ...string) [][]byte {
	var wg sync.WaitGroup
	entries := make([][]byte, len(keys))
	shardMap := map[*cacheShard][]entryData{}
	for i, key := range keys {
		hashedKey := c.hash.Sum64(key)
		shard := c.getShard(hashedKey)
		shardMap[shard] = append(shardMap[shard],
			entryData{
				hashedKey: hashedKey,
				entryKey:  key,
				index:     i,
			})
	}

	wg.Add(len(shardMap))
	for shard := range shardMap {
		go func(shard *cacheShard) {
			defer wg.Done()
			shard.getMulti(shardMap[shard], entries)
		}(shard)
	}
	wg.Wait()
	return entries
}
...

func (s *cacheShard) getMulti(keys []entryData, entries [][]byte) {
	defer s.lock.RUnlock()
	s.lock.RLock()
	for _, key := range keys {
		wrappedEntry, err := s.getWrappedEntry(key.hashedKey)
		if err != nil {
			continue
		}
		if entryKey := readKeyFromEntry(wrappedEntry); key.entryKey != entryKey {
			s.collision()
			if s.isVerbose {
				s.logger.Printf("Collision detected. Both %q and %q have the same hash %x", key.entryKey, entryKey, key.hashedKey)
			}
			continue
		}
		entry := readEntry(wrappedEntry)
                // Causes DeadLock
		s.hit(key.hashedKey)
		entries[key.index] = entry
	}
}

Test

func TestWriteAndGetMultiParallelSameKeyWithStats(t *testing.T) {
	t.Parallel()

	cache, _ := NewBigCache(Config{
		Shards:             4,
		LifeWindow:         0,
		MaxEntriesInWindow: 1000 * 10 * 60,
		MaxEntrySize:       500,
		Verbose:            true,
		StatsEnabled:       true,
	})

	var wg sync.WaitGroup
	ntest := 10
	n := 3
	wg.Add(n)
	keys := []string{"key_1", "key_2", "key_3", "key_4"}
	values := [][]byte{blob('1', 1024), blob('2', 1024),
		blob('3', 1024), blob('4', 1024)}
	for i := 0; i < ntest; i++ {
		for i, key := range keys {
			assertEqual(t, nil, cache.Set(key, values[i]))
		}
	}

	for j := 0; j < n; j++ {
		go func() {
			for i := 0; i < ntest; i++ {
				actual := cache.GetMulti(keys...)
				assertEqual(t, values, actual)
			}
			wg.Done()
		}()
	}

	...
}
  • Or am I getting the GetMulti implementation in the wrong direction?

Environment:

  • Version (git sha or release ): v3.0.2
  • go version: 1.18.4
@Hojun-Cho
Copy link
Contributor Author

Is it correct to proceed sequentially without using a goroutine?

@janisz
Copy link
Collaborator

janisz commented Sep 8, 2022

Is it correct to proceed sequentially without using a goroutine?

Yes, depend how you do locking. I think we should start with simple solution that we can iterate on with benchmark.

@Hojun-Cho
Copy link
Contributor Author

Thank you so much for the advice.

@Hojun-Cho Hojun-Cho reopened this Sep 9, 2022
@Hojun-Cho
Copy link
Contributor Author

@janisz

func (c *BigCache) MultiGet(keys ...string) ([][]byte, error) {
	entries := make([][]byte, len(keys))
	var firstErr error

	for i, key := range keys {
		hashedKey := c.hash.Sum64(key)
		shard := c.getShard(hashedKey)
		entry, err := shard.get(key, hashedKey)
		if firstErr == nil {
			firstErr = err
		}
		entries[i] = entry
	}
	return entries, firstErr
}

This is the benchmark test code.

func BenchmarkMultiReadFromCache(b *testing.B) {
	for _, shards := range []int{1, 512, 1024, 8192} {
		for _, readCount := range []int{1, 4, 8, 16} {
			b.Run(fmt.Sprintf("%d-shards,%d-count", shards, readCount), func(b *testing.B) {
				multiReadFromCache(b, shards, true, readCount)
			})
		}
	}
}

func BenchmarkSequentialReadFromCache(b *testing.B) {
	for _, shards := range []int{1, 512, 1024, 8192} {
		for _, readCount := range []int{1, 4, 8, 16} {
			b.Run(fmt.Sprintf("%d-shards,%d-count", shards, readCount), func(b *testing.B) {
				multiReadFromCache(b, shards, false, readCount)
			})
		}
	}
}

func multiReadFromCache(b *testing.B, shards int, isMulti bool, readCount int) {
	cache, _ := NewBigCache(Config{
		Shards:             shards,
		LifeWindow:         1000 * time.Second,
		MaxEntriesInWindow: max(b.N, 100),
		MaxEntrySize:       500,
	})
	for i := 0; i < b.N; i++ {
		cache.Set(strconv.Itoa(i), message)
	}
	b.ResetTimer()

	b.RunParallel(func(pb *testing.PB) {
		b.ReportAllocs()

		for pb.Next() {
			keys := []string{}
			for i := 0; i < readCount; i++ {
				keys = append(keys, strconv.Itoa(rand.Intn(b.N)))
			}
			if isMulti {
				cache.MultiGet(keys...)
			} else {
				for _, key := range keys {
					cache.Get(key)
				}
			}
		}
	})
}
  • The following results are from MultiRead - SequentialRead
  • shard 1
count ns/op B/op allocs/op
1 546 24 1
4 35 155 1
8 41 191 1
  • shard 512
count ns/op B/op allocs/op
1 61 24 1
4 -46 97 1
8 -105 192 1
  • shard 1024
count ns/op B/op allocs/op
1 16 24 1
4 32 97 1
8 -3 192 1
  • shard 8192
count ns/op B/op allocs/op
1 4 24 1
4 4 108 1
8 32 191 1

MultiGet is not performing very well.

os: windows
arch: amd64
cpu: Intel(R) Core(TM) i5-8300H CPU @ 2.30GHz

@Hojun-Cho
Copy link
Contributor Author

Doing a multiget on the shard gives similar results.

type entryData struct {
	hashedKey uint64
	entryKey  string
	index     int
}

func (s *cacheShard) multiGet(entryDatas []entryData, entries [][]byte) {
	s.lock.RLock()
	defer s.lock.RUnlock()
	for _, entryData := range entryDatas {
		wrappedEntry, err := s.getWrappedEntry(entryData.hashedKey)
		if err != nil {
			continue
		}
		if entryKey := readKeyFromEntry(wrappedEntry); entryData.entryKey != entryKey {
			s.collision()
			if s.isVerbose {
				s.logger.Printf("Collision detected. Both %q and %q have the same hash %x", entryData.entryKey, entryKey, entryData.hashedKey)
			}
			continue
		}
		entry := readEntry(wrappedEntry)
		s.hit(entryData.hashedKey)
		entries[entryData.index] = entry
	}
}

@Hojun-Cho Hojun-Cho reopened this Sep 9, 2022
@janisz
Copy link
Collaborator

janisz commented Sep 9, 2022

Why there are negative numbers in the table?

@Hojun-Cho
Copy link
Contributor Author

negative numbers means MultirRead is faster than SequentialRead (MultirRead - SequentialRead).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants