Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/region: optimize the logic of randomly selecting regions within a single range #8211

Merged
merged 4 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 42 additions & 27 deletions pkg/core/region_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,24 +339,32 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo {
}
// Pre-allocate the variables to reduce the temporary memory allocations.
var (
startKey, endKey []byte
startIndex, endIndex, randIndex int
startItem *regionItem
pivotItem = &regionItem{&RegionInfo{meta: &metapb.Region{}}}
region *RegionInfo
regions = make([]*RegionInfo, 0, n)
rangeLen, curLen = len(ranges), len(regions)
startKey, endKey []byte
// By default, we set the `startIndex` and `endIndex` to the whole tree range.
startIndex, endIndex = 0, treeLen
randIndex int
startItem *regionItem
pivotItem = &regionItem{&RegionInfo{meta: &metapb.Region{}}}
region *RegionInfo
regions = make([]*RegionInfo, 0, n)
rangeLen, curLen = len(ranges), len(regions)
// setStartEndIndices is a helper function to set `startIndex` and `endIndex`
// according to the `startKey` and `endKey`.
// according to the `startKey` and `endKey` and check if the range is invalid
// to skip the iteration.
// TODO: maybe we could cache the `startIndex` and `endIndex` for each range.
setStartEndIndices = func() {
pivotItem.meta.StartKey = startKey
startItem, startIndex = t.tree.GetWithIndex(pivotItem)
if len(endKey) != 0 {
setAndCheckStartEndIndices = func() bool {
HuSharp marked this conversation as resolved.
Show resolved Hide resolved
startKeyLen, endKeyLen := len(startKey), len(endKey)
if startKeyLen == 0 && endKeyLen == 0 {
startIndex, endIndex = 0, treeLen
return false
}
if startKeyLen > 0 {
pivotItem.meta.StartKey = startKey
startItem, startIndex = t.tree.GetWithIndex(pivotItem)
}
if endKeyLen > 0 {
pivotItem.meta.StartKey = endKey
_, endIndex = t.tree.GetWithIndex(pivotItem)
} else {
endIndex = treeLen
}
// Consider that the item in the tree may not be continuous,
// we need to check if the previous item contains the key.
Expand All @@ -366,13 +374,27 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo {
startIndex--
}
}
// Check whether the `startIndex` and `endIndex` are valid.
if endIndex <= startIndex {
if endKeyLen > 0 && bytes.Compare(startKey, endKey) > 0 {
log.Error("wrong range keys",
logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))),
logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))),
errs.ZapError(errs.ErrWrongRangeKeys))
}
return true
}
return false
}
)
// If no ranges specified, select randomly from the whole tree.
// This is a fast path to reduce the unnecessary iterations.
if rangeLen == 0 {
startKey, endKey = []byte(""), []byte("")
setStartEndIndices()
// This is a fast path to reduce the unnecessary iterations when we only have one range.
if rangeLen <= 1 {
if rangeLen == 1 {
startKey, endKey = ranges[0].StartKey, ranges[0].EndKey
if setAndCheckStartEndIndices() {
return regions
}
}
for curLen < n {
randIndex = rand.Intn(endIndex-startIndex) + startIndex
region = t.tree.GetAt(randIndex).RegionInfo
Expand All @@ -393,14 +415,7 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo {
// Shuffle the ranges to increase the randomness.
for _, i := range rand.Perm(rangeLen) {
startKey, endKey = ranges[i].StartKey, ranges[i].EndKey
setStartEndIndices()
if endIndex <= startIndex {
if len(endKey) > 0 && bytes.Compare(startKey, endKey) > 0 {
log.Error("wrong range keys",
logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))),
logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))),
errs.ZapError(errs.ErrWrongRangeKeys))
}
if setAndCheckStartEndIndices() {
continue
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/core/region_tree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,12 @@ func TestRandomRegion(t *testing.T) {
updateNewItem(tree, regionA)
ra := tree.randomRegion([]KeyRange{NewKeyRange("", "")})
re.Equal(regionA, ra)
ra = tree.randomRegion(nil)
re.Equal(regionA, ra)
ra2 := tree.RandomRegions(2, []KeyRange{NewKeyRange("", "")})
re.Equal([]*RegionInfo{regionA, regionA}, ra2)
ra2 = tree.RandomRegions(2, nil)
re.Equal([]*RegionInfo{regionA, regionA}, ra2)

regionB := NewTestRegionInfo(2, 2, []byte("g"), []byte("n"))
regionC := NewTestRegionInfo(3, 3, []byte("n"), []byte("t"))
Expand All @@ -307,6 +311,7 @@ func TestRandomRegion(t *testing.T) {
rf = tree.randomRegion([]KeyRange{NewKeyRange("z", "")})
re.Nil(rf)

checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, nil)
checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, []KeyRange{NewKeyRange("", "")})
checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB}, []KeyRange{NewKeyRange("", "n")})
checkRandomRegion(re, tree, []*RegionInfo{regionC, regionD}, []KeyRange{NewKeyRange("n", "")})
Expand Down Expand Up @@ -356,6 +361,7 @@ func TestRandomRegionDiscontinuous(t *testing.T) {
rd := tree.randomRegion([]KeyRange{NewKeyRange("", "b")})
re.Equal(regionD, rd)

checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, nil)
checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, []KeyRange{NewKeyRange("", "")})
}

Expand Down
Loading