From 3982e503a47444572bae710506dfead13ffa33fa Mon Sep 17 00:00:00 2001 From: Shawn Poulson <92753637+Baliedge@users.noreply.github.com> Date: Tue, 19 Mar 2024 09:18:24 -0400 Subject: [PATCH] MegaFix global behavior bugs. (#225) * Fix global behavior `ResetTime` bug. Every call to `GetRateLimits` would reset the `ResetTime` and not the `Remaining` counter. This would cause counters to eventually deplete and never fully reset. * Refine request time propagation. Request time is resolved at first call to `getLocalRateLimit()`, then is propagated across peer-to-peer for global behavior. * Fix race condition in global behavior. QueueUpdate() allowed for sending request/response when local ratelimits are updated. However, the order they get called to QueueUpdate() is not guaranteed to be chronological. This causes stale updates to propagate, causing lost hits. Instead, QueueUpdate() will only pass the request. The current ratelimit state will be retrieved immediately before propagation. Rigorous functional tests added around global behavior. * Fix intermittent test error caused by `TestHealthCheck`. * Refactor global behavior and functional tests for stability. - Simplify passing of request time across layers. - Better handling of metrics in tests. - Better detection of global broadcasts, global updates, and idle. - Drop redundant metric `guberator_global_broadcast_counter`. - Fix metric `gubernator_global_queue_length` for global broadcast. - Add metric `gubernator_global_send_queue_length` for global send. * Backwards compatibility needed for upgrading. * Don't call `OnChange()` event from non-owner. Non-owners shouldn't be persisting rate limit state. * Simplify cache item expiration check. * Rename `RequestTime` to `CreatedAt` in protos. --- Makefile | 4 +- algorithms.go | 87 ++- benchmark_test.go | 33 +- cache.go | 16 + functional_test.go | 1060 ++++++++++++++++++++------- global.go | 91 ++- go.mod | 13 +- go.sum | 26 +- gubernator.go | 65 +- gubernator.pb.go | 148 ++-- gubernator.proto | 12 + interval_test.go | 2 +- lrucache.go | 11 +- peer_client.go | 7 +- peer_client_test.go | 8 +- peers.pb.go | 75 +- peers.proto | 44 +- python/gubernator/gubernator_pb2.py | 40 +- python/gubernator/peers_pb2.py | 12 +- workers.go | 17 +- 20 files changed, 1212 insertions(+), 559 deletions(-) diff --git a/Makefile b/Makefile index 192ed39c..5baa4d74 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ $(GOLANGCI_LINT): ## Download Go linter .PHONY: lint lint: $(GOLANGCI_LINT) ## Run Go linter - $(GOLANGCI_LINT) run -v --fix -c .golangci.yml ./... + $(GOLANGCI_LINT) run -v -c .golangci.yml ./... .PHONY: test test: ## Run unit tests and measure code coverage @@ -24,7 +24,7 @@ test: ## Run unit tests and measure code coverage .PHONY: bench bench: ## Run Go benchmarks - go test ./... -bench . -benchtime 5s -timeout 0 -run=XXX -benchmem + go test ./... -bench . -benchtime 5s -timeout 0 -run='^$$' -benchmem .PHONY: docker docker: ## Build Docker image diff --git a/algorithms.go b/algorithms.go index f2ed4a82..c9231610 100644 --- a/algorithms.go +++ b/algorithms.go @@ -34,8 +34,7 @@ import ( // with 100 emails and the request will succeed. You can override this default behavior with `DRAIN_OVER_LIMIT` // Implements token bucket algorithm for rate limiting. https://en.wikipedia.org/wiki/Token_bucket -func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - +func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { tokenBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("tokenBucket")) defer tokenBucketTimer.ObserveDuration() @@ -100,7 +99,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * s.Remove(ctx, hashKey) } - return tokenBucketNewItem(ctx, s, c, r) + return tokenBucketNewItem(ctx, s, c, r, reqState) } // Update the limit if it changed. @@ -133,12 +132,12 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } // If our new duration means we are currently expired. - now := MillisecondNow() - if expire <= now { + createdAt := *r.CreatedAt + if expire <= createdAt { // Renew item. span.AddEvent("Limit has expired") - expire = now + r.Duration - t.CreatedAt = now + expire = createdAt + r.Duration + t.CreatedAt = createdAt t.Remaining = t.Limit } @@ -147,7 +146,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * rl.ResetTime = expire } - if s != nil { + if s != nil && reqState.IsOwner { defer func() { s.OnChange(ctx, r, item) }() @@ -162,7 +161,9 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // If we are already at the limit. if rl.Remaining == 0 && r.Hits > 0 { trace.SpanFromContext(ctx).AddEvent("Already over the limit") - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT t.Status = rl.Status return rl, nil @@ -180,7 +181,9 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // without updating the cache. if r.Hits > t.Remaining { trace.SpanFromContext(ctx).AddEvent("Over the limit") - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT if HasBehavior(r.Behavior, Behavior_DRAIN_OVER_LIMIT) { // DRAIN_OVER_LIMIT behavior drains the remaining counter. @@ -196,19 +199,19 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } // Item is not found in cache or store, create new. - return tokenBucketNewItem(ctx, s, c, r) + return tokenBucketNewItem(ctx, s, c, r, reqState) } // Called by tokenBucket() when adding a new item in the store. -func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - now := MillisecondNow() - expire := now + r.Duration +func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { + createdAt := *r.CreatedAt + expire := createdAt + r.Duration t := &TokenBucketItem{ Limit: r.Limit, Duration: r.Duration, Remaining: r.Limit - r.Hits, - CreatedAt: now, + CreatedAt: createdAt, } // Add a new rate limit to the cache. @@ -236,7 +239,9 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) // Client could be requesting that we always return OVER_LIMIT. if r.Hits > r.Limit { trace.SpanFromContext(ctx).AddEvent("Over the limit") - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT rl.Remaining = r.Limit t.Remaining = r.Limit @@ -244,7 +249,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) c.Add(item) - if s != nil { + if s != nil && reqState.IsOwner { s.OnChange(ctx, r, item) } @@ -252,7 +257,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) } // Implements leaky bucket algorithm for rate limiting https://en.wikipedia.org/wiki/Leaky_bucket -func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { +func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { leakyBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getRateLimit_leakyBucket")) defer leakyBucketTimer.ObserveDuration() @@ -260,7 +265,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * r.Burst = r.Limit } - now := MillisecondNow() + createdAt := *r.CreatedAt // Get rate limit from cache. hashKey := r.HashKey() @@ -309,7 +314,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * s.Remove(ctx, hashKey) } - return leakyBucketNewItem(ctx, s, c, r) + return leakyBucketNewItem(ctx, s, c, r, reqState) } if HasBehavior(r.Behavior, Behavior_RESET_REMAINING) { @@ -349,16 +354,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } if r.Hits != 0 { - c.UpdateExpiration(r.HashKey(), now+duration) + c.UpdateExpiration(r.HashKey(), createdAt+duration) } // Calculate how much leaked out of the bucket since the last time we leaked a hit - elapsed := now - b.UpdatedAt + elapsed := createdAt - b.UpdatedAt leak := float64(elapsed) / rate if int64(leak) > 0 { b.Remaining += leak - b.UpdatedAt = now + b.UpdatedAt = createdAt } if int64(b.Remaining) > b.Burst { @@ -369,12 +374,12 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * Limit: b.Limit, Remaining: int64(b.Remaining), Status: Status_UNDER_LIMIT, - ResetTime: now + (b.Limit-int64(b.Remaining))*int64(rate), + ResetTime: createdAt + (b.Limit-int64(b.Remaining))*int64(rate), } // TODO: Feature missing: check for Duration change between item/request. - if s != nil { + if s != nil && reqState.IsOwner { defer func() { s.OnChange(ctx, r, item) }() @@ -382,7 +387,9 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // If we are already at the limit if int64(b.Remaining) == 0 && r.Hits > 0 { - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT return rl, nil } @@ -391,14 +398,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * if int64(b.Remaining) == r.Hits { b.Remaining = 0 rl.Remaining = int64(b.Remaining) - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) return rl, nil } // If requested is more than available, then return over the limit // without updating the bucket, unless `DRAIN_OVER_LIMIT` is set. if r.Hits > int64(b.Remaining) { - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT // DRAIN_OVER_LIMIT behavior drains the remaining counter. @@ -417,16 +426,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * b.Remaining -= float64(r.Hits) rl.Remaining = int64(b.Remaining) - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) return rl, nil } - return leakyBucketNewItem(ctx, s, c, r) + return leakyBucketNewItem(ctx, s, c, r, reqState) } // Called by leakyBucket() when adding a new item in the store. -func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - now := MillisecondNow() +func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { + createdAt := *r.CreatedAt duration := r.Duration rate := float64(duration) / float64(r.Limit) if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { @@ -445,7 +454,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) Remaining: float64(r.Burst - r.Hits), Limit: r.Limit, Duration: duration, - UpdatedAt: now, + UpdatedAt: createdAt, Burst: r.Burst, } @@ -453,20 +462,22 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) Status: Status_UNDER_LIMIT, Limit: b.Limit, Remaining: r.Burst - r.Hits, - ResetTime: now + (b.Limit-(r.Burst-r.Hits))*int64(rate), + ResetTime: createdAt + (b.Limit-(r.Burst-r.Hits))*int64(rate), } // Client could be requesting that we start with the bucket OVER_LIMIT if r.Hits > r.Burst { - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT rl.Remaining = 0 - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) b.Remaining = 0 } item := &CacheItem{ - ExpireAt: now + duration, + ExpireAt: createdAt + duration, Algorithm: r.Algorithm, Key: r.HashKey(), Value: &b, @@ -474,7 +485,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) c.Add(item) - if s != nil { + if s != nil && reqState.IsOwner { s.OnChange(ctx, r, item) } diff --git a/benchmark_test.go b/benchmark_test.go index 5a383761..9673cf2b 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -22,6 +22,7 @@ import ( guber "github.com/mailgun/gubernator/v2" "github.com/mailgun/gubernator/v2/cluster" + "github.com/mailgun/holster/v4/clock" "github.com/mailgun/holster/v4/syncutil" "github.com/stretchr/testify/require" ) @@ -31,8 +32,9 @@ func BenchmarkServer(b *testing.B) { conf := guber.Config{} err := conf.SetDefaults() require.NoError(b, err, "Error in conf.SetDefaults") + createdAt := epochMillis(clock.Now()) - b.Run("GetPeerRateLimit() with no batching", func(b *testing.B) { + b.Run("GetPeerRateLimit", func(b *testing.B) { client, err := guber.NewPeerClient(guber.PeerConfig{ Info: cluster.GetRandomPeer(cluster.DataCenterNone), Behavior: conf.Behaviors, @@ -40,17 +42,17 @@ func BenchmarkServer(b *testing.B) { if err != nil { b.Errorf("Error building client: %s", err) } - b.ResetTimer() for n := 0; n < b.N; n++ { - _, err := client.GetPeerRateLimit(context.Background(), &guber.RateLimitReq{ - Name: "get_peer_rate_limits_benchmark", + _, err := client.GetPeerRateLimit(ctx, &guber.RateLimitReq{ + Name: b.Name(), UniqueKey: guber.RandomString(10), - Behavior: guber.Behavior_NO_BATCHING, + // Behavior: guber.Behavior_NO_BATCHING, Limit: 10, Duration: 5, Hits: 1, + CreatedAt: &createdAt, }) if err != nil { b.Errorf("Error in client.GetPeerRateLimit: %s", err) @@ -58,17 +60,16 @@ func BenchmarkServer(b *testing.B) { } }) - b.Run("GetRateLimit()", func(b *testing.B) { + b.Run("GetRateLimits batching", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Limit: 10, Duration: guber.Second * 5, @@ -82,17 +83,16 @@ func BenchmarkServer(b *testing.B) { } }) - b.Run("GetRateLimitGlobal()", func(b *testing.B) { + b.Run("GetRateLimits global", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { - _, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ + _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Behavior: guber.Behavior_GLOBAL, Limit: 10, @@ -110,11 +110,10 @@ func BenchmarkServer(b *testing.B) { b.Run("HealthCheck", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { - if _, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}); err != nil { + if _, err := client.HealthCheck(ctx, &guber.HealthCheckReq{}); err != nil { b.Errorf("Error in client.HealthCheck: %s", err) } } @@ -123,17 +122,15 @@ func BenchmarkServer(b *testing.B) { b.Run("Thundering herd", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() - fan := syncutil.NewFanOut(100) for n := 0; n < b.N; n++ { fan.Run(func(o interface{}) error { - _, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ + _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Limit: 10, Duration: guber.Second * 5, diff --git a/cache.go b/cache.go index 163627d2..0fd431a5 100644 --- a/cache.go +++ b/cache.go @@ -39,3 +39,19 @@ type CacheItem struct { // for the latest rate limit data. InvalidAt int64 } + +func (item *CacheItem) IsExpired() bool { + now := MillisecondNow() + + // If the entry is invalidated + if item.InvalidAt != 0 && item.InvalidAt < now { + return true + } + + // If the entry has expired, remove it from the cache + if item.ExpireAt < now { + return true + } + + return false +} diff --git a/functional_test.go b/functional_test.go index 654342b7..400137b2 100644 --- a/functional_test.go +++ b/functional_test.go @@ -24,18 +24,24 @@ import ( "math/rand" "net/http" "os" + "sort" "strings" + "sync" + "sync/atomic" "testing" "time" + "github.com/mailgun/errors" guber "github.com/mailgun/gubernator/v2" "github.com/mailgun/gubernator/v2/cluster" "github.com/mailgun/holster/v4/clock" + "github.com/mailgun/holster/v4/syncutil" "github.com/mailgun/holster/v4/testutil" "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" json "google.golang.org/protobuf/encoding/protojson" @@ -43,23 +49,12 @@ import ( // Setup and shutdown the mock gubernator cluster for the entire test suite func TestMain(m *testing.M) { - if err := cluster.StartWith([]guber.PeerInfo{ - {GRPCAddress: "127.0.0.1:9990", HTTPAddress: "127.0.0.1:9980", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9991", HTTPAddress: "127.0.0.1:9981", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9992", HTTPAddress: "127.0.0.1:9982", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9993", HTTPAddress: "127.0.0.1:9983", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9994", HTTPAddress: "127.0.0.1:9984", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9995", HTTPAddress: "127.0.0.1:9985", DataCenter: cluster.DataCenterNone}, - - // DataCenterOne - {GRPCAddress: "127.0.0.1:9890", HTTPAddress: "127.0.0.1:9880", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9891", HTTPAddress: "127.0.0.1:9881", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9892", HTTPAddress: "127.0.0.1:9882", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9893", HTTPAddress: "127.0.0.1:9883", DataCenter: cluster.DataCenterOne}, - }); err != nil { + err := startGubernator() + if err != nil { fmt.Println(err) os.Exit(1) } + code := m.Run() cluster.Stop() @@ -68,8 +63,8 @@ func TestMain(m *testing.M) { } func TestOverTheLimit(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -103,7 +98,7 @@ func TestOverTheLimit(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -123,7 +118,7 @@ func TestMultipleAsync(t *testing.T) { t.Logf("Asking Peer: %s", cluster.GetPeers()[0].GRPCAddress) client, errs := guber.DialV1Server(cluster.GetPeers()[0].GRPCAddress, nil) - require.Nil(t, errs) + require.NoError(t, errs) resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ @@ -147,7 +142,7 @@ func TestMultipleAsync(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) require.Len(t, resp.Responses, 2) @@ -166,8 +161,8 @@ func TestTokenBucket(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() addr := cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress - client, errs := guber.DialV1Server(addr, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(addr, nil) + require.NoError(t, err) tests := []struct { name string @@ -209,7 +204,7 @@ func TestTokenBucket(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -226,8 +221,8 @@ func TestTokenBucket(t *testing.T) { func TestTokenBucketGregorian(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -284,7 +279,7 @@ func TestTokenBucketGregorian(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -302,8 +297,8 @@ func TestTokenBucketNegativeHits(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() addr := cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress - client, errs := guber.DialV1Server(addr, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(addr, nil) + require.NoError(t, err) tests := []struct { name string @@ -356,7 +351,7 @@ func TestTokenBucketNegativeHits(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -372,8 +367,8 @@ func TestTokenBucketNegativeHits(t *testing.T) { func TestDrainOverLimit(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -482,8 +477,8 @@ func TestTokenBucketRequestMoreThanAvailable(t *testing.T) { func TestLeakyBucket(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -609,8 +604,8 @@ func TestLeakyBucket(t *testing.T) { func TestLeakyBucketWithBurst(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -716,8 +711,8 @@ func TestLeakyBucketWithBurst(t *testing.T) { func TestLeakyBucketGregorian(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -753,14 +748,16 @@ func TestLeakyBucketGregorian(t *testing.T) { now = now.Truncate(1 * time.Minute) // So we don't start on the minute boundary now = now.Add(time.Millisecond * 100) + name := t.Name() + key := guber.RandomString(10) for _, test := range tests { t.Run(test.Name, func(t *testing.T) { resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_greg", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Behavior: guber.Behavior_DURATION_IS_GREGORIAN, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.GregorianMinutes, @@ -769,15 +766,13 @@ func TestLeakyBucketGregorian(t *testing.T) { }, }, }) - clock.Freeze(clock.Now()) require.NoError(t, err) rl := resp.Responses[0] - assert.Equal(t, test.Status, rl.Status) assert.Equal(t, test.Remaining, rl.Remaining) assert.Equal(t, int64(60), rl.Limit) - assert.True(t, rl.ResetTime > now.Unix()) + assert.Greater(t, rl.ResetTime, now.Unix()) clock.Advance(test.Sleep) }) } @@ -786,8 +781,8 @@ func TestLeakyBucketGregorian(t *testing.T) { func TestLeakyBucketNegativeHits(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -899,8 +894,8 @@ func TestLeakyBucketRequestMoreThanAvailable(t *testing.T) { } func TestMissingFields(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Req *guber.RateLimitReq @@ -955,29 +950,29 @@ func TestMissingFields(t *testing.T) { resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{test.Req}, }) - require.Nil(t, err) + require.NoError(t, err) assert.Equal(t, test.Error, resp.Responses[0].Error, i) assert.Equal(t, test.Status, resp.Responses[0].Status, i) } } func TestGlobalRateLimits(t *testing.T) { - const ( - name = "test_global" - key = "account:12345" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) + var firstResetTime int64 - sendHit := func(client guber.V1Client, status guber.Status, hits int64, remain int64) { + sendHit := func(client guber.V1Client, status guber.Status, hits, remain int64) { ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) defer cancel() resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_global", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, Duration: guber.Minute * 3, @@ -987,11 +982,24 @@ func TestGlobalRateLimits(t *testing.T) { }, }) require.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].Error) - assert.Equal(t, remain, resp.Responses[0].Remaining) - assert.Equal(t, status, resp.Responses[0].Status) - assert.Equal(t, int64(5), resp.Responses[0].Limit) + item := resp.Responses[0] + assert.Equal(t, "", item.Error) + assert.Equal(t, remain, item.Remaining) + assert.Equal(t, status, item.Status) + assert.Equal(t, int64(5), item.Limit) + + // ResetTime should not change during test. + if firstResetTime == 0 { + firstResetTime = item.ResetTime + } + assert.Equal(t, firstResetTime, item.ResetTime) + + // ensure that we have a canonical host + assert.NotEmpty(t, item.Metadata["owner"]) } + + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + // Our first hit should create the request on the peer and queue for async forward sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1, 4) @@ -1005,8 +1013,6 @@ func TestGlobalRateLimits(t *testing.T) { assert.NoError(t, err) assert.Equal(t, 1, int(m.Value)) }) - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) @@ -1027,18 +1033,15 @@ func TestGlobalRateLimits(t *testing.T) { // either owner or non-owner peer. func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { ctx := context.Background() - const name = "test_global" - key := fmt.Sprintf("key:%016x", rand.Int()) + name := t.Name() + key := guber.RandomString(10) // Determine owner and non-owner peers. - ownerPeerInfo, err := cluster.FindOwningPeer(name, key) + owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) - owner := ownerPeerInfo.GRPCAddress - nonOwner := cluster.PeerAt(0).GRPCAddress - if nonOwner == owner { - nonOwner = cluster.PeerAt(1).GRPCAddress - } - require.NotEqual(t, owner, nonOwner) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + nonOwner := peers[0] // Connect to owner and non-owner peers in round robin. dialOpts := []grpc.DialOption{ @@ -1046,22 +1049,22 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), } - address := fmt.Sprintf("static:///%s,%s", owner, nonOwner) + address := fmt.Sprintf("static:///%s,%s", owner.PeerInfo.GRPCAddress, nonOwner.PeerInfo.GRPCAddress) conn, err := grpc.DialContext(ctx, address, dialOpts...) require.NoError(t, err) client := guber.NewV1Client(conn) - sendHit := func(status guber.Status, i int) { - ctx, cancel := context.WithTimeout(ctx, 10*clock.Second) + sendHit := func(client guber.V1Client, status guber.Status, i int) { + ctx, cancel := context.WithTimeout(context.Background(), 10*clock.Second) defer cancel() resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { Name: name, UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, + Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, + Duration: 5 * guber.Minute, Hits: 1, Limit: 2, }, @@ -1069,35 +1072,34 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { }) require.NoError(t, err, i) item := resp.Responses[0] - assert.Equal(t, "", item.GetError(), fmt.Sprintf("mismatch error, iteration %d", i)) - assert.Equal(t, status, item.GetStatus(), fmt.Sprintf("mismatch status, iteration %d", i)) + assert.Equal(t, "", item.Error, fmt.Sprintf("unexpected error, iteration %d", i)) + assert.Equal(t, status, item.Status, fmt.Sprintf("mismatch status, iteration %d", i)) } + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + // Send two hits that should be processed by the owner and non-owner and // deplete the limit consistently. - sendHit(guber.Status_UNDER_LIMIT, 1) - sendHit(guber.Status_UNDER_LIMIT, 2) - - // Sleep to ensure the global broadcast occurs (every 100ms). - time.Sleep(150 * time.Millisecond) + sendHit(client, guber.Status_UNDER_LIMIT, 1) + sendHit(client, guber.Status_UNDER_LIMIT, 2) + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 1)) // All successive hits should return OVER_LIMIT. for i := 2; i <= 10; i++ { - sendHit(guber.Status_OVER_LIMIT, i) + sendHit(client, guber.Status_OVER_LIMIT, i) } } func TestGlobalRateLimitsPeerOverLimit(t *testing.T) { - const ( - name = "test_global_token_limit" - key = "account:12345" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) - sendHit := func(expectedStatus guber.Status, hits int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + sendHit := func(expectedStatus guber.Status, hits, expectedRemaining int64) { + ctx, cancel := context.WithTimeout(context.Background(), 10*clock.Second) defer cancel() resp, err := peers[0].MustClient().GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ @@ -1106,80 +1108,44 @@ func TestGlobalRateLimitsPeerOverLimit(t *testing.T) { UniqueKey: key, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, + Duration: 5 * guber.Minute, Hits: hits, Limit: 2, }, }, }) assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) + item := resp.Responses[0] + assert.Equal(t, "", item.Error, "unexpected error") + assert.Equal(t, expectedStatus, item.Status, "mismatch status") + assert.Equal(t, expectedRemaining, item.Remaining, "mismatch remaining") } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - // Send two hits that should be processed by the owner and the broadcast to peer, depleting the remaining - sendHit(guber.Status_UNDER_LIMIT, 1) - sendHit(guber.Status_UNDER_LIMIT, 1) - // Wait for the broadcast from the owner to the peer - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) - // Since the remainder is 0, the peer should set OVER_LIMIT instead of waiting for the owner - // to respond with OVER_LIMIT. - sendHit(guber.Status_OVER_LIMIT, 1) - // Wait for the broadcast from the owner to the peer - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 2)) - // The status should still be OVER_LIMIT - sendHit(guber.Status_OVER_LIMIT, 0) -} + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) -func TestGlobalRateLimitsPeerOverLimitLeaky(t *testing.T) { - const ( - name = "test_global_token_limit_leaky" - key = "account:12345" - ) + // Send two hits that should be processed by the owner and the broadcast to + // peer, depleting the remaining. + sendHit(guber.Status_UNDER_LIMIT, 1, 1) + sendHit(guber.Status_UNDER_LIMIT, 1, 0) - peers, err := cluster.ListNonOwningDaemons(name, key) - require.NoError(t, err) + // Wait for the broadcast from the owner to the peer + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 1)) - sendHit := func(client guber.V1Client, expectedStatus guber.Status, hits int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) - defer cancel() - resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, - Hits: hits, - Limit: 2, - }, - }, - }) - assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) - } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) + // Since the remainder is 0, the peer should return OVER_LIMIT on next hit. + sendHit(guber.Status_OVER_LIMIT, 1, 0) - // Send two hits that should be processed by the owner and the broadcast to peer, depleting the remaining - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1) - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1) - // Wait for the broadcast from the owner to the peers - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) - // Ask a different peer if the status is over the limit - sendHit(peers[1].MustClient(), guber.Status_OVER_LIMIT, 1) + // Wait for the broadcast from the owner to the peer. + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 2)) + + // The status should still be OVER_LIMIT. + sendHit(guber.Status_OVER_LIMIT, 0, 0) } func TestGlobalRequestMoreThanAvailable(t *testing.T) { - const ( - name = "test_global_more_than_available" - key = "account:123456" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) @@ -1203,11 +1169,9 @@ func TestGlobalRequestMoreThanAvailable(t *testing.T) { assert.Equal(t, "", resp.Responses[0].GetError()) assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - prev, err := getBroadcastCount(owner) - require.NoError(t, err) + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") // Ensure GRPC has connections to each peer before we start, as we want // the actual test requests to happen quite fast. @@ -1238,11 +1202,10 @@ func TestGlobalRequestMoreThanAvailable(t *testing.T) { } func TestGlobalNegativeHits(t *testing.T) { - const ( - name = "test_global_negative_hits" - key = "account:12345" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) @@ -1267,9 +1230,10 @@ func TestGlobalNegativeHits(t *testing.T) { assert.Equal(t, status, resp.Responses[0].GetStatus()) assert.Equal(t, remaining, resp.Responses[0].Remaining) } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - prev, err := getBroadcastCount(owner) + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") require.NoError(t, err) // Send a negative hit on a rate limit with no hits @@ -1292,11 +1256,10 @@ func TestGlobalNegativeHits(t *testing.T) { } func TestGlobalResetRemaining(t *testing.T) { - const ( - name = "test_global_reset" - key = "account:123456" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) @@ -1321,9 +1284,10 @@ func TestGlobalResetRemaining(t *testing.T) { assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) assert.Equal(t, remaining, resp.Responses[0].Remaining) } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - prev, err := getBroadcastCount(owner) + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") require.NoError(t, err) for _, p := range peers { @@ -1374,21 +1338,11 @@ func TestGlobalResetRemaining(t *testing.T) { }) require.NoError(t, err) assert.NotEqual(t, 100, resp.Responses[0].Remaining) - -} - -func getMetricRequest(url string, name string) (*model.Sample, error) { - resp, err := http.Get(url) - if err != nil { - return nil, err - } - defer resp.Body.Close() - return getMetric(resp.Body, name) } func TestChangeLimit(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -1469,7 +1423,7 @@ func TestChangeLimit(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -1482,8 +1436,8 @@ func TestChangeLimit(t *testing.T) { } func TestResetRemaining(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -1542,7 +1496,7 @@ func TestResetRemaining(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -1554,93 +1508,42 @@ func TestResetRemaining(t *testing.T) { } func TestHealthCheck(t *testing.T) { - client, err := guber.DialV1Server(cluster.DaemonAt(0).GRPCListeners[0].Addr().String(), nil) - require.NoError(t, err) - - // Check that the cluster is healthy to start with - healthResp, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - require.NoError(t, err) - - require.Equal(t, "healthy", healthResp.GetStatus()) - - // Create a global rate limit that will need to be sent to all peers in the cluster - _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: "test_health_check", - UniqueKey: "account:12345", - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_BATCHING, - Duration: guber.Second * 3, - Hits: 1, - Limit: 5, - }, - }, - }) - require.Nil(t, err) - - // Stop the rest of the cluster to ensure errors occur on our instance - for i := 1; i < cluster.NumOfDaemons(); i++ { - d := cluster.DaemonAt(i) - require.NotNil(t, d) - d.Close() + // Check that the cluster is healthy to start with. + for _, peer := range cluster.GetDaemons() { + healthResp, err := peer.MustClient().HealthCheck(context.Background(), &guber.HealthCheckReq{}) + require.NoError(t, err) + assert.Equal(t, "healthy", healthResp.Status) } - // Hit the global rate limit again this time causing a connection error - _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: "test_health_check", - UniqueKey: "account:12345", - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Second * 3, - Hits: 1, - Limit: 5, - }, - }, - }) - require.Nil(t, err) + // Stop the cluster to ensure errors occur on our instance. + cluster.Stop() - testutil.UntilPass(t, 20, clock.Millisecond*300, func(t testutil.TestingT) { - // Check the health again to get back the connection error - healthResp, err = client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - if assert.Nil(t, err) { - return + // Check the health again to get back the connection error. + testutil.UntilPass(t, 20, 300*clock.Millisecond, func(t testutil.TestingT) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + for _, peer := range cluster.GetDaemons() { + _, err := peer.MustClient().HealthCheck(ctx, &guber.HealthCheckReq{}) + assert.Error(t, err, "connect: connection refused") } - - assert.Equal(t, "unhealthy", healthResp.GetStatus()) - assert.Contains(t, healthResp.GetMessage(), "connect: connection refused") }) - // Restart stopped instances - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*15) - defer cancel() - require.NoError(t, cluster.Restart(ctx)) - - // wait for every peer instance to come back online - for _, peer := range cluster.GetPeers() { - peerClient, err := guber.DialV1Server(peer.GRPCAddress, nil) - require.NoError(t, err) - testutil.UntilPass(t, 10, clock.Millisecond*300, func(t testutil.TestingT) { - healthResp, err = peerClient.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - assert.Equal(t, "healthy", healthResp.GetStatus()) - }) - } + // Restart so cluster is ready for next test. + require.NoError(t, startGubernator()) } func TestLeakyBucketDivBug(t *testing.T) { - // Freeze time so we don't leak during the test defer clock.Freeze(clock.Now()).Unfreeze() - + name := t.Name() + key := guber.RandomString(10) client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(t, err) resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_div", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.Millisecond * 1000, Hits: 1, @@ -1658,8 +1561,8 @@ func TestLeakyBucketDivBug(t *testing.T) { resp, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_div", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.Millisecond * 1000, Hits: 100, @@ -1683,6 +1586,8 @@ func TestMultiRegion(t *testing.T) { } func TestGRPCGateway(t *testing.T) { + name := t.Name() + key := guber.RandomString(10) address := cluster.GetRandomPeer(cluster.DataCenterNone).HTTPAddress resp, err := http.DefaultClient.Get("http://" + address + "/v1/HealthCheck") require.NoError(t, err) @@ -1702,8 +1607,8 @@ func TestGRPCGateway(t *testing.T) { payload, err := json.Marshal(&guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "requests_per_sec", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Duration: guber.Millisecond * 1000, Hits: 1, Limit: 10, @@ -1731,6 +1636,7 @@ func TestGRPCGateway(t *testing.T) { } func TestGetPeerRateLimits(t *testing.T) { + name := t.Name() ctx := context.Background() peerClient, err := guber.NewPeerClient(guber.PeerConfig{ Info: cluster.GetRandomPeer(cluster.DataCenterNone), @@ -1740,6 +1646,7 @@ func TestGetPeerRateLimits(t *testing.T) { t.Run("Stable rate check request order", func(t *testing.T) { // Ensure response order matches rate check request order. // Try various batch sizes. + createdAt := epochMillis(clock.Now()) testCases := []int{1, 2, 5, 10, 100, 1000} for _, n := range testCases { @@ -1750,13 +1657,14 @@ func TestGetPeerRateLimits(t *testing.T) { } for i := 0; i < n; i++ { req.Requests[i] = &guber.RateLimitReq{ - Name: "Foobar", - UniqueKey: fmt.Sprintf("%08x", i), + Name: name, + UniqueKey: guber.RandomString(10), Hits: 0, Limit: 1000 + int64(i), Duration: 1000, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_BATCHING, + CreatedAt: &createdAt, } } @@ -1779,6 +1687,468 @@ func TestGetPeerRateLimits(t *testing.T) { // TODO: Add a test for sending no rate limits RateLimitReqList.RateLimits = nil +func TestGlobalBehavior(t *testing.T) { + const limit = 1000 + broadcastTimeout := 400 * time.Millisecond + createdAt := epochMillis(clock.Now()) + + makeReq := func(name, key string, hits int64) *guber.RateLimitReq { + return &guber.RateLimitReq{ + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_TOKEN_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 3, + Hits: hits, + Limit: limit, + CreatedAt: &createdAt, + } + } + + t.Run("Hits on owner peer", func(t *testing.T) { + testCases := []struct { + Name string + Hits int64 + }{ + {Name: "Single hit", Hits: 1}, + {Name: "Multiple hits", Hits: 10}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + t.Logf("Owner peer: %s", owner.InstanceID) + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + + // When + for i := int64(0); i < testCase.Hits; i++ { + sendHit(t, owner, makeReq(name, key, 1), guber.Status_UNDER_LIMIT, 999-i) + } + + // Then + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var wg sync.WaitGroup + var didOwnerBroadcast, didNonOwnerBroadcast int + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + didOwnerBroadcast++ + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + didNonOwnerBroadcast++ + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, 1, didOwnerBroadcast) + assert.Zero(t, didNonOwnerBroadcast) + + // Check for global hits update from non-owner to owner peer. + // Expect no global hits update because the hits were given + // directly to the owner peer. + t.Log("Waiting for global broadcasts") + var didOwnerUpdate, didNonOwnerUpdate int + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + didOwnerUpdate++ + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + didNonOwnerUpdate++ + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Zero(t, didNonOwnerUpdate) + + // Assert UpdatePeerGlobals endpoint called once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.Equal(t, expected, upgCounters2[peer.InstanceID]) + } + + // Assert PeerGetRateLimits endpoint not called. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + assert.Equal(t, expected, gprlCounters2[peer.InstanceID]) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, limit-testCase.Hits) + } + }) + } + }) + + t.Run("Hits on non-owner peer", func(t *testing.T) { + testCases := []struct { + Name string + Hits int64 + }{ + {Name: "Single hit", Hits: 1}, + {Name: "Multiple htis", Hits: 10}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + t.Logf("Owner peer: %s", owner.InstanceID) + + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + + // When + for i := int64(0); i < testCase.Hits; i++ { + sendHit(t, peers[0], makeReq(name, key, 1), guber.Status_UNDER_LIMIT, 999-i) + } + + // Then + // Check for global hits update from non-owner to owner peer. + // Expect single global hits update from non-owner peer that received hits. + t.Log("Waiting for global hits updates") + var wg sync.WaitGroup + var didOwnerUpdate int + var didNonOwnerUpdate []string + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + didOwnerUpdate++ + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + didNonOwnerUpdate = append(didNonOwnerUpdate, peer.InstanceID) + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Len(t, didNonOwnerUpdate, 1) + assert.Equal(t, []string{peers[0].InstanceID}, didNonOwnerUpdate) + + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var didOwnerBroadcast, didNonOwnerBroadcast int + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + didOwnerBroadcast++ + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + didNonOwnerBroadcast++ + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, 1, didOwnerBroadcast) + assert.Empty(t, didNonOwnerBroadcast) + + // Assert UpdatePeerGlobals endpoint called once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.Equal(t, expected, upgCounters2[peer.InstanceID], "upgCounter %s", peer.InstanceID) + } + + // Assert PeerGetRateLimits endpoint called once on owner. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + if peer.InstanceID == owner.InstanceID { + expected++ + } + assert.Equal(t, expected, gprlCounters2[peer.InstanceID], "gprlCounter %s", peer.InstanceID) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, limit-testCase.Hits) + } + }) + } + }) + + t.Run("Distributed hits", func(t *testing.T) { + testCases := []struct { + Name string + Hits int + }{ + {Name: "2 hits", Hits: 2}, + {Name: "10 hits", Hits: 10}, + {Name: "100 hits", Hits: 100}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + var localPeers []*guber.Daemon + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + localPeers = append(localPeers, peer) + } + } + t.Logf("Owner peer: %s", owner.InstanceID) + + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + expectUpdate := make(map[string]struct{}) + var wg sync.WaitGroup + var mutex sync.Mutex + + // When + wg.Add(testCase.Hits) + for i := 0; i < testCase.Hits; i++ { + peer := localPeers[i%len(localPeers)] + go func(peer *guber.Daemon) { + sendHit(t, peer, makeReq(name, key, 1), guber.Status_UNDER_LIMIT, -1) + if peer.InstanceID != owner.InstanceID { + mutex.Lock() + expectUpdate[peer.InstanceID] = struct{}{} + mutex.Unlock() + } + wg.Done() + }(peer) + } + wg.Wait() + + // Then + // Check for global hits update from non-owner to owner peer. + // Expect single update from each non-owner peer that received + // hits. + t.Log("Waiting for global hits updates") + var didOwnerUpdate int64 + var didNonOwnerUpdate []string + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + atomic.AddInt64(&didOwnerUpdate, 1) + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + mutex.Lock() + didNonOwnerUpdate = append(didNonOwnerUpdate, peer.InstanceID) + mutex.Unlock() + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Len(t, didNonOwnerUpdate, len(expectUpdate)) + expectedNonOwnerUpdate := maps.Keys(expectUpdate) + sort.Strings(expectedNonOwnerUpdate) + sort.Strings(didNonOwnerUpdate) + assert.Equal(t, expectedNonOwnerUpdate, didNonOwnerUpdate) + + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var didOwnerBroadcast, didNonOwnerBroadcast int64 + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + atomic.AddInt64(&didOwnerBroadcast, 1) + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + atomic.AddInt64(&didNonOwnerBroadcast, 1) + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, int64(1), didOwnerBroadcast) + assert.Empty(t, didNonOwnerBroadcast) + + // Assert UpdatePeerGlobals endpoint called at least + // once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.GreaterOrEqual(t, upgCounters2[peer.InstanceID], expected, "upgCounter %s", peer.InstanceID) + } + + // Assert PeerGetRateLimits endpoint called on owner + // for each non-owner that received hits. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + if peer.InstanceID == owner.InstanceID { + expected += float64(len(expectUpdate)) + } + assert.Equal(t, expected, gprlCounters2[peer.InstanceID], "gprlCounter %s", peer.InstanceID) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, int64(limit-testCase.Hits)) + } + }) + } + }) +} + +// Request metrics and parse into map. +// Optionally pass names to filter metrics by name. +func getMetrics(HTTPAddr string, names ...string) (map[string]*model.Sample, error) { + url := fmt.Sprintf("http://%s/metrics", HTTPAddr) + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP error requesting metrics: %s", resp.Status) + } + decoder := expfmt.SampleDecoder{ + Dec: expfmt.NewDecoder(resp.Body, expfmt.FmtText), + Opts: &expfmt.DecodeOptions{ + Timestamp: model.Now(), + }, + } + nameSet := make(map[string]struct{}) + for _, name := range names { + nameSet[name] = struct{}{} + } + metrics := make(map[string]*model.Sample) + + for { + var smpls model.Vector + err := decoder.Decode(&smpls) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + for _, smpl := range smpls { + name := smpl.Metric.String() + if _, ok := nameSet[name]; ok || len(nameSet) == 0 { + metrics[name] = smpl + } + } + } + + return metrics, nil +} + +func getMetricRequest(url string, name string) (*model.Sample, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return getMetric(resp.Body, name) +} + func getMetric(in io.Reader, name string) (*model.Sample, error) { dec := expfmt.SampleDecoder{ Dec: expfmt.NewDecoder(in, expfmt.FmtText), @@ -1808,44 +2178,172 @@ func getMetric(in io.Reader, name string) (*model.Sample, error) { return nil, nil } -// getBroadcastCount returns the current broadcast count for use with waitForBroadcast() -// TODO: Replace this with something else, we can call and reset via HTTP/GRPC calls in gubernator v3 -func getBroadcastCount(d *guber.Daemon) (int, error) { - m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), - "gubernator_broadcast_duration_count") - if err != nil { - return 0, err - } +// waitForBroadcast waits until the broadcast count for the daemon changes to +// at least the expected value and the broadcast queue is empty. +// Returns an error if timeout waiting for conditions to be met. +func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect float64) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + for { + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_broadcast_duration_count", "gubernator_global_queue_length") + if err != nil { + return err + } + gbdc := metrics["gubernator_broadcast_duration_count"] + ggql := metrics["gubernator_global_queue_length"] + + // It's possible a broadcast occurred twice if waiting for multiple + // peers to forward updates to non-owners. + if float64(gbdc.Value) >= expect && ggql.Value == 0 { + return nil + } - return int(m.Value), nil + select { + case <-clock.After(100 * clock.Millisecond): + case <-ctx.Done(): + return ctx.Err() + } + } } -// waitForBroadcast waits until the broadcast count for the daemon passed -// changes to the expected value. Returns an error if the expected value is -// not found before the context is cancelled. -func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error { +// waitForUpdate waits until the global hits update count for the daemon +// changes to at least the expected value and the global update queue is empty. +// Returns an error if timeout waiting for conditions to be met. +func waitForUpdate(timeout clock.Duration, d *guber.Daemon, expect float64) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() for { - m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), - "gubernator_broadcast_duration_count") + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_global_send_duration_count", "gubernator_global_send_queue_length") if err != nil { return err } + gsdc := metrics["gubernator_global_send_duration_count"] + gsql := metrics["gubernator_global_send_queue_length"] - // It's possible a broadcast occurred twice if waiting for multiple peer to + // It's possible a hit occurred twice if waiting for multiple peers to // forward updates to the owner. - if int(m.Value) >= expect { - // Give the nodes some time to process the broadcasts - clock.Sleep(clock.Millisecond * 500) + if float64(gsdc.Value) >= expect && gsql.Value == 0 { return nil } select { - case <-clock.After(time.Millisecond * 800): + case <-clock.After(100 * clock.Millisecond): case <-ctx.Done(): return ctx.Err() } } } + +// waitForIdle waits until both global broadcast and global hits queues are +// empty. +func waitForIdle(timeout clock.Duration, daemons ...*guber.Daemon) error { + var wg syncutil.WaitGroup + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + for _, d := range daemons { + wg.Run(func(raw any) error { + d := raw.(*guber.Daemon) + for { + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_global_queue_length", "gubernator_global_send_queue_length") + if err != nil { + return err + } + ggql := metrics["gubernator_global_queue_length"] + gsql := metrics["gubernator_global_send_queue_length"] + + if ggql.Value == 0 && gsql.Value == 0 { + return nil + } + + select { + case <-clock.After(100 * clock.Millisecond): + case <-ctx.Done(): + return ctx.Err() + } + } + }, d) + } + errs := wg.Wait() + if len(errs) > 0 { + return errs[0] + } + return nil +} + +func getMetricValue(t *testing.T, d *guber.Daemon, name string) float64 { + m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), + name) + require.NoError(t, err) + if m == nil { + return 0 + } + return float64(m.Value) +} + +// Get metric counter values on each peer. +func getPeerCounters(t *testing.T, peers []*guber.Daemon, name string) map[string]float64 { + counters := make(map[string]float64) + for _, peer := range peers { + counters[peer.InstanceID] = getMetricValue(t, peer, name) + } + return counters +} + +func sendHit(t *testing.T, d *guber.Daemon, req *guber.RateLimitReq, expectStatus guber.Status, expectRemaining int64) { + if req.Hits != 0 { + t.Logf("Sending %d hits to peer %s", req.Hits, d.InstanceID) + } + client := d.MustClient() + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{req}, + }) + require.NoError(t, err) + item := resp.Responses[0] + assert.Equal(t, "", item.Error) + if expectRemaining >= 0 { + assert.Equal(t, expectRemaining, item.Remaining) + } + assert.Equal(t, expectStatus, item.Status) + assert.Equal(t, req.Limit, item.Limit) +} + +func epochMillis(t time.Time) int64 { + return t.UnixNano() / 1_000_000 +} + +func startGubernator() error { + err := cluster.StartWith([]guber.PeerInfo{ + {GRPCAddress: "127.0.0.1:9990", HTTPAddress: "127.0.0.1:9980", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9991", HTTPAddress: "127.0.0.1:9981", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9992", HTTPAddress: "127.0.0.1:9982", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9993", HTTPAddress: "127.0.0.1:9983", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9994", HTTPAddress: "127.0.0.1:9984", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9995", HTTPAddress: "127.0.0.1:9985", DataCenter: cluster.DataCenterNone}, + + // DataCenterOne + {GRPCAddress: "127.0.0.1:9890", HTTPAddress: "127.0.0.1:9880", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9891", HTTPAddress: "127.0.0.1:9881", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9892", HTTPAddress: "127.0.0.1:9882", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9893", HTTPAddress: "127.0.0.1:9883", DataCenter: cluster.DataCenterOne}, + }) + if err != nil { + return errors.Wrap(err, "while starting cluster") + } + + // Populate peer clients. Avoids data races when goroutines conflict trying + // to instantiate client singletons. + for _, peer := range cluster.GetDaemons() { + _, err = peer.Client() + if err != nil { + return errors.Wrap(err, "while connecting client") + } + } + return nil +} diff --git a/global.go b/global.go index bd0c1e7c..c5fe1676 100644 --- a/global.go +++ b/global.go @@ -22,28 +22,29 @@ import ( "github.com/mailgun/holster/v4/syncutil" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "google.golang.org/protobuf/proto" ) // globalManager manages async hit queue and updates peers in // the cluster periodically when a global rate limit we own updates. type globalManager struct { - hitsQueue chan *RateLimitReq - broadcastQueue chan *UpdatePeerGlobal - wg syncutil.WaitGroup - conf BehaviorConfig - log FieldLogger - instance *V1Instance // TODO circular import? V1Instance also holds a reference to globalManager - metricGlobalSendDuration prometheus.Summary - metricBroadcastDuration prometheus.Summary - metricBroadcastCounter *prometheus.CounterVec - metricGlobalQueueLength prometheus.Gauge + hitsQueue chan *RateLimitReq + broadcastQueue chan *RateLimitReq + wg syncutil.WaitGroup + conf BehaviorConfig + log FieldLogger + instance *V1Instance // TODO circular import? V1Instance also holds a reference to globalManager + metricGlobalSendDuration prometheus.Summary + metricGlobalSendQueueLength prometheus.Gauge + metricBroadcastDuration prometheus.Summary + metricGlobalQueueLength prometheus.Gauge } func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager { gm := globalManager{ log: instance.log, hitsQueue: make(chan *RateLimitReq, conf.GlobalBatchLimit), - broadcastQueue: make(chan *UpdatePeerGlobal, conf.GlobalBatchLimit), + broadcastQueue: make(chan *RateLimitReq, conf.GlobalBatchLimit), instance: instance, conf: conf, metricGlobalSendDuration: prometheus.NewSummary(prometheus.SummaryOpts{ @@ -51,15 +52,15 @@ func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager Help: "The duration of GLOBAL async sends in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.99: 0.001}, }), + metricGlobalSendQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "gubernator_global_send_queue_length", + Help: "The count of requests queued up for global broadcast. This is only used for GetRateLimit requests using global behavior.", + }), metricBroadcastDuration: prometheus.NewSummary(prometheus.SummaryOpts{ Name: "gubernator_broadcast_duration", Help: "The duration of GLOBAL broadcasts to peers in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.99: 0.001}, }), - metricBroadcastCounter: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "gubernator_broadcast_counter", - Help: "The count of broadcasts.", - }, []string{"condition"}), metricGlobalQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "gubernator_global_queue_length", Help: "The count of requests queued up for global broadcast. This is only used for GetRateLimit requests using global behavior.", @@ -71,14 +72,14 @@ func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager } func (gm *globalManager) QueueHit(r *RateLimitReq) { - gm.hitsQueue <- r + if r.Hits != 0 { + gm.hitsQueue <- r + } } -func (gm *globalManager) QueueUpdate(req *RateLimitReq, resp *RateLimitResp) { - gm.broadcastQueue <- &UpdatePeerGlobal{ - Key: req.HashKey(), - Algorithm: req.Algorithm, - Status: resp, +func (gm *globalManager) QueueUpdate(req *RateLimitReq) { + if req.Hits != 0 { + gm.broadcastQueue <- req } } @@ -108,11 +109,13 @@ func (gm *globalManager) runAsyncHits() { } else { hits[key] = r } + gm.metricGlobalSendQueueLength.Set(float64(len(hits))) // Send the hits if we reached our batch limit if len(hits) == gm.conf.GlobalBatchLimit { gm.sendHits(hits) hits = make(map[string]*RateLimitReq) + gm.metricGlobalSendQueueLength.Set(0) return true } @@ -126,6 +129,7 @@ func (gm *globalManager) runAsyncHits() { if len(hits) != 0 { gm.sendHits(hits) hits = make(map[string]*RateLimitReq) + gm.metricGlobalSendQueueLength.Set(0) } case <-done: interval.Stop() @@ -188,18 +192,19 @@ func (gm *globalManager) sendHits(hits map[string]*RateLimitReq) { // and in a periodic frequency determined by GlobalSyncWait. func (gm *globalManager) runBroadcasts() { var interval = NewInterval(gm.conf.GlobalSyncWait) - updates := make(map[string]*UpdatePeerGlobal) + updates := make(map[string]*RateLimitReq) gm.wg.Until(func(done chan struct{}) bool { select { - case updateReq := <-gm.broadcastQueue: - updates[updateReq.Key] = updateReq + case update := <-gm.broadcastQueue: + updates[update.HashKey()] = update + gm.metricGlobalQueueLength.Set(float64(len(updates))) // Send the hits if we reached our batch limit if len(updates) >= gm.conf.GlobalBatchLimit { - gm.metricBroadcastCounter.WithLabelValues("queue_full").Inc() gm.broadcastPeers(context.Background(), updates) - updates = make(map[string]*UpdatePeerGlobal) + updates = make(map[string]*RateLimitReq) + gm.metricGlobalQueueLength.Set(0) return true } @@ -210,13 +215,13 @@ func (gm *globalManager) runBroadcasts() { } case <-interval.C: - if len(updates) != 0 { - gm.metricBroadcastCounter.WithLabelValues("timer").Inc() - gm.broadcastPeers(context.Background(), updates) - updates = make(map[string]*UpdatePeerGlobal) - } else { - gm.metricGlobalQueueLength.Set(0) + if len(updates) == 0 { + break } + gm.broadcastPeers(context.Background(), updates) + updates = make(map[string]*RateLimitReq) + gm.metricGlobalQueueLength.Set(0) + case <-done: interval.Stop() return false @@ -226,14 +231,30 @@ func (gm *globalManager) runBroadcasts() { } // broadcastPeers broadcasts global rate limit statuses to all other peers -func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]*UpdatePeerGlobal) { +func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]*RateLimitReq) { defer prometheus.NewTimer(gm.metricBroadcastDuration).ObserveDuration() var req UpdatePeerGlobalsReq + reqState := RateLimitReqState{IsOwner: false} gm.metricGlobalQueueLength.Set(float64(len(updates))) - for _, r := range updates { - req.Globals = append(req.Globals, r) + for _, update := range updates { + // Get current rate limit state. + grlReq := proto.Clone(update).(*RateLimitReq) + grlReq.Hits = 0 + status, err := gm.instance.workerPool.GetRateLimit(ctx, grlReq, reqState) + if err != nil { + gm.log.WithError(err).Error("while retrieving rate limit status") + continue + } + updateReq := &UpdatePeerGlobal{ + Key: update.HashKey(), + Algorithm: update.Algorithm, + Duration: update.Duration, + Status: status, + CreatedAt: *update.CreatedAt, + } + req.Globals = append(req.Globals, updateReq) } fan := syncutil.NewFanOut(gm.conf.GlobalPeerRequestsConcurrency) diff --git a/go.mod b/go.mod index 93080b32..cb0f9886 100644 --- a/go.mod +++ b/go.mod @@ -23,8 +23,9 @@ require ( go.opentelemetry.io/otel/sdk v1.21.0 go.opentelemetry.io/otel/trace v1.21.0 go.uber.org/goleak v1.3.0 - golang.org/x/net v0.18.0 - golang.org/x/sync v0.3.0 + golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 + golang.org/x/net v0.22.0 + golang.org/x/sync v0.6.0 golang.org/x/time v0.3.0 google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b google.golang.org/grpc v1.59.0 @@ -81,12 +82,12 @@ require ( go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.8.0 // indirect go.uber.org/zap v1.21.0 // indirect - golang.org/x/mod v0.8.0 // indirect + golang.org/x/mod v0.15.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sys v0.14.0 // indirect - golang.org/x/term v0.14.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.6.0 // indirect + golang.org/x/tools v0.18.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20231012201019-e917dd12ba7a // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20231016165738-49dd2c1f3d0b // indirect diff --git a/go.sum b/go.sum index fea9ef4c..7b2a2004 100644 --- a/go.sum +++ b/go.sum @@ -478,6 +478,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -503,8 +505,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -550,8 +552,8 @@ golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg= -golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ= +golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= +golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -579,8 +581,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -643,13 +645,13 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= -golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8= -golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -721,8 +723,8 @@ golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ= +golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/gubernator.go b/gubernator.go index 7ec9a96a..ff6812ae 100644 --- a/gubernator.go +++ b/gubernator.go @@ -21,8 +21,10 @@ import ( "fmt" "strings" "sync" + "time" "github.com/mailgun/errors" + "github.com/mailgun/holster/v4/clock" "github.com/mailgun/holster/v4/syncutil" "github.com/mailgun/holster/v4/tracing" "github.com/prometheus/client_golang/prometheus" @@ -51,6 +53,10 @@ type V1Instance struct { workerPool *WorkerPool } +type RateLimitReqState struct { + IsOwner bool +} + var ( metricGetRateLimitCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "gubernator_getratelimit_counter", @@ -186,6 +192,7 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G "Requests.RateLimits list too large; max size is '%d'", maxBatchSize) } + createdAt := epochMillis(clock.Now()) resp := GetRateLimitsResp{ Responses: make([]*RateLimitResp, len(r.Requests)), } @@ -198,17 +205,19 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G var peer *PeerClient var err error - if len(req.UniqueKey) == 0 { + if req.UniqueKey == "" { metricCheckErrorCounter.WithLabelValues("Invalid request").Inc() resp.Responses[i] = &RateLimitResp{Error: "field 'unique_key' cannot be empty"} continue } - - if len(req.Name) == 0 { + if req.Name == "" { metricCheckErrorCounter.WithLabelValues("Invalid request").Inc() resp.Responses[i] = &RateLimitResp{Error: "field 'namespace' cannot be empty"} continue } + if req.CreatedAt == nil || *req.CreatedAt == 0 { + req.CreatedAt = &createdAt + } if ctx.Err() != nil { err = errors.Wrap(ctx.Err(), "Error while iterating request items") @@ -235,9 +244,10 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G } // If our server instance is the owner of this rate limit - if peer.Info().IsOwner { + reqState := RateLimitReqState{IsOwner: peer.Info().IsOwner} + if reqState.IsOwner { // Apply our rate limit algorithm to the request - resp.Responses[i], err = s.getLocalRateLimit(ctx, req) + resp.Responses[i], err = s.getLocalRateLimit(ctx, req, reqState) if err != nil { err = errors.Wrapf(err, "Error while apply rate limit for '%s'", key) span := trace.SpanFromContext(ctx) @@ -308,6 +318,7 @@ func (s *V1Instance) asyncRequest(ctx context.Context, req *AsyncReq) { funcTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.asyncRequest")) defer funcTimer.ObserveDuration() + reqState := RateLimitReqState{IsOwner: req.Peer.Info().IsOwner} resp := AsyncResp{ Idx: req.Idx, } @@ -326,8 +337,8 @@ func (s *V1Instance) asyncRequest(ctx context.Context, req *AsyncReq) { // If we are attempting again, the owner of this rate limit might have changed to us! if attempts != 0 { - if req.Peer.Info().IsOwner { - resp.Resp, err = s.getLocalRateLimit(ctx, req.Req) + if reqState.IsOwner { + resp.Resp, err = s.getLocalRateLimit(ctx, req.Req, reqState) if err != nil { s.log.WithContext(ctx). WithError(err). @@ -394,12 +405,13 @@ func (s *V1Instance) getGlobalRateLimit(ctx context.Context, req *RateLimitReq) tracing.EndScope(ctx, err) }() - cpy := proto.Clone(req).(*RateLimitReq) - SetBehavior(&cpy.Behavior, Behavior_NO_BATCHING, true) - SetBehavior(&cpy.Behavior, Behavior_GLOBAL, false) + req2 := proto.Clone(req).(*RateLimitReq) + SetBehavior(&req2.Behavior, Behavior_NO_BATCHING, true) + SetBehavior(&req2.Behavior, Behavior_GLOBAL, false) + reqState := RateLimitReqState{IsOwner: false} // Process the rate limit like we own it - resp, err = s.getLocalRateLimit(ctx, cpy) + resp, err = s.getLocalRateLimit(ctx, req2, reqState) if err != nil { return nil, errors.Wrap(err, "during in getLocalRateLimit") } @@ -411,6 +423,7 @@ func (s *V1Instance) getGlobalRateLimit(ctx context.Context, req *RateLimitReq) // UpdatePeerGlobals updates the local cache with a list of global rate limits. This method should only // be called by a peer who is the owner of a global rate limit. func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobalsReq) (*UpdatePeerGlobalsResp, error) { + defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.UpdatePeerGlobals")).ObserveDuration() now := MillisecondNow() for _, g := range r.Globals { item := &CacheItem{ @@ -423,6 +436,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals item.Value = &LeakyBucketItem{ Remaining: float64(g.Status.Remaining), Limit: g.Status.Limit, + Duration: g.Duration, Burst: g.Status.Limit, UpdatedAt: now, } @@ -430,6 +444,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals item.Value = &TokenBucketItem{ Status: g.Status.Status, Limit: g.Status.Limit, + Duration: g.Duration, Remaining: g.Status.Remaining, CreatedAt: now, } @@ -445,6 +460,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals // GetPeerRateLimits is called by other peers to get the rate limits owned by this peer. func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimitsReq) (resp *GetPeerRateLimitsResp, err error) { + defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.GetPeerRateLimits")).ObserveDuration() if len(r.Requests) > maxBatchSize { err := fmt.Errorf("'PeerRequest.rate_limits' list too large; max size is '%d'", maxBatchSize) metricCheckErrorCounter.WithLabelValues("Request too large").Inc() @@ -467,6 +483,7 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits respChan := make(chan respOut) var respWg sync.WaitGroup respWg.Add(1) + reqState := RateLimitReqState{IsOwner: true} go func() { // Capture each response and return in the same order @@ -494,7 +511,13 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits SetBehavior(&rin.req.Behavior, Behavior_DRAIN_OVER_LIMIT, true) } - rl, err := s.getLocalRateLimit(ctx, rin.req) + // Assign default to CreatedAt for backwards compatibility. + if rin.req.CreatedAt == nil || *rin.req.CreatedAt == 0 { + createdAt := epochMillis(clock.Now()) + rin.req.CreatedAt = &createdAt + } + + rl, err := s.getLocalRateLimit(ctx, rin.req, reqState) if err != nil { // Return the error for this request err = errors.Wrap(err, "Error in getLocalRateLimit") @@ -562,7 +585,7 @@ func (s *V1Instance) HealthCheck(ctx context.Context, r *HealthCheckReq) (health return health, nil } -func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ *RateLimitResp, err error) { +func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, reqState RateLimitReqState) (_ *RateLimitResp, err error) { ctx = tracing.StartNamedScope(ctx, "V1Instance.getLocalRateLimit", trace.WithAttributes( attribute.String("ratelimit.key", r.UniqueKey), attribute.String("ratelimit.name", r.Name), @@ -572,17 +595,19 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ defer func() { tracing.EndScope(ctx, err) }() defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getLocalRateLimit")).ObserveDuration() - resp, err := s.workerPool.GetRateLimit(ctx, r) + resp, err := s.workerPool.GetRateLimit(ctx, r, reqState) if err != nil { return nil, errors.Wrap(err, "during workerPool.GetRateLimit") } - metricGetRateLimitCounter.WithLabelValues("local").Inc() // If global behavior, then broadcast update to all peers. if HasBehavior(r.Behavior, Behavior_GLOBAL) { - s.global.QueueUpdate(r, resp) + s.global.QueueUpdate(r) } + if reqState.IsOwner { + metricGetRateLimitCounter.WithLabelValues("local").Inc() + } return resp, nil } @@ -723,10 +748,10 @@ func (s *V1Instance) Describe(ch chan<- *prometheus.Desc) { metricGetRateLimitCounter.Describe(ch) metricOverLimitCounter.Describe(ch) metricWorkerQueue.Describe(ch) - s.global.metricBroadcastCounter.Describe(ch) s.global.metricBroadcastDuration.Describe(ch) s.global.metricGlobalQueueLength.Describe(ch) s.global.metricGlobalSendDuration.Describe(ch) + s.global.metricGlobalSendQueueLength.Describe(ch) } // Collect fetches metrics from the server for use by prometheus @@ -741,10 +766,10 @@ func (s *V1Instance) Collect(ch chan<- prometheus.Metric) { metricGetRateLimitCounter.Collect(ch) metricOverLimitCounter.Collect(ch) metricWorkerQueue.Collect(ch) - s.global.metricBroadcastCounter.Collect(ch) s.global.metricBroadcastDuration.Collect(ch) s.global.metricGlobalQueueLength.Collect(ch) s.global.metricGlobalSendDuration.Collect(ch) + s.global.metricGlobalSendQueueLength.Collect(ch) } // HasBehavior returns true if the provided behavior is set @@ -785,3 +810,7 @@ func isDeadlineExceeded(err error) bool { } return errors.Is(err, context.DeadlineExceeded) } + +func epochMillis(t time.Time) int64 { + return t.UnixNano() / 1_000_000 +} diff --git a/gubernator.pb.go b/gubernator.pb.go index 808a8814..3b54288d 100644 --- a/gubernator.pb.go +++ b/gubernator.pb.go @@ -374,6 +374,17 @@ type RateLimitReq struct { // this to pass trace context to other peers. Might be useful for future clients to pass along // trace information to gubernator. Metadata map[string]string `protobuf:"bytes,9,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // The exact time this request was created in Epoch milliseconds. Due to + // time drift between systems, it may be advantageous for a client to set the + // exact time the request was created. It possible the system clock for the + // client has drifted from the system clock where gubernator daemon is + // running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + CreatedAt *int64 `protobuf:"varint,10,opt,name=created_at,json=createdAt,proto3,oneof" json:"created_at,omitempty"` } func (x *RateLimitReq) Reset() { @@ -471,6 +482,13 @@ func (x *RateLimitReq) GetMetadata() map[string]string { return nil } +func (x *RateLimitReq) GetCreatedAt() int64 { + if x != nil && x.CreatedAt != nil { + return *x.CreatedAt + } + return 0 +} + type RateLimitResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -684,7 +702,7 @@ var file_gubernator_proto_rawDesc = []byte{ 0x70, 0x12, 0x3a, 0x0a, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, - 0x73, 0x70, 0x52, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x22, 0x8e, 0x03, + 0x73, 0x70, 0x52, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x22, 0xc1, 0x03, 0x0a, 0x0c, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x75, 0x6e, 0x69, 0x71, 0x75, 0x65, 0x5f, 0x6b, 0x65, 0x79, @@ -706,68 +724,71 @@ var file_gubernator_proto_rawDesc = []byte{ 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xac, - 0x02, 0x0a, 0x0d, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, - 0x12, 0x2d, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, - 0x32, 0x15, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x14, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, - 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, 0x69, - 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, - 0x69, 0x6e, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, 0x74, 0x69, 0x6d, - 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x73, 0x65, 0x74, 0x54, 0x69, - 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x46, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x70, 0x62, 0x2e, - 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, - 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x10, 0x0a, - 0x0e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x22, - 0x62, 0x0a, 0x0f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, - 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x70, 0x65, 0x65, 0x72, 0x43, 0x6f, - 0x75, 0x6e, 0x74, 0x2a, 0x2f, 0x0a, 0x09, 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, - 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x4f, 0x4b, 0x45, 0x4e, 0x5f, 0x42, 0x55, 0x43, 0x4b, 0x45, 0x54, - 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x4c, 0x45, 0x41, 0x4b, 0x59, 0x5f, 0x42, 0x55, 0x43, 0x4b, - 0x45, 0x54, 0x10, 0x01, 0x2a, 0x8d, 0x01, 0x0a, 0x08, 0x42, 0x65, 0x68, 0x61, 0x76, 0x69, 0x6f, - 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x00, 0x12, - 0x0f, 0x0a, 0x0b, 0x4e, 0x4f, 0x5f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x01, - 0x12, 0x0a, 0x0a, 0x06, 0x47, 0x4c, 0x4f, 0x42, 0x41, 0x4c, 0x10, 0x02, 0x12, 0x19, 0x0a, 0x15, - 0x44, 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x49, 0x53, 0x5f, 0x47, 0x52, 0x45, 0x47, - 0x4f, 0x52, 0x49, 0x41, 0x4e, 0x10, 0x04, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, 0x53, 0x45, 0x54, - 0x5f, 0x52, 0x45, 0x4d, 0x41, 0x49, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x08, 0x12, 0x10, 0x0a, 0x0c, - 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x52, 0x45, 0x47, 0x49, 0x4f, 0x4e, 0x10, 0x10, 0x12, 0x14, - 0x0a, 0x10, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, - 0x49, 0x54, 0x10, 0x20, 0x2a, 0x29, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0f, - 0x0a, 0x0b, 0x55, 0x4e, 0x44, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x00, 0x12, - 0x0e, 0x0a, 0x0a, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x01, 0x32, - 0xdd, 0x01, 0x0a, 0x02, 0x56, 0x31, 0x12, 0x70, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, - 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, - 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, - 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x20, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, - 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, - 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x1c, 0x82, 0xd3, 0xe4, 0x93, - 0x02, 0x16, 0x3a, 0x01, 0x2a, 0x22, 0x11, 0x2f, 0x76, 0x31, 0x2f, 0x47, 0x65, 0x74, 0x52, 0x61, - 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x65, 0x0a, 0x0b, 0x48, 0x65, 0x61, 0x6c, - 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x12, 0x1d, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, - 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, - 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x1a, 0x1e, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, - 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, - 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x22, 0x17, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x11, 0x12, 0x0f, - 0x2f, 0x76, 0x31, 0x2f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x42, - 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, - 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x61, 0x12, 0x22, 0x0a, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, + 0x0a, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, + 0x41, 0x74, 0x88, 0x01, 0x01, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, + 0x38, 0x01, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, + 0x74, 0x22, 0xac, 0x02, 0x0a, 0x0d, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x2d, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x15, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, + 0x74, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x6d, 0x61, + 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x6d, + 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, + 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x73, 0x65, + 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x46, 0x0a, 0x08, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, + 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, + 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, + 0x22, 0x10, 0x0a, 0x0e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, + 0x65, 0x71, 0x22, 0x62, 0x0a, 0x0f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, + 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x65, 0x65, 0x72, 0x5f, + 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x70, 0x65, 0x65, + 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x2a, 0x2f, 0x0a, 0x09, 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, + 0x74, 0x68, 0x6d, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x4f, 0x4b, 0x45, 0x4e, 0x5f, 0x42, 0x55, 0x43, + 0x4b, 0x45, 0x54, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x4c, 0x45, 0x41, 0x4b, 0x59, 0x5f, 0x42, + 0x55, 0x43, 0x4b, 0x45, 0x54, 0x10, 0x01, 0x2a, 0x8d, 0x01, 0x0a, 0x08, 0x42, 0x65, 0x68, 0x61, + 0x76, 0x69, 0x6f, 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, + 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x4e, 0x4f, 0x5f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, + 0x47, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x47, 0x4c, 0x4f, 0x42, 0x41, 0x4c, 0x10, 0x02, 0x12, + 0x19, 0x0a, 0x15, 0x44, 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x49, 0x53, 0x5f, 0x47, + 0x52, 0x45, 0x47, 0x4f, 0x52, 0x49, 0x41, 0x4e, 0x10, 0x04, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, + 0x53, 0x45, 0x54, 0x5f, 0x52, 0x45, 0x4d, 0x41, 0x49, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x08, 0x12, + 0x10, 0x0a, 0x0c, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x52, 0x45, 0x47, 0x49, 0x4f, 0x4e, 0x10, + 0x10, 0x12, 0x14, 0x0a, 0x10, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x5f, + 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x20, 0x2a, 0x29, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x44, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, + 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, + 0x10, 0x01, 0x32, 0xdd, 0x01, 0x0a, 0x02, 0x56, 0x31, 0x12, 0x70, 0x0a, 0x0d, 0x47, 0x65, 0x74, + 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x70, 0x62, 0x2e, + 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, + 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x20, 0x2e, 0x70, 0x62, + 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, + 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x1c, 0x82, + 0xd3, 0xe4, 0x93, 0x02, 0x16, 0x3a, 0x01, 0x2a, 0x22, 0x11, 0x2f, 0x76, 0x31, 0x2f, 0x47, 0x65, + 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x65, 0x0a, 0x0b, 0x48, + 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x12, 0x1d, 0x2e, 0x70, 0x62, 0x2e, + 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, + 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x1a, 0x1e, 0x2e, 0x70, 0x62, 0x2e, 0x67, + 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x22, 0x17, 0x82, 0xd3, 0xe4, 0x93, 0x02, + 0x11, 0x12, 0x0f, 0x2f, 0x76, 0x31, 0x2f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, + 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -895,6 +916,7 @@ func file_gubernator_proto_init() { } } } + file_gubernator_proto_msgTypes[2].OneofWrappers = []interface{}{} type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/gubernator.proto b/gubernator.proto index fea99a22..52d5e65f 100644 --- a/gubernator.proto +++ b/gubernator.proto @@ -168,6 +168,18 @@ message RateLimitReq { // this to pass trace context to other peers. Might be useful for future clients to pass along // trace information to gubernator. map metadata = 9; + + // The exact time this request was created in Epoch milliseconds. Due to + // time drift between systems, it may be advantageous for a client to set the + // exact time the request was created. It possible the system clock for the + // client has drifted from the system clock where gubernator daemon is + // running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + optional int64 created_at = 10; } enum Status { diff --git a/interval_test.go b/interval_test.go index 68c8b40d..d01d86f3 100644 --- a/interval_test.go +++ b/interval_test.go @@ -19,7 +19,7 @@ package gubernator_test import ( "testing" - "github.com/mailgun/gubernator/v2" + gubernator "github.com/mailgun/gubernator/v2" "github.com/mailgun/holster/v4/clock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/lrucache.go b/lrucache.go index 09bc36ba..03867209 100644 --- a/lrucache.go +++ b/lrucache.go @@ -112,16 +112,7 @@ func (c *LRUCache) GetItem(key string) (item *CacheItem, ok bool) { if ele, hit := c.cache[key]; hit { entry := ele.Value.(*CacheItem) - now := MillisecondNow() - // If the entry is invalidated - if entry.InvalidAt != 0 && entry.InvalidAt < now { - c.removeElement(ele) - metricCacheAccess.WithLabelValues("miss").Add(1) - return - } - - // If the entry has expired, remove it from the cache - if entry.ExpireAt < now { + if entry.IsExpired() { c.removeElement(ele) metricCacheAccess.WithLabelValues("miss").Add(1) return diff --git a/peer_client.go b/peer_client.go index 39c13c14..5e2fef15 100644 --- a/peer_client.go +++ b/peer_client.go @@ -66,9 +66,10 @@ type response struct { } type request struct { - request *RateLimitReq - resp chan *response - ctx context.Context + request *RateLimitReq + reqState RateLimitReqState + resp chan *response + ctx context.Context } type PeerConfig struct { diff --git a/peer_client_test.go b/peer_client_test.go index d739f40a..5f0bc016 100644 --- a/peer_client_test.go +++ b/peer_client_test.go @@ -37,6 +37,7 @@ func TestPeerClientShutdown(t *testing.T) { } const threads = 10 + createdAt := epochMillis(clock.Now()) cases := []test{ {"No batching", gubernator.Behavior_NO_BATCHING}, @@ -71,9 +72,10 @@ func TestPeerClientShutdown(t *testing.T) { wg.Go(func() error { ctx := context.Background() _, err := client.GetPeerRateLimit(ctx, &gubernator.RateLimitReq{ - Hits: 1, - Limit: 100, - Behavior: c.Behavior, + Hits: 1, + Limit: 100, + Behavior: c.Behavior, + CreatedAt: &createdAt, }) if err != nil { diff --git a/peers.pb.go b/peers.pb.go index a805b29a..e69e6fe2 100644 --- a/peers.pb.go +++ b/peers.pb.go @@ -185,9 +185,25 @@ type UpdatePeerGlobal struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` - Status *RateLimitResp `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` - Algorithm Algorithm `protobuf:"varint,3,opt,name=algorithm,proto3,enum=pb.gubernator.Algorithm" json:"algorithm,omitempty"` + // Uniquely identifies this rate limit IE: 'ip:10.2.10.7' or 'account:123445' + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Status *RateLimitResp `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` + // The algorithm used to calculate the rate limit. The algorithm may change on + // subsequent requests, when this occurs any previous rate limit hit counts are reset. + Algorithm Algorithm `protobuf:"varint,3,opt,name=algorithm,proto3,enum=pb.gubernator.Algorithm" json:"algorithm,omitempty"` + // The duration of the rate limit in milliseconds + Duration int64 `protobuf:"varint,4,opt,name=duration,proto3" json:"duration,omitempty"` + // The exact time the original request was created in Epoch milliseconds. + // Due to time drift between systems, it may be advantageous for a client to + // set the exact time the request was created. It possible the system clock + // for the client has drifted from the system clock where gubernator daemon + // is running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + CreatedAt int64 `protobuf:"varint,5,opt,name=created_at,json=createdAt,proto3" json:"created_at,omitempty"` } func (x *UpdatePeerGlobal) Reset() { @@ -243,6 +259,20 @@ func (x *UpdatePeerGlobal) GetAlgorithm() Algorithm { return Algorithm_TOKEN_BUCKET } +func (x *UpdatePeerGlobal) GetDuration() int64 { + if x != nil { + return x.Duration + } + return 0 +} + +func (x *UpdatePeerGlobal) GetCreatedAt() int64 { + if x != nil { + return x.CreatedAt + } + return 0 +} + type UpdatePeerGlobalsResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -302,7 +332,7 @@ var file_peers_proto_rawDesc = []byte{ 0x39, 0x0a, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, - 0x6c, 0x52, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x22, 0x92, 0x01, 0x0a, 0x10, 0x55, + 0x6c, 0x52, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x22, 0xcd, 0x01, 0x0a, 0x10, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x34, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, @@ -311,25 +341,28 @@ var file_peers_proto_rawDesc = []byte{ 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x36, 0x0a, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x41, 0x6c, 0x67, 0x6f, 0x72, - 0x69, 0x74, 0x68, 0x6d, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x22, - 0x17, 0x0a, 0x15, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, - 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x32, 0xcd, 0x01, 0x0a, 0x07, 0x50, 0x65, 0x65, - 0x72, 0x73, 0x56, 0x31, 0x12, 0x60, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, - 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, + 0x69, 0x74, 0x68, 0x6d, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x12, + 0x1a, 0x0a, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x0a, 0x0a, 0x63, + 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x22, 0x17, 0x0a, 0x15, 0x55, 0x70, + 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x32, 0xcd, 0x01, 0x0a, 0x07, 0x50, 0x65, 0x65, 0x72, 0x73, 0x56, 0x31, 0x12, + 0x60, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, + 0x6d, 0x69, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, + 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, + 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, - 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, - 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, - 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, - 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x60, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, - 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, + 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, + 0x00, 0x12, 0x60, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, + 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, + 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x71, - 0x1a, 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, - 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, - 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, - 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x22, 0x00, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, + 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/peers.proto b/peers.proto index 1ce2a431..0dad87d4 100644 --- a/peers.proto +++ b/peers.proto @@ -26,32 +26,48 @@ import "gubernator.proto"; // NOTE: For use by gubernator peers only service PeersV1 { - // Used by peers to relay batches of requests to an owner peer - rpc GetPeerRateLimits (GetPeerRateLimitsReq) returns (GetPeerRateLimitsResp) {} + // Used by peers to relay batches of requests to an owner peer + rpc GetPeerRateLimits (GetPeerRateLimitsReq) returns (GetPeerRateLimitsResp) {} - // Used by owner peers to send global rate limit updates to non-owner peers - rpc UpdatePeerGlobals (UpdatePeerGlobalsReq) returns (UpdatePeerGlobalsResp) {} + // Used by owner peers to send global rate limit updates to non-owner peers + rpc UpdatePeerGlobals (UpdatePeerGlobalsReq) returns (UpdatePeerGlobalsResp) {} } message GetPeerRateLimitsReq { - // Must specify at least one RateLimit. The peer that recives this request MUST be authoritative for - // each rate_limit[x].unique_key provided, as the peer will not forward the request to any other peers - repeated RateLimitReq requests = 1; + // Must specify at least one RateLimit. The peer that recives this request MUST be authoritative for + // each rate_limit[x].unique_key provided, as the peer will not forward the request to any other peers + repeated RateLimitReq requests = 1; } message GetPeerRateLimitsResp { - // Responses are in the same order as they appeared in the PeerRateLimitRequests - repeated RateLimitResp rate_limits = 1; + // Responses are in the same order as they appeared in the PeerRateLimitRequests + repeated RateLimitResp rate_limits = 1; } message UpdatePeerGlobalsReq { - // Must specify at least one RateLimit - repeated UpdatePeerGlobal globals = 1; + // Must specify at least one RateLimit + repeated UpdatePeerGlobal globals = 1; } message UpdatePeerGlobal { - string key = 1; - RateLimitResp status = 2; - Algorithm algorithm = 3; + // Uniquely identifies this rate limit IE: 'ip:10.2.10.7' or 'account:123445' + string key = 1; + RateLimitResp status = 2; + // The algorithm used to calculate the rate limit. The algorithm may change on + // subsequent requests, when this occurs any previous rate limit hit counts are reset. + Algorithm algorithm = 3; + // The duration of the rate limit in milliseconds + int64 duration = 4; + // The exact time the original request was created in Epoch milliseconds. + // Due to time drift between systems, it may be advantageous for a client to + // set the exact time the request was created. It possible the system clock + // for the client has drifted from the system clock where gubernator daemon + // is running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + int64 created_at = 5; } message UpdatePeerGlobalsResp {} diff --git a/python/gubernator/gubernator_pb2.py b/python/gubernator/gubernator_pb2.py index 17351bb6..f1369bd5 100644 --- a/python/gubernator/gubernator_pb2.py +++ b/python/gubernator/gubernator_pb2.py @@ -15,7 +15,7 @@ from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gubernator.proto\x12\rpb.gubernator\x1a\x1cgoogle/api/annotations.proto\"K\n\x10GetRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"O\n\x11GetRateLimitsResp\x12:\n\tresponses\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\tresponses\"\x8e\x03\n\x0cRateLimitReq\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n\nunique_key\x18\x02 \x01(\tR\tuniqueKey\x12\x12\n\x04hits\x18\x03 \x01(\x03R\x04hits\x12\x14\n\x05limit\x18\x04 \x01(\x03R\x05limit\x12\x1a\n\x08\x64uration\x18\x05 \x01(\x03R\x08\x64uration\x12\x36\n\talgorithm\x18\x06 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x33\n\x08\x62\x65havior\x18\x07 \x01(\x0e\x32\x17.pb.gubernator.BehaviorR\x08\x62\x65havior\x12\x14\n\x05\x62urst\x18\x08 \x01(\x03R\x05\x62urst\x12\x45\n\x08metadata\x18\t \x03(\x0b\x32).pb.gubernator.RateLimitReq.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\xac\x02\n\rRateLimitResp\x12-\n\x06status\x18\x01 \x01(\x0e\x32\x15.pb.gubernator.StatusR\x06status\x12\x14\n\x05limit\x18\x02 \x01(\x03R\x05limit\x12\x1c\n\tremaining\x18\x03 \x01(\x03R\tremaining\x12\x1d\n\nreset_time\x18\x04 \x01(\x03R\tresetTime\x12\x14\n\x05\x65rror\x18\x05 \x01(\tR\x05\x65rror\x12\x46\n\x08metadata\x18\x06 \x03(\x0b\x32*.pb.gubernator.RateLimitResp.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x10\n\x0eHealthCheckReq\"b\n\x0fHealthCheckResp\x12\x16\n\x06status\x18\x01 \x01(\tR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1d\n\npeer_count\x18\x03 \x01(\x05R\tpeerCount*/\n\tAlgorithm\x12\x10\n\x0cTOKEN_BUCKET\x10\x00\x12\x10\n\x0cLEAKY_BUCKET\x10\x01*\x8d\x01\n\x08\x42\x65havior\x12\x0c\n\x08\x42\x41TCHING\x10\x00\x12\x0f\n\x0bNO_BATCHING\x10\x01\x12\n\n\x06GLOBAL\x10\x02\x12\x19\n\x15\x44URATION_IS_GREGORIAN\x10\x04\x12\x13\n\x0fRESET_REMAINING\x10\x08\x12\x10\n\x0cMULTI_REGION\x10\x10\x12\x14\n\x10\x44RAIN_OVER_LIMIT\x10 *)\n\x06Status\x12\x0f\n\x0bUNDER_LIMIT\x10\x00\x12\x0e\n\nOVER_LIMIT\x10\x01\x32\xdd\x01\n\x02V1\x12p\n\rGetRateLimits\x12\x1f.pb.gubernator.GetRateLimitsReq\x1a .pb.gubernator.GetRateLimitsResp\"\x1c\x82\xd3\xe4\x93\x02\x16\"\x11/v1/GetRateLimits:\x01*\x12\x65\n\x0bHealthCheck\x12\x1d.pb.gubernator.HealthCheckReq\x1a\x1e.pb.gubernator.HealthCheckResp\"\x17\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1/HealthCheckB\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gubernator.proto\x12\rpb.gubernator\x1a\x1cgoogle/api/annotations.proto\"K\n\x10GetRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"O\n\x11GetRateLimitsResp\x12:\n\tresponses\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\tresponses\"\xc1\x03\n\x0cRateLimitReq\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n\nunique_key\x18\x02 \x01(\tR\tuniqueKey\x12\x12\n\x04hits\x18\x03 \x01(\x03R\x04hits\x12\x14\n\x05limit\x18\x04 \x01(\x03R\x05limit\x12\x1a\n\x08\x64uration\x18\x05 \x01(\x03R\x08\x64uration\x12\x36\n\talgorithm\x18\x06 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x33\n\x08\x62\x65havior\x18\x07 \x01(\x0e\x32\x17.pb.gubernator.BehaviorR\x08\x62\x65havior\x12\x14\n\x05\x62urst\x18\x08 \x01(\x03R\x05\x62urst\x12\x45\n\x08metadata\x18\t \x03(\x0b\x32).pb.gubernator.RateLimitReq.MetadataEntryR\x08metadata\x12\"\n\ncreated_at\x18\n \x01(\x03H\x00R\tcreatedAt\x88\x01\x01\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\r\n\x0b_created_at\"\xac\x02\n\rRateLimitResp\x12-\n\x06status\x18\x01 \x01(\x0e\x32\x15.pb.gubernator.StatusR\x06status\x12\x14\n\x05limit\x18\x02 \x01(\x03R\x05limit\x12\x1c\n\tremaining\x18\x03 \x01(\x03R\tremaining\x12\x1d\n\nreset_time\x18\x04 \x01(\x03R\tresetTime\x12\x14\n\x05\x65rror\x18\x05 \x01(\tR\x05\x65rror\x12\x46\n\x08metadata\x18\x06 \x03(\x0b\x32*.pb.gubernator.RateLimitResp.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x10\n\x0eHealthCheckReq\"b\n\x0fHealthCheckResp\x12\x16\n\x06status\x18\x01 \x01(\tR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1d\n\npeer_count\x18\x03 \x01(\x05R\tpeerCount*/\n\tAlgorithm\x12\x10\n\x0cTOKEN_BUCKET\x10\x00\x12\x10\n\x0cLEAKY_BUCKET\x10\x01*\x8d\x01\n\x08\x42\x65havior\x12\x0c\n\x08\x42\x41TCHING\x10\x00\x12\x0f\n\x0bNO_BATCHING\x10\x01\x12\n\n\x06GLOBAL\x10\x02\x12\x19\n\x15\x44URATION_IS_GREGORIAN\x10\x04\x12\x13\n\x0fRESET_REMAINING\x10\x08\x12\x10\n\x0cMULTI_REGION\x10\x10\x12\x14\n\x10\x44RAIN_OVER_LIMIT\x10 *)\n\x06Status\x12\x0f\n\x0bUNDER_LIMIT\x10\x00\x12\x0e\n\nOVER_LIMIT\x10\x01\x32\xdd\x01\n\x02V1\x12p\n\rGetRateLimits\x12\x1f.pb.gubernator.GetRateLimitsReq\x1a .pb.gubernator.GetRateLimitsResp\"\x1c\x82\xd3\xe4\x93\x02\x16\"\x11/v1/GetRateLimits:\x01*\x12\x65\n\x0bHealthCheck\x12\x1d.pb.gubernator.HealthCheckReq\x1a\x1e.pb.gubernator.HealthCheckResp\"\x17\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1/HealthCheckB\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -31,28 +31,28 @@ _globals['_V1'].methods_by_name['GetRateLimits']._serialized_options = b'\202\323\344\223\002\026\"\021/v1/GetRateLimits:\001*' _globals['_V1'].methods_by_name['HealthCheck']._options = None _globals['_V1'].methods_by_name['HealthCheck']._serialized_options = b'\202\323\344\223\002\021\022\017/v1/HealthCheck' - _globals['_ALGORITHM']._serialized_start=1045 - _globals['_ALGORITHM']._serialized_end=1092 - _globals['_BEHAVIOR']._serialized_start=1095 - _globals['_BEHAVIOR']._serialized_end=1236 - _globals['_STATUS']._serialized_start=1238 - _globals['_STATUS']._serialized_end=1279 + _globals['_ALGORITHM']._serialized_start=1096 + _globals['_ALGORITHM']._serialized_end=1143 + _globals['_BEHAVIOR']._serialized_start=1146 + _globals['_BEHAVIOR']._serialized_end=1287 + _globals['_STATUS']._serialized_start=1289 + _globals['_STATUS']._serialized_end=1330 _globals['_GETRATELIMITSREQ']._serialized_start=65 _globals['_GETRATELIMITSREQ']._serialized_end=140 _globals['_GETRATELIMITSRESP']._serialized_start=142 _globals['_GETRATELIMITSRESP']._serialized_end=221 _globals['_RATELIMITREQ']._serialized_start=224 - _globals['_RATELIMITREQ']._serialized_end=622 - _globals['_RATELIMITREQ_METADATAENTRY']._serialized_start=563 - _globals['_RATELIMITREQ_METADATAENTRY']._serialized_end=622 - _globals['_RATELIMITRESP']._serialized_start=625 - _globals['_RATELIMITRESP']._serialized_end=925 - _globals['_RATELIMITRESP_METADATAENTRY']._serialized_start=563 - _globals['_RATELIMITRESP_METADATAENTRY']._serialized_end=622 - _globals['_HEALTHCHECKREQ']._serialized_start=927 - _globals['_HEALTHCHECKREQ']._serialized_end=943 - _globals['_HEALTHCHECKRESP']._serialized_start=945 - _globals['_HEALTHCHECKRESP']._serialized_end=1043 - _globals['_V1']._serialized_start=1282 - _globals['_V1']._serialized_end=1503 + _globals['_RATELIMITREQ']._serialized_end=673 + _globals['_RATELIMITREQ_METADATAENTRY']._serialized_start=599 + _globals['_RATELIMITREQ_METADATAENTRY']._serialized_end=658 + _globals['_RATELIMITRESP']._serialized_start=676 + _globals['_RATELIMITRESP']._serialized_end=976 + _globals['_RATELIMITRESP_METADATAENTRY']._serialized_start=599 + _globals['_RATELIMITRESP_METADATAENTRY']._serialized_end=658 + _globals['_HEALTHCHECKREQ']._serialized_start=978 + _globals['_HEALTHCHECKREQ']._serialized_end=994 + _globals['_HEALTHCHECKRESP']._serialized_start=996 + _globals['_HEALTHCHECKRESP']._serialized_end=1094 + _globals['_V1']._serialized_start=1333 + _globals['_V1']._serialized_end=1554 # @@protoc_insertion_point(module_scope) diff --git a/python/gubernator/peers_pb2.py b/python/gubernator/peers_pb2.py index b1451c7a..97a519d4 100644 --- a/python/gubernator/peers_pb2.py +++ b/python/gubernator/peers_pb2.py @@ -15,7 +15,7 @@ import gubernator_pb2 as gubernator__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bpeers.proto\x12\rpb.gubernator\x1a\x10gubernator.proto\"O\n\x14GetPeerRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"V\n\x15GetPeerRateLimitsResp\x12=\n\x0brate_limits\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\nrateLimits\"Q\n\x14UpdatePeerGlobalsReq\x12\x39\n\x07globals\x18\x01 \x03(\x0b\x32\x1f.pb.gubernator.UpdatePeerGlobalR\x07globals\"\x92\x01\n\x10UpdatePeerGlobal\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x34\n\x06status\x18\x02 \x01(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\x06status\x12\x36\n\talgorithm\x18\x03 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\"\x17\n\x15UpdatePeerGlobalsResp2\xcd\x01\n\x07PeersV1\x12`\n\x11GetPeerRateLimits\x12#.pb.gubernator.GetPeerRateLimitsReq\x1a$.pb.gubernator.GetPeerRateLimitsResp\"\x00\x12`\n\x11UpdatePeerGlobals\x12#.pb.gubernator.UpdatePeerGlobalsReq\x1a$.pb.gubernator.UpdatePeerGlobalsResp\"\x00\x42\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bpeers.proto\x12\rpb.gubernator\x1a\x10gubernator.proto\"O\n\x14GetPeerRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"V\n\x15GetPeerRateLimitsResp\x12=\n\x0brate_limits\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\nrateLimits\"Q\n\x14UpdatePeerGlobalsReq\x12\x39\n\x07globals\x18\x01 \x03(\x0b\x32\x1f.pb.gubernator.UpdatePeerGlobalR\x07globals\"\xcd\x01\n\x10UpdatePeerGlobal\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x34\n\x06status\x18\x02 \x01(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\x06status\x12\x36\n\talgorithm\x18\x03 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x1a\n\x08\x64uration\x18\x04 \x01(\x03R\x08\x64uration\x12\x1d\n\ncreated_at\x18\x05 \x01(\x03R\tcreatedAt\"\x17\n\x15UpdatePeerGlobalsResp2\xcd\x01\n\x07PeersV1\x12`\n\x11GetPeerRateLimits\x12#.pb.gubernator.GetPeerRateLimitsReq\x1a$.pb.gubernator.GetPeerRateLimitsResp\"\x00\x12`\n\x11UpdatePeerGlobals\x12#.pb.gubernator.UpdatePeerGlobalsReq\x1a$.pb.gubernator.UpdatePeerGlobalsResp\"\x00\x42\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -30,9 +30,9 @@ _globals['_UPDATEPEERGLOBALSREQ']._serialized_start=217 _globals['_UPDATEPEERGLOBALSREQ']._serialized_end=298 _globals['_UPDATEPEERGLOBAL']._serialized_start=301 - _globals['_UPDATEPEERGLOBAL']._serialized_end=447 - _globals['_UPDATEPEERGLOBALSRESP']._serialized_start=449 - _globals['_UPDATEPEERGLOBALSRESP']._serialized_end=472 - _globals['_PEERSV1']._serialized_start=475 - _globals['_PEERSV1']._serialized_end=680 + _globals['_UPDATEPEERGLOBAL']._serialized_end=506 + _globals['_UPDATEPEERGLOBALSRESP']._serialized_start=508 + _globals['_UPDATEPEERGLOBALSRESP']._serialized_end=531 + _globals['_PEERSV1']._serialized_start=534 + _globals['_PEERSV1']._serialized_end=739 # @@protoc_insertion_point(module_scope) diff --git a/workers.go b/workers.go index 07ba177f..34d99d1d 100644 --- a/workers.go +++ b/workers.go @@ -199,7 +199,7 @@ func (p *WorkerPool) dispatch(worker *Worker) { } resp := new(response) - resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, worker.cache) + resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, req.reqState, worker.cache) select { case req.resp <- resp: // Success. @@ -258,16 +258,17 @@ func (p *WorkerPool) dispatch(worker *Worker) { } // GetRateLimit sends a GetRateLimit request to worker pool. -func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) (retval *RateLimitResp, reterr error) { +func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, reqState RateLimitReqState) (*RateLimitResp, error) { // Delegate request to assigned channel based on request key. worker := p.getWorker(rlRequest.HashKey()) queueGauge := metricWorkerQueue.WithLabelValues("GetRateLimit", worker.name) queueGauge.Inc() defer queueGauge.Dec() handlerRequest := request{ - ctx: ctx, - resp: make(chan *response, 1), - request: rlRequest, + ctx: ctx, + resp: make(chan *response, 1), + request: rlRequest, + reqState: reqState, } // Send request. @@ -289,14 +290,14 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) } // Handle request received by worker. -func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, cache Cache) (*RateLimitResp, error) { +func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, reqState RateLimitReqState, cache Cache) (*RateLimitResp, error) { defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("Worker.handleGetRateLimit")).ObserveDuration() var rlResponse *RateLimitResp var err error switch req.Algorithm { case Algorithm_TOKEN_BUCKET: - rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req) + rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req, reqState) if err != nil { msg := "Error in tokenBucket" countError(err, msg) @@ -305,7 +306,7 @@ func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, } case Algorithm_LEAKY_BUCKET: - rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req) + rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req, reqState) if err != nil { msg := "Error in leakyBucket" countError(err, msg)