From 01bf52a2e35b7490e818f813c5e7b0c08e2f75ed Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Fri, 27 Sep 2024 01:24:20 +0530 Subject: [PATCH] [improvement] : Limit post calls to /v4/linode/instances endpoint (#519) * add global lock and token map for counting POST requests per token * set default limits in defaults.go and use them in controller * move rate-limits related functions to separate file and simplify logic * add unittests for ratelimits.go file * fix go.mod file * address review comments * fix updating token after rebasing from PR #507 --- Makefile | 1 + clients/clients.go | 3 + cloud/scope/common.go | 8 + cloud/scope/machine.go | 3 + config/manager/manager.yaml | 7 +- controller/linodemachine_controller.go | 8 +- .../linodemachine_controller_helpers.go | 29 ++- controller/linodemachine_controller_test.go | 30 +++ go.mod | 14 +- go.sum | 27 ++- mock/client.go | 13 + util/errors.go | 26 ++ util/ratelimits.go | 96 ++++++++ util/ratelimits_test.go | 226 ++++++++++++++++++ util/reconciler/defaults.go | 9 + 15 files changed, 477 insertions(+), 23 deletions(-) create mode 100644 util/errors.go create mode 100644 util/ratelimits.go create mode 100644 util/ratelimits_test.go diff --git a/Makefile b/Makefile index 5586e7d78..de1f390cd 100644 --- a/Makefile +++ b/Makefile @@ -268,6 +268,7 @@ clean-release: clean-release-git clean-child-clusters: kubectl $(KUBECTL) delete clusters -A --all --timeout=180s $(KUBECTL) delete linodevpc -A --all --timeout=60s + $(KUBECTL) delete linodefirewall -A --all --timeout=60s ## -------------------------------------- ## Build Dependencies diff --git a/clients/clients.go b/clients/clients.go index fbf4086a0..723d2bead 100644 --- a/clients/clients.go +++ b/clients/clients.go @@ -4,6 +4,7 @@ import ( "context" "github.com/akamai/AkamaiOPEN-edgegrid-golang/v8/pkg/dns" + "github.com/go-resty/resty/v2" "github.com/linode/linodego" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -20,6 +21,8 @@ type LinodeClient interface { LinodePlacementGroupClient LinodeFirewallClient LinodeTokenClient + + OnAfterResponse(m func(response *resty.Response) error) } type AkamClient interface { diff --git a/cloud/scope/common.go b/cloud/scope/common.go index f86677edb..b99d86cf7 100644 --- a/cloud/scope/common.go +++ b/cloud/scope/common.go @@ -2,8 +2,10 @@ package scope import ( "context" + "crypto/sha256" "crypto/tls" "crypto/x509" + "encoding/hex" "errors" "fmt" "net/http" @@ -187,3 +189,9 @@ func toFinalizer(obj client.Object) string { } return fmt.Sprintf("%s.%s/%s.%s", kind, group, namespace, name) } + +// GetHash returns sha256 hash of input string +func GetHash(key string) string { + hash := sha256.Sum256([]byte(key)) + return hex.EncodeToString(hash[:]) +} diff --git a/cloud/scope/machine.go b/cloud/scope/machine.go index 5efecc3a8..b4599914e 100644 --- a/cloud/scope/machine.go +++ b/cloud/scope/machine.go @@ -29,6 +29,7 @@ type MachineScope struct { PatchHelper *patch.Helper Cluster *clusterv1.Cluster Machine *clusterv1.Machine + TokenHash string LinodeClient LinodeClient LinodeCluster *infrav1alpha2.LinodeCluster LinodeMachine *infrav1alpha2.LinodeMachine @@ -71,6 +72,7 @@ func NewMachineScope(ctx context.Context, linodeClientConfig ClientConfig, param PatchHelper: helper, Cluster: params.Cluster, Machine: params.Machine, + TokenHash: GetHash(linodeClientConfig.Token), LinodeClient: linodeClient, LinodeCluster: params.LinodeCluster, LinodeMachine: params.LinodeMachine, @@ -170,5 +172,6 @@ func (s *MachineScope) SetCredentialRefTokenForLinodeClients(ctx context.Context return fmt.Errorf("credentials from secret ref: %w", err) } s.LinodeClient = s.LinodeClient.SetToken(string(apiToken)) + s.TokenHash = GetHash(string(apiToken)) return nil } diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 8d331b557..bebcac933 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -113,10 +113,9 @@ spec: # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ resources: limits: - cpu: 500m - memory: 128Mi + memory: 2Gi requests: - cpu: 10m - memory: 64Mi + cpu: 1000m + memory: 512Mi serviceAccountName: controller-manager terminationGracePeriodSeconds: 10 diff --git a/controller/linodemachine_controller.go b/controller/linodemachine_controller.go index 09244a382..66fdfd966 100644 --- a/controller/linodemachine_controller.go +++ b/controller/linodemachine_controller.go @@ -285,7 +285,12 @@ func (r *LinodeMachineReconciler) reconcilePreflightCreate(ctx context.Context, logger.Error(err, "Failed to create Linode machine InstanceCreateOptions") return retryIfTransient(err) } - linodeInstance, err := machineScope.LinodeClient.CreateInstance(ctx, *createOpts) + + linodeInstance, err := createInstance(ctx, logger, machineScope, createOpts) + if errors.Is(err, util.ErrRateLimit) { + return ctrl.Result{RequeueAfter: reconciler.SecondaryLinodeTooManyPOSTRequestsErrorRetryDelay}, nil + } + if err != nil { logger.Error(err, "Failed to create Linode machine instance") if reconciler.RecordDecayingCondition(machineScope.LinodeMachine, @@ -295,6 +300,7 @@ func (r *LinodeMachineReconciler) reconcilePreflightCreate(ctx context.Context, } return retryIfTransient(err) } + conditions.MarkTrue(machineScope.LinodeMachine, ConditionPreflightCreated) // Set the provider ID since the instance is successfully created machineScope.LinodeMachine.Spec.ProviderID = util.Pointer(fmt.Sprintf("linode://%d", linodeInstance.ID)) diff --git a/controller/linodemachine_controller_helpers.go b/controller/linodemachine_controller_helpers.go index d73321d48..6f99d02d5 100644 --- a/controller/linodemachine_controller_helpers.go +++ b/controller/linodemachine_controller_helpers.go @@ -59,10 +59,21 @@ var ( errNoPublicIPv6SLAACAddrs = errors.New("no public SLAAC address set") ) +func handleTooManyRequestsError(err error) (ctrl.Result, error) { + newErr := linodego.NewError(err) + if newErr.Response == nil { + return ctrl.Result{RequeueAfter: reconciler.DefaultLinodeTooManyRequestsErrorRetryDelay}, nil + } + if newErr.Response.Request.Method != http.MethodPost { + return ctrl.Result{RequeueAfter: reconciler.DefaultLinodeTooManyRequestsErrorRetryDelay}, nil + } + return ctrl.Result{RequeueAfter: reconciler.DefaultLinodeTooManyPOSTRequestsErrorRetryDelay}, nil +} + func retryIfTransient(err error) (ctrl.Result, error) { if util.IsRetryableError(err) { if linodego.ErrHasStatus(err, http.StatusTooManyRequests) { - return ctrl.Result{RequeueAfter: reconciler.DefaultLinodeTooManyRequestsErrorRetryDelay}, nil + return handleTooManyRequestsError(err) } return ctrl.Result{RequeueAfter: reconciler.DefaultMachineControllerRetryDelay}, nil } @@ -649,3 +660,19 @@ func getDefaultInstanceConfig(ctx context.Context, machineScope *scope.MachineSc return configs[0], nil } + +// createInstance provisions linode instance after checking if the request will be within the rate-limits +// Note: it takes a lock before checking for the rate limits and releases it after making request to linode API or when returning from function +func createInstance(ctx context.Context, logger logr.Logger, machineScope *scope.MachineScope, createOpts *linodego.InstanceCreateOptions) (*linodego.Instance, error) { + ctr := util.GetPostReqCounter(machineScope.TokenHash) + ctr.Mu.Lock() + defer ctr.Mu.Unlock() + + if ctr.IsPOSTLimitReached() { + logger.Info(fmt.Sprintf("Cannot make more POST requests as rate-limit is reached (%d per %v seconds). Waiting and retrying after %v seconds", reconciler.SecondaryPOSTRequestLimit, reconciler.SecondaryLinodeTooManyPOSTRequestsErrorRetryDelay, reconciler.SecondaryLinodeTooManyPOSTRequestsErrorRetryDelay)) + return nil, util.ErrRateLimit + } + + machineScope.LinodeClient.OnAfterResponse(ctr.ApiResponseRatelimitCounter) + return machineScope.LinodeClient.CreateInstance(ctx, *createOpts) +} diff --git a/controller/linodemachine_controller_test.go b/controller/linodemachine_controller_test.go index 13998d1bd..89f242a86 100644 --- a/controller/linodemachine_controller_test.go +++ b/controller/linodemachine_controller_test.go @@ -157,6 +157,9 @@ var _ = Describe("create", Label("machine", "create"), func() { IPv6: "fd00::", Status: linodego.InstanceOffline, }, nil) + mockLinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() bootInst := mockLinodeClient.EXPECT(). BootInstance(ctx, 123, 0). After(createInst). @@ -240,6 +243,9 @@ var _ = Describe("create", Label("machine", "create"), func() { time.Sleep(time.Microsecond) return nil, errors.New("time is up") }) + mockLinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() mScope := scope.MachineScope{ Client: k8sClient, @@ -283,6 +289,9 @@ var _ = Describe("create", Label("machine", "create"), func() { DoAndReturn(func(_, _ any) (*linodego.Instance, error) { return nil, linodego.NewError(errors.New("context deadline exceeded")) }) + mockLinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() mScope := scope.MachineScope{ Client: k8sClient, LinodeClient: mockLinodeClient, @@ -324,6 +333,9 @@ var _ = Describe("create", Label("machine", "create"), func() { IPv6: "fd00::", Status: linodego.InstanceOffline, }, nil) + mockLinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() listInstConfs := mockLinodeClient.EXPECT(). ListInstanceConfigs(ctx, 123, gomock.Any()). After(createInst). @@ -475,6 +487,9 @@ var _ = Describe("create", Label("machine", "create"), func() { IPv6: "fd00::", Status: linodego.InstanceOffline, }, nil) + mockLinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() listInstConfs := mockLinodeClient.EXPECT(). ListInstanceConfigs(ctx, 123, gomock.Any()). After(createInst). @@ -729,6 +744,9 @@ var _ = Describe("createDNS", Label("machine", "createDNS"), func() { IPv6: "fd00::", Status: linodego.InstanceOffline, }, nil) + mockLinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() bootInst := mockLinodeClient.EXPECT(). BootInstance(ctx, 123, 0). After(createInst). @@ -924,6 +942,9 @@ var _ = Describe("machine-lifecycle", Ordered, Label("machine", "machine-lifecyc mck.LinodeClient.EXPECT().CreateInstance(gomock.Any(), gomock.Any()). After(getImage). Return(nil, &linodego.Error{Code: http.StatusBadGateway}) + mck.LinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() res, err := reconciler.reconcile(ctx, mck.Logger(), mScope) Expect(err).NotTo(HaveOccurred()) Expect(res.RequeueAfter).To(Equal(rutil.DefaultMachineControllerRetryDelay)) @@ -980,6 +1001,9 @@ var _ = Describe("machine-lifecycle", Ordered, Label("machine", "machine-lifecyc mck.LinodeClient.EXPECT().CreateInstance(gomock.Any(), gomock.Any()). After(getImage). Return(nil, &linodego.Error{Code: http.StatusTooManyRequests}) + mck.LinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() res, err := reconciler.reconcile(ctx, mck.Logger(), mScope) Expect(err).NotTo(HaveOccurred()) Expect(res.RequeueAfter).To(Equal(rutil.DefaultLinodeTooManyRequestsErrorRetryDelay)) @@ -1002,6 +1026,9 @@ var _ = Describe("machine-lifecycle", Ordered, Label("machine", "machine-lifecyc IPv6: "fd00::", Status: linodego.InstanceOffline, }, nil) + mck.LinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() listInstConfigs := mck.LinodeClient.EXPECT(). ListInstanceConfigs(ctx, 123, gomock.Any()). After(createInst). @@ -1073,6 +1100,9 @@ var _ = Describe("machine-lifecycle", Ordered, Label("machine", "machine-lifecyc IPv6: "fd00::", Status: linodego.InstanceOffline, }, nil) + mck.LinodeClient.EXPECT(). + OnAfterResponse(gomock.Any()). + Return() bootInst := mck.LinodeClient.EXPECT(). BootInstance(ctx, 123, 0). After(createInst). diff --git a/go.mod b/go.mod index 71b6023f7..d535bc222 100644 --- a/go.mod +++ b/go.mod @@ -25,11 +25,17 @@ require ( k8s.io/api v0.30.3 k8s.io/apimachinery v0.30.3 k8s.io/client-go v0.30.3 - k8s.io/utils v0.0.0-20240102154912-e7106e64919e + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/cluster-api v1.8.0 sigs.k8s.io/controller-runtime v0.18.5 ) +require ( + github.com/google/cel-go v0.20.1 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 // indirect +) + require ( github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 // indirect github.com/apex/log v1.9.0 // indirect @@ -45,9 +51,9 @@ require ( github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.22.3 // indirect + github.com/go-openapi/swag v0.22.4 // indirect github.com/go-ozzo/ozzo-validation/v4 v4.3.0 // indirect - github.com/go-resty/resty/v2 v2.13.1 // indirect + github.com/go-resty/resty/v2 v2.13.1 github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gobuffalo/flect v1.0.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -111,7 +117,7 @@ require ( gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.30.3 // indirect - k8s.io/klog/v2 v2.120.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect diff --git a/go.sum b/go.sum index fd14c62c7..4b83b9714 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,8 @@ github.com/akamai/AkamaiOPEN-edgegrid-golang/v8 v8.4.0 h1:zZJimNqkV3o7qZqBnprKyH github.com/akamai/AkamaiOPEN-edgegrid-golang/v8 v8.4.0/go.mod h1:2xRRnHx8dnw0i8IZPYOI0I7xbr1gnAN1uIYo7acMIbg= github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9orim59UnfUTLRjMpd09C5uEVQ6RPGeCaVI= github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg= -github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df h1:7RFfzj4SSt6nnvCPbCqijJi1nWCd+TqAT3bYCStRC18= -github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= +github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= +github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= github.com/apex/log v1.9.0 h1:FHtw/xuaM8AgmvDDTI9fiwoAL25Sq2cxojnZICUU8l0= github.com/apex/log v1.9.0/go.mod h1:m82fZlWIuiWzWP04XCTXmnX0xRkYYbCdYn8jbJeLBEA= github.com/apex/logs v1.0.0/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo= @@ -64,8 +64,9 @@ github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= +github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-ozzo/ozzo-validation/v4 v4.3.0 h1:byhDUpfEwjsVQb1vBunvIjh2BHQ9ead57VkAEY4V+Es= github.com/go-ozzo/ozzo-validation/v4 v4.3.0/go.mod h1:2NKgrcHl3z6cJs+3Oo940FPRiTzuqKbvfrL2RxCj6Ew= github.com/go-resty/resty/v2 v2.13.1 h1:x+LHXBI2nMB1vqndymf26quycC4aggYJ7DECYbiz03g= @@ -82,8 +83,8 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/cel-go v0.17.8 h1:j9m730pMZt1Fc4oKhCLUHfjj6527LuhYcYw0Rl8gqto= -github.com/google/cel-go v0.17.8/go.mod h1:HXZKzB0LXqer5lHHgfWAnlYwJaQBDKMjxjulNQzhwhY= +github.com/google/cel-go v0.20.1 h1:nDx9r8S3L4pE61eDdt8igGj8rf5kjYR3ILxWIpWNi84= +github.com/google/cel-go v0.20.1/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -215,8 +216,8 @@ go.opentelemetry.io/contrib/bridges/prometheus v0.55.0 h1:1oZYcP3wuazG3O1563m8cs go.opentelemetry.io/contrib/bridges/prometheus v0.55.0/go.mod h1:sU48aWFqiqBXo2RBtq7KarczkW8uK6RdIU54y4VzpZs= go.opentelemetry.io/contrib/exporters/autoexport v0.55.0 h1:8kNP8SX9id5TY2feLB+79aFxE0kqzh3KvjF1nAfGxVM= go.opentelemetry.io/contrib/exporters/autoexport v0.55.0/go.mod h1:WhcvzeuTOr58aYsJ7S4ubY1xMs0WXAPaqTQnxr8bRHk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg= go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.6.0 h1:WYsDPt0fM4KZaMhLvY+x6TVXd85P/KNl3Ez3t+0+kGs= @@ -402,14 +403,14 @@ k8s.io/cluster-bootstrap v0.30.3 h1:MgxyxMkpaC6mu0BKWJ8985XCOnKU+eH3Iy+biwtDXRk= k8s.io/cluster-bootstrap v0.30.3/go.mod h1:h8BoLDfdD7XEEIXy7Bx9FcMzxHwz29jsYYi34bM5DKU= k8s.io/component-base v0.30.3 h1:Ci0UqKWf4oiwy8hr1+E3dsnliKnkMLZMVbWzeorlk7s= k8s.io/component-base v0.30.3/go.mod h1:C1SshT3rGPCuNtBs14RmVD2xW0EhRSeLvBh7AGk1quA= -k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= -k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= -k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ= -k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.0 h1:Tc9rS7JJoZ9sl3OpL4842oIk6lH7gWBb0JOmJ0ute7M= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.0/go.mod h1:1ewhL9l1gkPcU/IU/6rFYfikf+7Y5imWv7ARVbBOzNs= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 h1:2770sDpzrjjsAtVhSeUFseziht227YAWYHLGNM8QPwY= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= sigs.k8s.io/cluster-api v1.8.0 h1:xdF9svGCbezxOn9Y6QmlVnNaZ0n9QkRJpNuKJkeorUw= sigs.k8s.io/cluster-api v1.8.0/go.mod h1:iSUcU8rHBNRa6wZJvU6klHKI3EVQC0aMcgjeSofBwKw= sigs.k8s.io/controller-runtime v0.18.5 h1:nTHio/W+Q4aBlQMgbnC5hZb4IjIidyrizMai9P6n4Rk= diff --git a/mock/client.go b/mock/client.go index 011b17312..c8d7091f6 100644 --- a/mock/client.go +++ b/mock/client.go @@ -14,6 +14,7 @@ import ( reflect "reflect" dns "github.com/akamai/AkamaiOPEN-edgegrid-golang/v8/pkg/dns" + resty "github.com/go-resty/resty/v2" linodego "github.com/linode/linodego" gomock "go.uber.org/mock/gomock" meta "k8s.io/apimachinery/pkg/api/meta" @@ -740,6 +741,18 @@ func (mr *MockLinodeClientMockRecorder) ListVPCs(ctx, opts any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListVPCs", reflect.TypeOf((*MockLinodeClient)(nil).ListVPCs), ctx, opts) } +// OnAfterResponse mocks base method. +func (m_2 *MockLinodeClient) OnAfterResponse(m func(*resty.Response) error) { + m_2.ctrl.T.Helper() + m_2.ctrl.Call(m_2, "OnAfterResponse", m) +} + +// OnAfterResponse indicates an expected call of OnAfterResponse. +func (mr *MockLinodeClientMockRecorder) OnAfterResponse(m any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OnAfterResponse", reflect.TypeOf((*MockLinodeClient)(nil).OnAfterResponse), m) +} + // ResizeInstanceDisk mocks base method. func (m *MockLinodeClient) ResizeInstanceDisk(ctx context.Context, linodeID, diskID, size int) error { m.ctrl.T.Helper() diff --git a/util/errors.go b/util/errors.go new file mode 100644 index 000000000..5d76ced8f --- /dev/null +++ b/util/errors.go @@ -0,0 +1,26 @@ +/* +Copyright 2023 Akamai Technologies, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "errors" +) + +var ( + // ErrRateLimit indicates hitting linode API rate limits + ErrRateLimit = errors.New("rate-limit exceeded") +) diff --git a/util/ratelimits.go b/util/ratelimits.go new file mode 100644 index 000000000..9be705e25 --- /dev/null +++ b/util/ratelimits.go @@ -0,0 +1,96 @@ +/* +Copyright 2023 Akamai Technologies, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "net/http" + "strconv" + "strings" + "sync" + "time" + + "github.com/go-resty/resty/v2" + + "github.com/linode/cluster-api-provider-linode/util/reconciler" +) + +// PostRequestCounter keeps track of rate limits for POST to /linode/instances +type PostRequestCounter struct { + Mu sync.RWMutex + ReqRemaining int + RefreshTime int +} + +var ( + // mu is global lock to coordinate access to shared resource postRequestCounters + mu sync.RWMutex + // postRequestCounters stores token hash and pointer to its equivalent PostRequestCounter + postRequestCounters = make(map[string]*PostRequestCounter, 0) +) + +// ApiResponseRatelimitCounter updates ReqRemaining and RefreshTime when a POST call is made to /linode/instances +func (c *PostRequestCounter) ApiResponseRatelimitCounter(resp *resty.Response) error { + if resp.Request.Method != http.MethodPost || !strings.HasSuffix(resp.Request.URL, "/linode/instances") { + return nil + } + + var err error + c.ReqRemaining, err = strconv.Atoi(resp.Header().Get("X-Ratelimit-Remaining")) + if err != nil { + return err + } + + c.RefreshTime, err = strconv.Atoi(resp.Header().Get("X-Ratelimit-Reset")) + if err != nil { + return err + } + return nil +} + +// IsPOSTLimitReached checks whether POST limits have been reached. +func (c *PostRequestCounter) IsPOSTLimitReached() bool { + // TODO: Once linode API adjusts rate-limits, remove secondary rate limit and simplify accordingly + currTime := time.Now().Unix() + + if c.ReqRemaining == 0 { + if currTime <= int64(c.RefreshTime) { + return true + } + } + + secondaryLimitRefreshTime := c.RefreshTime - int(reconciler.SecondaryLinodeTooManyPOSTRequestsErrorRetryDelay.Seconds()) + if c.ReqRemaining <= reconciler.SecondaryPOSTRequestLimit && currTime <= int64(secondaryLimitRefreshTime) { + return true + } + return false +} + +// GetPostReqCounter returns pointer to PostRequestCounter for a given token hash +func GetPostReqCounter(tokenHash string) *PostRequestCounter { + mu.Lock() + defer mu.Unlock() + + ctr, exists := postRequestCounters[tokenHash] + if !exists { + ctr = &PostRequestCounter{ + ReqRemaining: reconciler.DefaultPOSTRequestLimit, + RefreshTime: 0, + } + postRequestCounters[tokenHash] = ctr + } + return ctr +} diff --git a/util/ratelimits_test.go b/util/ratelimits_test.go new file mode 100644 index 000000000..66eefdbcd --- /dev/null +++ b/util/ratelimits_test.go @@ -0,0 +1,226 @@ +/* +Copyright 2023 Akamai Technologies, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "net/http" + "reflect" + "testing" + "time" + + "github.com/go-resty/resty/v2" + + "github.com/linode/cluster-api-provider-linode/util/reconciler" +) + +func TestGetPostReqCounter(t *testing.T) { + t.Parallel() + tests := []struct { + name string + tokenHash string + want *PostRequestCounter + }{ + { + name: "provide hash which exists in map", + tokenHash: "abcdef", + want: &PostRequestCounter{ + ReqRemaining: 5, + RefreshTime: 3, + }, + }, + { + name: "provide hash which doesn't exist", + tokenHash: "uvwxyz", + want: &PostRequestCounter{ + ReqRemaining: reconciler.DefaultPOSTRequestLimit, + RefreshTime: 0, + }, + }, + } + for _, tt := range tests { + postRequestCounters["abcdef"] = &PostRequestCounter{ + ReqRemaining: reconciler.SecondaryPOSTRequestLimit, + RefreshTime: 3, + } + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := GetPostReqCounter(tt.tokenHash); !reflect.DeepEqual(got, tt.want) { + t.Errorf("GetPostReqCounter() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestPostRequestCounter_IsPOSTLimitReached(t *testing.T) { + t.Parallel() + tests := []struct { + name string + fields *PostRequestCounter + want bool + }{ + { + name: "not reached rate limits", + fields: &PostRequestCounter{ + ReqRemaining: 7, + RefreshTime: int(time.Now().Unix()), + }, + want: false, + }, + { + name: "reached token rate limit", + fields: &PostRequestCounter{ + ReqRemaining: reconciler.SecondaryPOSTRequestLimit, + RefreshTime: int(time.Now().Unix() + 100), + }, + want: true, + }, + { + name: "reached account rate limits", + fields: &PostRequestCounter{ + ReqRemaining: 0, + RefreshTime: int(time.Now().Unix() + 100), + }, + want: true, + }, + { + name: "refresh time smaller than current time", + fields: &PostRequestCounter{ + ReqRemaining: reconciler.SecondaryPOSTRequestLimit, + RefreshTime: int(time.Now().Unix() - 100), + }, + want: false, + }, + { + name: "refresh time smaller than current time", + fields: &PostRequestCounter{ + ReqRemaining: 0, + RefreshTime: int(time.Now().Unix() - 100), + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + c := &PostRequestCounter{ + ReqRemaining: tt.fields.ReqRemaining, + RefreshTime: tt.fields.RefreshTime, + } + if got := c.IsPOSTLimitReached(); got != tt.want { + t.Errorf("PostRequestCounter.IsPOSTLimitReached() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestPostRequestCounter_ApiResponseRatelimitCounter(t *testing.T) { + t.Parallel() + tests := []struct { + name string + fields *PostRequestCounter + args *resty.Response + wantErr bool + }{ + { + name: "not a POST call", + fields: &PostRequestCounter{ + ReqRemaining: 6, + RefreshTime: int(time.Now().Unix()), + }, + args: &resty.Response{ + Request: &resty.Request{ + Method: http.MethodGet, + }, + }, + wantErr: false, + }, + { + name: "endpoint different than /linode/instances", + fields: &PostRequestCounter{ + ReqRemaining: 6, + RefreshTime: int(time.Now().Unix()), + }, + args: &resty.Response{ + Request: &resty.Request{ + Method: http.MethodPost, + URL: "/v4/vpc/ips", + }, + }, + wantErr: false, + }, + { + name: "no headers in response", + fields: &PostRequestCounter{ + ReqRemaining: 6, + RefreshTime: int(time.Now().Unix()), + }, + args: &resty.Response{ + Request: &resty.Request{ + Method: http.MethodPost, + URL: "/v4/linode/instances", + }, + }, + wantErr: true, + }, + { + name: "missing one value in response header", + fields: &PostRequestCounter{ + ReqRemaining: 6, + RefreshTime: int(time.Now().Unix()), + }, + args: &resty.Response{ + Request: &resty.Request{ + Method: http.MethodPost, + URL: "/v4/linode/instances", + }, + RawResponse: &http.Response{ + Header: http.Header{"X-Ratelimit-Remaining": []string{"10"}}, + }, + }, + wantErr: true, + }, + { + name: "correct headers in response", + fields: &PostRequestCounter{ + ReqRemaining: 6, + RefreshTime: int(time.Now().Unix()), + }, + args: &resty.Response{ + Request: &resty.Request{ + Method: http.MethodPost, + URL: "/v4/linode/instances", + }, + RawResponse: &http.Response{ + Header: http.Header{"X-Ratelimit-Remaining": []string{"10"}, "X-Ratelimit-Reset": []string{"10"}}, + }, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + c := &PostRequestCounter{ + ReqRemaining: tt.fields.ReqRemaining, + RefreshTime: tt.fields.RefreshTime, + } + if err := c.ApiResponseRatelimitCounter(tt.args); (err != nil) != tt.wantErr { + t.Errorf("PostRequestCounter.ApiResponseRatelimitCounter() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/util/reconciler/defaults.go b/util/reconciler/defaults.go index ca0689faa..aefb1860e 100644 --- a/util/reconciler/defaults.go +++ b/util/reconciler/defaults.go @@ -65,6 +65,15 @@ const ( // DefaultDNSTTLSec is the default TTL used for DNS entries for api server loadbalancing DefaultDNSTTLSec = 30 + + // DefaultLinodeTooManyPOSTRequestsErrorRetryDelay is the default requeue delay if there is Linode API error for POST request. Currently, it is set to 10 requests per 30 seconds + DefaultLinodeTooManyPOSTRequestsErrorRetryDelay = 30 * time.Second + // SecondaryLinodeTooManyPOSTRequestsErrorRetryDelay is the secondary requeue delay if there is Linode API error for POST request. Currently, it is set to 5 requests per 15 seconds + SecondaryLinodeTooManyPOSTRequestsErrorRetryDelay = 15 * time.Second + // DefaultPOSTRequestLimit is the default limit of how many POST requests can be made to /linode/instances endpoint in 30 seconds before rate-limit reset. + DefaultPOSTRequestLimit = 10 + // SecondaryPOSTRequestLimit is the secondary limit of how many POST requests can be made to /linode/instances endpoint in 15 seconds before rate-limit kicks in. + SecondaryPOSTRequestLimit = 5 ) // DefaultedLoopTimeout will default the timeout if it is zero-valued.