From 06eb4e0571fc967c4a0fe0c3f77e417690cc0973 Mon Sep 17 00:00:00 2001 From: Marcin Wyszynski Date: Mon, 11 Dec 2023 12:22:05 +0100 Subject: [PATCH] Work with instances not in ASG (#7) --- internal/controller.go | 13 +++++++++++-- internal/controller_test.go | 37 +++++++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/internal/controller.go b/internal/controller.go index 3ce6913..1a6961f 100644 --- a/internal/controller.go +++ b/internal/controller.go @@ -246,12 +246,21 @@ func (c *Controller) KillInstance(ctx context.Context, instanceID string) (err e ShouldDecrementDesiredCapacity: aws.Bool(true), }) - if err != nil { + // A special instance of the error is when the instance is not part of + // the autoscaling group. This can happen when the instance successfully + // detached but for some reason the termination request failed. + // + // This will fix one-off errors and machines manually connected to the + // worker pool (as long as they terminate upon request), but if there + // are multiple ASGs connected to the same worker pool, this will be a + // common occurrence and will break the entire autoscaling logic. + if err != nil && !strings.Contains(err.Error(), "is not part of Auto Scaling group") { err = fmt.Errorf("could not detach instance from autoscaling group: %v", err) return err } - // Now that the instance is detached from the ASG, we can terminate it. + // Now that the instance is detached from the ASG (or was never part of + // the ASG), we can terminate it. _, err = c.EC2.TerminateInstances(ctx, &ec2.TerminateInstancesInput{ InstanceIds: []string{instanceID}, }) diff --git a/internal/controller_test.go b/internal/controller_test.go index 8d8dc44..2453531 100644 --- a/internal/controller_test.go +++ b/internal/controller_test.go @@ -440,13 +440,11 @@ func TestController(t *testing.T) { }) }) - g.Describe("when the detach call succeeds", func() { + g.Describe("when the detach call does not fail", func() { var terminateCall *mock.Call var terminateInput *ec2.TerminateInstancesInput g.BeforeEach(func() { - detachCall.Return(nil, nil) - terminateInput = nil terminateCall = mockEC2.On( @@ -460,12 +458,10 @@ func TestController(t *testing.T) { ) }) - g.Describe("when the terminate call fails", func() { - g.BeforeEach(func() { terminateCall.Return(nil, errors.New("bacon")) }) - - g.It("send the correct input", func() { - Expect(terminateInput).NotTo(BeNil()) - Expect(terminateInput.InstanceIds).To(ConsistOf(instanceID)) + g.Describe("when the instance is not part of the ASG", func() { + g.BeforeEach(func() { + detachCall.Return(nil, errors.New("instance is not part of Auto Scaling group")) + terminateCall.Return(nil, errors.New("bacon")) }) g.It("should return an error", func() { @@ -473,10 +469,27 @@ func TestController(t *testing.T) { }) }) - g.Describe("when the terminate call succeeds", func() { - g.BeforeEach(func() { terminateCall.Return(nil, nil) }) + g.Describe("when the detach call succeeds", func() { + g.BeforeEach(func() { detachCall.Return(nil, nil) }) + + g.Describe("when the terminate call fails", func() { + g.BeforeEach(func() { terminateCall.Return(nil, errors.New("bacon")) }) + + g.It("send the correct input", func() { + Expect(terminateInput).NotTo(BeNil()) + Expect(terminateInput.InstanceIds).To(ConsistOf(instanceID)) + }) - g.It("succeeds", func() { Expect(err).NotTo(HaveOccurred()) }) + g.It("should return an error", func() { + Expect(err).To(MatchError("could not terminate detached instance: bacon")) + }) + }) + + g.Describe("when the terminate call succeeds", func() { + g.BeforeEach(func() { terminateCall.Return(nil, nil) }) + + g.It("succeeds", func() { Expect(err).NotTo(HaveOccurred()) }) + }) }) }) })