Skip to content

Commit

Permalink
feat: Enable update in place for node groups with cluster placement g…
Browse files Browse the repository at this point in the history
…roup strategy (#3045)

* feat(eks):added subnet az filter for eks nodegroup placement groups

* fix: Correct logice for restricting placement group to AZ

* fix: Ensure node group args are passed from root module

---------

Co-authored-by: Bryant Biggs <[email protected]>
  • Loading branch information
Josephuss and bryantbiggs authored Aug 5, 2024
1 parent bfa5821 commit 75db486
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 25 deletions.
3 changes: 2 additions & 1 deletion modules/eks-managed-node-group/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ module "eks_managed_node_group" {
| [aws_iam_policy_document.role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
| [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source |
| [aws_ssm_parameter.ami](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ssm_parameter) | data source |
| [aws_subnets.efa](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |
| [aws_subnets.placement_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |

## Inputs

Expand Down Expand Up @@ -169,6 +169,7 @@ module "eks_managed_node_group" {
| <a name="input_name"></a> [name](#input\_name) | Name of the EKS managed node group | `string` | `""` | no |
| <a name="input_network_interfaces"></a> [network\_interfaces](#input\_network\_interfaces) | Customize network interfaces to be attached at instance boot time | `list(any)` | `[]` | no |
| <a name="input_placement"></a> [placement](#input\_placement) | The placement of the instance | `map(string)` | `{}` | no |
| <a name="input_placement_group_az"></a> [placement\_group\_az](#input\_placement\_group\_az) | Availability zone where placement group is created (ex. `eu-west-1c`) | `string` | `null` | no |
| <a name="input_placement_group_strategy"></a> [placement\_group\_strategy](#input\_placement\_group\_strategy) | The placement group strategy | `string` | `"cluster"` | no |
| <a name="input_platform"></a> [platform](#input\_platform) | [DEPRECATED - use `ami_type` instead. Will be removed in `v21.0`] Identifies the OS platform as `bottlerocket`, `linux` (AL2), `al2023`, or `windows` | `string` | `"linux"` | no |
| <a name="input_post_bootstrap_user_data"></a> [post\_bootstrap\_user\_data](#input\_post\_bootstrap\_user\_data) | User data that is appended to the user data script after of the EKS bootstrap script. Not used when `ami_type` = `BOTTLEROCKET_*` | `string` | `""` | no |
Expand Down
43 changes: 33 additions & 10 deletions modules/eks-managed-node-group/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ data "aws_ec2_instance_type" "this" {
}

locals {
enable_efa_support = var.create && var.enable_efa_support

efa_instance_type = try(element(var.instance_types, 0), "")
num_network_cards = try(data.aws_ec2_instance_type.this[0].maximum_network_cards, 0)

Expand All @@ -52,7 +54,7 @@ locals {
}
]

network_interfaces = var.enable_efa_support ? local.efa_network_interfaces : var.network_interfaces
network_interfaces = local.enable_efa_support ? local.efa_network_interfaces : var.network_interfaces
}

################################################################################
Expand All @@ -63,7 +65,7 @@ locals {
launch_template_name = coalesce(var.launch_template_name, "${var.name}-eks-node-group")
security_group_ids = compact(concat([var.cluster_primary_security_group_id], var.vpc_security_group_ids))

placement = var.create && (var.enable_efa_support || var.create_placement_group) ? { group_name = aws_placement_group.this[0].name } : var.placement
placement = local.create_placement_group ? { group_name = aws_placement_group.this[0].name } : var.placement
}

resource "aws_launch_template" "this" {
Expand Down Expand Up @@ -390,7 +392,7 @@ resource "aws_eks_node_group" "this" {
# Required
cluster_name = var.cluster_name
node_role_arn = var.create_iam_role ? aws_iam_role.this[0].arn : var.iam_role_arn
subnet_ids = var.enable_efa_support ? data.aws_subnets.efa[0].ids : var.subnet_ids
subnet_ids = local.create_placement_group ? data.aws_subnets.placement_group[0].ids : var.subnet_ids

scaling_config {
min_size = var.min_size
Expand Down Expand Up @@ -605,8 +607,12 @@ resource "aws_iam_role_policy" "this" {
# Placement Group
################################################################################

locals {
create_placement_group = var.create && (local.enable_efa_support || var.create_placement_group)
}

resource "aws_placement_group" "this" {
count = var.create && (var.enable_efa_support || var.create_placement_group) ? 1 : 0
count = local.create_placement_group ? 1 : 0

name = "${var.cluster_name}-${var.name}"
strategy = var.placement_group_strategy
Expand All @@ -624,8 +630,11 @@ resource "aws_placement_group" "this" {
################################################################################

# Find the availability zones supported by the instance type
# TODO - remove at next breaking change
# Force users to be explicit about which AZ to use when using placement groups,
# with or without EFA support
data "aws_ec2_instance_type_offerings" "this" {
count = var.create && var.enable_efa_support ? 1 : 0
count = local.enable_efa_support ? 1 : 0

filter {
name = "instance-type"
Expand All @@ -637,17 +646,31 @@ data "aws_ec2_instance_type_offerings" "this" {

# Reverse the lookup to find one of the subnets provided based on the availability
# availability zone ID of the queried instance type (supported)
data "aws_subnets" "efa" {
count = var.create && var.enable_efa_support ? 1 : 0
data "aws_subnets" "placement_group" {
count = local.create_placement_group ? 1 : 0

filter {
name = "subnet-id"
values = var.subnet_ids
}

filter {
name = "availability-zone-id"
values = data.aws_ec2_instance_type_offerings.this[0].locations
# The data source can lookup the first available AZ or you can specify an AZ (next filter)
dynamic "filter" {
for_each = var.enable_efa_support && var.placement_group_az == null ? [1] : []

content {
name = "availability-zone-id"
values = data.aws_ec2_instance_type_offerings.this[0].locations
}
}

dynamic "filter" {
for_each = var.placement_group_az != null ? [var.placement_group_az] : []

content {
name = "availability-zone"
values = [filter.value]
}
}
}

Expand Down
7 changes: 7 additions & 0 deletions modules/eks-managed-node-group/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ variable "create_placement_group" {
default = false
}

# TODO - remove at next breaking change
variable "placement_group_strategy" {
description = "The placement group strategy"
type = string
Expand Down Expand Up @@ -337,6 +338,12 @@ variable "subnet_ids" {
default = null
}

variable "placement_group_az" {
description = "Availability zone where placement group is created (ex. `eu-west-1c`)"
type = string
default = null
}

variable "min_size" {
description = "Minimum number of instances/nodes"
type = number
Expand Down
4 changes: 3 additions & 1 deletion modules/self-managed-node-group/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ module "self_managed_node_group" {
| [aws_iam_policy_document.role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
| [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source |
| [aws_ssm_parameter.ami](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ssm_parameter) | data source |
| [aws_subnets.efa](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |
| [aws_subnets.placement_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |

## Inputs

Expand Down Expand Up @@ -110,6 +110,7 @@ module "self_managed_node_group" {
| <a name="input_create_iam_instance_profile"></a> [create\_iam\_instance\_profile](#input\_create\_iam\_instance\_profile) | Determines whether an IAM instance profile is created or to use an existing IAM instance profile | `bool` | `true` | no |
| <a name="input_create_iam_role_policy"></a> [create\_iam\_role\_policy](#input\_create\_iam\_role\_policy) | Determines whether an IAM role policy is created or not | `bool` | `true` | no |
| <a name="input_create_launch_template"></a> [create\_launch\_template](#input\_create\_launch\_template) | Determines whether to create launch template or not | `bool` | `true` | no |
| <a name="input_create_placement_group"></a> [create\_placement\_group](#input\_create\_placement\_group) | Determines whether a placement group is created & used by the node group | `bool` | `false` | no |
| <a name="input_create_schedule"></a> [create\_schedule](#input\_create\_schedule) | Determines whether to create autoscaling group schedule or not | `bool` | `true` | no |
| <a name="input_credit_specification"></a> [credit\_specification](#input\_credit\_specification) | Customize the credit specification of the instance | `map(string)` | `{}` | no |
| <a name="input_default_cooldown"></a> [default\_cooldown](#input\_default\_cooldown) | The amount of time, in seconds, after a scaling activity completes before another scaling activity can start | `number` | `null` | no |
Expand Down Expand Up @@ -170,6 +171,7 @@ module "self_managed_node_group" {
| <a name="input_network_interfaces"></a> [network\_interfaces](#input\_network\_interfaces) | Customize network interfaces to be attached at instance boot time | `list(any)` | `[]` | no |
| <a name="input_placement"></a> [placement](#input\_placement) | The placement of the instance | `map(string)` | `{}` | no |
| <a name="input_placement_group"></a> [placement\_group](#input\_placement\_group) | The name of the placement group into which you'll launch your instances, if any | `string` | `null` | no |
| <a name="input_placement_group_az"></a> [placement\_group\_az](#input\_placement\_group\_az) | Availability zone where placement group is created (ex. `eu-west-1c`) | `string` | `null` | no |
| <a name="input_platform"></a> [platform](#input\_platform) | [DEPRECATED - must use `ami_type` instead. Will be removed in `v21.0`] | `string` | `null` | no |
| <a name="input_post_bootstrap_user_data"></a> [post\_bootstrap\_user\_data](#input\_post\_bootstrap\_user\_data) | User data that is appended to the user data script after of the EKS bootstrap script. Not used when `ami_type` = `BOTTLEROCKET_*` | `string` | `""` | no |
| <a name="input_pre_bootstrap_user_data"></a> [pre\_bootstrap\_user\_data](#input\_pre\_bootstrap\_user\_data) | User data that is injected into the user data script ahead of the EKS bootstrap script. Not used when `ami_type` = `BOTTLEROCKET_*` | `string` | `""` | no |
Expand Down
35 changes: 28 additions & 7 deletions modules/self-managed-node-group/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ resource "aws_autoscaling_group" "this" {

target_group_arns = var.target_group_arns
termination_policies = var.termination_policies
vpc_zone_identifier = local.enable_efa_support ? data.aws_subnets.efa[0].ids : var.subnet_ids
vpc_zone_identifier = local.enable_efa_support ? data.aws_subnets.placement_group[0].ids : var.subnet_ids
wait_for_capacity_timeout = var.wait_for_capacity_timeout
wait_for_elb_capacity = var.wait_for_elb_capacity

Expand Down Expand Up @@ -930,8 +930,12 @@ resource "aws_iam_role_policy" "this" {
# Placement Group
################################################################################

locals {
create_placement_group = var.create && (local.enable_efa_support || var.create_placement_group)
}

resource "aws_placement_group" "this" {
count = local.enable_efa_support ? 1 : 0
count = local.create_placement_group ? 1 : 0

name = "${var.cluster_name}-${var.name}"
strategy = "cluster"
Expand All @@ -949,6 +953,9 @@ resource "aws_placement_group" "this" {
################################################################################

# Find the availability zones supported by the instance type
# TODO - remove at next breaking change
# Force users to be explicit about which AZ to use when using placement groups,
# with or without EFA support
data "aws_ec2_instance_type_offerings" "this" {
count = local.enable_efa_support ? 1 : 0

Expand All @@ -962,17 +969,31 @@ data "aws_ec2_instance_type_offerings" "this" {

# Reverse the lookup to find one of the subnets provided based on the availability
# availability zone ID of the queried instance type (supported)
data "aws_subnets" "efa" {
count = local.enable_efa_support ? 1 : 0
data "aws_subnets" "placement_group" {
count = local.create_placement_group ? 1 : 0

filter {
name = "subnet-id"
values = var.subnet_ids
}

filter {
name = "availability-zone-id"
values = data.aws_ec2_instance_type_offerings.this[0].locations
# The data source can lookup the first available AZ or you can specify an AZ (next filter)
dynamic "filter" {
for_each = local.create_placement_group && var.placement_group_az == null ? [1] : []

content {
name = "availability-zone-id"
values = data.aws_ec2_instance_type_offerings.this[0].locations
}
}

dynamic "filter" {
for_each = var.placement_group_az != null ? [var.placement_group_az] : []

content {
name = "availability-zone"
values = [filter.value]
}
}
}

Expand Down
12 changes: 12 additions & 0 deletions modules/self-managed-node-group/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,12 @@ variable "placement" {
default = {}
}

variable "create_placement_group" {
description = "Determines whether a placement group is created & used by the node group"
type = bool
default = false
}

variable "private_dns_name_options" {
description = "The options for the instance hostname. The default values are inherited from the subnet"
type = map(string)
Expand Down Expand Up @@ -384,6 +390,12 @@ variable "availability_zones" {
default = null
}

variable "placement_group_az" {
description = "Availability zone where placement group is created (ex. `eu-west-1c`)"
type = string
default = null
}

variable "subnet_ids" {
description = "A list of subnet IDs to launch resources in. Subnets automatically determine which availability zones the group will reside. Conflicts with `availability_zones`"
type = list(string)
Expand Down
5 changes: 4 additions & 1 deletion node_groups.tf
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,10 @@ module "eks_managed_node_group" {
enable_monitoring = try(each.value.enable_monitoring, var.eks_managed_node_group_defaults.enable_monitoring, true)
enable_efa_support = try(each.value.enable_efa_support, var.eks_managed_node_group_defaults.enable_efa_support, false)
create_placement_group = try(each.value.create_placement_group, var.eks_managed_node_group_defaults.create_placement_group, false)
placement = try(each.value.placement, var.eks_managed_node_group_defaults.placement, {})
placement_group_az = try(each.value.placement_group_az, var.eks_managed_node_group_defaults.placement_group_az, null)
placement_group_strategy = try(each.value.placement_group_strategy, var.eks_managed_node_group_defaults.placement_group_strategy, "cluster")
network_interfaces = try(each.value.network_interfaces, var.eks_managed_node_group_defaults.network_interfaces, [])
placement = try(each.value.placement, var.eks_managed_node_group_defaults.placement, {})
maintenance_options = try(each.value.maintenance_options, var.eks_managed_node_group_defaults.maintenance_options, {})
private_dns_name_options = try(each.value.private_dns_name_options, var.eks_managed_node_group_defaults.private_dns_name_options, {})

Expand Down Expand Up @@ -444,7 +445,9 @@ module "self_managed_node_group" {
context = try(each.value.context, var.self_managed_node_group_defaults.context, null)

target_group_arns = try(each.value.target_group_arns, var.self_managed_node_group_defaults.target_group_arns, [])
create_placement_group = try(each.value.create_placement_group, var.self_managed_node_group_defaults.create_placement_group, false)
placement_group = try(each.value.placement_group, var.self_managed_node_group_defaults.placement_group, null)
placement_group_az = try(each.value.placement_group_az, var.self_managed_node_group_defaults.placement_group_az, null)
health_check_type = try(each.value.health_check_type, var.self_managed_node_group_defaults.health_check_type, null)
health_check_grace_period = try(each.value.health_check_grace_period, var.self_managed_node_group_defaults.health_check_grace_period, null)

Expand Down
13 changes: 8 additions & 5 deletions tests/eks-managed-node-group/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,16 @@ module "eks" {
}
}

placement_group = {
create_placement_group = true
# forces the subnet lookup to be restricted to this availability zone
placement_group_az = element(local.azs, 3)
}

# AL2023 node group utilizing new user data format which utilizes nodeadm
# to join nodes to the cluster (instead of /etc/eks/bootstrap.sh)
al2023_nodeadm = {
ami_type = "AL2023_x86_64_STANDARD"

ami_type = "AL2023_x86_64_STANDARD"
use_latest_ami_release_version = true

cloudinit_pre_nodeadm = [
Expand Down Expand Up @@ -376,9 +381,7 @@ module "eks_managed_node_group" {

subnet_ids = module.vpc.private_subnets
cluster_primary_security_group_id = module.eks.cluster_primary_security_group_id
vpc_security_group_ids = [
module.eks.node_security_group_id,
]
vpc_security_group_ids = [module.eks.node_security_group_id]

ami_type = "BOTTLEROCKET_x86_64"

Expand Down

0 comments on commit 75db486

Please sign in to comment.