From 9e59dfb615b20c1afd13197f3d53460f0ab9d65c Mon Sep 17 00:00:00 2001 From: Ryan Emerson Date: Wed, 13 Mar 2024 15:30:35 +0000 Subject: [PATCH] Use Hosted Control Planes for ROSA to speed up cluster creation - Remove multi-az cluster create options - Remove need for ccoctl when provisioning EFS as the cloud-credential-operator is not installed on HCP clusters Resolves #673 Signed-off-by: Ryan Emerson --- .github/workflows/rosa-cluster-create.yml | 23 +- .github/workflows/rosa-cluster-delete.yml | 8 + .../rosa-multi-az-cluster-create.yml | 4 +- .gitignore | 4 + .../pages/prerequisite/prerequisite-rosa.adoc | 3 +- provision/aws/efs/.gitignore | 1 + ...aws-efs-csi-driver-credential-request.yaml | 20 -- provision/aws/efs/iam-policy.json | 44 +++ .../rds/aurora_create_peering_connection.sh | 2 +- provision/aws/rosa_create_cluster.sh | 134 ++++----- provision/aws/rosa_delete_cluster.sh | 33 +-- provision/aws/rosa_efs_create.sh | 80 ++++-- provision/aws/rosa_efs_delete.sh | 18 +- provision/aws/rosa_machine_cidr.sh | 26 -- provision/opentofu/README.md | 44 +++ provision/opentofu/destroy.sh | 17 ++ provision/opentofu/modules/rosa/README.md | 7 + .../modules/rosa/account-roles/main.tf | 96 +++++++ .../modules/rosa/account-roles/provider.tf | 17 ++ .../modules/rosa/account-roles/variables.tf | 21 ++ provision/opentofu/modules/rosa/hcp/main.tf | 221 +++++++++++++++ provision/opentofu/modules/rosa/hcp/output.tf | 9 + .../opentofu/modules/rosa/hcp/provider.tf | 37 +++ .../rosa/hcp/scripts/rosa_machine_cidr.sh | 35 +++ .../rosa/hcp/scripts}/rosa_recreate_admin.sh | 0 .../rosa/hcp/scripts/rosa_verify_network.sh | 15 ++ .../opentofu/modules/rosa/hcp/variables.tf | 82 ++++++ .../rosa/oidc-provider-operator-roles/main.tf | 255 ++++++++++++++++++ .../oidc-provider-operator-roles/output.tf | 4 + .../oidc-provider-operator-roles/provider.tf | 22 ++ .../oidc-provider-operator-roles/variables.tf | 20 ++ provision/opentofu/modules/rosa/vpc/main.tf | 159 +++++++++++ .../opentofu/modules/rosa/vpc/outputs.tf | 14 + .../opentofu/modules/rosa/vpc/variables.tf | 73 +++++ provision/opentofu/reaper.sh | 15 ++ provision/opentofu/remote-state/README.md | 7 + provision/opentofu/remote-state/main.tf | 66 +++++ 37 files changed, 1436 insertions(+), 200 deletions(-) delete mode 100644 provision/aws/efs/credentialRequests/openshift-aws-efs-csi-driver-credential-request.yaml create mode 100644 provision/aws/efs/iam-policy.json delete mode 100755 provision/aws/rosa_machine_cidr.sh create mode 100644 provision/opentofu/README.md create mode 100755 provision/opentofu/destroy.sh create mode 100644 provision/opentofu/modules/rosa/README.md create mode 100644 provision/opentofu/modules/rosa/account-roles/main.tf create mode 100644 provision/opentofu/modules/rosa/account-roles/provider.tf create mode 100644 provision/opentofu/modules/rosa/account-roles/variables.tf create mode 100644 provision/opentofu/modules/rosa/hcp/main.tf create mode 100644 provision/opentofu/modules/rosa/hcp/output.tf create mode 100644 provision/opentofu/modules/rosa/hcp/provider.tf create mode 100755 provision/opentofu/modules/rosa/hcp/scripts/rosa_machine_cidr.sh rename provision/{aws => opentofu/modules/rosa/hcp/scripts}/rosa_recreate_admin.sh (100%) create mode 100755 provision/opentofu/modules/rosa/hcp/scripts/rosa_verify_network.sh create mode 100644 provision/opentofu/modules/rosa/hcp/variables.tf create mode 100644 provision/opentofu/modules/rosa/oidc-provider-operator-roles/main.tf create mode 100644 provision/opentofu/modules/rosa/oidc-provider-operator-roles/output.tf create mode 100644 provision/opentofu/modules/rosa/oidc-provider-operator-roles/provider.tf create mode 100644 provision/opentofu/modules/rosa/oidc-provider-operator-roles/variables.tf create mode 100644 provision/opentofu/modules/rosa/vpc/main.tf create mode 100644 provision/opentofu/modules/rosa/vpc/outputs.tf create mode 100644 provision/opentofu/modules/rosa/vpc/variables.tf create mode 100755 provision/opentofu/reaper.sh create mode 100644 provision/opentofu/remote-state/README.md create mode 100644 provision/opentofu/remote-state/main.tf diff --git a/.github/workflows/rosa-cluster-create.yml b/.github/workflows/rosa-cluster-create.yml index fc2dd66ed..b1ff23354 100644 --- a/.github/workflows/rosa-cluster-create.yml +++ b/.github/workflows/rosa-cluster-create.yml @@ -8,7 +8,6 @@ on: type: string computeMachineType: description: 'Instance type for the compute nodes' - default: m5.xlarge type: string multiAz: description: 'Deploy to multiple availability zones in the region' @@ -20,7 +19,6 @@ on: type: string replicas: description: 'Number of worker nodes to provision' - default: '2' type: string region: description: 'The AWS region to create the cluster in. Defaults to "vars.AWS_DEFAULT_REGION" if omitted.' @@ -33,23 +31,10 @@ on: type: string computeMachineType: description: 'Instance type for the compute nodes' - required: true - default: m5.xlarge - type: string - multiAz: - description: 'Deploy to multiple availability zones in the region' - required: true - default: false - type: boolean - availabilityZones: - description: 'Availability zones to deploy to' - required: false - default: '' type: string replicas: description: 'Number of worker nodes to provision' required: true - default: '2' type: string region: description: 'The AWS region to create the cluster in. Defaults to "vars.AWS_DEFAULT_REGION" if omitted.' @@ -76,6 +61,11 @@ jobs: aws-default-region: ${{ vars.AWS_DEFAULT_REGION }} rosa-token: ${{ secrets.ROSA_TOKEN }} + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_wrapper: false + - name: Create ROSA Cluster run: ./rosa_create_cluster.sh working-directory: provision/aws @@ -83,9 +73,8 @@ jobs: VERSION: ${{ env.OPENSHIFT_VERSION }} CLUSTER_NAME: ${{ inputs.clusterName || format('gh-{0}', github.repository_owner) }} COMPUTE_MACHINE_TYPE: ${{ inputs.computeMachineType }} - MULTI_AZ: ${{ inputs.multiAz }} - AVAILABILITY_ZONES: ${{ inputs.availabilityZones }} REPLICAS: ${{ inputs.replicas }} + TF_VAR_rhcs_token: ${{ secrets.ROSA_TOKEN }} - name: Archive ROSA logs uses: actions/upload-artifact@v4 diff --git a/.github/workflows/rosa-cluster-delete.yml b/.github/workflows/rosa-cluster-delete.yml index cf679a6b9..cce824051 100644 --- a/.github/workflows/rosa-cluster-delete.yml +++ b/.github/workflows/rosa-cluster-delete.yml @@ -45,6 +45,11 @@ jobs: with: clusterName: ${{ inputs.clusterName || format('gh-{0}', github.repository_owner) }} + - name: Setup OpenTofu + uses: opentofu/setup-opentofu@v1 + with: + tofu_wrapper: false + - name: Delete a ROSA Cluster if: ${{ inputs.deleteAll == 'no' }} shell: bash @@ -52,12 +57,15 @@ jobs: working-directory: provision/aws env: CLUSTER_NAME: ${{ inputs.clusterName || format('gh-{0}', github.repository_owner) }} + TF_VAR_rhcs_token: ${{ secrets.ROSA_TOKEN }} - name: Delete all ROSA Clusters if: ${{ inputs.deleteAll == 'yes' }} shell: bash run: ./rosa_cluster_reaper.sh working-directory: provision/aws + env: + TF_VAR_rhcs_token: ${{ secrets.ROSA_TOKEN }} - name: Archive ROSA logs uses: actions/upload-artifact@v4 diff --git a/.github/workflows/rosa-multi-az-cluster-create.yml b/.github/workflows/rosa-multi-az-cluster-create.yml index 6fb9a2a8b..d8209ffba 100644 --- a/.github/workflows/rosa-multi-az-cluster-create.yml +++ b/.github/workflows/rosa-multi-az-cluster-create.yml @@ -86,8 +86,8 @@ jobs: - name: Scale ROSA clusters run: | - rosa edit machinepool -c ${{ env.CLUSTER_PREFIX }}-a --min-replicas 3 scaling - rosa edit machinepool -c ${{ env.CLUSTER_PREFIX }}-b --min-replicas 3 scaling + rosa edit machinepool -c ${{ env.CLUSTER_PREFIX }}-a --min-replicas 3 --max-replicas 10 scaling + rosa edit machinepool -c ${{ env.CLUSTER_PREFIX }}-b --min-replicas 3 --max-replicas 10 scaling - name: Setup Go Task uses: ./.github/actions/task-setup diff --git a/.gitignore b/.gitignore index bb8279359..e4ba4e1cd 100644 --- a/.gitignore +++ b/.gitignore @@ -96,3 +96,7 @@ quarkus/data/*.db # Horreum # ########### provision/environment_data.json + +# OpenTofu / Terraform +**/*.tfstate* +**/*.terraform* diff --git a/doc/kubernetes/modules/ROOT/pages/prerequisite/prerequisite-rosa.adoc b/doc/kubernetes/modules/ROOT/pages/prerequisite/prerequisite-rosa.adoc index b85ed4f88..9ddd1d4b9 100644 --- a/doc/kubernetes/modules/ROOT/pages/prerequisite/prerequisite-rosa.adoc +++ b/doc/kubernetes/modules/ROOT/pages/prerequisite/prerequisite-rosa.adoc @@ -14,6 +14,7 @@ See <> for more information. == Prerequisites . xref:prerequisite/prerequisite-awscli.adoc[] +. [Install OpenTofu](https://opentofu.org/docs/intro/install/) . Perform the steps outlined in the https://console.redhat.com/openshift/create/rosa/getstarted[ROSA installation guide]: .. Enable ROSA Service in AWS account .. Download and install the ROSA command line tool @@ -47,8 +48,6 @@ If no `ADMIN_PASSWORD` is provided in the configuration, it reads it from the AW `VERSION`:: OpenShift cluster version. `REGION`:: AWS region where the cluster should run. `COMPUTE_MACHINE_TYPE`:: https://aws.amazon.com/ec2/instance-types/[AWS instance type] for the default OpenShift worker machine pool. -`MULTI_AZ`:: Boolean parameter to indicate whether the OpenShift cluster should span many Availability Zones within the selected region. -`AVAILABILITY_ZONES`:: Comma separated list of Availability Zones to use for the cluster. For example, `eu-central-1a,eu-central-1b`. `REPLICAS`:: Number of worker nodes. If multi-AZ installation is selected, then this needs to be a multiple of the number of AZs available in the region. For example, if the region has 3 AZs, then replicas need to be set to some multiple of 3. diff --git a/provision/aws/efs/.gitignore b/provision/aws/efs/.gitignore index 2da744985..4ba66b0fe 100644 --- a/provision/aws/efs/.gitignore +++ b/provision/aws/efs/.gitignore @@ -1,2 +1,3 @@ manifests ccoctl +iam-trust.json diff --git a/provision/aws/efs/credentialRequests/openshift-aws-efs-csi-driver-credential-request.yaml b/provision/aws/efs/credentialRequests/openshift-aws-efs-csi-driver-credential-request.yaml deleted file mode 100644 index 9c9cba5a3..000000000 --- a/provision/aws/efs/credentialRequests/openshift-aws-efs-csi-driver-credential-request.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: cloudcredential.openshift.io/v1 -kind: CredentialsRequest -metadata: - name: openshift-aws-efs-csi-driver - namespace: openshift-cloud-credential-operator -spec: - providerSpec: - apiVersion: cloudcredential.openshift.io/v1 - kind: AWSProviderSpec - statementEntries: - - action: - - elasticfilesystem:* - effect: Allow - resource: '*' - secretRef: - name: aws-efs-cloud-credentials - namespace: openshift-cluster-csi-drivers - serviceAccountNames: - - aws-efs-csi-driver-operator - - aws-efs-csi-driver-controller-sa diff --git a/provision/aws/efs/iam-policy.json b/provision/aws/efs/iam-policy.json new file mode 100644 index 000000000..a05a723ad --- /dev/null +++ b/provision/aws/efs/iam-policy.json @@ -0,0 +1,44 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "elasticfilesystem:DescribeMountTargets", + "elasticfilesystem:DescribeAccessPoints", + "elasticfilesystem:DescribeFileSystems", + "elasticfilesystem:ClientMount", + "elasticfilesystem:ClientWrite", + "elasticfilesystem:CreateTags", + "elasticfilesystem:CreateMountTarget", + "elasticfilesystem:DeleteMountTarget", + "elasticfilesystem:DeleteTags", + "elasticfilesystem:TagResource", + "elasticfilesystem:UntagResource" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticfilesystem:CreateAccessPoint" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "aws:RequestTag/efs.csi.aws.com/cluster": "true" + } + } + }, + { + "Effect": "Allow", + "Action": "elasticfilesystem:DeleteAccessPoint", + "Resource": "*", + "Condition": { + "StringEquals": { + "aws:ResourceTag/efs.csi.aws.com/cluster": "true" + } + } + } + ] +} diff --git a/provision/aws/rds/aurora_create_peering_connection.sh b/provision/aws/rds/aurora_create_peering_connection.sh index ab2935fdb..f6cd9fbc0 100755 --- a/provision/aws/rds/aurora_create_peering_connection.sh +++ b/provision/aws/rds/aurora_create_peering_connection.sh @@ -64,7 +64,7 @@ aws ec2 accept-vpc-peering-connection \ # Update the ROSA Cluster VPC's Route Table ROSA_PUBLIC_ROUTE_TABLE_ID=$(aws ec2 describe-route-tables \ - --filters "Name=vpc-id,Values=${ROSA_VPC}" "Name=association.main,Values=true" \ + --filters "Name=vpc-id,Values=${ROSA_VPC}" "Name=tag:Name,Values=*public*" \ --query "RouteTables[*].RouteTableId" \ --output text ) diff --git a/provision/aws/rosa_create_cluster.sh b/provision/aws/rosa_create_cluster.sh index 33bfd28bb..5159e8bfd 100755 --- a/provision/aws/rosa_create_cluster.sh +++ b/provision/aws/rosa_create_cluster.sh @@ -1,100 +1,74 @@ #!/usr/bin/env bash set -e +if [[ "$RUNNER_DEBUG" == "1" ]]; then + set -x +fi + if [ -f ./.env ]; then source ./.env fi +function requiredEnv() { + for ENV in $@; do + if [ -z "${!ENV}" ]; then + echo "${ENV} variable must be set" + exit 1 + fi + done +} + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + AWS_ACCOUNT=${AWS_ACCOUNT:-$(aws sts get-caller-identity --query "Account" --output text)} -if [ -z "$AWS_ACCOUNT" ]; then echo "Variable AWS_ACCOUNT needs to be set."; exit 1; fi - -if [ -z "$VERSION" ]; then echo "Variable VERSION needs to be set."; exit 1; fi -CLUSTER_NAME=${CLUSTER_NAME:-$(whoami)} -if [ -z "$CLUSTER_NAME" ]; then echo "Variable CLUSTER_NAME needs to be set."; exit 1; fi -if [ -z "$REGION" ]; then echo "Variable REGION needs to be set."; exit 1; fi -if [ -z "$COMPUTE_MACHINE_TYPE" ]; then echo "Variable COMPUTE_MACHINE_TYPE needs to be set."; exit 1; fi - -if [ "$MULTI_AZ" = "true" ]; then MULTI_AZ_PARAM="--multi-az"; else MULTI_AZ_PARAM=""; fi -if [ -z "$AVAILABILITY_ZONES" ]; then AVAILABILITY_ZONES_PARAM=""; else AVAILABILITY_ZONES_PARAM="--availability-zones $AVAILABILITY_ZONES"; fi -if [ -z "$REPLICAS" ]; then echo "Variable REPLICAS needs to be set."; exit 1; fi - -echo "Checking if cluster ${CLUSTER_NAME} already exists." -if rosa describe cluster --cluster="${CLUSTER_NAME}"; then - echo "Cluster ${CLUSTER_NAME} already exists." -else - echo "Verifying ROSA prerequisites." - echo "Check if AWS CLI is installed."; aws --version - echo "Check if ROSA CLI is installed."; rosa version - echo "Check if ELB service role is enabled." - if ! aws iam get-role --role-name "AWSServiceRoleForElasticLoadBalancing" --no-cli-pager; then - aws iam create-service-linked-role --aws-service-name "elasticloadbalancing.amazonaws.com" - fi - rosa whoami - rosa verify quota - - echo "Installing ROSA cluster ${CLUSTER_NAME}" - - MACHINE_CIDR=$(./rosa_machine_cidr.sh) - - ROSA_CMD="rosa create cluster \ - --sts \ - --cluster-name ${CLUSTER_NAME} \ - --version ${VERSION} \ - --role-arn arn:aws:iam::${AWS_ACCOUNT}:role/ManagedOpenShift-Installer-Role \ - --support-role-arn arn:aws:iam::${AWS_ACCOUNT}:role/ManagedOpenShift-Support-Role \ - --controlplane-iam-role arn:aws:iam::${AWS_ACCOUNT}:role/ManagedOpenShift-ControlPlane-Role \ - --worker-iam-role arn:aws:iam::${AWS_ACCOUNT}:role/ManagedOpenShift-Worker-Role \ - --operator-roles-prefix ${CLUSTER_NAME} \ - --region ${REGION} ${MULTI_AZ_PARAM} ${AVAILABILITY_ZONES_PARAM} \ - --replicas ${REPLICAS} \ - --compute-machine-type ${COMPUTE_MACHINE_TYPE} \ - --machine-cidr ${MACHINE_CIDR} \ - --service-cidr 172.30.0.0/16 \ - --pod-cidr 10.128.0.0/14 \ - --host-prefix 23" - - echo $ROSA_CMD - $ROSA_CMD + +requiredEnv AWS_ACCOUNT CLUSTER_NAME REGION + +export CLUSTER_NAME=${CLUSTER_NAME:-$(whoami)} + +echo "Verifying ROSA prerequisites." +echo "Check if AWS CLI is installed."; aws --version +echo "Check if ROSA CLI is installed."; rosa version +echo "Check if ELB service role is enabled." +if ! aws iam get-role --role-name "AWSServiceRoleForElasticLoadBalancing" --no-cli-pager; then + aws iam create-service-linked-role --aws-service-name "elasticloadbalancing.amazonaws.com" fi +rosa whoami +rosa verify quota -mkdir -p "logs/${CLUSTER_NAME}" +echo "Installing ROSA cluster ${CLUSTER_NAME}" -function custom_date() { - date '+%Y%m%d-%H%M%S' -} +cd ${SCRIPT_DIR}/../opentofu/modules/rosa/hcp +tofu init +tofu workspace new ${CLUSTER_NAME} || true +export TF_WORKSPACE=${CLUSTER_NAME} + +TOFU_CMD="tofu apply -auto-approve \ + -var cluster_name=${CLUSTER_NAME} \ + -var region=${REGION}" + +if [ -n "${COMPUTE_MACHINE_TYPE}" ]; then + TOFU_CMD+=" -var instance_type=${COMPUTE_MACHINE_TYPE}" +fi + +if [ -n "${VERSION}" ]; then + TOFU_CMD+=" -var openshift_version=${VERSION}" +fi + +if [ -n "${REPLICAS}" ]; then + TOFU_CMD+=" -var replicas=${REPLICAS}" +fi -echo "Creating operator roles." -rosa create operator-roles --cluster "${CLUSTER_NAME}" --mode auto --yes > "logs/${CLUSTER_NAME}/$(custom_date)_create-operator-roles.log" - -echo "Creating OIDC provider." -rosa create oidc-provider --cluster "${CLUSTER_NAME}" --mode auto --yes > "logs/${CLUSTER_NAME}/$(custom_date)_create-oidc-provider.log" - -echo "Waiting for cluster installation to finish." -# There have been failures with 'ERR: Failed to watch logs for cluster ... connection reset by peer' probably because services in the cluster were restarting during the cluster initialization. -# Those errors don't show an installation problem, and installation will continue asynchronously. Therefore, retry. -TIMEOUT=$(($(date +%s) + 3600)) -while true ; do - if (rosa logs install --cluster "${CLUSTER_NAME}" --watch --tail=1000000 >> "logs/${CLUSTER_NAME}/$(custom_date)_create-cluster.log"); then - break - fi - if (( TIMEOUT < $(date +%s))); then - echo "Timeout exceeded" - exit 1 - fi - echo "retrying watching logs after failure" - sleep 1 -done - -echo "Cluster installation complete." -echo - -./rosa_recreate_admin.sh +echo ${TOFU_CMD} +${TOFU_CMD} SCALING_MACHINE_POOL=$(rosa list machinepools -c "${CLUSTER_NAME}" -o json | jq -r '.[] | select(.id == "scaling") | .id') if [[ "${SCALING_MACHINE_POOL}" != "scaling" ]]; then - rosa create machinepool -c "${CLUSTER_NAME}" --instance-type m5.4xlarge --max-replicas 10 --min-replicas 0 --name scaling --enable-autoscaling + rosa create machinepool -c "${CLUSTER_NAME}" --instance-type m5.4xlarge --max-replicas 10 --min-replicas 1 --name scaling --enable-autoscaling fi +cd ${SCRIPT_DIR} +./rosa_oc_login.sh ./rosa_efs_create.sh ../infinispan/install_operator.sh diff --git a/provision/aws/rosa_delete_cluster.sh b/provision/aws/rosa_delete_cluster.sh index 7fb0cc8e6..1a8ad7024 100755 --- a/provision/aws/rosa_delete_cluster.sh +++ b/provision/aws/rosa_delete_cluster.sh @@ -9,6 +9,8 @@ if [ -f ./.env ]; then source ./.env fi +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + CLUSTER_NAME=${CLUSTER_NAME:-$(whoami)} if [ -z "$CLUSTER_NAME" ]; then echo "Variable CLUSTER_NAME needs to be set."; exit 1; fi @@ -16,32 +18,5 @@ if [ -z "$CLUSTER_NAME" ]; then echo "Variable CLUSTER_NAME needs to be set."; e ./rds/aurora_delete_peering_connection.sh || true ./rosa_efs_delete.sh || true -function custom_date() { - echo "$(date '+%Y%m%d-%H%M%S')" -} - -CLUSTER_ID=$(rosa describe cluster --cluster "$CLUSTER_NAME" | grep -oPm1 "^ID:\s*\K\w+") -echo "CLUSTER_ID: $CLUSTER_ID" - -rosa delete admin --cluster $CLUSTER_ID --yes || true - -rosa delete cluster --cluster $CLUSTER_ID --yes - -mkdir -p "logs/${CLUSTER_NAME}" - -echo "Waiting for cluster uninstallation to finish." -rosa logs uninstall --cluster $CLUSTER_ID --watch --tail=1000000 > "logs/${CLUSTER_NAME}/$(custom_date)_delete-cluster.log" - -echo "Cluster uninstalled." - -# Avoid error message 'Cluster 'xxx' is in 'uninstalling' state.' for the following commands by waiting until it is gone -n=0 -until [ "$n" -ge 20 ] -do - rosa describe cluster --cluster $CLUSTER_ID || break - n=$((n+1)) - sleep 10 -done - -rosa delete operator-roles --cluster $CLUSTER_ID --mode auto --yes > "logs/${CLUSTER_NAME}/$(custom_date)_delete-operator-roles.log" || true -rosa delete oidc-provider --cluster $CLUSTER_ID --mode auto --yes > "logs/${CLUSTER_NAME}/$(custom_date)_delete-oidc-provider.log" +cd ${SCRIPT_DIR}/../opentofu/modules/rosa/hcp +./../../../destroy.sh ${CLUSTER_NAME} diff --git a/provision/aws/rosa_efs_create.sh b/provision/aws/rosa_efs_create.sh index 22c85fa5a..f8d188f5f 100755 --- a/provision/aws/rosa_efs_create.sh +++ b/provision/aws/rosa_efs_create.sh @@ -24,22 +24,70 @@ cd efs echo "Installing EFS CSI driver operator." oc apply -f aws-efs-csi-driver-operator.yaml -# We've seen that the 'oc get...' has returned two entries in the past. Let's make sure that everything settled before we retrieve the one pod which is ready -kubectl wait --for=condition=Available --timeout=300s -n openshift-cloud-credential-operator deployment/cloud-credential-operator -CCO_POD_NAME=$(oc get po -n openshift-cloud-credential-operator -l app=cloud-credential-operator -o jsonpath='{range .items[*]}{.status.containerStatuses[*].ready.true}{.metadata.name}{ "\n"}{end}') - -oc cp -c cloud-credential-operator openshift-cloud-credential-operator/${CCO_POD_NAME}:/usr/bin/ccoctl ./ccoctl --retries=999 - -chmod 775 ./ccoctl - -./ccoctl aws create-iam-roles --name=${CLUSTER_NAME} --region=${AWS_REGION} --credentials-requests-dir=credentialRequests/ --identity-provider-arn=arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER} +cat << EOF > iam-trust.json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "${OIDC_PROVIDER}:sub": [ + "system:serviceaccount:openshift-cluster-csi-drivers:aws-efs-csi-driver-operator", + "system:serviceaccount:openshift-cluster-csi-drivers:aws-efs-csi-driver-controller-sa" + ] + } + } + } + ] +} +EOF -# if the CredentialsRequest was processed, manifests/openshift-cluster-csi-drivers-aws-efs-cloud-credentials-credentials.yaml file should be created -# create credentials if present -if [[ -f manifests/openshift-cluster-csi-drivers-aws-efs-cloud-credentials-credentials.yaml ]]; then - oc apply -f manifests/openshift-cluster-csi-drivers-aws-efs-cloud-credentials-credentials.yaml +ROLE_NAME="${CLUSTER_NAME}-aws-efs-csi-operator" +ROLE_ARN=$(aws iam get-role \ + --role-name ${ROLE_NAME} \ + --query "Role.Arn" \ + --output text \ + || echo "" +) +if [ -z "${ROLE_ARN}" ]; then + ROLE_ARN=$(aws iam create-role \ + --role-name ${ROLE_NAME} \ + --assume-role-policy-document file://iam-trust.json \ + --query "Role.Arn" \ + --output text + ) + + POLICY_ARN=$(aws iam create-policy \ + --policy-name "${CLUSTER_NAME}-rosa-efs-csi" \ + --policy-document file://iam-policy.json \ + --query 'Policy.Arn' \ + --output text + ) + + aws iam attach-role-policy \ + --role-name ${ROLE_NAME} \ + --policy-arn ${POLICY_ARN} fi +cat < 63 )); then - echo "Maximum number of unique machine CIDRS reached" - echo ${EXISTING_MACHINE_CIDRS} - exit 1 -fi - -while true; do - CIDR="10.0.$(shuf -i 0-63 -n 1).0/24" - if [[ "${EXISTING_MACHINE_CIDRS}" != *"${CIDR}"* ]]; then - break - fi -done -echo ${CIDR} diff --git a/provision/opentofu/README.md b/provision/opentofu/README.md new file mode 100644 index 000000000..d81fd5639 --- /dev/null +++ b/provision/opentofu/README.md @@ -0,0 +1,44 @@ +# OpenTofu +This is the root directory for managing all OpenTofu state. + +## Modules +All OpenTofu modules should be created in the `./modules` folder + +## State +All root modules should use a [S3 Backend](https://opentofu.org/docs/language/settings/backends/s3/) to +store state so that it's possible for centralised management of resources created by the team. A root module should add +the following to their `providers.tf` file: + +```terraform +terraform { + backend "s3" { + bucket = "kcb-tf-state" + key = + region = "eu-west-1" + encrypt = true + dynamodb_table = "app-state" + } +} +``` +The `key` field should be String that's unique to the module and relates to the module's functionality. + +To pull remote state for a given module to your local machine, execute: + +- `tofu state pull` + +### Workspaces +To isolate state created by different team members, [OpenTofu Workspaces](https://opentofu.org/docs/cli/workspaces/) +should be used where appropriate. For example, if user specific ROSA clusters are required a dedicated workspace should +be created for the cluster and any resources deployed to it. + +Workspace CRUD: + +- `tofu workspace list` +- `tofu workspace new/delete/select ` + +When an existing workspace is selected, you must then execute `tofu state pull` to ensure that you have the latest state +on your local machine. + +### Cleaning up old resources +The `./reaper.sh ` script calls `tofu destroy` for all workspaces associated with the module directory passed to +the script. diff --git a/provision/opentofu/destroy.sh b/provision/opentofu/destroy.sh new file mode 100755 index 000000000..3b60b72ca --- /dev/null +++ b/provision/opentofu/destroy.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -e + +if [[ "$RUNNER_DEBUG" == "1" ]]; then + set -x +fi + +WORKSPACE=$1 +echo ${WORKSPACE} +tofu init +tofu workspace select ${WORKSPACE} +tofu state pull +INPUTS=$(tofu output | sed -n 's/input_//p' | sed 's/ //g' | sed 's/^/-var /' | tr -d '"') +tofu destroy -auto-approve ${INPUTS} -lock-timeout=15m +tofu state list +tofu workspace select default +tofu workspace delete ${WORKSPACE} diff --git a/provision/opentofu/modules/rosa/README.md b/provision/opentofu/modules/rosa/README.md new file mode 100644 index 000000000..e74605036 --- /dev/null +++ b/provision/opentofu/modules/rosa/README.md @@ -0,0 +1,7 @@ +# ROSA OpenTofu Modules +In order to use the modules in this directory, it's necessary for the Red Hat Cloud Services provider token to be set +via the `rhcs_token` variable. The value of this var should be an OpenShift Cluster Manager token, which can be accessed +in the [Red Hat Hybrid Cloud Console](https://console.redhat.com/openshift/token). + +Instead of providing the token as a variable everytime `tofu` is called, it's possible to set the value via the +`TF_VAR_rhcs_token` environment variable. diff --git a/provision/opentofu/modules/rosa/account-roles/main.tf b/provision/opentofu/modules/rosa/account-roles/main.tf new file mode 100644 index 000000000..e233ba5dd --- /dev/null +++ b/provision/opentofu/modules/rosa/account-roles/main.tf @@ -0,0 +1,96 @@ +locals { + path = coalesce(var.path, "/") + account_roles_properties = [ + { + role_name = "HCP-ROSA-Installer" + role_type = "installer" + policy_details = data.rhcs_hcp_policies.all_policies.account_role_policies["sts_hcp_installer_permission_policy"] + principal_type = "AWS" + principal_identifier = "arn:aws:iam::710019948333:role/RH-Managed-OpenShift-Installer" + }, + { + role_name = "HCP-ROSA-Support" + role_type = "support" + policy_details = data.rhcs_hcp_policies.all_policies.account_role_policies["sts_hcp_support_permission_policy"] + principal_type = "AWS" + principal_identifier = "arn:aws:iam::710019948333:role/RH-Technical-Support-Access" + }, + { + role_name = "HCP-ROSA-Worker" + role_type = "instance_worker" + policy_details = data.rhcs_hcp_policies.all_policies.account_role_policies["sts_hcp_instance_worker_permission_policy"] + principal_type = "Service" + principal_identifier = "ec2.amazonaws.com" + }, + ] + account_roles_count = length(local.account_roles_properties) + account_role_prefix_valid = var.account_role_prefix != null ? ( + var.account_role_prefix + ) : ( + "account-role-${random_string.default_random[0].result}" + ) +} + +data "aws_iam_policy_document" "custom_trust_policy" { + count = local.account_roles_count + + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + principals { + type = local.account_roles_properties[count.index].principal_type + identifiers = [local.account_roles_properties[count.index].principal_identifier] + } + } +} + +module "account_iam_role" { + source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" + version = ">=5.34.0" + count = local.account_roles_count + + create_role = true + + role_name = "${local.account_role_prefix_valid}-${local.account_roles_properties[count.index].role_name}-Role" + + role_path = local.path + role_permissions_boundary_arn = "" + + create_custom_role_trust_policy = true + custom_role_trust_policy = data.aws_iam_policy_document.custom_trust_policy[count.index].json + + custom_role_policy_arns = [ + "${local.account_roles_properties[count.index].policy_details}" + ] + + tags = merge(var.tags, { + rosa_hcp_policies = true + red-hat-managed = true + rosa_role_prefix = "${local.account_role_prefix_valid}" + rosa_role_type = "${local.account_roles_properties[count.index].role_type}" + rosa_managed_policies = true + }) +} + +data "rhcs_hcp_policies" "all_policies" {} + +resource "random_string" "default_random" { + count = var.account_role_prefix != null ? 0 : 1 + + length = 4 + special = false + upper = false +} + +data "rhcs_info" "current" {} + +resource "time_sleep" "wait_10_seconds" { + destroy_duration = "10s" + create_duration = "10s" + triggers = { + account_iam_role_name = jsonencode([for value in module.account_iam_role : value.iam_role_name]) + account_roles_arn = jsonencode({ for idx, value in module.account_iam_role : local.account_roles_properties[idx].role_name => value.iam_role_arn }) + account_role_prefix = local.account_role_prefix_valid + path = var.path + } +} diff --git a/provision/opentofu/modules/rosa/account-roles/provider.tf b/provision/opentofu/modules/rosa/account-roles/provider.tf new file mode 100644 index 000000000..41b220c08 --- /dev/null +++ b/provision/opentofu/modules/rosa/account-roles/provider.tf @@ -0,0 +1,17 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.38.0" + } + rhcs = { + source = "terraform-redhat/rhcs" + version = "1.6.0-prerelease.1" + } + } +} + +provider "rhcs" { + token = var.rhcs_token + url = "https://api.stage.openshift.com" +} diff --git a/provision/opentofu/modules/rosa/account-roles/variables.tf b/provision/opentofu/modules/rosa/account-roles/variables.tf new file mode 100644 index 000000000..c0b2bff61 --- /dev/null +++ b/provision/opentofu/modules/rosa/account-roles/variables.tf @@ -0,0 +1,21 @@ +variable "rhcs_token" { + type = string + sensitive = true +} + +variable "account_role_prefix" { + type = string + default = null +} + +variable "path" { + description = "(Optional) The arn path for the account/operator roles as well as their policies." + type = string + default = "/" +} + +variable "tags" { + description = "List of AWS resource tags to apply." + type = map(string) + default = null +} diff --git a/provision/opentofu/modules/rosa/hcp/main.tf b/provision/opentofu/modules/rosa/hcp/main.tf new file mode 100644 index 000000000..7b1f04fa7 --- /dev/null +++ b/provision/opentofu/modules/rosa/hcp/main.tf @@ -0,0 +1,221 @@ +data "aws_availability_zones" "available" { + filter { + name = "opt-in-status" + values = ["opt-in-not-required"] + } +} + +data "aws_availability_zones" "available_azs" { + state = "available" + filter { + name = "opt-in-status" + # Currently, no support for Local Zones, Wavelength, or Outpost + values = ["opt-in-not-required"] + } +} + +locals { + supported_regions = { + "ap-northeast-1" = "apne1" + "ap-southeast-1" = "apse1" + "ap-southeast-2" = "apse2" + "ap-southeast-3" = "apse3" + "ap-southeast-4" = "apse4" + "ca-central-1" = "cac1" + "eu-central-1" = "euc1" + "eu-west-1" = "euw1" + "us-east-1" = "use1" + "us-east-2" = "use2" + "us-west-2" = "usw2" + "ap-south-2" = "aps2" + } + well_known_az_ids = { + us-east-1 = [2, 4, 6] + ap-northeast-1 = [1, 2, 4] + } + azs = slice(data.aws_availability_zones.available.names, 0, 1) + # If an AZ has a single hyphen, it's an AZ ID + az_ids = [for az in local.azs : az if length(split("-", az)) == 2] + # If an AZ has a two hyphens, it's an AZ name + az_names = [for az in local.azs : az if length(split("-", az)) == 3] + + vpc_cidr_prefix = tonumber(split("/", var.vpc_cidr)[1]) + subnet_newbits = var.subnet_cidr_prefix - local.vpc_cidr_prefix + subnet_count = var.private_subnets_only ? length(local.azs) : length(local.azs) * 2 + + account_role_prefix = var.cluster_name + operator_role_prefix = var.cluster_name +} + +# Performing multi-input validations in null_resource block +# https://github.com/hashicorp/terraform/issues/25609 +resource "null_resource" "validations" { + lifecycle { + precondition { + condition = lookup(local.supported_regions, var.region, null) != null + error_message = <<-EOT + ROSA with hosted control planes is currently only available in these regions: + ${join(", ", keys(local.supported_regions))}. + EOT + } + + precondition { + condition = local.vpc_cidr_prefix <= var.subnet_cidr_prefix + error_message = "Subnet CIDR prefix must be smaller prefix (larger number) than the VPC CIDR prefix." + } + + precondition { + condition = (length(local.az_ids) > 0 && length(local.az_names) == 0) || (length(local.az_ids) == 0 && length(local.az_names) > 0) + error_message = <<-EOT + Make sure to provide subnet_azs in either name format OR zone ID, do not mix and match. + E.g., us-east-1a,us-east-1b OR use1-az1,use1-az2 + EOT + } + + precondition { + condition = local.subnet_count <= pow(2, local.subnet_newbits) + error_message = <<-EOT + The size of available IP space is not enough to accomodate the expected number of subnets: + Try increasing the size of your VPC CIDR, e.g., 10.0.0.0/16 -> 10.0.0.0/14 + Or try decreasing the size of your Subnet Prefix, e.g., 24 -> 28 + EOT + } + + precondition { + condition = alltrue([ + for name in local.az_names :contains(data.aws_availability_zones.available_azs.names, name) + ]) + error_message = <<-EOT + ROSA with hosted control planes in region ${var.region} does not currently support availability zone name(s): + ${join(", ", [for name in local.az_names : name if !contains(data.aws_availability_zones.available_azs.names, name)])} + EOT + } + + precondition { + condition = alltrue([ + for id in local.az_ids :contains(data.aws_availability_zones.available_azs.zone_ids, id) + ]) + error_message = <<-EOT + ROSA with hosted control planes in region ${var.region} does not currently support availability zone ID(s): + ${join(", ", [for id in local.az_ids : id if !contains(data.aws_availability_zones.available_azs.zone_ids, id)])} + EOT + } + } +} + +module "account-roles" { + source = "../account-roles" + + account_role_prefix = local.account_role_prefix + path = var.path + rhcs_token = var.rhcs_token +} + +module "operator-roles" { + source = "../oidc-provider-operator-roles" + + oidc_config = "managed" + operator_role_prefix = local.operator_role_prefix + path = var.path +} + +data "external" "rosa" { + program = [ + "bash", "${path.module}/scripts/rosa_machine_cidr.sh" + ] + query = { + cluster_name = var.cluster_name + } +} + +module "vpc" { + source = "../vpc" + # This module doesn't really depend on these modules, but ensuring these are executed first lets us fail-fast if there + # are issues with the roles and prevents us having to wait for a VPC to be provisioned before errors are reported + depends_on = [module.account-roles, module.operator-roles] + + cluster_name = var.cluster_name + region = var.region + subnet_azs = local.azs + subnet_cidr_prefix = 28 + vpc_cidr = data.external.rosa.result.cidr +} + +data "aws_caller_identity" "current" { +} + +locals { + account_role_path = coalesce(var.path, "/") + sts_roles = { + role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.account_role_path}${local.account_role_prefix}-HCP-ROSA-Installer-Role", + support_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.account_role_path}${local.account_role_prefix}-HCP-ROSA-Support-Role", + instance_iam_roles = { + worker_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.account_role_path}${local.account_role_prefix}-HCP-ROSA-Worker-Role" + } + operator_role_prefix = local.operator_role_prefix, + oidc_config_id = module.operator-roles.oidc_config_id + } +} + +resource "rhcs_cluster_rosa_hcp" "rosa_hcp_cluster" { + availability_zones = local.azs + aws_account_id = data.aws_caller_identity.current.account_id + aws_billing_account_id = data.aws_caller_identity.current.account_id + aws_subnet_ids = module.vpc.cluster-subnets + cloud_region = var.region + machine_cidr = data.external.rosa.result.cidr + name = var.cluster_name + properties = merge( + { + rosa_creator_arn = data.aws_caller_identity.current.arn + }, + ) + sts = local.sts_roles + replicas = var.replicas + version = var.openshift_version + wait_for_create_complete = true +} + +# TODO +#resource "rhcs_hcp_machine_pool" "my_pool" { +# cluster = rhcs_cluster_rosa_hcp.rosa_hcp_cluster.name +# name = "scaling" +# aws_node_pool = { +# instance_type = var.instance_type +# } +# autoscaling = { +# enabled = true, +# max_replicas = 10, +# min_replicas = 0 +# } +# subnet_id = module.vpc.cluster-public-subnets[0] +#} + +resource "rhcs_cluster_wait" "rosa_cluster" { + cluster = rhcs_cluster_rosa_hcp.rosa_hcp_cluster.id + timeout = 30 +} + +resource "null_resource" "create_admin" { + depends_on = [rhcs_cluster_wait.rosa_cluster] + provisioner "local-exec" { + command = "./scripts/rosa_recreate_admin.sh" + environment = { + CLUSTER_NAME = var.cluster_name + } + interpreter = ["bash"] + working_dir = path.module + } +} + +resource "null_resource" "rosa_verify_network" { + depends_on = [null_resource.create_admin] + provisioner "local-exec" { + command = "./scripts/rosa_verify_network.sh" + environment = { + CLUSTER_NAME = var.cluster_name + } + interpreter = ["bash"] + working_dir = path.module + } +} diff --git a/provision/opentofu/modules/rosa/hcp/output.tf b/provision/opentofu/modules/rosa/hcp/output.tf new file mode 100644 index 000000000..fe2c1d417 --- /dev/null +++ b/provision/opentofu/modules/rosa/hcp/output.tf @@ -0,0 +1,9 @@ +output "input_cluster_name" { + value = var.cluster_name + description = "The name of the created ROSA hosted control planes cluster." +} + +output "input_region" { + value = var.region + description = "The region AWS resources created in." +} diff --git a/provision/opentofu/modules/rosa/hcp/provider.tf b/provision/opentofu/modules/rosa/hcp/provider.tf new file mode 100644 index 000000000..41741d690 --- /dev/null +++ b/provision/opentofu/modules/rosa/hcp/provider.tf @@ -0,0 +1,37 @@ +terraform { + backend "s3" { + bucket = "kcb-tf-state" + key = "vpc" + region = "eu-west-1" + encrypt = true + dynamodb_table = "app-state" + } + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.38.0" + } + null = { + source = "hashicorp/null" + version = "~> 3.2" + } + rhcs = { + source = "terraform-redhat/rhcs" + version = "1.6.0-prerelease.2" + } + } + + required_version = ">= 1.4.0" +} + +provider "aws" { + region = var.region + # Force set sts_region to preventing hanging on invalid regions + sts_region = "us-east-1" +} + +provider "rhcs" { + token = var.rhcs_token + url = "https://api.openshift.com" +} diff --git a/provision/opentofu/modules/rosa/hcp/scripts/rosa_machine_cidr.sh b/provision/opentofu/modules/rosa/hcp/scripts/rosa_machine_cidr.sh new file mode 100755 index 000000000..d5cdfd442 --- /dev/null +++ b/provision/opentofu/modules/rosa/hcp/scripts/rosa_machine_cidr.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -e + +if [[ "$RUNNER_DEBUG" == "1" ]]; then + set -x +fi + +eval "$(jq -r '@sh "CLUSTER_NAME=\(.cluster_name)"')" + +if [ -z "$CLUSTER_NAME" ]; then echo "'cluster_name' needs to be present in input JSON."; exit 1; fi + +# https://access.redhat.com/documentation/en-us/red_hat_openshift_service_on_aws/4/html/networking/cidr-range-definitions +# Must not overlap with Pod CDR: 10.128.0.0/14 +# Must not overlap with OVN Kubernetes: 100.64.0.0/16 + +CLUSTERS=$(rosa list clusters -o json) +CLUSTER=$(echo $CLUSTERS | jq ".[] | select(.name == \"${CLUSTER_NAME}\")") +if [ -n "${CLUSTER}" ]; then + CIDR=$(echo ${CLUSTER} | jq -r .network.machine_cidr) +else + EXISTING_MACHINE_CIDRS=$(echo ${CLUSTERS} | jq -r ".[].network.machine_cidr" | sort -u) + if (( $(echo ${EXISTING_MACHINE_CIDRS} | wc -l) > 63 )); then + echo "Maximum number of unique machine CIDRS reached" + echo ${EXISTING_MACHINE_CIDRS} + exit 1 + fi + + while true; do + CIDR="10.0.$(shuf -i 0-63 -n 1).0/24" + if [[ "${EXISTING_MACHINE_CIDRS}" != *"${CIDR}"* ]]; then + break + fi + done +fi +echo "{\"cidr\":\"${CIDR}\"}" diff --git a/provision/aws/rosa_recreate_admin.sh b/provision/opentofu/modules/rosa/hcp/scripts/rosa_recreate_admin.sh similarity index 100% rename from provision/aws/rosa_recreate_admin.sh rename to provision/opentofu/modules/rosa/hcp/scripts/rosa_recreate_admin.sh diff --git a/provision/opentofu/modules/rosa/hcp/scripts/rosa_verify_network.sh b/provision/opentofu/modules/rosa/hcp/scripts/rosa_verify_network.sh new file mode 100755 index 000000000..6cb29fcb3 --- /dev/null +++ b/provision/opentofu/modules/rosa/hcp/scripts/rosa_verify_network.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -e + +if [[ "$RUNNER_DEBUG" == "1" ]]; then + set -x +fi + +if [ -z "$CLUSTER_NAME" ]; then echo "Variable CLUSTER_NAME needs to be set."; exit 1; fi + +CLUSTER=$(rosa describe cluster -c ${CLUSTER_NAME} -o json) +REGION=$(echo ${CLUSTER} | jq -r '.region.id') +SUBNETS=$(echo ${CLUSTER} | jq -r '.aws.subnet_ids | join(",")') +# Explicitly verify network as the initial "Inflight check" for egress regularly fails, but passes on subsequent verification +rosa verify network --cluster ${CLUSTER_NAME} +rosa verify network --watch --status-only --region ${REGION} --subnet-ids ${SUBNETS} diff --git a/provision/opentofu/modules/rosa/hcp/variables.tf b/provision/opentofu/modules/rosa/hcp/variables.tf new file mode 100644 index 000000000..a0133d22e --- /dev/null +++ b/provision/opentofu/modules/rosa/hcp/variables.tf @@ -0,0 +1,82 @@ +variable "rhcs_token" { + type = string + sensitive = true +} + +variable "region" { + description = <<-EOT + Region to create AWS infrastructure resources for a + ROSA with hosted control planes cluster. (required) + EOT + type = string +} + +variable "path" { + description = "(Optional) The arn path for the account/operator roles as well as their policies." + type = string + default = null +} + +variable "cluster_name" { + description = "Name of the ROSA hosted control planes cluster to be created." + type = string + default = "rosa-hcp" + + validation { + condition = can(regex("^[a-z][-a-z0-9]{0,13}[a-z0-9]$", var.cluster_name)) + error_message = <<-EOT + ROSA cluster names must be less than 16 characters. + May only contain lower case, alphanumeric, or hyphens characters. + EOT + } +} + +variable "vpc_cidr" { + description = <<-EOT + IPv4 CIDR netmask for the VPC resource. + This should equal or include the cluster's Machine CIDR netmask. + EOT + type = string + default = "10.0.0.0/16" + + validation { + condition = can(cidrnetmask(var.vpc_cidr)) + error_message = "VPC CIDR must be a valid CIDR netmask, e.g., '10.0.0.0/16'." + } +} + +variable "subnet_cidr_prefix" { + description = <<-EOT + The CIDR prefix value to use when dividing up the VPC CIDR range into subnet ranges. + E.g., 24 to create equal subnets of size "24": 10.0.1.0/24, 10.0.2.0/24, etc. + EOT + type = number + default = 24 +} + +variable "private_subnets_only" { + description = "Only create private subnets" + type = bool + default = false +} + +variable "extra_tags" { + description = "Extra tags to apply to AWS resources" + type = map + default = {} +} + +variable "openshift_version" { + type = string + default = "4.14.5" +} + +variable "instance_type" { + type = string + default = "m5.4xlarge" +} + +variable "replicas" { + type = number + default = 2 +} diff --git a/provision/opentofu/modules/rosa/oidc-provider-operator-roles/main.tf b/provision/opentofu/modules/rosa/oidc-provider-operator-roles/main.tf new file mode 100644 index 000000000..528758c2b --- /dev/null +++ b/provision/opentofu/modules/rosa/oidc-provider-operator-roles/main.tf @@ -0,0 +1,255 @@ +data "aws_caller_identity" "current" {} + +########################################################################### +## Below is taken from rhcs-hcp module `oidc-config-and-provider` +########################################################################### + +locals { + path = coalesce(var.path, "/") + managed = var.oidc_config == "managed" +} + +resource "rhcs_rosa_oidc_config" "oidc_config" { + managed = local.managed + secret_arn = local.managed ? null : module.aws_secrets_manager[0].secret_arn + issuer_url = local.managed ? null : rhcs_rosa_oidc_config_input.oidc_input[0].issuer_url +} + +resource "aws_iam_openid_connect_provider" "oidc_provider" { + url = "https://${rhcs_rosa_oidc_config.oidc_config.oidc_endpoint_url}" + + client_id_list = [ + "openshift", + "sts.amazonaws.com" + ] + + tags = var.tags + + thumbprint_list = [rhcs_rosa_oidc_config.oidc_config.thumbprint] +} + +module "aws_s3_bucket" { + source = "terraform-aws-modules/s3-bucket/aws" + version = ">=4.1.0" + + count = local.managed ? 0 : 1 + + bucket = rhcs_rosa_oidc_config_input.oidc_input[count.index].bucket_name + tags = merge(var.tags, { + red-hat-managed = true + }) + + block_public_acls = true + ignore_public_acls = true + block_public_policy = false + restrict_public_buckets = false + + attach_policy = true + policy = data.aws_iam_policy_document.allow_access_from_another_account[count.index].json +} + +data "aws_iam_policy_document" "allow_access_from_another_account" { + count = local.managed ? 0 : 1 + + statement { + principals { + identifiers = ["*"] + type = "*" + } + sid = "AllowReadPublicAccess" + effect = "Allow" + actions = [ + "s3:GetObject", + ] + + resources = [ + format("arn:aws:s3:::%s/*", rhcs_rosa_oidc_config_input.oidc_input[count.index].bucket_name), + ] + } +} + +resource "rhcs_rosa_oidc_config_input" "oidc_input" { + count = local.managed ? 0 : 1 + + region = data.aws_region.current.name +} + +module "aws_secrets_manager" { + source = "terraform-aws-modules/secrets-manager/aws" + version = ">=1.1.1" + + count = local.managed ? 0 : 1 + + create = true + name = rhcs_rosa_oidc_config_input.oidc_input[count.index].private_key_secret_name + description = format("Secret for %s", rhcs_rosa_oidc_config_input.oidc_input[count.index].private_key_secret_name) + + tags = merge(var.tags, { + red-hat-managed = true + }) + + secret_string = rhcs_rosa_oidc_config_input.oidc_input[count.index].private_key +} + +resource "aws_s3_object" "discrover_doc_object" { + count = local.managed ? 0 : 1 + + bucket = module.aws_s3_bucket[count.index].s3_bucket_id + key = ".well-known/openid-configuration" + content = rhcs_rosa_oidc_config_input.oidc_input[count.index].discovery_doc + content_type = "application/json" + + tags = merge(var.tags, { + red-hat-managed = true + }) +} + +resource "aws_s3_object" "s3_object" { + count = local.managed ? 0 : 1 + + bucket = module.aws_s3_bucket[count.index].s3_bucket_id + key = "keys.json" + content = rhcs_rosa_oidc_config_input.oidc_input[count.index].jwks + content_type = "application/json" + + tags = merge(var.tags, { + red-hat-managed = true + }) +} + +data "aws_region" "current" {} + +resource "time_sleep" "wait_10_seconds" { + create_duration = "10s" + destroy_duration = "10s" + triggers = { + oidc_config_id = rhcs_rosa_oidc_config.oidc_config.id + oidc_endpoint_url = rhcs_rosa_oidc_config.oidc_config.oidc_endpoint_url + oidc_provider_url = aws_iam_openid_connect_provider.oidc_provider.url + discrover_doc_object = local.managed ? null : aws_s3_object.discrover_doc_object[0].checksum_sha1 + s3_object = local.managed ? null : aws_s3_object.s3_object[0].checksum_sha1 + } +} + +########################################################################### +## Below is taken from rhcs-hcp module `operator-roles` +########################################################################### + +locals { + operator_roles_properties = [ + { + operator_name = "installer-cloud-credentials" + operator_namespace = "openshift-image-registry" + role_name = "openshift-image-registry-installer-cloud-credentials" + policy_details = data.rhcs_hcp_policies.all_policies.operator_role_policies["openshift_hcp_image_registry_installer_cloud_credentials_policy"] + service_accounts = ["system:serviceaccount:openshift-image-registry:cluster-image-registry-operator", "system:serviceaccount:openshift-image-registry:registry"] + }, + { + operator_name = "cloud-credentials" + operator_namespace = "openshift-ingress-operator" + role_name = "openshift-ingress-operator-cloud-credentials" + policy_details = data.rhcs_hcp_policies.all_policies.operator_role_policies["openshift_hcp_ingress_operator_cloud_credentials_policy"] + service_accounts = ["system:serviceaccount:openshift-ingress-operator:ingress-operator"] + }, + { + operator_name = "ebs-cloud-credentials" + operator_namespace = "openshift-cluster-csi-drivers" + role_name = "openshift-cluster-csi-drivers-ebs-cloud-credentials" + policy_details = data.rhcs_hcp_policies.all_policies.operator_role_policies["openshift_hcp_cluster_csi_drivers_ebs_cloud_credentials_policy"] + service_accounts = ["system:serviceaccount:openshift-cluster-csi-drivers:aws-ebs-csi-driver-operator", "system:serviceaccount:openshift-cluster-csi-drivers:aws-ebs-csi-driver-controller-sa"] + }, + { + operator_name = "cloud-credentials" + operator_namespace = "openshift-cloud-network-config-controller" + role_name = "openshift-cloud-network-config-controller-cloud-credentials" + policy_details = data.rhcs_hcp_policies.all_policies.operator_role_policies["openshift_hcp_cloud_network_config_controller_cloud_credentials_policy"] + service_accounts = ["system:serviceaccount:openshift-cloud-network-config-controller:cloud-network-config-controller"] + }, + { + operator_name = "kube-controller-manager" + operator_namespace = "kube-system" + role_name = "kube-system-kube-controller-manager" + policy_details = data.rhcs_hcp_policies.all_policies.operator_role_policies["openshift_hcp_kube_controller_manager_credentials_policy"] + service_accounts = ["system:serviceaccount:kube-system:kube-controller-manager"] + }, + { + operator_name = "capa-controller-manager" + operator_namespace = "kube-system" + role_name = "kube-system-capa-controller-manager" + policy_details = data.rhcs_hcp_policies.all_policies.operator_role_policies["openshift_hcp_capa_controller_manager_credentials_policy"] + service_accounts = ["system:serviceaccount:kube-system:capa-controller-manager"] + }, + { + operator_name = "control-plane-operator" + operator_namespace = "kube-system" + role_name = "kube-system-control-plane-operator" + policy_details = data.rhcs_hcp_policies.all_policies.operator_role_policies["openshift_hcp_control_plane_operator_credentials_policy"] + service_accounts = ["system:serviceaccount:kube-system:control-plane-operator"] + }, + { + operator_name = "kms-provider" + operator_namespace = "kube-system" + role_name = "kube-system-kms-provider" + policy_details = data.rhcs_hcp_policies.all_policies.operator_role_policies["openshift_hcp_kms_provider_credentials_policy"] + service_accounts = ["system:serviceaccount:kube-system:kms-provider"] + }, + ] + operator_roles_count = length(local.operator_roles_properties) +} + +data "aws_iam_policy_document" "custom_trust_policy" { + count = local.operator_roles_count + + statement { + effect = "Allow" + actions = ["sts:AssumeRoleWithWebIdentity"] + principals { + type = "Federated" + identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${rhcs_rosa_oidc_config.oidc_config.oidc_endpoint_url}"] + } + condition { + test = "StringEquals" + variable = "${rhcs_rosa_oidc_config.oidc_config.oidc_endpoint_url}:sub" + values = local.operator_roles_properties[count.index].service_accounts + } + } +} + +module "operator_iam_role" { + source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" + version = ">=5.34.0" + count = local.operator_roles_count + + create_role = true + + role_name = substr("${var.operator_role_prefix}-${local.operator_roles_properties[count.index].operator_namespace}-${local.operator_roles_properties[count.index].operator_name}", 0, 64) + + role_path = var.path + role_permissions_boundary_arn = "" + + create_custom_role_trust_policy = true + custom_role_trust_policy = data.aws_iam_policy_document.custom_trust_policy[count.index].json + + custom_role_policy_arns = [ + "${local.operator_roles_properties[count.index].policy_details}" + ] + + tags = merge(var.tags, { + rosa_managed_policies = true + rosa_hcp_policies = true + red-hat-managed = true + operator_namespace = "${local.operator_roles_properties[count.index].operator_namespace}" + operator_name = "${local.operator_roles_properties[count.index].operator_name}" + }) +} + +data "rhcs_hcp_policies" "all_policies" {} +data "rhcs_info" "current" {} + +resource "time_sleep" "role_resources_propagation" { + create_duration = "20s" + triggers = { + operator_role_prefix = var.operator_role_prefix + operator_role_arns = jsonencode([for value in module.operator_iam_role : value.iam_role_arn]) + } +} diff --git a/provision/opentofu/modules/rosa/oidc-provider-operator-roles/output.tf b/provision/opentofu/modules/rosa/oidc-provider-operator-roles/output.tf new file mode 100644 index 000000000..8d2b57637 --- /dev/null +++ b/provision/opentofu/modules/rosa/oidc-provider-operator-roles/output.tf @@ -0,0 +1,4 @@ +output "oidc_config_id" { + value = time_sleep.wait_10_seconds.triggers["oidc_config_id"] + description = "The unique identifier associated with users authenticated through OpenID Connect (OIDC) generated by this OIDC config." +} diff --git a/provision/opentofu/modules/rosa/oidc-provider-operator-roles/provider.tf b/provision/opentofu/modules/rosa/oidc-provider-operator-roles/provider.tf new file mode 100644 index 000000000..0e7179d54 --- /dev/null +++ b/provision/opentofu/modules/rosa/oidc-provider-operator-roles/provider.tf @@ -0,0 +1,22 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.38.0" + } + rhcs = { + source = "terraform-redhat/rhcs" + version = "1.6.0-prerelease.1" + } + time = { + source = "hashicorp/time" + version = ">= 0.9" + } + null = { + source = "hashicorp/null" + version = ">= 3.0.0" + } + } +} diff --git a/provision/opentofu/modules/rosa/oidc-provider-operator-roles/variables.tf b/provision/opentofu/modules/rosa/oidc-provider-operator-roles/variables.tf new file mode 100644 index 000000000..87eb6b789 --- /dev/null +++ b/provision/opentofu/modules/rosa/oidc-provider-operator-roles/variables.tf @@ -0,0 +1,20 @@ +variable "operator_role_prefix" { + type = string +} + +variable "oidc_config" { + type = string + default = "" +} + +variable "path" { + description = "(Optional) The arn path for the account/operator roles as well as their policies." + type = string + default = null +} + +variable "tags" { + type = map(string) + default = null + description = "List of AWS resource tags to apply." +} diff --git a/provision/opentofu/modules/rosa/vpc/main.tf b/provision/opentofu/modules/rosa/vpc/main.tf new file mode 100644 index 000000000..d7d7566da --- /dev/null +++ b/provision/opentofu/modules/rosa/vpc/main.tf @@ -0,0 +1,159 @@ +locals { + supported_regions = { + "ap-northeast-1" = "apne1" + "ap-southeast-1" = "apse1" + "ap-southeast-2" = "apse2" + "ap-southeast-3" = "apse3" + "ap-southeast-4" = "apse4" + "ca-central-1" = "cac1" + "eu-central-1" = "euc1" + "eu-west-1" = "euw1" + "us-east-1" = "use1" + "us-east-2" = "use2" + "us-west-2" = "usw2" + "ap-south-2" = "aps2" + } + well_known_az_ids = { + us-east-1 = [2, 4, 6] + ap-northeast-1 = [1, 2, 4] + } + az_id_prefix = lookup(local.supported_regions, var.region, null) != null ? "${local.supported_regions[var.region]}-az" : "unknown-az" + azs = ( + length(var.subnet_azs) > 0 ? + (var.single_az_only ? [var.subnet_azs[0]] : var.subnet_azs) : + (var.single_az_only ? + ["${local.az_id_prefix}${lookup(local.well_known_az_ids, var.region, [1, 2, 3])[0]}"] : + [for id in lookup(local.well_known_az_ids, var.region, [1, 2, 3]) : "${local.az_id_prefix}${id}"] + ) + ) + # If an AZ has a single hyphen, it's an AZ ID + az_ids = [for az in local.azs : az if length(split("-", az)) == 2] + # If an AZ has a two hyphens, it's an AZ name + az_names = [for az in local.azs : az if length(split("-", az)) == 3] + + vpc_cidr_prefix = tonumber(split("/", var.vpc_cidr)[1]) + subnet_newbits = var.subnet_cidr_prefix - local.vpc_cidr_prefix + subnet_count = var.private_subnets_only ? length(local.azs) : length(local.azs) * 2 + all_subnets = ( + var.private_subnets_only ? + [ + local.subnet_newbits == 0 ? + [var.vpc_cidr] : + cidrsubnets(var.vpc_cidr, [for i in range(length(local.azs)) : local.subnet_newbits]...), [] + ] : + [ + for cidr_block in cidrsubnets(var.vpc_cidr, 1, 1) : + local.subnet_newbits == 1 ? + [cidr_block] : + cidrsubnets(cidr_block, [for i in range(length(local.azs)) : local.subnet_newbits - 1]...) + ] + ) +} + + +# Performing multi-input validations in null_resource block +# https://github.com/hashicorp/terraform/issues/25609 +resource "null_resource" "validations" { + lifecycle { + precondition { + condition = lookup(local.supported_regions, var.region, null) != null + error_message = <<-EOT + ROSA with hosted control planes is currently only available in these regions: + ${join(", ", keys(local.supported_regions))}. + EOT + } + + precondition { + condition = local.vpc_cidr_prefix <= var.subnet_cidr_prefix + error_message = "Subnet CIDR prefix must be smaller prefix (larger number) than the VPC CIDR prefix." + } + + precondition { + condition = !(var.single_az_only && length(var.subnet_azs) > 1) + error_message = <<-EOT + It's invalid to supply more than 1 `subnet_azs` while also specifying `single_az_only=true` (default). + To use more than 1 availability zone, set `-var single_az_only=false`. + Or set `-var 'subnet_azs=["${length(var.subnet_azs) > 0 ? var.subnet_azs[0] : "none"}"]'` + EOT + } + + precondition { + condition = (length(local.az_ids) > 0 && length(local.az_names) == 0) || (length(local.az_ids) == 0 && length(local.az_names) > 0) + error_message = <<-EOT + Make sure to provide subnet_azs in either name format OR zone ID, do not mix and match. + E.g., us-east-1a,us-east-1b OR use1-az1,use1-az2 + EOT + } + + precondition { + condition = local.subnet_count <= pow(2, local.subnet_newbits) + error_message = <<-EOT + The size of available IP space is not enough to accomodate the expected number of subnets: + Try increasing the size of your VPC CIDR, e.g., 10.0.0.0/16 -> 10.0.0.0/14 + Or try decreasing the size of your Subnet Prefix, e.g., 24 -> 28 + EOT + } + + precondition { + condition = alltrue([for name in local.az_names : contains(data.aws_availability_zones.available_azs.names, name)]) + error_message = <<-EOT + ROSA with hosted control planes in region ${var.region} does not currently support availability zone name(s): + ${join(", ", [for name in local.az_names : name if !contains(data.aws_availability_zones.available_azs.names, name)])} + EOT + } + + precondition { + condition = alltrue([for id in local.az_ids : contains(data.aws_availability_zones.available_azs.zone_ids, id)]) + error_message = <<-EOT + ROSA with hosted control planes in region ${var.region} does not currently support availability zone ID(s): + ${join(", ", [for id in local.az_ids : id if !contains(data.aws_availability_zones.available_azs.zone_ids, id)])} + EOT + } + } +} + + +data "aws_availability_zones" "available_azs" { + state = "available" + filter { + name = "opt-in-status" + # Currently, no support for Local Zones, Wavelength, or Outpost + values = ["opt-in-not-required"] + } +} + + +module "vpc" { + depends_on = [resource.null_resource.validations] + + source = "terraform-aws-modules/vpc/aws" + version = "~> 4.0.0" + + name = "${var.cluster_name}-vpc" + cidr = var.vpc_cidr + + azs = local.azs + private_subnets = local.all_subnets[0] + public_subnets = local.all_subnets[1] + # Tags defined per https://repost.aws/knowledge-center/eks-vpc-subnet-discovery + private_subnet_tags = merge(var.extra_tags , + { + "kubernetes.io/role/internal-elb" = "1" + }) + public_subnet_tags = merge(var.extra_tags , + { + "kubernetes.io/role/elb" = "1" + }) + + enable_nat_gateway = true + enable_dns_hostnames = true + enable_dns_support = true + manage_default_security_group = false + + tags = merge(var.extra_tags, + { + Terraform = "true" + service = "ROSA" + cluster_name = var.cluster_name + }) +} diff --git a/provision/opentofu/modules/rosa/vpc/outputs.tf b/provision/opentofu/modules/rosa/vpc/outputs.tf new file mode 100644 index 000000000..4b9d7e016 --- /dev/null +++ b/provision/opentofu/modules/rosa/vpc/outputs.tf @@ -0,0 +1,14 @@ +output "cluster-private-subnets" { + value = module.vpc.private_subnets + description = "List of private subnet IDs created." +} + +output "cluster-public-subnets" { + value = module.vpc.public_subnets + description = "List of public subnet IDs created." +} + +output "cluster-subnets" { + value = concat(module.vpc.public_subnets, module.vpc.private_subnets) + description = "List of both public and private subnets." +} diff --git a/provision/opentofu/modules/rosa/vpc/variables.tf b/provision/opentofu/modules/rosa/vpc/variables.tf new file mode 100644 index 000000000..665bd38c2 --- /dev/null +++ b/provision/opentofu/modules/rosa/vpc/variables.tf @@ -0,0 +1,73 @@ +variable "region" { + description = <<-EOT + Region to create AWS infrastructure resources for a + ROSA with hosted control planes cluster. (required) + EOT + type = string +} + +variable "subnet_azs" { + # Usage: -var 'subnet_azs=["us-east-1a"]' or -var 'subnet_azs["use1-az1"]' + description = <<-EOT + List of availability zone names or IDs that subnets can get deployed into. + If not provided, defaults to well known AZ IDs for each region. + Does not currently support Local Zones, Outpost, or Wavelength. + EOT + type = list(string) + default = [] +} + +variable "cluster_name" { + description = "Name of the created ROSA with hosted control planes cluster." + type = string + default = "rosa-hcp" + + validation { + condition = can(regex("^[a-z][-a-z0-9]{0,13}[a-z0-9]$", var.cluster_name)) + error_message = <<-EOT + ROSA cluster names must be less than 16 characters. + May only contain lower case, alphanumeric, or hyphens characters. + EOT + } +} + +variable "vpc_cidr" { + description = <<-EOT + IPv4 CIDR netmask for the VPC resource. + This should equal or include the cluster's Machine CIDR netmask. + EOT + type = string + default = "10.0.0.0/16" + + validation { + condition = can(cidrnetmask(var.vpc_cidr)) + error_message = "VPC CIDR must be a valid CIDR netmask, e.g., '10.0.0.0/16'." + } +} + +variable "subnet_cidr_prefix" { + description = <<-EOT + The CIDR prefix value to use when dividing up the VPC CIDR range into subnet ranges. + E.g., 24 to create equal subnets of size "24": 10.0.1.0/24, 10.0.2.0/24, etc. + EOT + type = number + default = 24 +} + +variable "private_subnets_only" { + description = "Only create private subnets" + type = bool + default = false +} + +variable "single_az_only" { + description = "Only create subnets in a single availability zone" + type = bool + default = true +} + +variable "extra_tags" { + description = "Extra tags to apply to AWS resources" + type = map + default = {} +} \ No newline at end of file diff --git a/provision/opentofu/reaper.sh b/provision/opentofu/reaper.sh new file mode 100755 index 000000000..bfe931861 --- /dev/null +++ b/provision/opentofu/reaper.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -e + +if [[ "$RUNNER_DEBUG" == "1" ]]; then + set -x +fi + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +cd $1 +tofu init +declare -a WORKSPACES=($(tofu workspace list | sed 's/*//' | grep -v "default")) +for WORKSPACE in ${WORKSPACES}; do + bash ${SCRIPT_DIR}/destroy.sh ${WORKSPACE} +done diff --git a/provision/opentofu/remote-state/README.md b/provision/opentofu/remote-state/README.md new file mode 100644 index 000000000..da2be8150 --- /dev/null +++ b/provision/opentofu/remote-state/README.md @@ -0,0 +1,7 @@ +To initiate the remote S3 bucket and Dynamo DB for storing the team's OpenTofu workspaces, +execute the following commands in this directory: + +1. `tofu init` +2. `tofu apply` + +NOTE: This should only be performed once to initiate/recreate the team's S3 state bucket diff --git a/provision/opentofu/remote-state/main.tf b/provision/opentofu/remote-state/main.tf new file mode 100644 index 000000000..6aa968bb7 --- /dev/null +++ b/provision/opentofu/remote-state/main.tf @@ -0,0 +1,66 @@ +provider "aws" { + region = "eu-west-1" +} + +resource "aws_s3_bucket" "terraform_state" { + bucket = "kcb-tf-state" + + lifecycle { + prevent_destroy = true + } +} + +resource "aws_s3_bucket_versioning" "terraform_state" { + bucket = aws_s3_bucket.terraform_state.id + versioning_configuration { + status = "Enabled" + } +} + +resource "aws_dynamodb_table" "terraform_state_lock" { + name = "app-state" + read_capacity = 1 + write_capacity = 1 + hash_key = "LockID" + + attribute { + name = "LockID" + type = "S" + } +} + +resource "aws_s3_bucket_policy" "terraform_state" { + bucket = "${aws_s3_bucket.terraform_state.id}" + policy =<