Kava-Labs · sesheffield · Sep 18, 2024 · Sep 18, 2024 · Sep 19, 2024 · Sep 20, 2024
@@ -2,7 +2,7 @@
 set -x
 
 # get all the node's ec2 instance ids for the specified chain id
-chain_node_instance_ids=$(aws ec2 describe-instances --filters "Name=tag:KavaChainId,Values=$CHAIN_ID" | jq -r '[.Reservations | .[] | .Instances | .[] | .InstanceId] | join(" ")')
+chain_node_instance_ids=$(aws ec2 describe-instances --filters "Name=tag:$AWS_CHAIN_ID_TAG_NAME,Values=$CHAIN_ID" | jq -r '[.Reservations | .[] | .Instances | .[] | .InstanceId] | join(" ")')
 
 for chain_node_instance_id in ${chain_node_instance_ids}
 do
@@ -17,6 +17,17 @@ do
         aws autoscaling exit-standby \
             --instance-ids "$chain_node_instance_id" \
             --auto-scaling-group-name "$autoscaling_group_name"
+
+        while true; do
+            autoscaling_group_state=$(aws autoscaling describe-auto-scaling-instances --instance-ids "$chain_node_instance_id" | jq -r '[.AutoScalingInstances | .[].LifecycleState] | join(" ")')
+            if [ "$autoscaling_group_state" == "InService" ]; then
+                echo "instance ($chain_node_instance_id) is now in inService state"
+                break
+            else
+                echo "instance ($chain_node_instance_id) not in standby state yet (current state: $autoscaling_group_state), waiting 10 seconds"
+                sleep 10
+            fi
+        done
         ;;
     *)
         echo "instance ($chain_node_instance_id) not in an elgible state ($autoscaling_group_state) for exiting standby, skipping"

@@ -1,8 +1,8 @@
 #!/bin/bash
 set -x
 
-# get all the node's ec2 instance ids for the specified chain id
-chain_node_instance_ids=$(aws ec2 describe-instances --filters "Name=tag:KavaChainId,Values=$CHAIN_ID" | jq -r '[.Reservations | .[] | .Instances | .[] | .InstanceId] | join(" ")')
+# get all the node's ec2 instance ids for the specified chain id KavaChainId
+chain_node_instance_ids=$(aws ec2 describe-instances --filters "Name=tag:$AWS_CHAIN_ID_TAG_NAME,Values=$CHAIN_ID" | jq -r '[.Reservations | .[] | .Instances | .[] | .InstanceId] | join(" ")')
 
 for chain_node_instance_id in ${chain_node_instance_ids}
 do
@@ -19,6 +19,17 @@ do
             --instance-ids "$chain_node_instance_id" \
             --auto-scaling-group-name "$autoscaling_group_name" \
             --should-decrement-desired-capacity
+
+        while true; do
+            autoscaling_group_state=$(aws autoscaling describe-auto-scaling-instances --instance-ids "$chain_node_instance_id" | jq -r '[.AutoScalingInstances | .[].LifecycleState] | join(" ")')
+            if [ "$autoscaling_group_state" == "Standby" ]; then
+                echo "instance ($chain_node_instance_id) is now in standby state"
+                break
+            else
+                echo "instance ($chain_node_instance_id) not in standby state yet (current state: $autoscaling_group_state), waiting 10 seconds"
+                sleep 10
+            fi
+        done
         ;;
     *)
         echo "instance ($chain_node_instance_id) not in an elgible state ($autoscaling_group_state) for going on standby, skipping"

@@ -1,41 +1,64 @@
 name: Continuous Deployment (Protonet)
-## run after every successful CI job of new commits to the master branch
-#on:
-#  workflow_run:
-#    workflows: [Continuous Integration (Kava Master)]
-#    types:
-#      - completed
+# run after every successful CI job of new commits to the master branch
+on:
+  workflow_run:
+    workflows: [Continuous Integration (Kava Master)]
+    types:
+      - completed
 
 jobs:
+  changed_files:
+    runs-on: ubuntu-latest
+    # define output for first job forwarding output of changedProtonetConfig job
+    outputs:
+      changedProtonetConfig: ${{ steps.changed-protonet-config.outputs.any_changed }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # OR "2" -> To retrieve the preceding commit.
+      - name: Get all changed protonet files
+        id: hanged-protonet-config
+        uses: tj-actions/changed-files@v42
+        with:
+          # Avoid using single or double quotes for multiline patterns
+          files: |
+            ci/env/kava-protonet/**
+
   # in order:
   # enter standby (prevents autoscaling group from killing node during deploy)
-  # stop kava
-  # take ebs + zfs snapshots
+  # stop doctor and kava
   # download updated binary and genesis
-  # reset application database state (only done on internal testnet)
+  # reset application database state
+  # restart all once all have been reset
+  # start kava and doctor
+  # enter inService
   reset-chain-to-zero-state:
     # only start cd pipeline if last ci run was successful
-    if: ${{ github.event.workflow_run.conclusion == 'success' }}
-    uses: ./.github/workflows/cd-reset-internal-testnet.yml
+    if: ${{ github.event.workflow_run.conclusion == 'success' && needs.changed_files.outputs.changedProtonetConfig == 'true' }}
+    uses: ./.github/workflows/cd-reset-protonet.yml
     with:
       aws-region: us-east-1
+      aws-chain-id-tag-name: ChainId
       chain-id: proto_2221-17000
-      ssm-document-name: kava-testnet-internal-node-update
-      playbook-name: reset-protonet-playbook.yml
+      auto_scaling_group_names: kava-protonet-iavl-v1-validator-api-node,kava-protonet-iavl-v1-rpc-api-node
+      ssm-document-name: kava-protonet-iavl-v1-instance-update
+      playbook-name: reset-chain.yml
       playbook-infrastructure-branch: master
     secrets: inherit
+    needs: [changed_files]
 
-  # start kava with new binary and genesis state on api, peer and seed nodes, place nodes in service once they start and are synched to live
-  start-chain-api:
-    uses: ./.github/workflows/cd-start-chain.yml
-    with:
-      aws-region: us-east-1
-      chain-id: proto_2221-17000
-      ssm-document-name: kava-testnet-internal-node-update
-      playbook-name: start-chain-api-playbook.yml
-      playbook-infrastructure-branch: master
-    secrets: inherit
-    needs: [reset-chain-to-zero-state]
+  restart-chain:
+      uses: ./.github/workflows/cd-restart-protonet.yml
+      with:
+        aws-region: us-east-1
+        aws-chain-id-tag-name: ChainId
+        chain-id: proto_2221-17000
+        auto_scaling_group_names: kava-protonet-iavl-v1-validator-api-node,kava-protonet-iavl-v1-rpc-api-node
+        ssm-document-name: kava-protonet-iavl-v1-instance-update
+        playbook-name: restart-chain.yml
+        playbook-infrastructure-branch: master
+      secrets: inherit
+      needs: [ reset-chain-to-zero-state ]
 
   # setup test and development accounts and balances, deploy contracts by calling the chain's api
   seed-chain-state:
@@ -45,16 +68,17 @@ jobs:
       chain-id: proto_2221-17000
       seed-script-filename: seed-protonet.sh
       erc20-deployer-network-name: protonet
-      genesis_validator_addresses: "kavavaloper14w4avgdvqrlpww6l5dhgj4egfn6ln7gmtp7r2m"
+      genesis_validator_addresses: "kavavaloper1jaw3g097lq9jdrnscchspr2233yhpsxdlq7ula"
       kava_version_filepath:  ./ci/env/kava-protonet/KAVA.VERSION
     secrets: inherit
-    needs: [start-chain-api]
+    needs: [restart-chain]
+
   post-pipeline-metrics:
     uses: ./.github/workflows/metric-pipeline.yml
     if: always() # always run so we metric failures and successes
     with:
       aws-region: us-east-1
-      metric-name: kava.deploys.testnet.proto
+      metric-name: kava.deploys.protonet.proto
       namespace: Kava/ContinuousDeployment
     secrets: inherit
     needs: [seed-chain-state]
@@ -40,6 +40,7 @@ jobs:
       - name: take the chain offline
         run: bash ${GITHUB_WORKSPACE}/.github/scripts/put-all-chain-nodes-on-standby.sh
         env:
+          AWS_CHAIN_ID_TAG_NAME: ${{ inputs.aws-chain-id-tag-name }}
           CHAIN_ID: ${{ inputs.chain-id }}
           AWS_REGION: ${{ inputs.aws-region }}
           AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }}

@@ -0,0 +1,94 @@
+name: Reset Protonet
+
+on:
+  workflow_call:
+    inputs:
+      auto_scaling_group_names:
+        required: true
+        type: string
+        description: 'Comma-separated list of auto-scaling group names'
+      aws-chain-id-tag-name:
+        required: false
+        type: string
+        default: 'KavaChainId'
+      chain-id:
+        required: true
+        type: string
+      aws-region:
+        required: true
+        type: string
+      ssm-document-name:
+        required: true
+        type: string
+      playbook-name:
+        required: true
+        type: string
+      playbook-infrastructure-branch:
+        required: true
+        type: string
+    secrets:
+      CI_AWS_KEY_ID:
+        required: true
+      CI_AWS_KEY_SECRET:
+        required: true
+      KAVA_PRIVATE_GITHUB_ACCESS_TOKEN:
+        required: true
+
+# in order:
+# enter standby (prevents autoscaling group from killing node during deploy)
+# stop doctor and kava
+# download updated binary and genesis
+# reset application database state
+# start kava and doctor
+# enter inService
+jobs:
+  place-chain-nodes-on-standby:
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout repo from current commit
+        uses: actions/checkout@v4
+      - name: take the chain offline
+        run: bash ${GITHUB_WORKSPACE}/.github/scripts/put-all-chain-nodes-on-standby.sh
+        env:
+          AWS_CHAIN_ID_TAG_NAME: ${{ inputs.aws-chain-id-tag-name }}
+          CHAIN_ID: ${{ inputs.chain-id }}
+          AWS_REGION: ${{ inputs.aws-region }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }}
+      - name: checkout infrastructure repo
+        uses: actions/checkout@v4
+        with:
+          repository: Kava-Labs/infrastructure
+          token: ${{ secrets.KAVA_PRIVATE_GITHUB_ACCESS_TOKEN }}
+          path: infrastructure
+          ref: master
+      - name: Set up Go
+        uses: actions/setup-go@v4
+        with:
+          go-version-file: go.mod
+      - name: build kava node updater
+        run: cd infrastructure/cli/kava-node-updater && make install && cd ../../../
+      - name: run reset playbook on all chain nodes
+        run: |
+          IFS=',' read -r -a auto_scaling_group_names <<< "$AUTO_SCALING_GROUP_NAMES"
+          for auto_scaling_group_name in "${auto_scaling_group_names[@]}"; do
+            kava-node-updater \
+              --debug \
+              --max-retries=2 \
+              --aws-ssm-document-name="$SSM_DOCUMENT_NAME" \
+              --infrastructure-git-pointer="$PLAYBOOK_INFRASTRUCTURE_BRANCH" \
+              --update-playbook-filename="$PLAYBOOK_NAME" \
+              --autoscaling-group-name="$auto_scaling_group_name" \
+              --max-upgrade-batch-size=0 \
+              --node-types="" \
+              --wait-for-node-sync-after-upgrade=true
+          done
+        env:
+          SSM_DOCUMENT_NAME: ${{ inputs.ssm-document-name }}
+          PLAYBOOK_NAME: ${{ inputs.playbook-name }}
+          AUTO_SCALING_GROUP_NAMES: ${{ inputs.auto_scaling_group_names }}
+          AWS_REGION: ${{ inputs.aws-region }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }}
+          AWS_SDK_LOAD_CONFIG: 1
+          PLAYBOOK_INFRASTRUCTURE_BRANCH: ${{ inputs.playbook-infrastructure-branch }}
@@ -0,0 +1,102 @@
+name: Restart Protonet
+
+on:
+  workflow_call:
+    inputs:
+      auto_scaling_group_names:
+        required: true
+        type: string
+        description: 'Comma-separated list of auto-scaling group names'
+      aws-chain-id-tag-name:
+        required: false
+        type: string
+        default: 'KavaChainId'
+      chain-id:
+        required: true
+        type: string
+      aws-region:
+        required: true
+        type: string
+      ssm-document-name:
+        required: true
+        type: string
+      playbook-name:
+        required: true
+        type: string
+      playbook-infrastructure-branch:
+        required: true
+        type: string
+    secrets:
+      CI_AWS_KEY_ID:
+        required: true
+      CI_AWS_KEY_SECRET:
+        required: true
+      KAVA_PRIVATE_GITHUB_ACCESS_TOKEN:
+        required: true
+
+# in order:
+# enter standby (prevents autoscaling group from killing node during deploy)
+# stop doctor and kava
+# download updated binary and genesis
+# reset application database state
+# start kava and doctor
+# enter inService
+jobs:
+  restart-traffic:
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout repo from current commit
+        uses: actions/checkout@v4
+      - name: take the chain offline
+        run: bash ${GITHUB_WORKSPACE}/.github/scripts/put-all-chain-nodes-on-standby.sh
+        env:
+          AWS_CHAIN_ID_TAG_NAME: ${{ inputs.aws-chain-id-tag-name }}
+          CHAIN_ID: ${{ inputs.chain-id }}
+          AWS_REGION: ${{ inputs.aws-region }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }}
+      - name: checkout infrastructure repo
+        uses: actions/checkout@v4
+        with:
+          repository: Kava-Labs/infrastructure
+          token: ${{ secrets.KAVA_PRIVATE_GITHUB_ACCESS_TOKEN }}
+          path: infrastructure
+          ref: master
+      - name: Set up Go
+        uses: actions/setup-go@v4
+        with:
+          go-version-file: go.mod
+      - name: build kava node updater
+        run: cd infrastructure/cli/kava-node-updater && make install && cd ../../../
+      - name: run reset playbook on all chain nodes
+        run: |
+          IFS=',' read -r -a auto_scaling_group_names <<< "$AUTO_SCALING_GROUP_NAMES"
+          for auto_scaling_group_name in "${auto_scaling_group_names[@]}"; do
+            kava-node-updater \
+              --debug \
+              --max-retries=2 \
+              --aws-ssm-document-name="$SSM_DOCUMENT_NAME" \
+              --infrastructure-git-pointer="$PLAYBOOK_INFRASTRUCTURE_BRANCH" \
+              --update-playbook-filename="$PLAYBOOK_NAME" \
+              --autoscaling-group-name="$auto_scaling_group_name" \
+              --max-upgrade-batch-size=0 \
+              --node-types="" \
+              --wait-for-node-sync-after-upgrade=true
+          done
+        env:
+          SSM_DOCUMENT_NAME: ${{ inputs.ssm-document-name }}
+          PLAYBOOK_NAME: ${{ inputs.playbook-name }}
+          AUTO_SCALING_GROUP_NAMES: ${{ inputs.auto_scaling_group_names }}
+          AWS_REGION: ${{ inputs.aws-region }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }}
+          AWS_SDK_LOAD_CONFIG: 1
+          PLAYBOOK_INFRASTRUCTURE_BRANCH: ${{ inputs.playbook-infrastructure-branch }}
+      - name: bring the chain online
+        run: bash ${GITHUB_WORKSPACE}/.github/scripts/exit-standby-all-chain-nodes.sh
+        env:
+          AWS_CHAIN_ID_TAG_NAME: ${{ inputs.aws-chain-id-tag-name }}
+          CHAIN_ID: ${{ inputs.chain-id }}
+          AWS_REGION: ${{ inputs.aws-region }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }}
@@ -38,7 +38,9 @@ jobs:
       - name: get desired version of network
         id: kava-version
         run: |
-          echo "KAVA_VERSION=$(cat ./ci/env/kava-internal-testnet/KAVA.VERSION)" >> $GITHUB_OUTPUT
+          KAVA_VERSION=$(cat ${{ inputs.kava_version_filepath }})
+          echo "KAVA_VERSION=$KAVA_VERSION" >> $GITHUB_OUTPUT
+          echo "Kava Version: $KAVA_VERSION"
         env:
           KAVA_VERSION_FILEPATH: ${{ inputs.kava_version_filepath }}
       - name: checkout version of kava used by network