Skip to content
This repository has been archived by the owner on May 23, 2024. It is now read-only.

Implement Health Checks w/ Rollback #29

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions deployments/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
!deploy.sh
!deploy-types.sh
!spam-deploys.sh
!health-checker.sh
!README.md
# Also, the project configurations!
!project-configs.sh
Expand Down
4 changes: 4 additions & 0 deletions deployments/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ The `port` is mandatory, but the `env_file_path` and `docker_flags` are optional

The paths provided in the said file **MUST** be **absolute** (trust me you don't want to handle bash's path spaghetti :upside_down_face: :wink:).

#### Deployment Health Checks and Rollbacks

You can set a `project_health_check_url` per project, which will be used to assert the service is running properly after deployment. The deploy script will query the url (using cURL) every 10 seconds until it returns HTTP 200, for a maximum of 5 minutes. It is advised that you pass a url served by the respective service which only returns 200 when the service is in good operating state. This variable is optional. If it is not provided, the health check will not be done.

## Notes

`docker system prune` should be run periodically to clean up dangling images and containers. The deployment scripts attempt to minimize the number of these but some are left on purpose due to speeding up multi-stage builds.
Expand Down
64 changes: 59 additions & 5 deletions deployments/deploy-types.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,17 @@ function to_lower_case() {
echo "$1" | tr '[:upper:]' '[:lower:]'
}

# Adaptation of https://stackoverflow.com/questions/192292/how-best-to-include-other-scripts
deploy_curr_dir="${BASH_SOURCE%/*}"
if [[ ! -d "$deploy_curr_dir" ]]; then deploy_curr_dir="${0%/*}"; fi

# shellcheck source=/dev/null
source "$deploy_curr_dir/health-checker.sh"

# For deploying stuff with docker, simply put.
function deploy_default() {
# (dotenv_location is not mandatory)
local project="$1" branch="$2" image_tag port="$3" dotenv_location="${4:-}" docker_flags="${5:-}"
# (dotenv_location, docker_flags and health_check_url are not mandatory)
local project="$1" branch="$2" image_tag port="$3" dotenv_location="${4:-}" docker_flags="${5:-}" health_check_url="${6:-}"
image_tag="$(to_lower_case "$project---$branch" )"

# If we have a dotenv file specified, copy it into the current directory (in case of error, `cp` prints something so no need to echo anything)
Expand All @@ -28,7 +35,10 @@ function deploy_default() {
old_container_id="$(docker ps -aq --filter ancestor="$image_tag")"

echo -e "Starting docker build\n"
docker build -f Dockerfile-prod -t "$image_tag" .

local new_image_id
new_image_id=$(docker build -q -f Dockerfile-prod -t "$image_tag" .)

local build_status="$?"

# Disabled as this meant that no dependencies could be cached. Instead run `docker system prune` periodically to clear up disk space if necessary.
Expand Down Expand Up @@ -57,7 +67,10 @@ function deploy_default() {
else
echo -e "#-> No docker flags specified.\n"
fi
echo "${docker_flags:-} -d --restart=unless-stopped --env PORT=80 -p $port:80 $image_tag" | xargs docker run

local new_container_id
new_container_id=$(echo "${docker_flags:-} -d --restart=unless-stopped --env PORT=80 -p $port:80 $image_tag" | xargs docker run)

local run_status="$?"
if [ "$run_status" != 0 ]; then
>&2 echo -e "\n###-> ERROR! Run failed!"
Expand All @@ -75,6 +88,47 @@ function deploy_default() {
return "$run_status"
fi

local health_check_result=0
if [[ -n "$health_check_url" ]]; then
echo -e "###->Starting health check...\n"
# This is done this way due to the use of set -e above.
# If the command is successful, the || won't run, so the default value is 0
# if the command is not successful, we need the || so that the script does not exit immediately
health_checker "$health_check_url" || health_check_result="$?"

if [ "$health_check_result" != 0 ]; then
>&2 echo -e "\n###-> ERROR! Service did not pass the health check! Rolling back to previous container!"

docker stop "$new_container_id" &>/dev/null
docker wait "$new_container_id"
echo -e "\n###-> New container stopped.\n"

>&2 echo "###-> Retagging old image and starting old container back up"
if [[ -n "$old_image_id" ]]; then
docker tag "$old_image_id" "$image_tag"
else
>&2 echo "###->> No old image found for retagging!!"
fi
if [[ -n "$old_container_id" ]]; then
docker start "$old_container_id"
else
>&2 echo "###->> No old container found for starting back up!!"
fi

>&2 echo "###->> Removing new (un-healthy) container"
docker rm "$new_container_id"

if [[ "$(docker images -q "$image_tag")" == "$new_image_id" ]]; then
>&2 echo "###-> Not removing image, as the container was run using the same one (build did a full cache hit)"
else
printf "Removed new image with id: "
docker rmi "$new_image_id"
fi

return 1
fi
fi

# Cleanup
echo -e "\n###-> New container now running successfuly, removing old container and image!"
if [[ -n "$old_container_id" ]]; then
Expand All @@ -89,7 +143,7 @@ function deploy_default() {
echo "###-> Not removing image, as the container was run using the same one (build did a full cache hit)"
else
printf "old image id: "
docker rmi "$old_image_id"
docker rmi "$old_image_id"
fi
else
echo "###-> No old image found, so none removed."
Expand Down
4 changes: 2 additions & 2 deletions deployments/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ branch="${2:-master}"
# shellcheck source=utils/utils.sh
source "$deploy_curr_dir/../utils/utils.sh"

# Getting project configurations (configured_projects, project_port and project_dotenv_location)
# Getting project configurations (configured_projects, project_port, project_dotenv_location and project_health_check_url)
# shellcheck source=deployments/project-configs.sh
source "$deploy_curr_dir/project-configs.sh"

Expand Down Expand Up @@ -111,7 +111,7 @@ set +e
echo

# Passing in the configs from ./project-configs.sh. dotenv_location and docker_flags might not be set so sending instead an empty variable ("") so that the 'unbound variable' error does not occur
deploy_default "$project" "$branch" "${project_port[$project---$branch]}" "${project_dotenv_location[$project---$branch]:-}" "${project_docker_flags[$project---$branch]:-}"
deploy_default "$project" "$branch" "${project_port[$project---$branch]}" "${project_dotenv_location[$project---$branch]:-}" "${project_docker_flags[$project---$branch]:-}" "${project_health_check_url[$project---$branch]:-}"
) 2>&1 | tee "$logfile"

# This gets the return status of the first element of the previous pipe, aka the subshell executing the deployment commands
Expand Down
66 changes: 66 additions & 0 deletions deployments/health-checker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env bash

### Check for service availability after container startup
### To be used by deploy-types.sh

# See https://sipb.mit.edu/doc/safe-shell/
set -ueo pipefail

# Calls a given url and returns 0 if the response status is HTTP 200, 1 otherwise
# Arguments: url - URL to call
function is_healthy_url() {
local url="$1"

local response_code
response_code="$(curl -s -o /dev/null -w "%{http_code}" "$url")"

if [ "$response_code" == "200" ]; then
echo 0
else
echo 1
fi
}

# Periodically calls a given url until it returns HTTP 200, or max retries is reached.
# Returns 0 if health check was successful, 1 otherwise
# Arguments: url - URL to call
function health_checker() {
local url="$1"

# According to 1 retry every 10 seconds, this will try for 5 minutes
local MAX_ATTEMPTS=31
local RETRY_INTERVAL_SECONDS=10

local is_healthy_result=1
local retry_count=0

while [ "$is_healthy_result" -ne 0 ]
do

if [ "$retry_count" -eq "$MAX_ATTEMPTS" ]; then
break
fi

echo -e "[Health Checker] Attempt $retry_count\n"

is_healthy_result="$(is_healthy_url "$url")"

if [ "$is_healthy_result" -eq 0 ]; then
echo -e "[Health Checker] Health Check successfull!\n"
break
fi

echo -e "[Health Checker] Attempt ${retry_count} failed.\n"
retry_count=$((retry_count+1))

sleep $RETRY_INTERVAL_SECONDS
done

if [ "$is_healthy_result" -ne 0 ]; then
echo -e "[Health Checker] Max number of retries reached. Health Check failed.\n"
fi

return "$is_healthy_result"
}

export -f health_checker
5 changes: 5 additions & 0 deletions deployments/project-configs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ configured_projects="Website-NIAEFEUP tts-fe nijobs-fe nijobs-be"
declare -A project_port
declare -A project_dotenv_location
declare -A project_docker_flags
declare -A project_health_check_url

# Website-NIAEFEUP
project_port[Website-NIAEFEUP---master]=3000
Expand All @@ -38,15 +39,19 @@ project_dotenv_location[nijobs-fe---experimental]='/home/ni/niployments/deployme
project_port[nijobs-be---master]=4010
project_dotenv_location[nijobs-be---master]='/home/ni/niployments/deployments/env-files/nijobs-be/master/.env.local'
project_docker_flags[nijobs-be---master]='-v /home/ni/niployments/deployments/volumes-data/nijobs:/usr/src/app/static'
project_health_check_url[nijobs-be---master]="https://localhost:${project_port[nijobs-be---master]}/"
## nijobs-be staging
project_port[nijobs-be---develop]=4011
project_dotenv_location[nijobs-be---develop]='/home/ni/niployments/deployments/env-files/nijobs-be/develop/.env.local'
project_docker_flags[nijobs-be---develop]='-v /home/ni/niployments/deployments/volumes-data/nijobs-beta:/usr/src/app/static'
project_health_check_url[nijobs-be---develop]="https://localhost:${project_port[nijobs-be---develop]}/"

# debug example:
# project_dotenv_location[nijobs-be---develop]='/home/miguel/Coding/NIAEFEUP/niployments/deployments/env-files/nijobs-be/develop/.env.local'

# Essential, duh! :)
export project_port
export project_dotenv_location
export project_docker_flags
export project_health_check_url
export configured_projects