diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index fa904d2..0bf231d 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -3,7 +3,7 @@ name: Run Tests on: push jobs: - test: + test-python: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -26,4 +26,22 @@ jobs: - name: Run tests env: CI: "true" - run: poetry run pytest \ No newline at end of file + run: poetry run pytest + + test-bash: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Setup Bats and bats libs + id: setup-bats + uses: bats-core/bats-action@3.0.0 + - name: Setup Shell Mock + run: wget -P /opt/ https://github.com/boschresearch/shellmock/releases/download/0.9.1/shellmock.bash + - run: ls /opt + - name: Run tests + shell: bash + env: + BATS_LIB_PATH: ${{ steps.setup-bats.outputs.lib-path }} + SHELLMOCK_PATH: /opt/shellmock.bash + TERM: xterm + run: bats ./bin/**/*_test.sh diff --git a/.gitignore b/.gitignore index bf95a1b..61b6a30 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,6 @@ htmlcov/ .coverage .gnupg requirements.txt -docs/_build \ No newline at end of file + +docs/_build +bin/digifeeds/*.config \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 6786a78..7385197 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,7 +68,18 @@ RUN poetry export --without dev -f requirements.txt --output requirements.txt # We want poetry on in development FROM poetry AS development RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \ - git + git \ + bats \ + bats-assert \ + bats-file\ + wget\ + zip\ + unzip + +RUN wget -P /opt/ https://github.com/boschresearch/shellmock/releases/download/0.9.1/shellmock.bash && \ + chown ${UID}:${GID} /opt/shellmock.bash + +ENV SHELLMOCK_PATH=/opt/shellmock.bash # Switch to the non-root user "user" USER app @@ -84,4 +95,4 @@ COPY --chown=${UID}:${GID} --from=build "/app/requirements.txt" /app/requirement RUN pip install -r /app/requirements.txt -USER app \ No newline at end of file +USER app diff --git a/bin/digifeeds/upload_to_s3.config.example b/bin/digifeeds/upload_to_s3.config.example new file mode 100644 index 0000000..2bed0d5 --- /dev/null +++ b/bin/digifeeds/upload_to_s3.config.example @@ -0,0 +1,3 @@ +input_directory="some/path/to/input/directory" +processed_directory="some/path/to/processed/directory" +digifeeds_bucket="rclone_remote_to_s3_bucket" \ No newline at end of file diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh new file mode 100755 index 0000000..e63b766 --- /dev/null +++ b/bin/digifeeds/upload_to_s3.sh @@ -0,0 +1,242 @@ +#! /bin/bash + +########### +# CONSTANTS +########### + +# For push gateway +START_TIME=$(date '+%s') + +# Directory this script lives in +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + +APP_ENV=${APP_ENV:-"production"} + +if [[ $APP_ENV != "test" ]]; then + # CONFIG + # Variables contained in the config file: + # + # input_directory: path to the input directory + # processed_directory: path to the directory of processed files+% + # digifeeds_bucket: rclone remote for the digifeeds bucket + # + # timestamp: used for testing timestamps; should be ommited in production + # send_metrics: when "false" metrics don't get sent; + # APP_ENV: when "test" the main script is not executed + CONFIG_FILE=${1:-$SCRIPT_DIR/upload_to_s3.config} + # shellcheck source=/dev/null + source "$CONFIG_FILE" +fi +if ! input_directory=${input_directory:?}; then exit 1; fi +if ! processed_directory=${processed_directory:?}; then exit 1; fi +if ! digifeeds_bucket=${digifeeds_bucket:?}; then exit 1; fi +send_metrics=${send_metrics:-"true"} + +# matches .tif and .jp2 files with 8 digit file names that start with 0 OR +# checksum.md5 files +# examples that match: +# 01234567.tif +# 01234567.jp2 +# checksum.md5 +IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' + +# For push gateway +JOB_NAME="aim_digifeeds_upload_to_aws" + +########### +# COUNTERS +########### +files_processed_total=0 +image_order_errors_total=0 +upload_errors_total=0 +errors_total=0 + +########### +# FUNCTIONS +########### + +log_info() { + echo "$(date --rfc-3339=seconds) - INFO: $*" +} + +log_error() { + echo "$(date --rfc-3339=seconds) - ERROR: $*" +} +log_debug() { + [[ ${DEBUG:-false} == "true" ]] && echo "$(date --rfc-3339=seconds) - DEBUG: $*" +} + +#equivalent to ls +list_files() { + local path=$1 + find "$path" -maxdepth 1 ! -printf '%P\n' +} + +# Gets the last count from a job in the push gateway push gateway +last_count() { + local metric=$1 + pushgateway_advanced -j $JOB_NAME -q "${metric}" +} + +verify_image_order() { + #Sort the array + mapfile -t sorted < <(printf '%s\n' "$@" | sort) + + local cnt=0 + for arg in "${sorted[@]}"; do + cnt=$((cnt + 1)) + int=${arg:0:8} + [ $((10#$int)) != $cnt ] && return 1 + done + return 0 +} + +zip_it() { + local barcode_path=$1 + cd "$barcode_path" || return 1 + list_files . | awk "$IMGAWK" | xargs zip -rq "$barcode_path".zip + local zip_return=$? + #Go back to previous directory; Don't print the output. + cd - >/dev/null || return 1 + return $zip_return +} + +verify_zip() { + local barcode_path=$1 + + local files_in_dir + if ! files_in_dir=$(list_files "$barcode_path" | awk "$IMGAWK" | sort); then + return 1 + fi + local files_in_zip + if ! files_in_zip=$(zipinfo -1 "$barcode_path".zip | sort); then + return 1 + fi + + if [ "$files_in_dir" == "$files_in_zip" ]; then + return 0 + else + return 1 + fi +} + +print_metrics() { + local fp_current_total=$1 + local image_order_errors_current_total=$2 + local upload_errors_current_total=$3 + local errors_current_total=$4 + + local fp_metric="${JOB_NAME}_files_processed_total" + local fp_last + fp_last=$(last_count $fp_metric) + local fp_total=$((fp_last + fp_current_total)) + + local image_order_errors_metric="${JOB_NAME}_image_order_errors_total" + local image_order_errors_last + image_order_errors_last=$(last_count $image_order_errors_metric) + local image_order_errors_total=$((image_order_errors_last + image_order_errors_current_total)) + + local upload_errors_metric="${JOB_NAME}_upload_errors_total" + local upload_errors_last + upload_errors_last=$(last_count $upload_errors_metric) + local upload_errors_total=$((upload_errors_last + upload_errors_current_total)) + + local errors_metric="${JOB_NAME}_errors_total" + local errors_last + errors_last=$(last_count $errors_metric) + local errors_total=$((errors_last + errors_current_total)) + + cat <