From 6e1b78afedfedf2e3540193d7cb174894337cf25 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Tue, 15 Oct 2024 16:40:04 -0400 Subject: [PATCH 01/16] hello world bash script --- bin/digifeeds/test.sh | 2 ++ 1 file changed, 2 insertions(+) create mode 100755 bin/digifeeds/test.sh diff --git a/bin/digifeeds/test.sh b/bin/digifeeds/test.sh new file mode 100755 index 0000000..008f7d4 --- /dev/null +++ b/bin/digifeeds/test.sh @@ -0,0 +1,2 @@ +#! /bin/bash +echo "hello world!" From 86a21ab2ab61fd4dad1656dec90b5e691181387a Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 18 Oct 2024 16:57:16 +0000 Subject: [PATCH 02/16] WIP started working on the real script --- .gitignore | 4 +- Dockerfile | 8 ++- bin/digifeeds/test.sh | 2 - bin/digifeeds/upload_to_s3.config.example | 4 ++ bin/digifeeds/upload_to_s3.sh | 33 ++++++++++ bin/digifeeds/upload_to_s3_test.sh | 79 +++++++++++++++++++++++ 6 files changed, 126 insertions(+), 4 deletions(-) delete mode 100755 bin/digifeeds/test.sh create mode 100644 bin/digifeeds/upload_to_s3.config.example create mode 100755 bin/digifeeds/upload_to_s3.sh create mode 100755 bin/digifeeds/upload_to_s3_test.sh diff --git a/.gitignore b/.gitignore index bf95a1b..61b6a30 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,6 @@ htmlcov/ .coverage .gnupg requirements.txt -docs/_build \ No newline at end of file + +docs/_build +bin/digifeeds/*.config \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 6786a78..7a9a9a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,7 +68,13 @@ RUN poetry export --without dev -f requirements.txt --output requirements.txt # We want poetry on in development FROM poetry AS development RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \ - git + git \ + bats \ + bats-assert \ + bats-file\ + zip\ + unzip + # Switch to the non-root user "user" USER app diff --git a/bin/digifeeds/test.sh b/bin/digifeeds/test.sh deleted file mode 100755 index 008f7d4..0000000 --- a/bin/digifeeds/test.sh +++ /dev/null @@ -1,2 +0,0 @@ -#! /bin/bash -echo "hello world!" diff --git a/bin/digifeeds/upload_to_s3.config.example b/bin/digifeeds/upload_to_s3.config.example new file mode 100644 index 0000000..1a825b2 --- /dev/null +++ b/bin/digifeeds/upload_to_s3.config.example @@ -0,0 +1,4 @@ +input_directory="some/path/to/input/directory" +processed_directory="some/path/to/processed/directory" +work_directory="some/path/to/work/directory" +log_directory="some/path/to/log/directory" \ No newline at end of file diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh new file mode 100755 index 0000000..0a00f03 --- /dev/null +++ b/bin/digifeeds/upload_to_s3.sh @@ -0,0 +1,33 @@ +#! /bin/bash + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + +########### +# CONFIG +# Variables contained in the config file: +# +# input_directory: path to the input directory +# processed_directory: path to the directory of processed files +# work_directory: path to the directory to do the processing of files +# log_directory: path to store log and metrics files +CONFIG_FILE=${1:-$SCRIPT_DIR/upload_to_s3.config} + +source $CONFIG_FILE +TIMESTAMP=${timestamp:-$(date +%F_%H-%M-%S)} #YYY-MM-DD_hh-mm-ss + +#This is so that the script works on empty directories. +# shopt -s nullglob + +barcode_directories=($input_directory/*/) +for barcode_path in "${barcode_directories[@]}"; do + # turns "/some/path/to/some_barcode/" into "some_barcode" + barcode=$(basename ${barcode_path%%/}) + mv $input_directory/$barcode $work_directory/$barcode + + zip -rq $work_directory/$barcode $work_directory/$barcode + + mv $work_directory/$barcode.zip $processed_directory + mv $work_directory/$barcode $processed_directory/ +done + +echo $TIMESTAMP diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh new file mode 100755 index 0000000..857e13d --- /dev/null +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -0,0 +1,79 @@ +#!/usr/bin/bats +load '/usr/lib/bats/bats-support/load' +load '/usr/lib/bats/bats-assert/load' +load '/usr/lib/bats/bats-file/load' + +setup() { + SCRATCH_PATH="/tmp/upload_to_s3" + CONFIG_PATH=$SCRATCH_PATH/upload_to_s3.config + SUBJECT="$BATS_TEST_DIRNAME/upload_to_s3.sh $CONFIG_PATH" + + mkdir $SCRATCH_PATH + + INPUT_DIR=$SCRATCH_PATH/input + PROCESSED_DIR=$SCRATCH_PATH/processed + LOG_DIR=$SCRATCH_PATH/log + WORK_DIR=$SCRATCH_PATH/work + + BARCODE_1="30123456789012" + BARCODE_2="40123456789012" + TIMESTAMP="YYYY-MM-DD_hh-mm-ss" + + mkdir $INPUT_DIR + mkdir $PROCESSED_DIR + mkdir $LOG_DIR + mkdir $WORK_DIR + +#IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' +#ls | awk "$IMGAWK" | xargs zip -r "$zipfile + mkdir $INPUT_DIR/$BARCODE_1 + touch $INPUT_DIR/$BARCODE_1/01234567.tif + touch $INPUT_DIR/$BARCODE_1/01234567.jp2 + touch $INPUT_DIR/$BARCODE_1/checksum.md5 + touch $INPUT_DIR/$BARCODE_1/Thumbs.db + touch $INPUT_DIR/$BARCODE_1/some_other_file.txt + + mkdir $INPUT_DIR/$BARCODE_2 + touch $INPUT_DIR/$BARCODE_2/01234567.tif + + cat <$CONFIG_PATH +input_directory="$INPUT_DIR" +processed_directory="$PROCESSED_DIR" +work_directory="$WORK_DIR" +log_directory="$LOG_DIR" +timestamp=$TIMESTAMP +EOF + +} + +teardown() { + rm -r $SCRATCH_PATH +} + +@test "It Works" { + run $SUBJECT + + assert_success + + assert_file_exists $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_1}.zip + assert_file_exists $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_2}.zip + + assert_dir_exists $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_1} + assert_dir_exists $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_2} +} + +# @test "It filters the appropriate files" { +# run $SUBJECT +# mv $PROCESSED_DIR/$BARCODE_1.zip $WORK_DIR/ +# unzip $WORK_DIR/$BARCODE_1.zip +# } + +# This test shows that `shopt -s nullglob` in necessary` +@test "Emtpy input directory works" { + rm -r $INPUT_DIR/$BARCODE_1 + rm -r $INPUT_DIR/$BARCODE_2 + + run $SUBJECT + assert_success +} + From dfc35b90171119edc455b18a7887d4915b7f11e4 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 18 Oct 2024 18:12:27 +0000 Subject: [PATCH 03/16] only appropriate files get added to zip --- bin/digifeeds/upload_to_s3.sh | 32 +++++++++++++++++++++++------- bin/digifeeds/upload_to_s3_test.sh | 18 +++++++++++------ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 0a00f03..328d237 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -10,24 +10,42 @@ SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) # processed_directory: path to the directory of processed files # work_directory: path to the directory to do the processing of files # log_directory: path to store log and metrics files +# +# timestamp: used for testing timestamps; should be ommited in production CONFIG_FILE=${1:-$SCRIPT_DIR/upload_to_s3.config} - source $CONFIG_FILE + TIMESTAMP=${timestamp:-$(date +%F_%H-%M-%S)} #YYY-MM-DD_hh-mm-ss +# matches .tif and .jp2 files with 8 digit file names that start with 0 OR +# checksum.md5 files +# examples that match: +# 01234567.tif +# 01234567.jp2 +# checksum.md5 +IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' + #This is so that the script works on empty directories. -# shopt -s nullglob +shopt -s nullglob barcode_directories=($input_directory/*/) + +# move directories; Limit chance of users interacting with files to be moved. for barcode_path in "${barcode_directories[@]}"; do # turns "/some/path/to/some_barcode/" into "some_barcode" barcode=$(basename ${barcode_path%%/}) + mv $input_directory/$barcode $work_directory/$barcode +done - zip -rq $work_directory/$barcode $work_directory/$barcode +# works on barcodes +for barcode_path in "${barcode_directories[@]}"; do + barcode=$(basename ${barcode_path%%/}) - mv $work_directory/$barcode.zip $processed_directory - mv $work_directory/$barcode $processed_directory/ -done + cd $work_directory/$barcode + ls | awk "$IMGAWK" | xargs zip -rq $work_directory/$barcode.zip + cd - -echo $TIMESTAMP + mv $work_directory/$barcode.zip $processed_directory/${TIMESTAMP}_${barcode}.zip + mv $work_directory/$barcode $processed_directory/${TIMESTAMP}_${barcode} +done diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh index 857e13d..2720daf 100755 --- a/bin/digifeeds/upload_to_s3_test.sh +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -31,7 +31,7 @@ setup() { touch $INPUT_DIR/$BARCODE_1/01234567.jp2 touch $INPUT_DIR/$BARCODE_1/checksum.md5 touch $INPUT_DIR/$BARCODE_1/Thumbs.db - touch $INPUT_DIR/$BARCODE_1/some_other_file.txt + touch $INPUT_DIR/$BARCODE_1/some_other_file.tif mkdir $INPUT_DIR/$BARCODE_2 touch $INPUT_DIR/$BARCODE_2/01234567.tif @@ -62,11 +62,17 @@ teardown() { assert_dir_exists $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_2} } -# @test "It filters the appropriate files" { -# run $SUBJECT -# mv $PROCESSED_DIR/$BARCODE_1.zip $WORK_DIR/ -# unzip $WORK_DIR/$BARCODE_1.zip -# } +@test "It filters the appropriate files" { + run $SUBJECT + cd $BATS_TEST_TMPDIR + mv $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_1}.zip ./ + unzip -q ${TIMESTAMP}_${BARCODE_1}.zip + assert_file_exists '01234567.jp2' + assert_file_exists '01234567.tif' + assert_file_exists 'checksum.md5' + assert_file_not_exists 'Thumbs.db' + assert_file_not_exists 'some_other_file.tif' +} # This test shows that `shopt -s nullglob` in necessary` @test "Emtpy input directory works" { From 92df082dca3d0007af965fd79633c2a026e920d6 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 18 Oct 2024 19:59:57 +0000 Subject: [PATCH 04/16] adds shellmock --- Dockerfile | 7 ++++++- bin/digifeeds/upload_to_s3.sh | 7 ++++++- bin/digifeeds/upload_to_s3_test.sh | 15 ++++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7a9a9a5..7385197 100644 --- a/Dockerfile +++ b/Dockerfile @@ -72,9 +72,14 @@ RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \ bats \ bats-assert \ bats-file\ + wget\ zip\ unzip +RUN wget -P /opt/ https://github.com/boschresearch/shellmock/releases/download/0.9.1/shellmock.bash && \ + chown ${UID}:${GID} /opt/shellmock.bash + +ENV SHELLMOCK_PATH=/opt/shellmock.bash # Switch to the non-root user "user" USER app @@ -90,4 +95,4 @@ COPY --chown=${UID}:${GID} --from=build "/app/requirements.txt" /app/requirement RUN pip install -r /app/requirements.txt -USER app \ No newline at end of file +USER app diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 328d237..3726b76 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -1,4 +1,5 @@ -#! /bin/bash +#! /bin/bash +set -e #exit on error SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) @@ -10,6 +11,7 @@ SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) # processed_directory: path to the directory of processed files # work_directory: path to the directory to do the processing of files # log_directory: path to store log and metrics files +# digifeeds_bucket: rclone remote for the digifeeds bucket # # timestamp: used for testing timestamps; should be ommited in production CONFIG_FILE=${1:-$SCRIPT_DIR/upload_to_s3.config} @@ -42,10 +44,13 @@ done for barcode_path in "${barcode_directories[@]}"; do barcode=$(basename ${barcode_path%%/}) + echo "hello" cd $work_directory/$barcode ls | awk "$IMGAWK" | xargs zip -rq $work_directory/$barcode.zip cd - + rclone copy $workdirectory/$barcode.zip $digifeeds_bucket:$barcode.zip + mv $work_directory/$barcode.zip $processed_directory/${TIMESTAMP}_${barcode}.zip mv $work_directory/$barcode $processed_directory/${TIMESTAMP}_${barcode} done diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh index 2720daf..bd7db6f 100755 --- a/bin/digifeeds/upload_to_s3_test.sh +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -3,7 +3,9 @@ load '/usr/lib/bats/bats-support/load' load '/usr/lib/bats/bats-assert/load' load '/usr/lib/bats/bats-file/load' + setup() { + load $SHELLMOCK_PATH SCRATCH_PATH="/tmp/upload_to_s3" CONFIG_PATH=$SCRATCH_PATH/upload_to_s3.config SUBJECT="$BATS_TEST_DIRNAME/upload_to_s3.sh $CONFIG_PATH" @@ -41,6 +43,7 @@ input_directory="$INPUT_DIR" processed_directory="$PROCESSED_DIR" work_directory="$WORK_DIR" log_directory="$LOG_DIR" +digifeeds_bucket="digifeeds_bucket" timestamp=$TIMESTAMP EOF @@ -48,11 +51,15 @@ EOF teardown() { rm -r $SCRATCH_PATH + } + @test "It Works" { + shellmock new rclone + shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: run $SUBJECT - + assert_success assert_file_exists $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_1}.zip @@ -60,9 +67,13 @@ teardown() { assert_dir_exists $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_1} assert_dir_exists $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_2} + shellmock assert expectations rclone } @test "It filters the appropriate files" { + shellmock new rclone + shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: + run $SUBJECT cd $BATS_TEST_TMPDIR mv $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_1}.zip ./ @@ -72,6 +83,8 @@ teardown() { assert_file_exists 'checksum.md5' assert_file_not_exists 'Thumbs.db' assert_file_not_exists 'some_other_file.tif' + + shellmock assert expectations rclone } # This test shows that `shopt -s nullglob` in necessary` From bc8186ca315b4f18fe95ad82567d1bab83d35d06 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 18 Oct 2024 20:22:28 +0000 Subject: [PATCH 05/16] set up automated bash tests --- .github/workflows/tests.yaml | 22 ++++++++++++++++++++-- bin/digifeeds/upload_to_s3_test.sh | 9 ++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index fa904d2..0bf231d 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -3,7 +3,7 @@ name: Run Tests on: push jobs: - test: + test-python: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -26,4 +26,22 @@ jobs: - name: Run tests env: CI: "true" - run: poetry run pytest \ No newline at end of file + run: poetry run pytest + + test-bash: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Setup Bats and bats libs + id: setup-bats + uses: bats-core/bats-action@3.0.0 + - name: Setup Shell Mock + run: wget -P /opt/ https://github.com/boschresearch/shellmock/releases/download/0.9.1/shellmock.bash + - run: ls /opt + - name: Run tests + shell: bash + env: + BATS_LIB_PATH: ${{ steps.setup-bats.outputs.lib-path }} + SHELLMOCK_PATH: /opt/shellmock.bash + TERM: xterm + run: bats ./bin/**/*_test.sh diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh index bd7db6f..5cc3772 100755 --- a/bin/digifeeds/upload_to_s3_test.sh +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -1,7 +1,8 @@ #!/usr/bin/bats -load '/usr/lib/bats/bats-support/load' -load '/usr/lib/bats/bats-assert/load' -load '/usr/lib/bats/bats-file/load' +export BATS_LIB_PATH=${BATS_LIB_PATH:-"/usr/lib/bats"} +bats_load_library bats-support +bats_load_library bats-assert +bats_load_library bats-file setup() { @@ -26,8 +27,6 @@ setup() { mkdir $LOG_DIR mkdir $WORK_DIR -#IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' -#ls | awk "$IMGAWK" | xargs zip -r "$zipfile mkdir $INPUT_DIR/$BARCODE_1 touch $INPUT_DIR/$BARCODE_1/01234567.tif touch $INPUT_DIR/$BARCODE_1/01234567.jp2 From dfceb422a75996c1bc92c8c506efb24fa1ed914b Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Mon, 21 Oct 2024 21:18:13 +0000 Subject: [PATCH 06/16] moves most logic to functions --- bin/digifeeds/upload_to_s3.sh | 68 +++-------- bin/digifeeds/upload_to_s3_functions.sh | 148 ++++++++++++++++++++++++ bin/digifeeds/upload_to_s3_test.sh | 74 +++++++++++- 3 files changed, 235 insertions(+), 55 deletions(-) create mode 100644 bin/digifeeds/upload_to_s3_functions.sh diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 3726b76..bf33723 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -1,56 +1,26 @@ -#! /bin/bash -set -e #exit on error +#! /bin/bash -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +# For push gateway +START_TIME=$(date '+%s') ########### -# CONFIG -# Variables contained in the config file: -# -# input_directory: path to the input directory -# processed_directory: path to the directory of processed files -# work_directory: path to the directory to do the processing of files -# log_directory: path to store log and metrics files -# digifeeds_bucket: rclone remote for the digifeeds bucket -# -# timestamp: used for testing timestamps; should be ommited in production -CONFIG_FILE=${1:-$SCRIPT_DIR/upload_to_s3.config} -source $CONFIG_FILE - -TIMESTAMP=${timestamp:-$(date +%F_%H-%M-%S)} #YYY-MM-DD_hh-mm-ss - -# matches .tif and .jp2 files with 8 digit file names that start with 0 OR -# checksum.md5 files -# examples that match: -# 01234567.tif -# 01234567.jp2 -# checksum.md5 -IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' - -#This is so that the script works on empty directories. -shopt -s nullglob - -barcode_directories=($input_directory/*/) - -# move directories; Limit chance of users interacting with files to be moved. -for barcode_path in "${barcode_directories[@]}"; do - # turns "/some/path/to/some_barcode/" into "some_barcode" - barcode=$(basename ${barcode_path%%/}) - - mv $input_directory/$barcode $work_directory/$barcode -done +# Directory this script lives in +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) -# works on barcodes -for barcode_path in "${barcode_directories[@]}"; do - barcode=$(basename ${barcode_path%%/}) +source $SCRIPT_DIR/upload_to_s3_functions.sh - echo "hello" - cd $work_directory/$barcode - ls | awk "$IMGAWK" | xargs zip -rq $work_directory/$barcode.zip - cd - - rclone copy $workdirectory/$barcode.zip $digifeeds_bucket:$barcode.zip +if [[ $APP_ENV != "test" ]]; then + # CONFIG + # Variables contained in the config file: + # + # input_directory: path to the input directory + # processed_directory: path to the directory of processed files+% + # digifeeds_bucket: rclone remote for the digifeeds bucket + # + # timestamp: used for testing timestamps; should be ommited in production + CONFIG_FILE=${1:-$SCRIPT_DIR/upload_to_s3.config} + source $CONFIG_FILE - mv $work_directory/$barcode.zip $processed_directory/${TIMESTAMP}_${barcode}.zip - mv $work_directory/$barcode $processed_directory/${TIMESTAMP}_${barcode} -done + main +fi \ No newline at end of file diff --git a/bin/digifeeds/upload_to_s3_functions.sh b/bin/digifeeds/upload_to_s3_functions.sh new file mode 100644 index 0000000..dadc8e1 --- /dev/null +++ b/bin/digifeeds/upload_to_s3_functions.sh @@ -0,0 +1,148 @@ +### Constants + +# matches .tif and .jp2 files with 8 digit file names that start with 0 OR +# checksum.md5 files +# examples that match: +# 01234567.tif +# 01234567.jp2 +# checksum.md5 +IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' + +JOB_NAME="aim_digifeeds_upload_to_s3" + +### FUNCTIONS + +log_info() { + echo "$(date --rfc-3339=seconds) - INFO: ${@}" +} + +log_error() { + echo "$(date --rfc-3339=seconds) - ERROR: ${@}" +} + +# Gets the last count from a job in the push gateway push gateway +last_count() { + local metric=$1 + pushgateway_advanced -j $JOB_NAME -q ${metric} +} + +zip_it() { + local barcode_path=$1 + cd $barcode_path + ls | awk "$IMGAWK" | xargs zip -rq $barcode_path.zip + local zip_return=$? + #Go back to previous directory; Don't print the output. + cd - >/dev/null + return $zip_return +} + +verify_zip() { + local barcode_path=$1 + + local files_in_dir=$(ls $barcode_path | awk "$IMGAWK" | sort) + [ $? != 0 ] && return 1 + local files_in_zip=$(zipinfo -1 $barcode_path.zip | sort) + [ $? != 0 ] && return 1 + + if [ "$files_in_dir" == "$files_in_zip" ]; then + return 0 + else + return 1 + fi +} + + +print_metrics() { + local fp_current_total=$1 + local upload_errors_current_total=$2 + local errors_current_total=$3 + + local fp_metric="${JOB_NAME}_files_processed_total" + local fp_last=$(last_count $fp_metric) + local fp_total=$((fp_last + fp_current_total)) + + local upload_errors_metric="${JOB_NAME}_upload_errors_total" + local upload_errors_last=$(last_count $upload_errors_metric) + local upload_errors_total=$((upload_errors_last + upload_errors_current_total)) + + local errors_metric="${JOB_NAME}_errors_total" + local errors_last=$(last_count $errors_metric) + local errors_total=$((errors_last + errors_current_total)) + + cat <$CONFIG_PATH input_directory="$INPUT_DIR" processed_directory="$PROCESSED_DIR" -work_directory="$WORK_DIR" -log_directory="$LOG_DIR" digifeeds_bucket="digifeeds_bucket" timestamp=$TIMESTAMP +send_metrics="false" EOF } teardown() { rm -r $SCRATCH_PATH +} +@test "Back to Basics" { + run log_info "Hello" + assert_output --partial "Hello" } @test "It Works" { shellmock new rclone shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: + shellmock config rclone 0 1:check regex-2:$INPUT_DIR regex-3:^digifeeds_bucket: run $SUBJECT assert_success @@ -72,6 +74,7 @@ teardown() { @test "It filters the appropriate files" { shellmock new rclone shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: + shellmock config rclone 0 1:check regex-2:$INPUT_DIR regex-3:^digifeeds_bucket: run $SUBJECT cd $BATS_TEST_TMPDIR @@ -95,3 +98,62 @@ teardown() { assert_success } +@test "Failed zip" { + shellmock new zip + shellmock config zip 1 + run $SUBJECT + assert_output --partial "ERROR: Failed to zip $BARCODE_1" + assert_output --partial "ERROR: Failed to zip $BARCODE_2" + assert_output --partial "INFO: Total files processed:\t0" + assert_output --partial "INFO: Total errors:\t2" + assert_output --partial "INFO: Total errors uploading to S3:\t0" + shellmock assert expectations zip +} + +@test "Failed copy records error and moves on" { + shellmock new rclone + shellmock config rclone 1 1:copy regex-3:^digifeeds_bucket: <<< "Rclone error mock: Failed to copy" + run $SUBJECT + assert_output --partial "ERROR: Failed to copy $BARCODE_1" + assert_output --partial "ERROR: Failed to copy $BARCODE_2" + assert_output --partial "INFO: Total files processed:\t0" + assert_output --partial "INFO: Total errors:\t2" + assert_output --partial "INFO: Total errors uploading to S3:\t2" + shellmock assert expectations rclone +} + +@test "Failed on S3 verification and moves on" { + shellmock new rclone + shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: + shellmock config rclone 1 1:check regex-2:$INPUT_DIR regex-3:^digifeeds_bucket: + run $SUBJECT + assert_output --partial "ERROR: $BARCODE_1 not found in S3" + assert_output --partial "ERROR: $BARCODE_2 not found in S3" + assert_output --partial "INFO: Total files processed:\t0" + assert_output --partial "INFO: Total errors:\t2" + assert_output --partial "INFO: Total errors uploading to S3:\t2" + shellmock assert expectations rclone +} +@test "print_metrics" { + shellmock new pushgateway_advanced + shellmock config pushgateway_advanced 0 <<< 5 + run print_metrics 1 2 3 + assert_output --partial "aim_digifeeds_upload_to_s3_files_processed_total 6" + assert_output --partial "aim_digifeeds_upload_to_s3_upload_errors_total 7" + assert_output --partial "aim_digifeeds_upload_to_s3_errors_total 8" + shellmock assert expectations pushgateway_advanced + +} + +@test "verify_zip success" { + zip_it $INPUT_DIR/$BARCODE_1 + run verify_zip $INPUT_DIR/$BARCODE_1 + assert_success +} + +@test "verify_zip fail" { + zip_it $INPUT_DIR/$BARCODE_2 + mv $INPUT_DIR/$BARCODE_2.zip $INPUT_DIR/$BARCODE_1.zip + run verify_zip $INPUT_DIR/$BARCODE_1 + assert_failure +} \ No newline at end of file From 5eec504cf9e59fddf6d2048a8dfb26926e17112d Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Mon, 21 Oct 2024 21:42:25 +0000 Subject: [PATCH 07/16] able to load upload_to_s3.sh in test file --- bin/digifeeds/upload_to_s3.config.example | 3 +- bin/digifeeds/upload_to_s3.sh | 156 +++++++++++++++++++++- bin/digifeeds/upload_to_s3_functions.sh | 148 -------------------- bin/digifeeds/upload_to_s3_test.sh | 25 ++-- 4 files changed, 165 insertions(+), 167 deletions(-) delete mode 100644 bin/digifeeds/upload_to_s3_functions.sh diff --git a/bin/digifeeds/upload_to_s3.config.example b/bin/digifeeds/upload_to_s3.config.example index 1a825b2..2bed0d5 100644 --- a/bin/digifeeds/upload_to_s3.config.example +++ b/bin/digifeeds/upload_to_s3.config.example @@ -1,4 +1,3 @@ input_directory="some/path/to/input/directory" processed_directory="some/path/to/processed/directory" -work_directory="some/path/to/work/directory" -log_directory="some/path/to/log/directory" \ No newline at end of file +digifeeds_bucket="rclone_remote_to_s3_bucket" \ No newline at end of file diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index bf33723..edcc0e4 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -1,13 +1,155 @@ #! /bin/bash +########### +# CONSTANTS +########### + # For push gateway START_TIME=$(date '+%s') -########### # Directory this script lives in SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) -source $SCRIPT_DIR/upload_to_s3_functions.sh + +# matches .tif and .jp2 files with 8 digit file names that start with 0 OR +# checksum.md5 files +# examples that match: +# 01234567.tif +# 01234567.jp2 +# checksum.md5 +IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' + +# For push gateway +JOB_NAME="aim_digifeeds_upload_to_s3" + +########### +# FUNCTIONS +########### + +log_info() { + echo "$(date --rfc-3339=seconds) - INFO: ${@}" +} + +log_error() { + echo "$(date --rfc-3339=seconds) - ERROR: ${@}" +} + +# Gets the last count from a job in the push gateway push gateway +last_count() { + local metric=$1 + pushgateway_advanced -j $JOB_NAME -q ${metric} +} + +zip_it() { + local barcode_path=$1 + cd $barcode_path + ls | awk "$IMGAWK" | xargs zip -rq $barcode_path.zip + local zip_return=$? + #Go back to previous directory; Don't print the output. + cd - >/dev/null + return $zip_return +} + +verify_zip() { + local barcode_path=$1 + + local files_in_dir=$(ls $barcode_path | awk "$IMGAWK" | sort) + [ $? != 0 ] && return 1 + local files_in_zip=$(zipinfo -1 $barcode_path.zip | sort) + [ $? != 0 ] && return 1 + + if [ "$files_in_dir" == "$files_in_zip" ]; then + return 0 + else + return 1 + fi +} + +print_metrics() { + local fp_current_total=$1 + local upload_errors_current_total=$2 + local errors_current_total=$3 + + local fp_metric="${JOB_NAME}_files_processed_total" + local fp_last=$(last_count $fp_metric) + local fp_total=$((fp_last + fp_current_total)) + + local upload_errors_metric="${JOB_NAME}_upload_errors_total" + local upload_errors_last=$(last_count $upload_errors_metric) + local upload_errors_total=$((upload_errors_last + upload_errors_current_total)) + + local errors_metric="${JOB_NAME}_errors_total" + local errors_last=$(last_count $errors_metric) + local errors_total=$((errors_last + errors_current_total)) + + cat </dev/null - return $zip_return -} - -verify_zip() { - local barcode_path=$1 - - local files_in_dir=$(ls $barcode_path | awk "$IMGAWK" | sort) - [ $? != 0 ] && return 1 - local files_in_zip=$(zipinfo -1 $barcode_path.zip | sort) - [ $? != 0 ] && return 1 - - if [ "$files_in_dir" == "$files_in_zip" ]; then - return 0 - else - return 1 - fi -} - - -print_metrics() { - local fp_current_total=$1 - local upload_errors_current_total=$2 - local errors_current_total=$3 - - local fp_metric="${JOB_NAME}_files_processed_total" - local fp_last=$(last_count $fp_metric) - local fp_total=$((fp_last + fp_current_total)) - - local upload_errors_metric="${JOB_NAME}_upload_errors_total" - local upload_errors_last=$(last_count $upload_errors_metric) - local upload_errors_total=$((upload_errors_last + upload_errors_current_total)) - - local errors_metric="${JOB_NAME}_errors_total" - local errors_last=$(last_count $errors_metric) - local errors_total=$((errors_last + errors_current_total)) - - cat <$CONFIG_PATH -input_directory="$INPUT_DIR" -processed_directory="$PROCESSED_DIR" -digifeeds_bucket="digifeeds_bucket" -timestamp=$TIMESTAMP -send_metrics="false" -EOF + ## Config that's in main. + input_directory="$INPUT_DIR" + processed_directory="$PROCESSED_DIR" + digifeeds_bucket="digifeeds_bucket" + timestamp=$TIMESTAMP + send_metrics="false" + APP_ENV="test" + load "$BATS_TEST_DIRNAME/upload_to_s3.sh" } teardown() { rm -r $SCRATCH_PATH } -@test "Back to Basics" { - run log_info "Hello" - assert_output --partial "Hello" -} - - @test "It Works" { shellmock new rclone shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: From 30b1faef23c6add6588c49afb5ed116a6a19bc52 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Tue, 22 Oct 2024 20:36:54 +0000 Subject: [PATCH 08/16] change job name to not have a number --- bin/digifeeds/upload_to_s3.sh | 4 ++-- bin/digifeeds/upload_to_s3_test.sh | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index edcc0e4..68dcf38 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -1,4 +1,4 @@ -#! /bin/bash +# ! /bin/bash ########### # CONSTANTS @@ -20,7 +20,7 @@ SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' # For push gateway -JOB_NAME="aim_digifeeds_upload_to_s3" +JOB_NAME="aim_digifeeds_upload_to_aws" ########### # FUNCTIONS diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh index 053b08b..6608ebb 100755 --- a/bin/digifeeds/upload_to_s3_test.sh +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -9,7 +9,6 @@ setup() { load $SHELLMOCK_PATH SCRATCH_PATH="/tmp/upload_to_s3" CONFIG_PATH=$SCRATCH_PATH/upload_to_s3.config - #SUBJECT="$BATS_TEST_DIRNAME/upload_to_s3.sh $CONFIG_PATH" SUBJECT=main @@ -133,9 +132,9 @@ teardown() { shellmock new pushgateway_advanced shellmock config pushgateway_advanced 0 <<< 5 run print_metrics 1 2 3 - assert_output --partial "aim_digifeeds_upload_to_s3_files_processed_total 6" - assert_output --partial "aim_digifeeds_upload_to_s3_upload_errors_total 7" - assert_output --partial "aim_digifeeds_upload_to_s3_errors_total 8" + assert_output --partial "aim_digifeeds_upload_to_aws_files_processed_total 6" + assert_output --partial "aim_digifeeds_upload_to_aws_upload_errors_total 7" + assert_output --partial "aim_digifeeds_upload_to_aws_errors_total 8" shellmock assert expectations pushgateway_advanced } From bab4c617e14cab715727d0c80cad07b6a54fdf7f Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Tue, 22 Oct 2024 21:21:25 +0000 Subject: [PATCH 09/16] add more logs; fix rclone copy command --- bin/digifeeds/upload_to_s3.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 68dcf38..c36363d 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -109,6 +109,7 @@ main() { log_info "Copying $barcode" + log_info "Zipping $barcode" zip_it $input_directory/$barcode if [[ $? != 0 ]]; then log_error "Failed to zip $barcode" @@ -116,6 +117,7 @@ main() { continue fi + log_info "Verifying zip of $barcode" verify_zip $input_directory/$barcode if [[ $? != 0 ]]; then log_error "$barcode.zip does not contain the correct files" @@ -123,8 +125,8 @@ main() { continue fi - - rclone copy $input_directory/$barcode.zip $digifeeds_bucket:$barcode.zip + log_info "Sending $barcode to S3" + rclone copy $input_directory/$barcode.zip $digifeeds_bucket: if [[ $? != 0 ]]; then log_error "Failed to copy $barcode" upload_errors_total=$((upload_errors_total + 1)) @@ -132,6 +134,7 @@ main() { continue fi + log_info "Verifying barcode in S3" rclone check $input_directory/$barcode.zip $digifeeds_bucket: if [[ $? != 0 ]]; then log_error "$barcode not found in S3" From 7c6d021508e780c5a9db72abd7ba8cee1cc180de Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Tue, 22 Oct 2024 21:35:43 +0000 Subject: [PATCH 10/16] getting rid of tabs in summary output --- bin/digifeeds/upload_to_s3.sh | 6 +++--- bin/digifeeds/upload_to_s3_test.sh | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index c36363d..1d69d30 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -149,9 +149,9 @@ main() { files_processed_total=$((files_processed_total + 1)) done - log_info "Total files processed:\t$files_processed_total" - log_info "Total errors uploading to S3:\t$upload_errors_total" - log_info "Total errors:\t$errors_total" + log_info "Total files processed: $files_processed_total" + log_info "Total errors uploading to S3: $upload_errors_total" + log_info "Total errors: $errors_total" } diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh index 6608ebb..cafe0d0 100755 --- a/bin/digifeeds/upload_to_s3_test.sh +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -98,9 +98,9 @@ teardown() { run $SUBJECT assert_output --partial "ERROR: Failed to zip $BARCODE_1" assert_output --partial "ERROR: Failed to zip $BARCODE_2" - assert_output --partial "INFO: Total files processed:\t0" - assert_output --partial "INFO: Total errors:\t2" - assert_output --partial "INFO: Total errors uploading to S3:\t0" + assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors: 2" + assert_output --partial "INFO: Total errors uploading to S3: 0" shellmock assert expectations zip } @@ -110,9 +110,9 @@ teardown() { run $SUBJECT assert_output --partial "ERROR: Failed to copy $BARCODE_1" assert_output --partial "ERROR: Failed to copy $BARCODE_2" - assert_output --partial "INFO: Total files processed:\t0" - assert_output --partial "INFO: Total errors:\t2" - assert_output --partial "INFO: Total errors uploading to S3:\t2" + assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors: 2" + assert_output --partial "INFO: Total errors uploading to S3: 2" shellmock assert expectations rclone } @@ -123,9 +123,9 @@ teardown() { run $SUBJECT assert_output --partial "ERROR: $BARCODE_1 not found in S3" assert_output --partial "ERROR: $BARCODE_2 not found in S3" - assert_output --partial "INFO: Total files processed:\t0" - assert_output --partial "INFO: Total errors:\t2" - assert_output --partial "INFO: Total errors uploading to S3:\t2" + assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors: 2" + assert_output --partial "INFO: Total errors uploading to S3: 2" shellmock assert expectations rclone } @test "print_metrics" { From f0e9531c1acafaeb98c3047890cceb4b326f5e85 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Tue, 22 Oct 2024 21:59:13 +0000 Subject: [PATCH 11/16] counters need to be global and passed to print_metrics --- bin/digifeeds/upload_to_s3.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 1d69d30..fb9793b 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -21,6 +21,13 @@ IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' # For push gateway JOB_NAME="aim_digifeeds_upload_to_aws" + +########### +# COUNTERS +########### +files_processed_total=0 +upload_errors_total=0 +errors_total=0 ########### # FUNCTIONS @@ -97,9 +104,6 @@ EOMETRICS main() { TIMESTAMP=${timestamp:-$(date +%F_%H-%M-%S)} #YYY-MM-DD_hh-mm-ss - local files_processed_total=0 - local upload_errors_total=0 - local errors_total=0 #This is so that the script works on empty directories. shopt -s nullglob @@ -174,7 +178,7 @@ if [[ $APP_ENV != "test" ]]; then main if [ "$send_metrics" != "false" ]; then - print_metrics | /usr/local/bin/pushgateway_advanced -j $JOB_NAME + print_metrics $files_processed_total $upload_errors_total $errors_total | /usr/local/bin/pushgateway_advanced -j $JOB_NAME /usr/local/bin/pushgateway -j $JOB_NAME -b $START_TIME fi log_info "=====End $(date)=====" From 52fdbd1ea083afb7a9f10745fc8e237b872d9990 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Wed, 23 Oct 2024 16:49:07 +0000 Subject: [PATCH 12/16] checks image order --- bin/digifeeds/upload_to_s3.sh | 43 ++++++++++++++++++++++++-- bin/digifeeds/upload_to_s3_test.sh | 48 +++++++++++++++++++++++------- 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index fb9793b..128e454 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -26,6 +26,7 @@ JOB_NAME="aim_digifeeds_upload_to_aws" # COUNTERS ########### files_processed_total=0 +image_order_errors_total=0 upload_errors_total=0 errors_total=0 @@ -47,6 +48,19 @@ last_count() { pushgateway_advanced -j $JOB_NAME -q ${metric} } +verify_image_order(){ + #Sort the array + sorted=($(printf '%s\n' "$@" | sort )) + + local cnt=0 + for arg in "${sorted[@]}"; do + cnt=$((cnt+1)) + int=${arg:0:8} + [ $((int)) != $cnt ] && return 1 + done + return 0 +} + zip_it() { local barcode_path=$1 cd $barcode_path @@ -74,16 +88,22 @@ verify_zip() { print_metrics() { local fp_current_total=$1 - local upload_errors_current_total=$2 - local errors_current_total=$3 + local image_order_errors_current_total=$2 + local upload_errors_current_total=$3 + local errors_current_total=$4 local fp_metric="${JOB_NAME}_files_processed_total" local fp_last=$(last_count $fp_metric) local fp_total=$((fp_last + fp_current_total)) + local image_order_errors_metric="${JOB_NAME}_image_order_errors_total" + local image_order_errors_last=$(last_count $image_order_errors_metric) + local image_order_errors_total=$((image_order_errors_last + image_order_errors_current_total)) + local upload_errors_metric="${JOB_NAME}_upload_errors_total" local upload_errors_last=$(last_count $upload_errors_metric) local upload_errors_total=$((upload_errors_last + upload_errors_current_total)) + local errors_metric="${JOB_NAME}_errors_total" local errors_last=$(last_count $errors_metric) @@ -93,6 +113,9 @@ print_metrics() { # HELP ${fp_metric} Count of digifeeds zip files sent to S3 # TYPE ${fp_metric} counter $fp_metric $fp_total +# HELP ${image_order_errors_metric} Count of folders where there are missing pages of images +# TYPE ${image_order_errors_metric} counter +${image_order_errors_metric} $image_order_errors_total # HELP ${upload_errors_metric} Count of errors when uploading digifeeds zip files to S3 # TYPE ${upload_errors_metric} counter ${upload_errors_metric} $upload_errors_total @@ -112,6 +135,19 @@ main() { local barcode=$(basename ${barcode_path%%/}) log_info "Copying $barcode" + + log_info "Verifying image order $barcode" + #8 digits, ends in .tif or .jp2 + filter_regex='[[:digit:]]{8}\.tif$|[[:digit:]]{8}\.jp2$' + local image_list=$(cd $barcode_path && ls | egrep "$filter_regex") + verify_image_order $image_list + if [[ $? != 0 ]]; then + log_error "Image order incorrect for $barcode" + image_order_errors_total=$((image_order_errors_total + 1)) + errors_total=$((errors_total + 1)) + continue + fi + log_info "Zipping $barcode" zip_it $input_directory/$barcode @@ -154,6 +190,7 @@ main() { done log_info "Total files processed: $files_processed_total" + log_info "Total errors image order: $image_order_errors_total " log_info "Total errors uploading to S3: $upload_errors_total" log_info "Total errors: $errors_total" } @@ -178,7 +215,7 @@ if [[ $APP_ENV != "test" ]]; then main if [ "$send_metrics" != "false" ]; then - print_metrics $files_processed_total $upload_errors_total $errors_total | /usr/local/bin/pushgateway_advanced -j $JOB_NAME + print_metrics $files_processed_total $image_order_errors_total $upload_errors_total $errors_total | /usr/local/bin/pushgateway_advanced -j $JOB_NAME /usr/local/bin/pushgateway -j $JOB_NAME -b $START_TIME fi log_info "=====End $(date)=====" diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh index cafe0d0..661d57d 100755 --- a/bin/digifeeds/upload_to_s3_test.sh +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -17,22 +17,22 @@ setup() { INPUT_DIR=$SCRATCH_PATH/input PROCESSED_DIR=$SCRATCH_PATH/processed - BARCODE_1="30123456789012" - BARCODE_2="40123456789012" + BARCODE_1="30000000189012" + BARCODE_2="40000000189012" TIMESTAMP="YYYY-MM-DD_hh-mm-ss" mkdir $INPUT_DIR mkdir $PROCESSED_DIR mkdir $INPUT_DIR/$BARCODE_1 - touch $INPUT_DIR/$BARCODE_1/01234567.tif - touch $INPUT_DIR/$BARCODE_1/01234567.jp2 + touch $INPUT_DIR/$BARCODE_1/00000001.tif + touch $INPUT_DIR/$BARCODE_1/00000002.jp2 touch $INPUT_DIR/$BARCODE_1/checksum.md5 touch $INPUT_DIR/$BARCODE_1/Thumbs.db touch $INPUT_DIR/$BARCODE_1/some_other_file.tif mkdir $INPUT_DIR/$BARCODE_2 - touch $INPUT_DIR/$BARCODE_2/01234567.tif + touch $INPUT_DIR/$BARCODE_2/00000001.tif ## Config that's in main. input_directory="$INPUT_DIR" @@ -74,8 +74,8 @@ teardown() { cd $BATS_TEST_TMPDIR mv $PROCESSED_DIR/${TIMESTAMP}_${BARCODE_1}.zip ./ unzip -q ${TIMESTAMP}_${BARCODE_1}.zip - assert_file_exists '01234567.jp2' - assert_file_exists '01234567.tif' + assert_file_exists '00000001.tif' + assert_file_exists '00000002.jp2' assert_file_exists 'checksum.md5' assert_file_not_exists 'Thumbs.db' assert_file_not_exists 'some_other_file.tif' @@ -92,6 +92,30 @@ teardown() { assert_success } +@test "verify_image_order sucess" { + run verify_image_order 00000001.tif 00000003.jp2 00000002.tif + assert_success +} + +@test "verify_image_order failure" { + run verify_image_order 00000001.tif 00000003.tif 00000004.jp2 + assert_failure +} + +@test "Failed image order" { + shellmock new rclone + shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: + shellmock config rclone 0 1:check regex-2:$INPUT_DIR regex-3:^digifeeds_bucket: + touch $INPUT_DIR/$BARCODE_1/00000004.jp2 + run $SUBJECT + assert_output --partial "ERROR: Image order incorrect for $BARCODE_1" + assert_output --partial "INFO: Total files processed: 1" + assert_output --partial "INFO: Total errors: 1" + assert_output --partial "INFO: Total errors image order: 1" + assert_output --partial "INFO: Total errors uploading to S3: 0" + shellmock assert expectations rclone +} + @test "Failed zip" { shellmock new zip shellmock config zip 1 @@ -99,6 +123,7 @@ teardown() { assert_output --partial "ERROR: Failed to zip $BARCODE_1" assert_output --partial "ERROR: Failed to zip $BARCODE_2" assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors image order: 0" assert_output --partial "INFO: Total errors: 2" assert_output --partial "INFO: Total errors uploading to S3: 0" shellmock assert expectations zip @@ -111,6 +136,7 @@ teardown() { assert_output --partial "ERROR: Failed to copy $BARCODE_1" assert_output --partial "ERROR: Failed to copy $BARCODE_2" assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors image order: 0" assert_output --partial "INFO: Total errors: 2" assert_output --partial "INFO: Total errors uploading to S3: 2" shellmock assert expectations rclone @@ -124,6 +150,7 @@ teardown() { assert_output --partial "ERROR: $BARCODE_1 not found in S3" assert_output --partial "ERROR: $BARCODE_2 not found in S3" assert_output --partial "INFO: Total files processed: 0" + assert_output --partial "INFO: Total errors image order: 0" assert_output --partial "INFO: Total errors: 2" assert_output --partial "INFO: Total errors uploading to S3: 2" shellmock assert expectations rclone @@ -131,10 +158,11 @@ teardown() { @test "print_metrics" { shellmock new pushgateway_advanced shellmock config pushgateway_advanced 0 <<< 5 - run print_metrics 1 2 3 + run print_metrics 1 2 3 4 assert_output --partial "aim_digifeeds_upload_to_aws_files_processed_total 6" - assert_output --partial "aim_digifeeds_upload_to_aws_upload_errors_total 7" - assert_output --partial "aim_digifeeds_upload_to_aws_errors_total 8" + assert_output --partial "aim_digifeeds_upload_to_aws_image_order_errors_total 7" + assert_output --partial "aim_digifeeds_upload_to_aws_upload_errors_total 8" + assert_output --partial "aim_digifeeds_upload_to_aws_errors_total 9" shellmock assert expectations pushgateway_advanced } From 77aaa5fbc9c938b1f256db050778a793a50b21bc Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Wed, 23 Oct 2024 16:52:24 +0000 Subject: [PATCH 13/16] add debug logging --- bin/digifeeds/upload_to_s3.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 128e454..5abbe8e 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -41,6 +41,10 @@ log_info() { log_error() { echo "$(date --rfc-3339=seconds) - ERROR: ${@}" } +log_debug() +{ + [[ ${DEBUG:-false} == "true" ]] && echo "$(date --rfc-3339=seconds) - DEBUG: ${@}" +} # Gets the last count from a job in the push gateway push gateway last_count() { @@ -136,7 +140,7 @@ main() { log_info "Copying $barcode" - log_info "Verifying image order $barcode" + log_debug "Verifying image order $barcode" #8 digits, ends in .tif or .jp2 filter_regex='[[:digit:]]{8}\.tif$|[[:digit:]]{8}\.jp2$' local image_list=$(cd $barcode_path && ls | egrep "$filter_regex") @@ -149,7 +153,7 @@ main() { fi - log_info "Zipping $barcode" + log_debug "Zipping $barcode" zip_it $input_directory/$barcode if [[ $? != 0 ]]; then log_error "Failed to zip $barcode" @@ -157,7 +161,7 @@ main() { continue fi - log_info "Verifying zip of $barcode" + log_debug "Verifying zip of $barcode" verify_zip $input_directory/$barcode if [[ $? != 0 ]]; then log_error "$barcode.zip does not contain the correct files" @@ -165,7 +169,7 @@ main() { continue fi - log_info "Sending $barcode to S3" + log_debug "Sending $barcode to S3" rclone copy $input_directory/$barcode.zip $digifeeds_bucket: if [[ $? != 0 ]]; then log_error "Failed to copy $barcode" @@ -174,7 +178,7 @@ main() { continue fi - log_info "Verifying barcode in S3" + log_debug "Verifying barcode in S3" rclone check $input_directory/$barcode.zip $digifeeds_bucket: if [[ $? != 0 ]]; then log_error "$barcode not found in S3" From bd0a6c19b6b118e4d6340d72d047112a2f9298f4 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Wed, 23 Oct 2024 17:00:29 +0000 Subject: [PATCH 14/16] explictly convert integer string to a decimal value --- bin/digifeeds/upload_to_s3.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 5abbe8e..39a071f 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -60,7 +60,7 @@ verify_image_order(){ for arg in "${sorted[@]}"; do cnt=$((cnt+1)) int=${arg:0:8} - [ $((int)) != $cnt ] && return 1 + [ $((10#$int)) != $cnt ] && return 1 done return 0 } From 10a1dd26482819f645cac867db7cf4a4bcc6fdb5 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Wed, 23 Oct 2024 19:07:17 +0000 Subject: [PATCH 15/16] fix shellcheck complaints --- bin/digifeeds/upload_to_s3.sh | 145 ++++++++++++++++------------- bin/digifeeds/upload_to_s3_test.sh | 87 +++++++++-------- 2 files changed, 122 insertions(+), 110 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 39a071f..75042cb 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -1,4 +1,4 @@ -# ! /bin/bash +#! /bin/bash ########### # CONSTANTS @@ -10,6 +10,26 @@ START_TIME=$(date '+%s') # Directory this script lives in SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +${APP_ENV:-"production"} +if [[ $APP_ENV != "test" ]]; then + # CONFIG + # Variables contained in the config file: + # + # input_directory: path to the input directory + # processed_directory: path to the directory of processed files+% + # digifeeds_bucket: rclone remote for the digifeeds bucket + # + # timestamp: used for testing timestamps; should be ommited in production + # send_metrics: when "false" metrics don't get sent; + # APP_ENV: when "test" the main script is not executed + CONFIG_FILE=${1:-$SCRIPT_DIR/upload_to_s3.config} + # shellcheck source=/dev/null + source "$CONFIG_FILE" +fi +${input_directory:?} +${processed_directory:?} +${digifeeds_bucket:?} +${send_metrics:-"true"} # matches .tif and .jp2 files with 8 digit file names that start with 0 OR # checksum.md5 files @@ -21,7 +41,7 @@ IMGAWK='/^(0[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\.(tif|jp2)|checksum\.md5)$/' # For push gateway JOB_NAME="aim_digifeeds_upload_to_aws" - + ########### # COUNTERS ########### @@ -35,58 +55,67 @@ errors_total=0 ########### log_info() { - echo "$(date --rfc-3339=seconds) - INFO: ${@}" + echo "$(date --rfc-3339=seconds) - INFO: $*" } log_error() { - echo "$(date --rfc-3339=seconds) - ERROR: ${@}" + echo "$(date --rfc-3339=seconds) - ERROR: $*" } -log_debug() -{ - [[ ${DEBUG:-false} == "true" ]] && echo "$(date --rfc-3339=seconds) - DEBUG: ${@}" +log_debug() { + [[ ${DEBUG:-false} == "true" ]] && echo "$(date --rfc-3339=seconds) - DEBUG: $*" +} + +#equivalent to ls +list_files() { + local path=$1 + find "$path" -maxdepth 1 ! -printf '%P\n' } # Gets the last count from a job in the push gateway push gateway last_count() { local metric=$1 - pushgateway_advanced -j $JOB_NAME -q ${metric} + pushgateway_advanced -j $JOB_NAME -q "${metric}" } -verify_image_order(){ +verify_image_order() { #Sort the array - sorted=($(printf '%s\n' "$@" | sort )) + mapfile -t sorted < <(printf '%s\n' "$@" | sort) local cnt=0 for arg in "${sorted[@]}"; do - cnt=$((cnt+1)) + cnt=$((cnt + 1)) int=${arg:0:8} - [ $((10#$int)) != $cnt ] && return 1 + [ $((10#$int)) != $cnt ] && return 1 done return 0 } zip_it() { local barcode_path=$1 - cd $barcode_path - ls | awk "$IMGAWK" | xargs zip -rq $barcode_path.zip + cd "$barcode_path" || return 1 + list_files . | awk "$IMGAWK" | xargs zip -rq "$barcode_path".zip local zip_return=$? #Go back to previous directory; Don't print the output. - cd - >/dev/null + cd - >/dev/null || return 1 return $zip_return } verify_zip() { local barcode_path=$1 - local files_in_dir=$(ls $barcode_path | awk "$IMGAWK" | sort) - [ $? != 0 ] && return 1 - local files_in_zip=$(zipinfo -1 $barcode_path.zip | sort) - [ $? != 0 ] && return 1 + local files_in_dir + if ! files_in_dir=$(list_files "$barcode_path" | awk "$IMGAWK" | sort); then + return 1 + fi + local files_in_zip + if ! files_in_zip=$(zipinfo -1 "$barcode_path".zip | sort); then + return 1 + fi if [ "$files_in_dir" == "$files_in_zip" ]; then return 0 else - return 1 + return 1 fi } @@ -97,20 +126,23 @@ print_metrics() { local errors_current_total=$4 local fp_metric="${JOB_NAME}_files_processed_total" - local fp_last=$(last_count $fp_metric) + local fp_last + fp_last=$(last_count $fp_metric) local fp_total=$((fp_last + fp_current_total)) local image_order_errors_metric="${JOB_NAME}_image_order_errors_total" - local image_order_errors_last=$(last_count $image_order_errors_metric) + local image_order_errors_last + image_order_errors_last=$(last_count $image_order_errors_metric) local image_order_errors_total=$((image_order_errors_last + image_order_errors_current_total)) local upload_errors_metric="${JOB_NAME}_upload_errors_total" - local upload_errors_last=$(last_count $upload_errors_metric) + local upload_errors_last + upload_errors_last=$(last_count $upload_errors_metric) local upload_errors_total=$((upload_errors_last + upload_errors_current_total)) - local errors_metric="${JOB_NAME}_errors_total" - local errors_last=$(last_count $errors_metric) + local errors_last + errors_last=$(last_count $errors_metric) local errors_total=$((errors_last + errors_current_total)) cat < Date: Wed, 23 Oct 2024 19:51:26 +0000 Subject: [PATCH 16/16] better handles configuration variables --- bin/digifeeds/upload_to_s3.sh | 11 ++++++----- bin/digifeeds/upload_to_s3_test.sh | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/bin/digifeeds/upload_to_s3.sh b/bin/digifeeds/upload_to_s3.sh index 75042cb..e63b766 100755 --- a/bin/digifeeds/upload_to_s3.sh +++ b/bin/digifeeds/upload_to_s3.sh @@ -10,7 +10,8 @@ START_TIME=$(date '+%s') # Directory this script lives in SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) -${APP_ENV:-"production"} +APP_ENV=${APP_ENV:-"production"} + if [[ $APP_ENV != "test" ]]; then # CONFIG # Variables contained in the config file: @@ -26,10 +27,10 @@ if [[ $APP_ENV != "test" ]]; then # shellcheck source=/dev/null source "$CONFIG_FILE" fi -${input_directory:?} -${processed_directory:?} -${digifeeds_bucket:?} -${send_metrics:-"true"} +if ! input_directory=${input_directory:?}; then exit 1; fi +if ! processed_directory=${processed_directory:?}; then exit 1; fi +if ! digifeeds_bucket=${digifeeds_bucket:?}; then exit 1; fi +send_metrics=${send_metrics:-"true"} # matches .tif and .jp2 files with 8 digit file names that start with 0 OR # checksum.md5 files diff --git a/bin/digifeeds/upload_to_s3_test.sh b/bin/digifeeds/upload_to_s3_test.sh index 64dfa2f..9657d20 100755 --- a/bin/digifeeds/upload_to_s3_test.sh +++ b/bin/digifeeds/upload_to_s3_test.sh @@ -46,6 +46,22 @@ teardown() { rm -r "$SCRATCH_PATH" } +@test "exits without input_directory" { + unset input_directory + run "${BATS_TEST_DIRNAME}/upload_to_s3.sh" + assert_failure +} +@test "exits without processed_directory" { + unset processed_directory + run "${BATS_TEST_DIRNAME}/upload_to_s3.sh" + assert_failure +} +@test "exits without digifeeds_bucket" { + unset digifeeds_bucket + run "${BATS_TEST_DIRNAME}/upload_to_s3.sh" + assert_failure +} + @test "It Works" { shellmock new rclone shellmock config rclone 0 1:copy regex-3:^digifeeds_bucket: