diff --git a/.github/workflow_scripts/build_paddle.sh b/.github/workflow_scripts/build_paddle.sh new file mode 100644 index 000000000..e931f76b3 --- /dev/null +++ b/.github/workflow_scripts/build_paddle.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +set -ex + +# Used to capture status exit of build eval command +ss=0 + +REPO_NAME="$1" # Eg. 'd2l-zh' +TARGET_BRANCH="$2" # Eg. 'master' ; if PR raised to master +CACHE_DIR="$3" # Eg. 'ci_cache_pr' or 'ci_cache_push' + +pip3 install d2l==0.17.6 +mkdir _build + +source $(dirname "$0")/utils.sh + +# Move sanity check outside +d2lbook build outputcheck tabcheck + +# Move aws copy commands for cache restore outside +if [ "$DISABLE_CACHE" = "false" ]; then + echo "Retrieving paddle build cache from "$CACHE_DIR"" + measure_command_time "aws s3 sync s3://preview.d2l.ai/"$CACHE_DIR"/"$REPO_NAME"-"$TARGET_BRANCH"/_build/eval_paddle/ _build/eval_paddle/ --delete --quiet --exclude 'data/*'" +fi + +# Continue the script even if some notebooks in build fail to +# make sure that cache is copied to s3 for the successful notebooks +d2lbook build eval --tab paddle || ((ss=1)) + +# Move aws copy commands for cache store outside +echo "Upload paddle build cache to s3" +measure_command_time "aws s3 sync _build s3://preview.d2l.ai/"$CACHE_DIR"/"$REPO_NAME"-"$TARGET_BRANCH"/_build --acl public-read --quiet --exclude 'eval*/data/*'" + +# Exit with a non-zero status if evaluation failed +if [ "$ss" -ne 0 ]; then + exit 1 +fi diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 1d51302ec..71e1183c8 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -12,6 +12,9 @@ on: image_mxnet: type: boolean description: Build MXNet Image + image_paddle: + type: boolean + description: Build Paddle Image image_builder: type: boolean description: Build D2L Builder Image @@ -66,6 +69,16 @@ jobs: # Clean up to reclaim space echo "y" | docker system prune -a + - name: Build D2L Paddle Image + if: github.event.inputs.image_paddle == 'true' + run: | + chmod +x ./login_ecr.sh; ./login_ecr.sh + docker build -f Dockerfile.d2l-zh-paddle -t d2l-containers:d2l-zh-paddle-latest . + docker tag d2l-containers:d2l-zh-paddle-latest 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-zh-paddle-latest + docker push 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-zh-paddle-latest + # Clean up to reclaim space + echo "y" | docker system prune -a + - name: Build D2L CPU Builder Image if: github.event.inputs.image_builder == 'true' run: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 301e52904..49e161fb5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,16 +83,38 @@ jobs: echo "Terminating Submitted AWS Batch Job: "${{ env.Batch_JobID }}"" aws batch terminate-job --job-id "${{ env.Batch_JobID }}" --reason "Job terminated by cancelled workflow" + build_paddle: + name: Build Paddle + if: "github.repository == 'd2l-ai/d2l-zh' && !contains(github.event.head_commit.message, '[skip paddle]') && !contains(github.event.head_commit.message, '[skip frameworks]')" + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Setup Env Vars + uses: ./.github/actions/setup_env_vars + - name: Evaluate Paddle on AWS Batch + uses: ./.github/actions/submit-job + with: + job-type: ci-gpu-paddle + job-name: D2L-Build-Paddle + command: chmod +x ./.github/workflow_scripts/build_paddle.sh && ./.github/workflow_scripts/build_paddle.sh "${{ env.REPO_NAME }}" "${{ env.TARGET_BRANCH }}" "${{ env.CACHE_DIR }}" + - name: Terminate Batch Job on Cancellation + if: ${{ cancelled() && env.Batch_JobID }} + run: | + echo "Terminating Submitted AWS Batch Job: "${{ env.Batch_JobID }}"" + aws batch terminate-job --job-id "${{ env.Batch_JobID }}" --reason "Job terminated by cancelled workflow" + build_and_deploy: name: Build Website/PDF & Publish - needs: [build_torch, build_tf, build_mxnet] + needs: [build_torch, build_tf, build_mxnet, build_paddle] if: | always() && github.repository == 'd2l-ai/d2l-zh' && !contains(github.event.head_commit.message, '[skip builder]') && (needs.build_torch.result == 'success' || needs.build_torch.result == 'skipped') && (needs.build_tf.result == 'success' || needs.build_tf.result == 'skipped') && - (needs.build_mxnet.result == 'success' || needs.build_mxnet.result == 'skipped') + (needs.build_mxnet.result == 'success' || needs.build_mxnet.result == 'skipped') && + (needs.build_paddle.result == 'success' || needs.build_paddle.result == 'skipped') runs-on: ubuntu-latest steps: - name: Checkout repository diff --git a/ci/docker/Dockerfile.d2l-zh-paddle b/ci/docker/Dockerfile.d2l-zh-paddle new file mode 100644 index 000000000..3815cc8e4 --- /dev/null +++ b/ci/docker/Dockerfile.d2l-zh-paddle @@ -0,0 +1,28 @@ +# Use Paddle 2.3.2 (Dec 2022) +FROM nvcr.io/nvidia/paddlepaddle:22.12-py3 + +RUN adduser --disabled-password --disabled-login ci +WORKDIR /home/ci + +# Copy d2l_job script +ADD d2l_job.sh . +RUN chmod +x d2l_job.sh; chown ci d2l_job.sh + +# Copy git timesync for caching +ADD git-timesync /home/ci/.local/bin/ +RUN chmod +x /home/ci/.local/bin/git-timesync + +# Allow permissions for pip installations and git-timesync +RUN chown -R ci:ci /home/ci/.local + +USER ci + +ENV PATH="/home/ci/.local/bin:$PATH" + +# Install d2lbook using pip + paddlepaddle dependencies +RUN pip3 install git+https://github.com/d2l-ai/d2l-book opencv-python==4.6.0.66 + +# Python script to print framework versions +ADD print_versions.py . + +CMD ["/bin/bash"] diff --git a/ci/submit-job.py b/ci/submit-job.py index e288a6e45..bd1b6a367 100644 --- a/ci/submit-job.py +++ b/ci/submit-job.py @@ -35,6 +35,10 @@ 'ci-gpu-mxnet': { 'job_definition': 'd2l-ci-zh-gpu-mxnet:1', 'job_queue': 'D2L-CI-GPU' + }, + 'ci-gpu-paddle': { + 'job_definition': 'd2l-ci-zh-gpu-paddle:1', + 'job_queue': 'D2L-CI-GPU' } }