diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build_upstream.yaml similarity index 100% rename from .github/workflows/_build.yaml rename to .github/workflows/_build_upstream.yaml diff --git a/.github/workflows/_ci.yaml b/.github/workflows/_ci.yaml index 522c51474..8d7962ece 100644 --- a/.github/workflows/_ci.yaml +++ b/.github/workflows/_ci.yaml @@ -37,7 +37,7 @@ jobs: build-jax: needs: build-base - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} ARTIFACT_NAME: artifact-jax-build @@ -52,7 +52,7 @@ jobs: build-triton: needs: build-jax if: inputs.ARCHITECTURE == 'amd64' # Triton does not seem to support arm64 - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} ARTIFACT_NAME: artifact-triton-build @@ -65,7 +65,7 @@ jobs: build-equinox: needs: build-jax - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} ARTIFACT_NAME: artifact-equinox-build @@ -79,7 +79,7 @@ jobs: build-upstream-maxtext: needs: build-jax if: inputs.ARCHITECTURE == 'amd64' # Triton does not seem to support arm64 - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} ARTIFACT_NAME: artifact-maxtext-build @@ -102,7 +102,7 @@ jobs: build-levanter: needs: [build-jax] - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} ARTIFACT_NAME: "artifact-levanter-build" @@ -115,7 +115,7 @@ jobs: build-upstream-t5x: needs: build-jax - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} ARTIFACT_NAME: "artifact-t5x-build" @@ -128,7 +128,7 @@ jobs: build-upstream-pax: needs: build-jax - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} ARTIFACT_NAME: artifact-pax-build @@ -161,7 +161,7 @@ jobs: build-grok: needs: [build-jax] - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: ${{ inputs.ARCHITECTURE }} ARTIFACT_NAME: "artifact-grok-build" diff --git a/.github/workflows/_sandbox.yaml b/.github/workflows/_sandbox.yaml index af3918e32..78ccced41 100644 --- a/.github/workflows/_sandbox.yaml +++ b/.github/workflows/_sandbox.yaml @@ -114,7 +114,7 @@ jobs: build-jax: needs: build-base - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: amd64 ARTIFACT_NAME: artifact-jax-build @@ -128,7 +128,7 @@ jobs: build-upstream-maxtext: needs: build-jax - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: amd64 ARTIFACT_NAME: artifact-maxtext-build diff --git a/.github/workflows/mjx-build-test.yaml b/.github/workflows/mjx-build-test.yaml index 3cca4ee5a..395276c86 100644 --- a/.github/workflows/mjx-build-test.yaml +++ b/.github/workflows/mjx-build-test.yaml @@ -3,18 +3,18 @@ run-name: MJX build (${{ github.event_name == 'workflow_run' && format('nightly on: schedule: - - cron: '30 9 * * *' # Pacific Time 01:30 AM in UTC + - cron: "30 9 * * *" # Pacific Time 01:30 AM in UTC workflow_dispatch: inputs: BASE_IMAGE_AMD64: type: string - description: 'JAX mealkit AMD64 image built by NVIDIA/JAX-Toolbox' - default: '' + description: "JAX mealkit AMD64 image built by NVIDIA/JAX-Toolbox" + default: "" required: false BASE_IMAGE_ARM64: type: string - description: 'JAX mealkit AMD64 image built by NVIDIA/JAX-Toolbox' - default: '' + description: "JAX mealkit AMD64 image built by NVIDIA/JAX-Toolbox" + default: "" required: false PUBLISH: type: boolean @@ -22,19 +22,16 @@ on: default: false required: false - env: DOCKER_REGISTRY: ghcr.io/nvidia DEFAULT_BASE_IMAGE: ghcr.io/nvidia/jax-mealkit:jax - permissions: - contents: read # to fetch code - actions: write # to cancel previous workflows + contents: read # to fetch code + actions: write # to cancel previous workflows packages: write # to upload container jobs: - metadata: runs-on: ubuntu-22.04 outputs: @@ -42,9 +39,8 @@ jobs: BASE_IMAGE_AMD64: ${{ steps.base-image.outputs.BASE_IMAGE_AMD64 }} BASE_IMAGE_ARM64: ${{ steps.base-image.outputs.BASE_IMAGE_ARM64 }} BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }} - - steps: + steps: - name: Cancel workflow if upstream workflow did not success if: ${{ steps.if-upstream-failed.outputs.UPSTREAM_FAILED == 'true' }} run: | @@ -83,10 +79,10 @@ jobs: fi echo "BASE_IMAGE_AMD64=${BASE_IMAGE_AMD64}" >> $GITHUB_OUTPUT echo "BASE_IMAGE_ARM64=${BASE_IMAGE_ARM64}" >> $GITHUB_OUTPUT - + amd64: needs: metadata - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: amd64 ARTIFACT_NAME: artifact-mjx-build @@ -99,7 +95,7 @@ jobs: arm64: needs: metadata - uses: ./.github/workflows/_build.yaml + uses: ./.github/workflows/_build_upstream.yaml with: ARCHITECTURE: arm64 ARTIFACT_NAME: artifact-mjx-build @@ -158,13 +154,17 @@ jobs: matrix: GPU_ARCH: [A100] # ensures A100 job lands on dedicated runner for this particular job - runs-on: [self-hosted, "${{ matrix.GPU_ARCH == 'A100' && format('{0}:{1}', matrix.GPU_ARCH, github.run_id) || matrix.GPU_ARCH }}"] + runs-on: + [ + self-hosted, + "${{ matrix.GPU_ARCH == 'A100' && format('{0}:{1}', matrix.GPU_ARCH, github.run_id) || matrix.GPU_ARCH }}", + ] steps: - name: Print environment variables run: env - name: Print GPU information - run: nvidia-smi + run: nvidia-smi - name: Check out repository uses: actions/checkout@v4 @@ -186,7 +186,7 @@ jobs: continue-on-error: true run: | docker run --gpus=all --shm-size=1g ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} bash -ec "mjx-testspeed --mjcf=humanoid/humanoid.xml --batch_size=8192 --unroll=4 --output=tsv" | tee -a test-mjx.log - + - name: Save perf to summary shell: bash -x -e {0} continue-on-error: true