From d3bb883f636c1646844add496aecebf9af3883f7 Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Mon, 21 Oct 2024 18:29:06 +0000 Subject: [PATCH] Fix release_build and gpu ci, use cu124 as default to be consistent with torch Update actions/checkout Update actions/checkout 2 Migrate release_build to use pytorch test-infra linux_job.yml Fix typo Fix typo 2 CUDA 12.4 update as default for pypi and gpu ci Add env env variable pypi token env variable pypi token 2 env variable pypi token 3 env variable pypi token 4 env variable pypi token 5 env variable pypi token 6 env variable pypi token 7 remove upgrade pip remove upgrade pip 2 remove upgrade pip 3 --- .github/workflows/release_build.yml | 36 ++++++++++++----------------- .github/workflows/unittest_ci.yml | 16 +++++++++++++ 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/.github/workflows/release_build.yml b/.github/workflows/release_build.yml index 4dd841c02..8be8ea628 100644 --- a/.github/workflows/release_build.yml +++ b/.github/workflows/release_build.yml @@ -18,37 +18,32 @@ jobs: strategy: matrix: include: - - os: linux.2xlarge - python-version: 3.8 - python-tag: "py38" - cuda-tag: "cu121" - os: linux.2xlarge python-version: 3.9 python-tag: "py39" - cuda-tag: "cu121" + cuda-tag: "cu124" - os: linux.2xlarge python-version: '3.10' python-tag: "py310" - cuda-tag: "cu121" + cuda-tag: "cu124" - os: linux.2xlarge python-version: '3.11' python-tag: "py311" - cuda-tag: "cu121" + cuda-tag: "cu124" - os: linux.2xlarge python-version: '3.12' python-tag: "py312" - cuda-tag: "cu121" + cuda-tag: "cu124" steps: # Checkout the repository to the GitHub Actions runner - name: Check ldd --version run: ldd --version - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Update pip run: | sudo yum update -y sudo yum -y install git python3-pip - sudo pip3 install --upgrade pip - name: Setup conda run: | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh @@ -73,12 +68,12 @@ jobs: - name: Install PyTorch and CUDA shell: bash run: | - conda run -n build_binary pip install torch --index-url https://download.pytorch.org/whl/test/cu121 + conda run -n build_binary pip install torch - name: Install fbgemm shell: bash run: | conda run -n build_binary pip install numpy - conda run -n build_binary pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/test/cu121 + conda run -n build_binary pip install fbgemm-gpu - name: Install Dependencies shell: bash run: | @@ -102,7 +97,7 @@ jobs: python setup.py bdist_wheel \ --python-tag=${{ matrix.python-tag }} - name: Upload wheel as GHA artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: torchrec_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl path: dist/torchrec-*.whl @@ -112,9 +107,9 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [linux.4xlarge.nvidia.gpu] - python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] - cuda-tag: ["cu121"] + os: [linux.g5.12xlarge.nvidia.gpu] + python-version: [3.9, "3.10", "3.11", "3.12"] + cuda-tag: ["cu124"] needs: build_on_cpu # the glibc version should match the version of the one we used to build the binary # for this case, it's 2.26 @@ -149,12 +144,11 @@ jobs: sudo lshw -C display # Checkout the repository to the GitHub Actions runner - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Update pip run: | sudo yum update -y sudo yum -y install git python3-pip - sudo pip3 install --upgrade pip - name: Setup conda run: | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh @@ -179,19 +173,19 @@ jobs: - name: Install PyTorch and CUDA shell: bash run: | - conda run -n build_binary pip install torch --index-url https://download.pytorch.org/whl/test/cu121 + conda run -n build_binary pip install torch # download wheel from GHA - name: Install fbgemm shell: bash run: | conda run -n build_binary pip install numpy - conda run -n build_binary pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/test/cu121 + conda run -n build_binary pip install fbgemm-gpu - name: Install torchmetrics shell: bash run: | conda run -n build_binary pip install torchmetrics==1.0.3 - name: Download wheel - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: torchrec_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl - name: Display structure of downloaded files diff --git a/.github/workflows/unittest_ci.yml b/.github/workflows/unittest_ci.yml index 2e120457a..8865acee4 100644 --- a/.github/workflows/unittest_ci.yml +++ b/.github/workflows/unittest_ci.yml @@ -23,6 +23,10 @@ jobs: python-version: 3.9 python-tag: "py39" cuda-tag: "cu121" + - os: linux.g5.12xlarge.nvidia.gpu + python-version: 3.9 + python-tag: "py39" + cuda-tag: "cu124" - os: linux.g5.12xlarge.nvidia.gpu python-version: '3.10' python-tag: "py310" @@ -31,6 +35,10 @@ jobs: python-version: '3.10' python-tag: "py310" cuda-tag: "cu121" + - os: linux.g5.12xlarge.nvidia.gpu + python-version: '3.10' + python-tag: "py310" + cuda-tag: "cu124" - os: linux.g5.12xlarge.nvidia.gpu python-version: '3.11' python-tag: "py311" @@ -39,6 +47,10 @@ jobs: python-version: '3.11' python-tag: "py311" cuda-tag: "cu121" + - os: linux.g5.12xlarge.nvidia.gpu + python-version: '3.11' + python-tag: "py311" + cuda-tag: "cu124" - os: linux.g5.12xlarge.nvidia.gpu python-version: '3.12' python-tag: "py312" @@ -47,6 +59,10 @@ jobs: python-version: '3.12' python-tag: "py312" cuda-tag: "cu121" + - os: linux.g5.12xlarge.nvidia.gpu + python-version: '3.12' + python-tag: "py312" + cuda-tag: "cu124" uses: pytorch/test-infra/.github/workflows/linux_job.yml@main with: runner: ${{ matrix.os }}