From b1ee2c5228ef8a88169f1e34205ef05a50410d01 Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Wed, 23 Oct 2024 12:27:33 -0700 Subject: [PATCH] package version validation (#2513) Summary: We encounter a case where TorchRec package version does not match FBGEMM in release. Here, we check that in validating the binaries Reviewed By: iamzainhuda Differential Revision: D64840358 --- .github/scripts/validate_binaries.sh | 71 ++++++++++++++++------------ 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh index 22009dac3..85ad0de47 100755 --- a/.github/scripts/validate_binaries.sh +++ b/.github/scripts/validate_binaries.sh @@ -40,40 +40,33 @@ else export CUDA_VERSION="cpu" fi -if [[ ${MATRIX_CHANNEL} = 'pypi_release' ]]; then - echo "checking pypi release" - pip install torch - pip install fbgemm-gpu - pip install torchrec -else - # figure out URL - if [[ ${MATRIX_CHANNEL} = 'nightly' ]]; then - export PYTORCH_URL="https://download.pytorch.org/whl/nightly/${CUDA_VERSION}" - elif [[ ${MATRIX_CHANNEL} = 'test' ]]; then - export PYTORCH_URL="https://download.pytorch.org/whl/test/${CUDA_VERSION}" - elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then - export PYTORCH_URL="https://download.pytorch.org/whl/${CUDA_VERSION}" - fi +# figure out URL +if [[ ${MATRIX_CHANNEL} = 'nightly' ]]; then + export PYTORCH_URL="https://download.pytorch.org/whl/nightly/${CUDA_VERSION}" +elif [[ ${MATRIX_CHANNEL} = 'test' ]]; then + export PYTORCH_URL="https://download.pytorch.org/whl/test/${CUDA_VERSION}" +elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then + export PYTORCH_URL="https://download.pytorch.org/whl/${CUDA_VERSION}" +fi - # install pytorch - # switch back to conda once torch nightly is fixed - # if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then - # export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}" - # fi - conda run -n build_binary pip install torch --index-url "$PYTORCH_URL" +# install pytorch +# switch back to conda once torch nightly is fixed +# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then +# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}" +# fi +conda run -n build_binary pip install torch --index-url "$PYTORCH_URL" - # install fbgemm - conda run -n build_binary pip install fbgemm-gpu --index-url "$PYTORCH_URL" +# install fbgemm +conda run -n build_binary pip install fbgemm-gpu --index-url "$PYTORCH_URL" - # install requirements from pypi - conda run -n build_binary pip install torchmetrics==1.0.3 +# install requirements from pypi +conda run -n build_binary pip install torchmetrics==1.0.3 - # install torchrec - conda run -n build_binary pip install torchrec --index-url "$PYTORCH_URL" +# install torchrec +conda run -n build_binary pip install torchrec --index-url "$PYTORCH_URL" - # Run small import test - conda run -n build_binary python -c "import torch; import fbgemm_gpu; import torchrec" -fi +# Run small import test +conda run -n build_binary python -c "import torch; import fbgemm_gpu; import torchrec" # check directory ls -R @@ -98,13 +91,22 @@ fi if [[ ${MATRIX_CHANNEL} != 'release' ]]; then exit 0 +else + # Check version matches only for release binaries + torchrec_version=$(conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2) + fbgemm_version=$(conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2) + + if [ "$torchrec_version" != "$fbgemm_version" ]; then + echo "Error: TorchRec package version does not match FBGEMM package version" + exit 1 + fi fi conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}" conda run -n build_binary python --version -if [[ ${MATRIX_GPU_ARCH_VERSION} != '12.1' ]]; then +if [[ ${MATRIX_GPU_ARCH_VERSION} != '12.4' ]]; then exit 0 fi @@ -113,6 +115,15 @@ conda run -n build_binary pip install torch conda run -n build_binary pip install fbgemm-gpu conda run -n build_binary pip install torchrec +# Check version matching again for PyPI +torchrec_version=$(conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2) +fbgemm_version=$(conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2) + +if [ "$torchrec_version" != "$fbgemm_version" ]; then + echo "Error: TorchRec package version does not match FBGEMM package version" + exit 1 +fi + # check directory ls -R