diff --git a/.github/workflows/rocm6.yml b/.github/workflows/rocm6.yml new file mode 100644 index 0000000..c64f16a --- /dev/null +++ b/.github/workflows/rocm6.yml @@ -0,0 +1,75 @@ +name: OpenSplat (Docker ROCm 6.x) + +on: + push: + branches: + - main + pull_request: + types: [ assigned, opened, synchronize, reopened ] + release: + types: [ published, edited ] + +jobs: + build: + name: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] + arch: [x64] # [x64, x86] + torch-version: [2.1.2] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] + rocm-version: [6.0.2] + cmake-build-type: [Release] # [Debug, ClangTidy] + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Free disk space + run: | + df -hT + # https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + sudo rm -rf /opt/ghc + sudo rm -rf /usr/share/dotnet + # delete libraries for Android (12G), CodeQL (5.3G), PowerShell (1.3G), Swift (1.7G) + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/local/share/boost + sudo rm -rf "${AGENT_TOOLSDIRECTORY}" + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/share/swift + sudo rm -rf /usr/local/lib/android/sdk + sudo rm -rf /opt/hostedtoolcache/Python + sudo rm -rf /opt/hostedtoolcache/go + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /var/lib/gems + sudo swapoff -a + sudo rm -f /swapfile + sudo apt-get autoremove -y + sudo apt-get autoclean -y + sudo snap set system refresh.retain=2 + docker images -qf "dangling=true" | xargs -r sudo docker rmi + docker system prune -a -f + echo "Disk usage after cleanup:" + df -hT + sudo service docker stop + echo "Docker Stopped" + sudo mv /var/lib/docker /mnt/docker-data + echo "Docker data moved" + sudo ln -s /mnt/docker-data /var/lib/docker + echo "Docker data relinked" + sudo service docker start + echo "Docker Restarted" + + - name: Build Docker Image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile.rocm6 + build-args: | + ROCM_VERSION=${{ matrix.rocm-version }} + TORCH_VERSION=${{ matrix.torch-version }} + CMAKE_BUILD_TYPE=${{ matrix.cmake-build-type }} + push: false + tags: opensplat:ubuntu-22.04-libtorch-2.1.2-rocm-6.0.2 diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 60f1612..3a48d9a 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -7,7 +7,7 @@ ARG TORCH_VERSION=2.2.1 ARG CUDA_VERSION=12.1.1 ARG ROCM_VERSION=5.7.1 ARG LLVM_VERSION=16 -ARG PYTORCH_ROCM_ARCH=gfx906 +ARG PYTORCH_ROCM_ARCH=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942 ARG CMAKE_BUILD_TYPE=Release ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} @@ -66,7 +66,17 @@ RUN ROCM_VER_FULL=${ROCM_VERSION} && \ unzip -q libtorch.zip -d . && \ rm ./libtorch.zip -# Configure and build \ +# Configure and build RUN source .github/workflows/cuda/Linux-env.sh cu"${CUDA_VERSION%%.*}"$(echo $CUDA_VERSION | cut -d'.' -f2) && \ export PATH=$PATH:/opt/rocm/bin && \ - ln -s -f /opt/rocm/llvm/lib/clang/${LLVM_VERSION}.0.0 /opt/rocm/llvm/lib/clang/${LLVM_VERSION} + ln -s -f /opt/rocm/llvm/lib/clang/${LLVM_VERSION}.0.0 /opt/rocm/llvm/lib/clang/${LLVM_VERSION} && \ + mkdir build && \ + cd build && \ + cmake .. \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DGPU_RUNTIME=HIP \ + -DHIP_ROOT_DIR=/opt/rocm \ + -DOPENSPLAT_BUILD_SIMPLE_TRAINER=ON \ + -DCMAKE_PREFIX_PATH=/code/libtorch \ + -DCMAKE_INSTALL_PREFIX=/code/install && \ + make diff --git a/Dockerfile.rocm6 b/Dockerfile.rocm6 new file mode 100644 index 0000000..b273083 --- /dev/null +++ b/Dockerfile.rocm6 @@ -0,0 +1,39 @@ +ARG UBUNTU_VERSION=22.04 +ARG TORCH_VERSION=2.1.2 +ARG ROCM_VERSION=6.0.2 +FROM rocm/pytorch:rocm${ROCM_VERSION}_ubuntu22.04_py3.10_pytorch_${TORCH_VERSION} + +ARG LLVM_VERSION=17 +ARG CMAKE_BUILD_TYPE=Release + +SHELL ["/bin/bash", "-c"] + +# Env variables +ENV DEBIAN_FRONTEND noninteractive + +# Prepare directories +WORKDIR /code + +# Copy everything +COPY . ./ + +# Install build dependencies +RUN apt-get update && \ + apt-get install -y \ + libopencv-dev && \ + apt-get autoremove -y --purge && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Configure and build \ +RUN source activate py_3.10 && \ + mkdir build && \ + cd build && \ + cmake .. \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DGPU_RUNTIME=HIP \ + -DHIP_ROOT_DIR=/opt/rocm \ + -DOPENSPLAT_BUILD_SIMPLE_TRAINER=ON \ + -DCMAKE_PREFIX_PATH=/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch \ + -DCMAKE_INSTALL_PREFIX=/code/install && \ + make diff --git a/README.md b/README.md index 8c82b86..298bedc 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,9 @@ docker build \ ## Docker Build (ROCm via HIP) Navigate to the root directory of OpenSplat repo that has Dockerfile and run the following command to build the Docker image: ```bash -docker build -t opensplat -f Dockerfile.rocm . +docker build \ + -t opensplat \ + -f Dockerfile.rocm . ``` The `-t` flag and other `--build-arg` let you tag and further customize your image across different ubuntu versions, CUDA/libtorch stacks, and hardware accelerators. @@ -89,7 +91,12 @@ docker build \ --build-arg PYTORCH_ROCM_ARCH="gfx906" \ --build-arg CMAKE_BUILD_TYPE=Release . ``` - +Note: If you want to use ROCm 6.x, you need to switch to AMD version of pytorch docker as a base layer to build: +```bash +docker build \ + -t opensplat:ubuntu-22.04-libtorch-torch-2.1.2-rocm-6.0.2 \ + -f Dockerfile.rocm6 . +``` ## Run