Skip to content

Commit

Permalink
Merge pull request #40 from pfxuan/docker-rocm6
Browse files Browse the repository at this point in the history
Add Docker build for ROCm 6.x
  • Loading branch information
pierotofy committed Mar 16, 2024
2 parents 1d59c7d + 194eec9 commit 9597d60
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 5 deletions.
75 changes: 75 additions & 0 deletions .github/workflows/rocm6.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: OpenSplat (Docker ROCm 6.x)

on:
push:
branches:
- main
pull_request:
types: [ assigned, opened, synchronize, reopened ]
release:
types: [ published, edited ]

jobs:
build:
name: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04]
arch: [x64] # [x64, x86]
torch-version: [2.1.2] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1]
rocm-version: [6.0.2]
cmake-build-type: [Release] # [Debug, ClangTidy]
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
submodules: recursive

- name: Free disk space
run: |
df -hT
# https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
sudo rm -rf /opt/ghc
sudo rm -rf /usr/share/dotnet
# delete libraries for Android (12G), CodeQL (5.3G), PowerShell (1.3G), Swift (1.7G)
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/local/share/boost
sudo rm -rf "${AGENT_TOOLSDIRECTORY}"
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/lib/android/sdk
sudo rm -rf /opt/hostedtoolcache/Python
sudo rm -rf /opt/hostedtoolcache/go
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /var/lib/gems
sudo swapoff -a
sudo rm -f /swapfile
sudo apt-get autoremove -y
sudo apt-get autoclean -y
sudo snap set system refresh.retain=2
docker images -qf "dangling=true" | xargs -r sudo docker rmi
docker system prune -a -f
echo "Disk usage after cleanup:"
df -hT
sudo service docker stop
echo "Docker Stopped"
sudo mv /var/lib/docker /mnt/docker-data
echo "Docker data moved"
sudo ln -s /mnt/docker-data /var/lib/docker
echo "Docker data relinked"
sudo service docker start
echo "Docker Restarted"
- name: Build Docker Image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.rocm6
build-args: |
ROCM_VERSION=${{ matrix.rocm-version }}
TORCH_VERSION=${{ matrix.torch-version }}
CMAKE_BUILD_TYPE=${{ matrix.cmake-build-type }}
push: false
tags: opensplat:ubuntu-22.04-libtorch-2.1.2-rocm-6.0.2
16 changes: 13 additions & 3 deletions Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ARG TORCH_VERSION=2.2.1
ARG CUDA_VERSION=12.1.1
ARG ROCM_VERSION=5.7.1
ARG LLVM_VERSION=16
ARG PYTORCH_ROCM_ARCH=gfx906
ARG PYTORCH_ROCM_ARCH=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942
ARG CMAKE_BUILD_TYPE=Release

ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
Expand Down Expand Up @@ -66,7 +66,17 @@ RUN ROCM_VER_FULL=${ROCM_VERSION} && \
unzip -q libtorch.zip -d . && \
rm ./libtorch.zip

# Configure and build \
# Configure and build
RUN source .github/workflows/cuda/Linux-env.sh cu"${CUDA_VERSION%%.*}"$(echo $CUDA_VERSION | cut -d'.' -f2) && \
export PATH=$PATH:/opt/rocm/bin && \
ln -s -f /opt/rocm/llvm/lib/clang/${LLVM_VERSION}.0.0 /opt/rocm/llvm/lib/clang/${LLVM_VERSION}
ln -s -f /opt/rocm/llvm/lib/clang/${LLVM_VERSION}.0.0 /opt/rocm/llvm/lib/clang/${LLVM_VERSION} && \
mkdir build && \
cd build && \
cmake .. \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DGPU_RUNTIME=HIP \
-DHIP_ROOT_DIR=/opt/rocm \
-DOPENSPLAT_BUILD_SIMPLE_TRAINER=ON \
-DCMAKE_PREFIX_PATH=/code/libtorch \
-DCMAKE_INSTALL_PREFIX=/code/install && \
make
39 changes: 39 additions & 0 deletions Dockerfile.rocm6
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
ARG UBUNTU_VERSION=22.04
ARG TORCH_VERSION=2.1.2
ARG ROCM_VERSION=6.0.2
FROM rocm/pytorch:rocm${ROCM_VERSION}_ubuntu22.04_py3.10_pytorch_${TORCH_VERSION}

ARG LLVM_VERSION=17
ARG CMAKE_BUILD_TYPE=Release

SHELL ["/bin/bash", "-c"]

# Env variables
ENV DEBIAN_FRONTEND noninteractive

# Prepare directories
WORKDIR /code

# Copy everything
COPY . ./

# Install build dependencies
RUN apt-get update && \
apt-get install -y \
libopencv-dev && \
apt-get autoremove -y --purge && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Configure and build \
RUN source activate py_3.10 && \
mkdir build && \
cd build && \
cmake .. \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DGPU_RUNTIME=HIP \
-DHIP_ROOT_DIR=/opt/rocm \
-DOPENSPLAT_BUILD_SIMPLE_TRAINER=ON \
-DCMAKE_PREFIX_PATH=/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch \
-DCMAKE_INSTALL_PREFIX=/code/install && \
make
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ docker build \
## Docker Build (ROCm via HIP)
Navigate to the root directory of OpenSplat repo that has Dockerfile and run the following command to build the Docker image:
```bash
docker build -t opensplat -f Dockerfile.rocm .
docker build \
-t opensplat \
-f Dockerfile.rocm .
```

The `-t` flag and other `--build-arg` let you tag and further customize your image across different ubuntu versions, CUDA/libtorch stacks, and hardware accelerators.
Expand All @@ -89,7 +91,12 @@ docker build \
--build-arg PYTORCH_ROCM_ARCH="gfx906" \
--build-arg CMAKE_BUILD_TYPE=Release .
```

Note: If you want to use ROCm 6.x, you need to switch to AMD version of pytorch docker as a base layer to build:
```bash
docker build \
-t opensplat:ubuntu-22.04-libtorch-torch-2.1.2-rocm-6.0.2 \
-f Dockerfile.rocm6 .
```

## Run

Expand Down

0 comments on commit 9597d60

Please sign in to comment.