Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WiP] Provide a dev container for DALI #5017

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ set(CMAKE_C_STANDARD 11)
set(CMAKE_CUDA_STANDARD 17)

# CXX flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-free-nonheap-object -Wno-unused-variable -Wno-unused-function -Wno-strict-overflow -fno-strict-aliasing -fPIC -fvisibility=hidden")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-free-nonheap-object -Wno-unused-variable -Wno-unused-function -Wno-strict-overflow -Wno-nonnull -fno-strict-aliasing -fPIC -fvisibility=hidden")

if (WERROR)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
Expand Down
80 changes: 80 additions & 0 deletions docker/Dockerfile.deps.ubuntu
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

ARG FROM_IMAGE_NAME=ubuntu:22.04
ARG BUILDER_EXTRA_DEPS=scratch
FROM ${BUILDER_EXTRA_DEPS} as extra_deps
FROM ${FROM_IMAGE_NAME}

ENV DEBIAN_FRONTEND=noninteractive
RUN apt update && apt install -y --no-install-recommends \
python3 \
python3-distutils \
python3-dev \
python3-pip \
gcc \
g++ \
make \
cmake \
git \
git-lfs \
wget \
autoconf \
automake \
libtool \
libtool-bin \
patchelf \
nasm \
python-is-python3 \
patch \
zlib1g-dev \
libxml2-dev \
dkms dpkg-dev \
libudev-dev \
liburcu-dev \
libmount-dev \
libnuma-dev \
libssl-dev \
libjsoncpp-dev \
libasan6 \
libunwind-dev \
curl \
libncurses5-dev \
lsb-core \
wget \
unzip \
llvm && \
git lfs install && \
# crete generic symblinks to libasan.so and libstdc++.so
ln /usr/lib/x86_64-linux-gnu/libasan.so.6 /usr/lib/x86_64-linux-gnu/libasan.so && \
ln /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libstdc++.so && \
pip install --upgrade pip && \
pip install --upgrade \
future \
setuptools \
wheel \
flake8 \
clang && \
pip install libclang && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /root/.cache/pip/

COPY DALI_DEPS_VERSION /tmp

ARG DALI_DEPS_REPO
ENV DALI_DEPS_REPO=${DALI_DEPS_REPO:-https://github.com/NVIDIA/DALI_deps}

ARG DALI_DEPS_VERSION_SHA
ENV DALI_DEPS_VERSION_SHA=${DALI_DEPS_VERSION_SHA}

# run in /bin/bash to have more advanced features supported like list
RUN /bin/bash -c 'DALI_DEPS_VERSION_SHA=${DALI_DEPS_VERSION_SHA:-$(cat /tmp/DALI_DEPS_VERSION)} && \
git clone ${DALI_DEPS_REPO} /tmp/dali_deps && \
cd /tmp/dali_deps && \
git checkout ${DALI_DEPS_VERSION_SHA} && \
git submodule init && \
git submodule update --depth 1 --init --recursive && \
export CC_COMP=${CC} && \
export CXX_COMP=${CXX} && \
/tmp/dali_deps/build_scripts/build_deps.sh && rm -rf /tmp/dali_deps && rm -rf /tmp/DALI_DEPS_VERSION'

# extra deps
COPY --from=extra_deps / /
70 changes: 70 additions & 0 deletions docker/Dockerfile.ubuntu
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
ARG DEPS_IMAGE_NAME
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that we should consider skipping this container, unless we want to have an option to tinker with different targets in dev container.
Still, this is mostly an artifact of how we use it in CI, and most of those options can be controlled directly with CMake invocation inside the dev container.

The next step would be a layer that adds the tools for testing/debugging instead of this one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, I would leave it as it is and use it to create test env. When it is done, we can merge it with Dockerfile.deps.ubuntu (unless there is a part that needs to have cuda installed already).

# clean builder without source code inside
FROM ${DEPS_IMAGE_NAME} as builder

# make sure that linker discovers cuda libs
RUN echo "/usr/local/cuda/targets/x86_64-linux/lib" > /etc/ld.so.conf.d/999_cuda.conf && \
echo "/usr/local/cuda-11.8/targets/x86_64-linux/lib" >> /etc/ld.so.conf.d/999_cuda.conf && \
ldconfig && \
pip install --upgrade \
astunparse \
gast \
dm-tree && \
rm -rf /root/.cache/pip/ && \
# Install dependencies: opencv-python from 3.3.0.10 onwards uses QT which requires
# X11 and other libraries that are not present in clean docker images or bundled there
# 4.3.0.38 uses libGL.so as well so install libgl1-mesa-dev
apt-get update && \
apt-get install -y --no-install-recommends libsm6 \
libice6 \
libxrender1 \
libxext6 \
libx11-6 \
glib-2.0 \
libgl1-mesa-dev && \
# for simple audio python wheel
apt-get install -y --no-install-recommends libasound2-dev && \
apt-get update && \
apt-get install wget software-properties-common -y && \
CUDNN_VERSION=8.8.1.3-1 && \
CUBLASS_VERSION_11=11.11.3.6-1 && \
CUBLASS_VERSION_12=12.0.2.224-1 && \
NCCL_VERSION=2.16.5-1 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
mv cuda-*.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
apt-get update && \
apt-get install -y --no-install-recommends \
libcudnn8=${CUDNN_VERSION}+cuda12.0 \
libcudnn8-dev=${CUDNN_VERSION}+cuda12.0 \
libnccl2=${NCCL_VERSION}+cuda12.0 \
libcublas-11-8=${CUBLASS_VERSION_11} \
libcublas-12-0=${CUBLASS_VERSION_12} \
libcufft-11-8 \
libcusparse-11-8 \
cuda-nvrtc-11-8 && \
rm -rf /var/lib/apt/lists/*

RUN --mount=type=bind,source=qa/,target=/opt/dali/qa/ \
export INSTALL=YES && \
export CUDA_VERSION=$(echo $(nvcc --version) | sed 's/.*\(release \)\([0-9]\+\)\.\([0-9]\+\).*/\2\3/') && \
cd /opt/dali/qa/ && ./download_pip_packages.sh

RUN --mount=type=bind,source=./DALI_EXTRA_VERSION,target=/opt/dali/DALI_EXTRA_VERSION,type=bind,source=qa/,target=/opt/dali/qa/ \
/bin/bash -c 'pushd /opt/dali/qa/ && \
source ./setup_dali_extra.sh && \
popd'

ENV WITH_DYNAMIC_CUDA_TOOLKIT=ON \
WITH_DYNAMIC_NVJPEG=ON \
WITH_DYNAMIC_CUFFT=ON \
WITH_DYNAMIC_NPP=ON \
BUILD_CUFILE=ON \
NVIDIA_BUILD_ID=0 \
# to be adjusted later, first it to run from the build dir, the second is for nose test
# so the user can `python3 -m nose_wrapper --verbose`
PYTHONPATH=/opt/dali/test_comp/dali/python/:/opt/dali/qa \
DALI_EXTRA_PATH=/opt/dali_extra

WORKDIR /opt/dali
57 changes: 57 additions & 0 deletions docker/bake.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
variable "ARCH" {
default = "x86_64"
}

variable "CUDA_VERSION" {
default = "122"
}

target "cuda_toolkit" {
dockerfile = "docker/Dockerfile.cuda${CUDA_VERSION}.${ARCH}.deps"
tags = [
"nvidia/dali:cuda${CUDA_VERSION}_${ARCH}.toolkit"
]
}

target "deps_ubuntu" {
args = {
FROM_IMAGE_NAME = "ubuntu:22.04"
}
dockerfile = "docker/Dockerfile.deps.ubuntu"
tags = [
"nvidia/dali:${ARCH}.deps"
]
}

target "deps_with_cuda" {
contexts = {
"nvidia/dali:${ARCH}.deps" = "target:deps_ubuntu",
"nvidia/dali:cuda${CUDA_VERSION}_${ARCH}.toolkit" = "target:cuda_toolkit"
}
args = {
FROM_IMAGE_NAME = "nvidia/dali:${ARCH}.deps",
CUDA_IMAGE = "nvidia/dali:cuda${CUDA_VERSION}_${ARCH}.toolkit"
}
dockerfile = "docker/Dockerfile.cuda.deps"
tags = [
"nvidia/dali:cu${CUDA_VERSION}_${ARCH}.deps"
]
}

target "builder_image" {
contexts = {
"nvidia/dali:cu${CUDA_VERSION}_${ARCH}.deps" = "target:deps_with_cuda"
}
args = {
DEPS_IMAGE_NAME = "nvidia/dali:cu${CUDA_VERSION}_${ARCH}.deps"
}
dockerfile = "docker/Dockerfile.ubuntu"
tags = [
"nvidia/dali:cu${CUDA_VERSION}_${ARCH}.build"
]
target = "builder"
}

group "default" {
targets = ["builder_image"]
}
15 changes: 8 additions & 7 deletions qa/TL0_python-self-test-base-cuda/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ version_eq "$DALI_CUDA_MAJOR_VERSION" "12" && \
mv /usr/local/cuda /usr/local/cuda_bak && \
ln -s cuda-12.0 /usr/local/cuda
version_ge "$DALI_CUDA_MAJOR_VERSION" "11" && \
pip uninstall -y `pip list | grep nvidia-cu | cut -d " " -f1` `pip list | grep nvidia-n | cut -d " " -f1` \
pip uninstall -y `pip list | grep nvidia-cu | cut -d " " -f1` `pip list | grep nvidia-n | cut -d " " -f1` && CUDA_WHEELS_REMOVED=1 \
|| true

export DO_NOT_INSTALL_CUDA_WHEEL="TRUE"
Expand All @@ -33,10 +33,11 @@ bash -e ./test.sh
popd

# restore old CUDA symlink, reinstall the latest CUDA wheels
version_eq "$DALI_CUDA_MAJOR_VERSION" "11" && \
rm -rf /usr/local/cuda && mv /usr/local/cuda_bak /usr/local/cuda
version_ge "$DALI_CUDA_MAJOR_VERSION" "11" && \
pip install nvidia-cufft-cu${DALI_CUDA_MAJOR_VERSION} \
nvidia-npp-cu${DALI_CUDA_MAJOR_VERSION} \
nvidia-nvjpeg-cu${DALI_CUDA_MAJOR_VERSION} \
|| true
rm -rf /usr/local/cuda && mv /usr/local/cuda_bak /usr/local/cuda && \
if [ -n "$CUDA_WHEELS_REMOVED" ]; then
pip install nvidia-cufft-cu${DALI_CUDA_MAJOR_VERSION} \
nvidia-npp-cu${DALI_CUDA_MAJOR_VERSION} \
nvidia-nvjpeg-cu${DALI_CUDA_MAJOR_VERSION} && \
unset CUDA_WHEELS_REMOVED;
fi || true
20 changes: 16 additions & 4 deletions qa/download_pip_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,28 @@ do
# check all test files inside
for test_file in $(ls -f *.sh);
do
# don't parse files that don't source test_template.sh driectly as variables
# are not set up properly
if [ -z "$(grep 'test_template.sh' $test_file)" ]; then
continue
fi
export pip_packages=""
echo "Checking file: " ${test_file}
source ${test_file}
echo "PIPs to install: " ${pip_packages}
echo "Packagess to install: " ${pip_packages}
if test -n "$pip_packages"
then
last_config_index=$(python ../setup_packages.py -n -u $pip_packages --cuda ${CUDA_VERSION})

# get extra index url for given packages - PEP 503 Python Package Index
extra_indices=$($topdir/qa/setup_packages.py -u $pip_packages --cuda ${CUDA_VERSION} -e)
extra_indices=$(python ../setup_packages.py -u $pip_packages --cuda ${CUDA_VERSION} -e)
extra_indices_string=""
for e in ${extra_indices}; do
extra_indices_string="${extra_indices_string} --extra-index-url=${e}"
done

# get link index url for given packages - a URL or path to an html file with links to archives
link_indices=$($topdir/qa/setup_packages.py -u $pip_packages --cuda ${CUDA_VERSION} -k)
link_indices=$(python ../setup_packages.py -u $pip_packages --cuda ${CUDA_VERSION} -k)
link_indices_string=""
for e in ${link_indices}; do
link_indices_string="${link_indices_string} -f ${e}"
Expand All @@ -37,7 +42,14 @@ do
inst=$(python ../setup_packages.py -i $i -u $pip_packages --cuda ${CUDA_VERSION})
if [ -n "$inst" ]
then
pip download $inst -d /pip-packages ${link_indices_string} ${extra_indices_string}
if [ -n "$INSTALL" ]
then
pip install $inst ${link_indices_string} ${extra_indices_string}
# only one test variant is needed
break
else
pip download $inst -d /pip-packages ${link_indices_string} ${extra_indices_string}
fi
fi
done
fi
Expand Down
2 changes: 1 addition & 1 deletion qa/setup_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def get_name(self, cuda_version=None, idx=None):

def get_all_versions(self, cuda_version):
cuda_version = self.max_cuda_version(cuda_version)
return self.filter_versions(self.versions[cuda_version])
return self.filter_versions(self.versions.get(cuda_version, []))

def max_cuda_version(self, cuda_version):
"""Gets a compatible, available cuda version to one asked for.
Expand Down
9 changes: 8 additions & 1 deletion qa/test_template.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
#!/bin/bash

set -o pipefail
source qa/test_template_impl.sh 2>&1 | perl -pe 'use POSIX strftime;
if [ -n "$gather_pip_packages" ]
then
# perl breaks the opulation of the outise variables from the inside of the sourced
# script. Turn this off to the gather_pip_packages process
source qa/test_template_impl.sh
else
source qa/test_template_impl.sh 2>&1 | perl -pe 'use POSIX strftime;
$|=1;
select((select(STDERR), $| = 1)[0]);
print strftime "[%Y-%m-%d %H:%M:%S] ", localtime'
fi