diff --git a/.github/workflows/dockerization.yml b/.github/workflows/dockerization.yml index d92bda8dc9..2ffc005b94 100644 --- a/.github/workflows/dockerization.yml +++ b/.github/workflows/dockerization.yml @@ -1,6 +1,4 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - +# Tests an FL experiment in a Dockerized environment. name: Dockerization on: @@ -10,13 +8,10 @@ on: permissions: contents: read -env: - # A workaround for long FQDN names provided by GitHub actions. - FQDN: "localhost" - jobs: build: runs-on: ubuntu-latest + timeout-minutes: 10 steps: - uses: actions/checkout@v3 @@ -28,7 +23,76 @@ jobs: run: | python -m pip install --upgrade pip pip install . - - name: Dockerization test + + - name: Build base image + run: | + docker build -t openfl -f openfl-docker/Dockerfile.base . + + - name: Create workspace image + run: | + fx workspace create --prefix example_workspace --template keras_cnn_mnist + cd example_workspace + fx plan initialize -a localhost + fx workspace dockerize --base_image openfl + + - name: Create certificate authority for workspace + run: | + cd example_workspace + fx workspace certify + + - name: Create signed cert for collaborator + run: | + cd example_workspace + fx collaborator create -d 1 -n charlie --silent + fx collaborator generate-cert-request -n charlie --silent + fx collaborator certify --request-pkg col_charlie_to_agg_cert_request.zip --silent + + # Pack the collaborator's private key, signed cert, and data.yaml into a tarball + tarfiles="plan/data.yaml agg_to_col_charlie_signed_cert.zip" + for entry in cert/client/*; do + if [[ "$entry" == *.key ]]; then + tarfiles="$tarfiles $entry" + fi + done + + tar -cf cert_col_charlie.tar $tarfiles + + # Clean up + rm -f $tarfiles + rm -f col_charlie_to_agg_cert_request.zip + + - name: Create signed cert for aggregator + run: | + cd example_workspace + fx aggregator generate-cert-request --fqdn localhost + fx aggregator certify --fqdn localhost --silent + + # Pack all files that aggregator needs to start training + tar -cf cert_agg.tar plan cert save + + # Remove the directories after archiving + rm -rf plan cert save + + - name: Load workspace image run: | - python -m tests.github.dockerization_test - + cd example_workspace + docker load -i example_workspace_image.tar + + - name: Run aggregator and collaborator + run: | + cd example_workspace + + set -x + docker run --rm \ + --network host \ + --mount type=bind,source=./cert_agg.tar,target=/certs.tar \ + -e CONTAINER_TYPE=aggregator \ + example_workspace /home/openfl/openfl-docker/start_actor_in_container.sh & + + # TODO: Run with two collaborators instead. + docker run --rm \ + --network host \ + --mount type=bind,source=./cert_col_charlie.tar,target=/certs.tar \ + -e CONTAINER_TYPE=collaborator \ + -e COL=charlie \ + example_workspace /home/openfl/openfl-docker/start_actor_in_container.sh diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index f6109b51eb..0212823850 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -44,23 +44,6 @@ jobs: run: | coverage run -m pytest -rA coverage report - - dockerization: # from dockerization.yml - needs: [lint, pytest-coverage] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v3 - with: - python-version: "3.8" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install . - - name: Dockerization test - run: | - python -m tests.github.dockerization_test interactive-kvasir: # from interactive-kvasir.yml needs: [lint, pytest-coverage, dockerization] diff --git a/openfl-docker/Dockerfile.base b/openfl-docker/Dockerfile.base index 3e8c89c56c..b45a7315cb 100644 --- a/openfl-docker/Dockerfile.base +++ b/openfl-docker/Dockerfile.base @@ -1,122 +1,38 @@ -# Copyright (C) 2020-2023 Intel Corporation +# Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +# ------------------------------------ +# OpenFL Base Image +# ------------------------------------ +FROM ubuntu:22.04 as base -# If your machine is behind a proxy, make sure you set it up in ~/.docker/config.json - -ARG IMAGE_NAME=ubuntu -ARG IMAGE_TAG=22.04 - -# Base image to be used everywhere -FROM ${IMAGE_NAME}:${IMAGE_TAG} as base -RUN apt-get clean && \ - apt-get update && \ - apt-get upgrade -y && \ - apt-get clean autoclean && \ - apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* - -# Python image -FROM base as python-dev - -ARG PYTHON=python3.10 -ARG PIP=pip3.10 - -RUN apt-get update && \ - apt-get install --no-install-recommends --fix-missing -y \ - curl \ - gpg-agent \ - software-properties-common && \ - add-apt-repository -y ppa:deadsnakes/ppa && \ - apt-get update && \ - DEBIAN_FRONTEND=noninteractive \ - apt-get install --no-install-recommends --fix-missing -y \ - ${PYTHON} \ - ${PYTHON}-distutils && \ - curl -s https://bootstrap.pypa.io/get-pip.py | ${PYTHON} && \ - apt-get purge -y \ - curl \ - gpg-agent \ - software-properties-common && \ - apt-get autoclean && \ - apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* - -RUN ln -sf /usr/bin/${PYTHON} /usr/bin/python && \ - ln -sf /usr/bin/${PYTHON} /usr/bin/python3 && \ - ln -sf /usr/local/bin/${PIP} /usr/local/bin/pip && \ - ln -sf /usr/local/bin/${PIP} /usr/local/bin/pip3 - -FROM base as python-base - -ARG PYTHON=python3.10 - -COPY --from=python-dev /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu -COPY --from=python-dev /usr/lib/${PYTHON} /usr/lib/${PYTHON} -COPY --from=python-dev /usr/local/lib/${PYTHON} /usr/local/lib/${PYTHON} -COPY --from=python-dev /usr/bin /usr/bin -COPY --from=python-dev /usr/local/bin /usr/local/bin - -FROM python-base as openfl - +# Configure network proxy, if required, in ~/.docker/config.json +ENV DEBIAN_FRONTEND=noninteractive SHELL ["/bin/bash", "-o", "pipefail", "-c"] -ARG INSTALL_SOURCES="yes" -WORKDIR /zlib -#zlib install to 1.2.13 -RUN apt-get update && \ - apt-get install -y --no-install-recommends --fix-missing wget build-essential -RUN wget --no-check-certificate https://github.com/madler/zlib/archive/refs/tags/v1.2.13.tar.gz && tar -xvf ./v1.2.13.tar.gz && cd zlib-1.2.13 && ./configure --prefix=/usr && make && make install -RUN rm -rf zlib-1.2.13 && rm -rf v1.2.13.tar.gz -RUN apt-get remove --purge -y wget build-essential && \ - apt-get autoclean -y && \ - apt-get auto-remove -y - -RUN dpkg --get-selections | grep -v deinstall | awk '{print $1}' > base_packages.txt && \ +# Install dependencies. +RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \ + apt-get update && \ + apt-get install -y \ + python3-pip \ + python3.10-dev \ + ca-certificates \ + build-essential \ + --no-install-recommends && \ rm -rf /var/lib/apt/lists/* -RUN apt-get update && \ - apt-get install -y --no-install-recommends --fix-missing \ - openssh-server=\* \ - curl=\* \ - ca-certificates=\* && \ - rm -rf /etc/ssh/ssh_host_*_key && \ - if [ "$INSTALL_SOURCES" = "yes" ]; then \ - dpkg --get-selections | grep -v deinstall | awk '{print $1}' > all_packages.txt && \ - sed -Ei 's/# deb-src /deb-src /' /etc/apt/sources.list && \ - apt-get update && \ - grep -v -f base_packages.txt all_packages.txt | while read -r line; do \ - package=$line; \ - unparsed_name=("${package//:/ }"); \ - name=$(echo $unparsed_name | awk '{ print $1 }'); \ - echo "${name}" >> all_dependencies.txt; \ - echo "${name}" >> licenses.txt;\ - cat /usr/share/doc/"${name}"/copyright >> licenses.txt; \ - grep -lE 'GPL|MPL|EPL' /usr/share/doc/"${name}"/copyright; \ - exit_status=$?; \ - if [ $exit_status -eq 0 ]; then \ - apt-get source -q --download-only "$package"; \ - fi \ - done && rm -rf ./*packages.txt && \ - echo "Download source for $(find . | wc -l) third-party packages: $(du -sh)"; fi && \ - rm -rf /var/lib/apt/lists/* +# Create an unprivileged user. +RUN groupadd -g 1001 default && \ + useradd -m -u 1001 -g default openfl +USER openfl -WORKDIR /openfl +# Install OpenFL. +WORKDIR /home/openfl COPY . . +ENV PATH=/home/openfl/.local/bin:$PATH +RUN pip install --no-cache-dir -U pip setuptools wheel && \ + pip install --no-cache-dir -e . -# Install OpenFL -RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \ - pip install --no-cache-dir . - -WORKDIR /thirdparty -RUN if [ "$INSTALL_SOURCES" = "yes" ]; then \ - pip install --no-cache-dir pip-licenses; \ - pip-licenses -l >> licenses.txt; \ - pip-licenses | awk '{for(i=1;i<=NF;i++) if(i!=2) printf $i" "; print ""}' | tee -a all_dependencies.txt; \ - pip-licenses | grep -E 'GPL|MPL|EPL' | awk '{OFS="=="} {print $1,$2}' | xargs pip download --no-binary :all:; \ -fi -WORKDIR /openfl - -HEALTHCHECK --interval=30m --timeout=3s \ - CMD echo "Container works" || exit 1 +# Download thirdparty licenses. +RUN INSTALL_SOURCES=yes /home/openfl/openfl-docker/licenses.sh -CMD [ "/bin/bash" ] +CMD ["/bin/bash"] \ No newline at end of file diff --git a/openfl-docker/Dockerfile.workspace b/openfl-docker/Dockerfile.workspace index 3819fafacb..faa9566231 100644 --- a/openfl-docker/Dockerfile.workspace +++ b/openfl-docker/Dockerfile.workspace @@ -1,25 +1,20 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# ------------------------------------ +# Workspace Image +# ------------------------------------ ARG BASE_IMAGE=openfl:latest FROM ${BASE_IMAGE} -# Create unprivileged user to limit changes to mounted volumes -ENV username=user -ARG USER_ID=10001 -ARG GROUP_ID=1001 -RUN addgroup --gid $GROUP_ID $username -RUN adduser --disabled-password --gecos '' --uid $USER_ID --gid $GROUP_ID $username - -WORKDIR /home/user -# Allow user to work in home dir -RUN chmod -R a+rw . -# Allow pip to work with existing packages (?) -RUN chmod -R a+rwx /usr/local -USER user +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +USER openfl ARG WORKSPACE_NAME COPY ${WORKSPACE_NAME}.zip . -RUN fx workspace import --archive ${WORKSPACE_NAME}.zip -# Unifying the workspace folder name -RUN mv ${WORKSPACE_NAME} workspace -WORKDIR /home/user/workspace -RUN pip install -r requirements.txt +WORKDIR /home/openfl +RUN fx workspace import --archive ${WORKSPACE_NAME}.zip && \ + pip install -r ${WORKSPACE_NAME}/requirements.txt + +WORKDIR /home/openfl/${WORKSPACE_NAME} +CMD ["/bin/bash"] \ No newline at end of file diff --git a/openfl-docker/licenses.sh b/openfl-docker/licenses.sh new file mode 100755 index 0000000000..005a8bccfa --- /dev/null +++ b/openfl-docker/licenses.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# Downloads thirdparty licenses. +# Save the list of installed packages to base_packages.txt +dpkg --get-selections | grep -v deinstall | awk '{print $1}' > base_packages.txt + +# If INSTALL_SOURCES is set to "yes", perform additional operations +if [ "$INSTALL_SOURCES" = "yes" ]; then + # Save the list of all installed packages to all_packages.txt + dpkg --get-selections | grep -v deinstall | awk '{print $1}' > all_packages.txt + + # Enable source repositories in APT sources list + sed -Ei 's/# deb-src /deb-src /' /etc/apt/sources.list + + # Update the package list again after enabling source repositories + apt-get update + + # Process each package and download source if it matches specific licenses + grep -v -f base_packages.txt all_packages.txt | while read -r package; do + name=$(echo "${package//:/ }" | awk '{print $1}') + echo "$name" >> all_dependencies.txt + echo "$name" >> licenses.txt + cat /usr/share/doc/"$name"/copyright >> licenses.txt + if grep -lE 'GPL|MPL|EPL' /usr/share/doc/"$name"/copyright; then + apt-get source -q --download-only "$package" + fi + done + + # Clean up + rm -rf ./*packages.txt + echo "Download source for $(find . | wc -l) third-party packages: $(du -sh)" + + # Clean up APT lists again + rm -rf /var/lib/apt/lists/* +fi + +mkdir -p thirdparty +cd thirdparty + +# If INSTALL_SOURCES is set to "yes", perform additional operations +if [ "$INSTALL_SOURCES" = "yes" ]; then + # Install pip-licenses and generate license files + pip install --no-cache-dir pip-licenses + pip-licenses -l >> licenses.txt + + # Append dependency list to all_dependencies.txt + pip-licenses | awk '{for(i=1;i<=NF;i++) if(i!=2) printf $i" "; print ""}' | tee -a all_dependencies.txt + + # Download source packages for Python packages with specific licenses + pip-licenses | grep -E 'GPL|MPL|EPL' | awk '{OFS="=="} {print $1,$2}' | xargs pip download --no-binary :all: +fi diff --git a/tests/github/dockerization_test.py b/tests/github/dockerization_test.py deleted file mode 100644 index f39845a5b5..0000000000 --- a/tests/github/dockerization_test.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from argparse import ArgumentParser -import socket -from subprocess import check_call -import shutil -import os -from pathlib import Path -import tarfile -import time -from concurrent.futures import ProcessPoolExecutor - -from tests.github.utils import start_aggregator_container -from tests.github.utils import start_collaborator_container -from tests.github.utils import create_signed_cert_for_collaborator -from openfl.utilities.utils import getfqdn_env - -if __name__ == '__main__': - # 1. Create the workspace - parser = ArgumentParser() - workspace_choice = [] - with os.scandir('openfl-workspace') as iterator: - for entry in iterator: - if entry.name not in ['__init__.py', 'workspace', 'default']: - workspace_choice.append(entry.name) - parser.add_argument('--template', default='keras_cnn_mnist', choices=workspace_choice) - parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') - parser.add_argument('--col', default='one123dragons') - parser.add_argument('--data_path', default='1') - parser.add_argument('--base_image_tag', default='openfl') - args = parser.parse_args() - base_image_tag = args.base_image_tag - fed_workspace = args.fed_workspace - col = args.col - - # If an aggregator container will run on another machine - # a relevant FQDN should be provided - fqdn = getfqdn_env() - # Build base image - check_call([ - 'docker', 'build', '-t', base_image_tag, '-f', 'openfl-docker/Dockerfile.base', '.' - ]) - - # Create FL workspace - shutil.rmtree(fed_workspace, ignore_errors=True) - check_call([ - 'fx', 'workspace', 'create', '--prefix', fed_workspace, '--template', args.template - ]) - os.chdir(fed_workspace) - fed_directory = Path().resolve() # Get the absolute directory path for the workspace - - # Initialize FL plan - check_call(['fx', 'plan', 'initialize', '-a', fqdn]) - - # 2. Build the workspace image and save it to a tarball - - # This commant builds an image tagged $FED_WORKSPACE - # Then it saves it to a ${FED_WORKSPACE}_image.tar - - check_call(['fx', 'workspace', 'dockerize', '--base_image', base_image_tag]) - - # We remove the base OpenFL image as well - # as built workspace image to simulate starting - # on another machine - workspace_image_name = fed_workspace - check_call(['docker', 'image', 'rm', '-f', base_image_tag, workspace_image_name]) - - # 3. Generate certificates for the aggregator and the collaborator - - # Create certificate authority for the workspace - check_call(['fx', 'workspace', 'certify']) - - # Prepare a tarball with the collab's private key, the singed cert, - # and data.yaml for collaborator container - # This step can be repeated for each collaborator - create_signed_cert_for_collaborator(args.col, args.data_path) - - # Also perform certificate generation for the aggregator. - # Create aggregator certificate - check_call(['fx', 'aggregator', 'generate-cert-request', '--fqdn', fqdn]) - # Sign aggregator certificate - # Remove '--silent' if you run this manually - check_call(['fx', 'aggregator', 'certify', '--fqdn', fqdn, '--silent']) - - # Pack all files that aggregator need to start training - aggregator_required_files = 'cert_agg.tar' - with tarfile.open(aggregator_required_files, 'w') as f: - for d in ['plan', 'cert', 'save']: - f.add(d) - shutil.rmtree(d) - - # 4. Load the image - image_tar = f'{fed_workspace}_image.tar' - check_call(['docker', 'load', '--input', image_tar]) - time.sleep(5) - with ProcessPoolExecutor(max_workers=2) as executor: - executor.submit(start_aggregator_container, args=( - workspace_image_name, - aggregator_required_files - )) - time.sleep(5) - executor.submit(start_collaborator_container, args=( - workspace_image_name, - col - )) - # If containers are started but collaborator will fail to - # conect the aggregator, the pipeline will go to the infinite loop diff --git a/tests/github/experimental/workspace/utils.py b/tests/github/experimental/workspace/utils.py index 7f7da4496f..24b9ddea15 100644 --- a/tests/github/experimental/workspace/utils.py +++ b/tests/github/experimental/workspace/utils.py @@ -118,26 +118,3 @@ def create_signed_cert_for_collaborator(col, data_path): os.remove(f) # Remove request archive os.remove(f'col_{col}_to_agg_cert_request.zip') - - -def start_aggregator_container(workspace_image_name, aggregator_required_files): - check_call( - 'docker run --rm ' - '--network host ' - f'-v {Path.cwd().resolve()}/{aggregator_required_files}:/certs.tar ' - '-e \"CONTAINER_TYPE=aggregator\" ' - f'{workspace_image_name} ' - 'bash /openfl/openfl-docker/start_actor_in_container.sh', - shell=True) - - -def start_collaborator_container(workspace_image_name, col_name): - check_call( - 'docker run --rm ' - '--network host ' - f'-v {Path.cwd()}/cert_col_{col_name}.tar:/certs.tar ' - '-e \"CONTAINER_TYPE=collaborator\" ' - f'-e \"COL={col_name}\" ' - f'{workspace_image_name} ' - 'bash /openfl/openfl-docker/start_actor_in_container.sh', - shell=True) diff --git a/tests/github/utils.py b/tests/github/utils.py index a22479cfae..b265448111 100644 --- a/tests/github/utils.py +++ b/tests/github/utils.py @@ -117,26 +117,3 @@ def create_signed_cert_for_collaborator(col, data_path): os.remove(f) # Remove request archive os.remove(f'col_{col}_to_agg_cert_request.zip') - - -def start_aggregator_container(workspace_image_name, aggregator_required_files): - check_call( - 'docker run --rm ' - '--network host ' - f'-v {Path.cwd().resolve()}/{aggregator_required_files}:/certs.tar ' - '-e \"CONTAINER_TYPE=aggregator\" ' - f'{workspace_image_name} ' - 'bash /openfl/openfl-docker/start_actor_in_container.sh', - shell=True) - - -def start_collaborator_container(workspace_image_name, col_name): - check_call( - 'docker run --rm ' - '--network host ' - f'-v {Path.cwd()}/cert_col_{col_name}.tar:/certs.tar ' - '-e \"CONTAINER_TYPE=collaborator\" ' - f'-e \"COL={col_name}\" ' - f'{workspace_image_name} ' - 'bash /openfl/openfl-docker/start_actor_in_container.sh', - shell=True)