From 68c8996c10d1b58267e0d72967d7559b9674ce2f Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Mon, 25 Sep 2023 09:49:08 -0400 Subject: [PATCH] docker: add build script to ease cutting new releases --- docker/Dockerfile.cpu | 16 +++++--- docker/Dockerfile.gpu | 15 ++++--- docker/MAINTAINER.md | 45 +++++++++++---------- docker/build.py | 91 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 133 insertions(+), 34 deletions(-) create mode 100755 docker/build.py diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 162a8aa6..d80a4986 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -1,19 +1,21 @@ FROM python:3.9 as base +ARG cnlpt_version + RUN --mount=type=cache,target=/root/.cache \ pip install cython RUN --mount=type=cache,target=/root/.cache \ - pip install cnlp-transformers - -# temporary workaround for transformers/huggingface version issue -RUN pip uninstall -y huggingface-hub -RUN pip install huggingface-hub==0.10.1 + pip install cnlp-transformers==$cnlpt_version WORKDIR /home # this copy is to support the preload of train models in the downstream images COPY .. /home ENTRYPOINT ["/bin/bash"] +FROM base as current +run python -c "import sys;sys.path.append('/home/docker');import model_download; model_download.current()" +ENTRYPOINT ["cnlpt_current_rest", "-p", "8000"] + FROM base as dtr run python -c "import sys;sys.path.append('/home/docker');import model_download; model_download.dtr()" ENTRYPOINT ["cnlpt_dtr_rest", "-p", "8000"] @@ -28,7 +30,9 @@ ENTRYPOINT ["cnlpt_negation_rest", "-p", "8000"] FROM base as termexists run python -c "import sys;sys.path.append('/home/docker');import model_download; model_download.termexists()" -ENTRYPOINT ["cnlpt_termexists", "-p", "8000"] +# Temporary fix, remove once the released pip package has the new model +run sed -i 's/sharpseed-termexists/termexists_pubmedbert_ssm/g' /usr/local/lib/python3.9/site-packages/cnlpt/api/termexists_rest.py +ENTRYPOINT ["cnlpt_termexists_rest", "-p", "8000"] FROM base as temporal run python -c "import sys;sys.path.append('/home/docker');import model_download; model_download.temporal()" diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu index cd43c1b1..4e8a8085 100644 --- a/docker/Dockerfile.gpu +++ b/docker/Dockerfile.gpu @@ -1,13 +1,10 @@ -FROM nvidia/cuda:10.2-runtime-ubi8 as base +FROM nvidia/cuda:12.2.0-runtime-ubi8 as base -RUN yum -y install python39 python39-pip -RUN pip3.9 install cython torch -RUN pip3.9 install cnlp-transformers - -# temporary workaround for transformers/huggingface version issue -RUN pip3.9 uninstall -y huggingface-hub -RUN pip3.9 install huggingface-hub==0.11.0 +ARG cnlpt_version +RUN yum -y install python39 python39-pip +RUN pip3.9 install cython torch +RUN pip3.9 install cnlp-transformers==$cnlpt_version WORKDIR /opt/cnlp/ @@ -33,6 +30,8 @@ ENTRYPOINT ["cnlpt_negation_rest", "-p", "8000"] FROM base as termexists run /usr/bin/python3.9 -c "import sys;sys.path.append('/home/docker');import model_download; model_download.termexists()" +# Temporary fix, remove once the released pip package has the new model +run sed -i 's/sharpseed-termexists/termexists_pubmedbert_ssm/g' /usr/local/lib/python3.9/site-packages/cnlpt/api/termexists_rest.py ENTRYPOINT ["cnlpt_termexists_rest", "-p", "8000"] FROM base as temporal diff --git a/docker/MAINTAINER.md b/docker/MAINTAINER.md index 08af2691..48bf2a2a 100644 --- a/docker/MAINTAINER.md +++ b/docker/MAINTAINER.md @@ -1,25 +1,30 @@ -To deploy images to dockerhub, first auth with docker with an account that -has access to the smartonfhir organization. Then, the following commands -should build and publish images (in the CPU case, for multiple architectures). +# Creating cNLPT Docker Images +## Setup +- First authenticate with Docker with an account that has access to the + [smartonfhir](https://hub.docker.com/u/smartonfhir/) organization. +- Make sure you have a local docker buildx setup that supports both amd64 and arm64. + - Run `docker buildx ls` to see your current setup. + - If you don't have a multi-platform instance already, you can create a new default one with: + `docker buildx create --driver docker-container --name cross-builder --platform linux/amd64,linux/arm64 --use` -MODEL should be one of: [base, dtr, event, negation, temporal, timex] -PROCESSOR should be one of: [cpu, gpu] -PLATFORMS should be linux/amd64 for GPU, and linux/amd64,linux/arm64 for CPU +## Building +Use the `./build.py` script to build new images. +Pass `--help` to see all your options. + +### Local Testing +Use the `./build.py` script to build the image you care about, +and then run something like the following, depending on your model: +```shell +docker run --rm -p 8000:8000 smartonfhir/cnlp-transformers:termexists-latest-cpu ``` -export MAJOR=0 -export MINOR=4 -export PATCH=0 -export MODEL=negation -export PROCESSOR=cpu -export PLATFORMS=linux/amd64,linux/arm64 -docker buildx build \ ---push --platform $PLATFORMS \ ---tag smartonfhir/cnlp-transformers:$MODEL-latest-$PROCESSOR \ ---tag smartonfhir/cnlp-transformers:$MODEL-$MAJOR-$PROCESSOR \ ---tag smartonfhir/cnlp-transformers:$MODEL-$MAJOR.$MINOR-$PROCESSOR \ ---tag smartonfhir/cnlp-transformers:$MODEL-$MAJOR.$MINOR.$PATCH-$PROCESSOR \ --f Dockerfile.$PROCESSOR \ ---target $MODEL . +With that specific example of the `termexists` model, you could smoke test it like so: +```shell +curl http://localhost:8000/termexists/process -H "Content-Type: application/json" -d '{"doc_text": "Patient has no cough", "entities": [[0, 6], [15, 19]]}'; echo ``` +Which should print `{"statuses":[1,-1]}` (the word `cough` was negated, but `Patient` was not). + +### Publishing to Docker Hub +Run the same `./build.py` command you tested with, but add the `--push` flag. +The built images will be pushed to Docker Hub. diff --git a/docker/build.py b/docker/build.py new file mode 100755 index 00000000..637a5940 --- /dev/null +++ b/docker/build.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +import argparse +import os +import subprocess + +# At time of writing this comment, the cnn and hier models see to be works in progress, so aren't included here. +MODELS = [ + "current", + "dtr", + "event", + "negation", + "temporal", + "termexists", + "timex", +] + +parser = argparse.ArgumentParser() +parser.add_argument("--model", action="append", choices=["all"] + MODELS) +parser.add_argument("--processor", choices=["all", "cpu", "gpu"], default="all") +parser.add_argument("--push", action="store_true", default=False) +args = parser.parse_args() + + +def get_latest_pip_version(package: str) -> str: + """Query pip for the latest release of a software package""" + process = subprocess.run( + # Use a python version that matches the Dockerfiles + ["pip", "index", "--python-version=3.9", "versions", package], + capture_output=True, + check=True, + ) + last_line = process.stdout.decode("utf8").strip().split("\n")[-1].strip() + if "LATEST:" not in last_line: + raise SystemExit("Did not understand 'pip index versions' output") + return last_line.split()[-1] + + +def build_one(model: str, processor: str, *, version: str, push: bool = False) -> None: + """Builds a single docker image""" + print(f"Building model {model} for processor {processor}:") + + pwd = os.path.dirname(__file__) + + version_parts = version.split(".") + major = version_parts[0] + minor = version_parts[1] + patch = version_parts[2] + + platforms = "linux/amd64" + if processor == "cpu" and push: # only build extra platforms on push because --load can't do multi-platforms + platforms += ",linux/arm64" + + build_args = [ + f"--build-arg=cnlpt_version={version}", # to make sure that we don't have a version mismatch, we pin cnlpt + f"--file={pwd}/Dockerfile.{processor}", + f"--platform={platforms}", + f"--tag=smartonfhir/cnlp-transformers:{model}-latest-{processor}", + f"--tag=smartonfhir/cnlp-transformers:{model}-{major}-{processor}", + f"--tag=smartonfhir/cnlp-transformers:{model}-{major}.{minor}-{processor}", + f"--tag=smartonfhir/cnlp-transformers:{model}-{major}.{minor}.{patch}-{processor}", + f"--target={model}", + pwd, + ] + if push: + build_args.append("--push") # to push to docker hub + else: + build_args.append("--load") # to load into docker locally + + subprocess.run(["docker", "buildx", "build"] + build_args, check=True) + + +if __name__ == '__main__': + if args.processor == "all": + processors = ["cpu", "gpu"] + else: + processors = [args.processor] + + models = args.model + if not args.model or "all" in args.model: + models = MODELS + + # Check version of cnlpt available via pip. + # Our Dockerfiles pull directly from pip, so we want to be setting the same version as we'll install. + # We don't want to pull the version from our sibling code in this repo, because it might not be released yet, + # but we still want to be able to push new builds of the existing releases. + version = get_latest_pip_version("cnlp-transformers") + + for model in models: + for processor in processors: + build_one(model, processor, version=version, push=args.push)