Skip to content

Commit

Permalink
remove mlem, fix dependencies, import predict-drug-target and add it …
Browse files Browse the repository at this point in the history
…to TRAPI, update deployment dockerfile
  • Loading branch information
vemonet committed Apr 10, 2024
1 parent da8214f commit 91d60fc
Show file tree
Hide file tree
Showing 12 changed files with 366 additions and 277 deletions.
3 changes: 0 additions & 3 deletions .mlem.yaml

This file was deleted.

57 changes: 24 additions & 33 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8
ARG BASE_IMAGE=tiangolo/uvicorn-gunicorn-fastapi:python3.10
FROM ${BASE_IMAGE}
# Gunicorn image 3.4G: https://github.com/tiangolo/uvicorn-gunicorn-docker/tree/master/docker-images


Expand All @@ -8,53 +9,43 @@ LABEL org.opencontainers.image.source="https://github.com/MaastrichtU-IDS/transl
USER root
WORKDIR /app

# Java 11 required for Spark to work
RUN echo 'deb http://ftp.fr.debian.org/debian bullseye main' >> /etc/apt/sources.list.d/bullseye.list && \
apt-get update && \
apt-get install -y build-essential wget curl vim openjdk-11-jdk && \
RUN apt-get update && \
apt-get install -y build-essential wget curl vim && \
pip install --upgrade pip

# RUN curl -sSf https://rye-up.com/get | RYE_INSTALL_OPTION="--yes" bash

# TODO: remove? Install Spark for standalone context in /opt
ENV APACHE_SPARK_VERSION=3.2.0
ENV HADOOP_VERSION=3.2
ENV SPARK_HOME=/opt/spark
ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx2048M --driver-java-options=-Dlog4j.logLevel=info"
ENV PATH="${PATH}:${SPARK_HOME}/bin"
RUN wget -q -O spark.tgz https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
tar xzf spark.tgz -C /opt && \
rm "spark.tgz" && \
ln -s "/opt/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" $SPARK_HOME
RUN echo "log4j.rootCategory=ERROR, console" > $SPARK_HOME/conf/log4j.properties
# RUN chown -R 1000:1000 /opt/spark

# Define some environment variables for pyspark and gunicorn config
ENV PYSPARK_PYTHON=/usr/local/bin/python3
ENV PYSPARK_DRIVER_PYTHON=/usr/local/bin/python3



ENV PORT=8808
ENV GUNICORN_CMD_ARGS="--preload"
ENV PORT=8808 \
GUNICORN_CMD_ARGS="--preload" \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
ACCESS_LOG="-" \
ERROR_LOG="-" \
OPENTELEMETRY_ENABLED=false

# Use requirements.txt to install some dependencies only when needed
COPY requirements.txt .
RUN pip install -r requirements.txt
# COPY requirements.txt .
# RUN pip install -r requirements.txt

## Copy the source code (in the same folder as the Dockerfile)
COPY . .

ENV MODULE_NAME=trapi.main
ENV VARIABLE_NAME=app
ENV MODULE_NAME=trapi.main \
VARIABLE_NAME=app

# WORKDIR /app/trapi-openpredict

# RUN pip install -e /app/predict-drug-target /app/trapi-predict-kit
RUN pip install -e .

RUN pip install -e ".[train,test]"
# RUN pip install -e ./trapi-predict-kit
# RUN pip install -e . /app/predict-drug-target /app/trapi-predict-kit
# RUN pip install -e /app/trapi-predict-kit

RUN dvc pull -f

EXPOSE 8808

# ENTRYPOINT [ "gunicorn", "-w", "8", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8808", "trapi.main:app"]
# ENTRYPOINT [ "gunicorn", "-w", "8", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8808", "src.trapi_oprenpredict.main:app"]

# Build entrypoint script to pull latest dvc changes before startup
RUN echo "#!/bin/bash" > /entrypoint.sh && \
Expand Down
23 changes: 12 additions & 11 deletions docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ services:

# OPENPREDICT_APIKEY: ${OPENPREDICT_APIKEY}

jaeger-otel-agent.sri:
image: jaegertracing/all-in-one:latest
# ports:
# - "16686:16686"
# - "4318:4318"
# - "6831:6831/udp"
environment:
LOG_LEVEL: debug
VIRTUAL_HOST: jaeger.137.120.31.102.nip.io
LETSENCRYPT_HOST: jaeger.137.120.31.102.nip.io
VIRTUAL_PORT: 16686
# NOTE: not required, just for testing
# jaeger-otel-agent.sri:
# image: jaegertracing/all-in-one:latest
# # ports:
# # - "16686:16686"
# # - "4318:4318"
# # - "6831:6831/udp"
# environment:
# LOG_LEVEL: debug
# VIRTUAL_HOST: jaeger.137.120.31.102.nip.io
# LETSENCRYPT_HOST: jaeger.137.120.31.102.nip.io
# VIRTUAL_PORT: 16686
49 changes: 49 additions & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
version: "3"
services:

# Container used for starting a container to run training for original OpenPredict drug-disease model
train:
build:
context: .
dockerfile: src/openpredict_model/Dockerfile
ports:
- 8808:8808
volumes:
- ./:/app
environment:
# Show print() in logs:
PYTHONUNBUFFERED: '1'
LOG_LEVEL: 'INFO'
NO_JAEGER: "true"
entrypoint: sleep infinity


# Container used for testing and running scripts
tests:
build: .
volumes:
- ./:/app
- ~/.nanopub-docker:/root/.nanopub
environment:
PYTHONUNBUFFERED: '1'
LOG_LEVEL: 'INFO'
NO_JAEGER: "true"
entrypoint: pytest --cov=src tests/integration
# entrypoint: pytest tests/integration/test_train_model.py -s
# entrypoint: pytest tests/integration/test_openpredict_api.py::test_post_trapi -s
# entrypoint: pytest tests/package/test_decorator.py -s


# Container to deploy a JupyterLab/VSCode workspace for development
# workspace:
# image: ghcr.io/maastrichtu-ids/jupyterlab
# ports:
# - 8888:8888
# volumes:
# - ./:/home/jovyan/work
# user: root
# environment:
# - GRANT_SUDO=yes
# - LOG_LEVEL=INFO
# ## With password:
# # - JUPYTER_TOKEN=password
33 changes: 1 addition & 32 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,35 +13,4 @@ services:
PYTHONUNBUFFERED: '1'
LOG_LEVEL: 'INFO'
NO_JAEGER: "true"
entrypoint: uvicorn trapi.main:app --host 0.0.0.0 --port 8808 --debug --reload


# Container used for testing and running scripts
tests:
build: .
volumes:
- ./:/app
- ~/.nanopub-docker:/root/.nanopub
environment:
PYTHONUNBUFFERED: '1'
LOG_LEVEL: 'INFO'
NO_JAEGER: "true"
entrypoint: pytest --cov=src tests/integration
# entrypoint: pytest tests/integration/test_train_model.py -s
# entrypoint: pytest tests/integration/test_openpredict_api.py::test_post_trapi -s
# entrypoint: pytest tests/package/test_decorator.py -s


# Container to deploy a JupyterLab/VSCode workspace for development
# workspace:
# image: ghcr.io/maastrichtu-ids/jupyterlab
# ports:
# - 8888:8888
# volumes:
# - ./:/home/jovyan/work
# user: root
# environment:
# - GRANT_SUDO=yes
# - LOG_LEVEL=INFO
# ## With password:
# # - JUPYTER_TOKEN=password
entrypoint: uvicorn trapi.main:app --host 0.0.0.0 --port 8808 --reload
30 changes: 13 additions & 17 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,28 @@ classifiers = [
dynamic = ["version"]

dependencies = [
"requests >=2.23.0",
# "trapi-predict-kit[opentelemetry] >=0.2.3",
"trapi-predict-kit @ git+https://github.com/MaastrichtU-IDS/trapi-predict-kit.git",
# "trapi-predict-kit @ {root:uri}/trapi-predict-kit",
"numpy",
"pandas",
"scikit-learn",
"gensim", # For word2vec
"scipy==1.10.1", # https://stackoverflow.com/questions/78279136/importerror-cannot-import-name-triu-from-scipy-linalg-gensim

"pydantic >=1.9",
"fastapi >=0.68.1",
"requests >=2.23.0",
"rdflib >=6.1.1",
"SPARQLWrapper >=2.0.0,<3.0.0",
# "mlem",
"mlem >=0.4.0",
"reasoner-pydantic >=4.1.4",
# "trapi-predict-kit[web,opentelemetry] >=0.2.3",
"trapi-predict-kit[web,opentelemetry] @ git+https://github.com/MaastrichtU-IDS/trapi-predict-kit.git",
# "trapi-predict-kit @ {root:uri}/trapi-predict-kit",
"predict-drug-target @ git+https://github.com/MaastrichtU-IDS/predict-drug-target.git",
# "predict-drug-target", # Load from workspace

"opentelemetry-sdk",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-exporter-jaeger",
"opentelemetry-instrumentation-fastapi",
"opentelemetry-instrumentation-httpx",
"opentelemetry-instrumentation-requests",
# "fairworkflows",
# "fairworkflows@git+https://github.com/vemonet/fairworkflows.git",
# "yatiml >=0.10.0",
# "pydantic >=1.9",
]

[project.optional-dependencies]
train = [
# Dependencies to train the original OpenPredict drug-disease model
"numpy ==1.16.1", # requires python 3.8
"pandas ==1.1.1",
"torch ==1.13.1",
Expand Down
67 changes: 67 additions & 0 deletions src/openpredict_model/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8
# Gunicorn image 3.4G: https://github.com/tiangolo/uvicorn-gunicorn-docker/tree/master/docker-images

# NOTE: Dockerfile to train original OpenPredict drug-disease model

LABEL org.opencontainers.image.source="https://github.com/MaastrichtU-IDS/translator-openpredict"

# Change the current user to root and the working directory to /app
USER root
WORKDIR /app

# Java 11 required for Spark to work
RUN echo 'deb http://ftp.fr.debian.org/debian bullseye main' >> /etc/apt/sources.list.d/bullseye.list && \
apt-get update && \
apt-get install -y build-essential wget curl vim openjdk-11-jdk && \
pip install --upgrade pip


# TODO: remove? Install Spark for standalone context in /opt
ENV APACHE_SPARK_VERSION=3.2.0
ENV HADOOP_VERSION=3.2
ENV SPARK_HOME=/opt/spark
ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx2048M --driver-java-options=-Dlog4j.logLevel=info"
ENV PATH="${PATH}:${SPARK_HOME}/bin"
RUN wget -q -O spark.tgz https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
tar xzf spark.tgz -C /opt && \
rm "spark.tgz" && \
ln -s "/opt/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" $SPARK_HOME
RUN echo "log4j.rootCategory=ERROR, console" > $SPARK_HOME/conf/log4j.properties
# RUN chown -R 1000:1000 /opt/spark

# Define some environment variables for pyspark and gunicorn config
ENV PYSPARK_PYTHON=/usr/local/bin/python3
ENV PYSPARK_DRIVER_PYTHON=/usr/local/bin/python3



ENV PORT=8808
ENV GUNICORN_CMD_ARGS="--preload"

# Use requirements.txt to install some dependencies only when needed
COPY requirements.txt .
RUN pip install -r requirements.txt

## Copy the source code (in the same folder as the Dockerfile)
COPY . .

ENV MODULE_NAME=trapi.main
ENV VARIABLE_NAME=app

RUN pip install -e ".[train,test]"
# RUN pip install -e ./trapi-predict-kit

RUN dvc pull -f

EXPOSE 8808

# ENTRYPOINT [ "gunicorn", "-w", "8", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8808", "trapi.main:app"]

# Build entrypoint script to pull latest dvc changes before startup
RUN echo "#!/bin/bash" > /entrypoint.sh && \
echo "dvc pull" >> /entrypoint.sh && \
echo "/start.sh" >> /entrypoint.sh && \
chmod +x /entrypoint.sh


CMD [ "/entrypoint.sh" ]
Loading

0 comments on commit 91d60fc

Please sign in to comment.