Skip to content

Commit

Permalink
feat: Add build logic for Dagster container images
Browse files Browse the repository at this point in the history
As part of the updated build/deployment we want to have separate images for the Dagit
and dagaster-daemon processes, which are also separate from the user pipeline code so
that they can all be built, deployed, and scaled independently. For the user pipelines
we also want to ensure that the dbt project is available in the runtime
environment. This does the following:
- Copy all files related to the dbt project into user pipeline images by default
- Create a multi-stage build for Dagit/dagster-daemon to avoid duplicate logic
- Moves the Dagster-specific workspace and Dagster yaml files into the `ol_orchestrate` directory
- Moves the dbt project files to the proper directory level in the repo
- Adds the initial work to package up collections of Dagster pipelines based on the 'repository' as the entry-point for the Python distribution
  • Loading branch information
blarghmatey committed Jun 8, 2022
1 parent 3579bc3 commit 863294a
Show file tree
Hide file tree
Showing 23 changed files with 115 additions and 36 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand Down Expand Up @@ -32,7 +32,7 @@ repos:
- id: yamllint
args: [--format, parsable, -d, relaxed]
- repo: https://github.com/asottile/pyupgrade
rev: v2.32.1
rev: v2.34.0
hooks:
- id: pyupgrade
args:
Expand Down Expand Up @@ -78,7 +78,7 @@ repos:
- --extend-ignore=D1
- --diff
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.950
rev: v0.961
hooks:
- id: mypy
additional_dependencies:
Expand All @@ -87,7 +87,7 @@ repos:
- types-pytz
- types-pymysql
- repo: https://github.com/sqlfluff/sqlfluff
rev: 0.13.1
rev: 0.13.2
hooks:
- id: sqlfluff-fix
# Arbitrary arguments to show an example
Expand Down
3 changes: 0 additions & 3 deletions dagster.yaml

This file was deleted.

24 changes: 22 additions & 2 deletions dockerfiles/BUILD
Original file line number Diff line number Diff line change
@@ -1,9 +1,29 @@
docker_image(
name="docker",
name="dagit",
source="Dockerfile.dagit",
dependencies=[
"src/ol_orchestrate:dagit",
"src/ol_orchestrate:dagster-daemon",
"src/ol_orchestrate:project-config",
],
target_stage="dagit"
)

docker_image(
name="docker0",
name="dagster-daemon",
source="Dockerfile.dagit",
dependencies=[
"src/ol_orchestrate:dagster-daemon",
"src/ol_orchestrate:project-config",
],
target_stage="dagster-daemon"
)

docker_image(
name="pipeline",
dependencies=[
"src/ol_orchestrate:open-edx",
"src/ol_dbt:dbt_project"
],
source="Dockerfile.user_pipeline",
)
18 changes: 10 additions & 8 deletions dockerfiles/Dockerfile.dagit
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
FROM python:3.9-slim AS dagster-base
RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app
RUN useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster
RUN chown -R dagster: /opt/dagster/
RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app && \
useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster &&\
chown -R dagster: /opt/dagster/
USER dagster
ENV DAGSTER_HOME=/opt/dagster/dagster_home/

# Copy your code and workspace to /opt/dagster/app
COPY workspace.yaml /opt/dagster/app/
COPY --chown=dagster:dagster src/ol_orchestrate/workspace.yaml /opt/dagster/app/

# Copy dagster instance YAML to $DAGSTER_HOME
COPY dagster.yaml /opt/dagster/dagster_home/
COPY --chown=dagster:dagster src/ol_orchestrate/dagster.yaml /opt/dagster/dagster_home/

WORKDIR /opt/dagster/app

EXPOSE 3000

FROM dagster-base AS dagit
ENTRYPOINT ["dagit", "-h", "0.0.0.0", "-p", "3000"]

FROM dagster-base AS dagster-daemon
COPY --chown=dagster:dagster src.ol_orchestrate/dagster-daemon.pex /usr/local/bin/dagster-daemon
ENTRYPOINT ["dagster-daemon", "run"]

FROM dagster-base AS dagit
COPY --chown=dagster:dagster src.ol_orchestrate/dagit.pex /usr/local/bin/dagit
ENTRYPOINT ["dagit", "-h", "0.0.0.0", "-p", "3000"]
8 changes: 7 additions & 1 deletion dockerfiles/Dockerfile.user_pipeline
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
FROM python:3.9-slim

CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000", ]
RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app /tmp/packages && \
useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster
COPY *.whl /tmp/packages/
RUN pip install --no-cache-dir /tmp/packages/* && rm -r /tmp/packages/
COPY --chown=dagster:dagster src/ol_dbt/ /opt/dbt/
USER dagster
CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000"]
21 changes: 20 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ httpx = "^0.22.0"
pyarrow = "^8.0.0"
pyathena = "^2.8.0"
pymysql = "^1.0.0"
hvac = "^0.11.2"

[tool.poetry.dev-dependencies]
black = "*"
Expand All @@ -58,7 +59,7 @@ requires = ["poetry_core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.sqlfluff.core]
templater = "dbt"
templater = "jinja"
dialect = "hive"
sql_file_exts = ".sql,.sql.j2,.dml,.ddl"

Expand All @@ -67,3 +68,6 @@ unwrap_wrapped_queries = true

[tool.sqlfluff.templater.jinja]
apply_dbt_builtins = true

[tool.sqlfluff.templater.dbt]
project_dir = "src/ol_dbt/"
File renamed without changes.
4 changes: 4 additions & 0 deletions src/ol_dbt/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
files(
name="dbt_project",
sources=["**/*.yml", "**/*.json", "**/*.sql", "**/*.yaml"],
)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
19 changes: 3 additions & 16 deletions src/ol_orchestrate/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,9 @@ pex_binary(
entry_point="dagster.daemon.cli:main"
)

docker_image(
name="ol-dagit",
description="Dagster web service container iamge",
dependencies=[
":dagit"
],
source="dockerfiles/Dockerfile.dagster"
)

docker_image(
name="ol-dagster-daemon",
description="Dagster daemon container image for scheduling and run triggering",
dependencies=[
":dagster-daemon"
],
source="dockerfiles/Dockerfile.dagster"
files(
name="project-config",
sources=["dagster.yaml", "workspace.yaml"]
)

python_distribution(
Expand Down
40 changes: 40 additions & 0 deletions src/ol_orchestrate/dagster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
postgres_config: &postgres_config
config:
postgres_db:
username:
env: DAGSTER_PG_USERNAME
password:
env: DAGSTER_PG_PASSWORD
hostname:
env: DAGSTER_PG_HOST
db_name:
env: dagster
port: 5432

telemetry:
enabled: true
scheduler:
module: dagster.core.scheduler
class: DagsterDaemonScheduler
compute_logs:
module: dagster_aws.s3.compute_log_manager
class: S3ComputeLogManager
config:
bucket: dagster-{{ environment }}
prefix: compute-logs/
run_storage:
module: dagster_postgres.run_storage
class: PostgresRunStorage
config:
<<: *postgres_config
event_log_storage:
module: dagster_postgres.event_log
class: PostgresEventLogStorage
config:
<<: *postgres_config
schedule_storage:
module: dagster_postgres.schedule_storage
class: PostgresScheduleStorage
config:
<<: *postgres_config
File renamed without changes.

0 comments on commit 863294a

Please sign in to comment.