From 863294a733e48575d18e9174b325e4fa48361ffe Mon Sep 17 00:00:00 2001 From: Tobias Macey Date: Wed, 8 Jun 2022 16:03:40 -0400 Subject: [PATCH] feat: Add build logic for Dagster container images As part of the updated build/deployment we want to have separate images for the Dagit and dagaster-daemon processes, which are also separate from the user pipeline code so that they can all be built, deployed, and scaled independently. For the user pipelines we also want to ensure that the dbt project is available in the runtime environment. This does the following: - Copy all files related to the dbt project into user pipeline images by default - Create a multi-stage build for Dagit/dagster-daemon to avoid duplicate logic - Moves the Dagster-specific workspace and Dagster yaml files into the `ol_orchestrate` directory - Moves the dbt project files to the proper directory level in the repo - Adds the initial work to package up collections of Dagster pipelines based on the 'repository' as the entry-point for the Python distribution --- .pre-commit-config.yaml | 8 ++-- dagster.yaml | 3 -- dockerfiles/BUILD | 24 ++++++++++- dockerfiles/Dockerfile.dagit | 18 +++++---- dockerfiles/Dockerfile.user_pipeline | 8 +++- poetry.lock | 21 +++++++++- pyproject.toml | 6 ++- src/ol_dbt/{ol_data => }/.gitignore | 0 src/ol_dbt/BUILD | 4 ++ src/ol_dbt/{ol_data => }/README.md | 0 src/ol_dbt/{ol_data => }/analyses/.gitkeep | 0 src/ol_dbt/{ol_data => }/dbt_project.yml | 0 src/ol_dbt/{ol_data => }/macros/.gitkeep | 0 .../models/example/my_first_dbt_model.sql | 0 .../models/example/my_second_dbt_model.sql | 0 .../{ol_data => }/models/example/schema.yml | 0 src/ol_dbt/{ol_data => }/packages.yml | 0 src/ol_dbt/{ol_data => }/seeds/.gitkeep | 0 src/ol_dbt/{ol_data => }/snapshots/.gitkeep | 0 src/ol_dbt/{ol_data => }/tests/.gitkeep | 0 src/ol_orchestrate/BUILD | 19 ++------- src/ol_orchestrate/dagster.yaml | 40 +++++++++++++++++++ .../ol_orchestrate/workspace.yaml | 0 23 files changed, 115 insertions(+), 36 deletions(-) delete mode 100644 dagster.yaml rename src/ol_dbt/{ol_data => }/.gitignore (100%) create mode 100644 src/ol_dbt/BUILD rename src/ol_dbt/{ol_data => }/README.md (100%) rename src/ol_dbt/{ol_data => }/analyses/.gitkeep (100%) rename src/ol_dbt/{ol_data => }/dbt_project.yml (100%) rename src/ol_dbt/{ol_data => }/macros/.gitkeep (100%) rename src/ol_dbt/{ol_data => }/models/example/my_first_dbt_model.sql (100%) rename src/ol_dbt/{ol_data => }/models/example/my_second_dbt_model.sql (100%) rename src/ol_dbt/{ol_data => }/models/example/schema.yml (100%) rename src/ol_dbt/{ol_data => }/packages.yml (100%) rename src/ol_dbt/{ol_data => }/seeds/.gitkeep (100%) rename src/ol_dbt/{ol_data => }/snapshots/.gitkeep (100%) rename src/ol_dbt/{ol_data => }/tests/.gitkeep (100%) create mode 100644 src/ol_orchestrate/dagster.yaml rename workspace.yaml => src/ol_orchestrate/workspace.yaml (100%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 13f2864d3..6e9313042 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 + rev: v4.3.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -32,7 +32,7 @@ repos: - id: yamllint args: [--format, parsable, -d, relaxed] - repo: https://github.com/asottile/pyupgrade - rev: v2.32.1 + rev: v2.34.0 hooks: - id: pyupgrade args: @@ -78,7 +78,7 @@ repos: - --extend-ignore=D1 - --diff - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.950 + rev: v0.961 hooks: - id: mypy additional_dependencies: @@ -87,7 +87,7 @@ repos: - types-pytz - types-pymysql - repo: https://github.com/sqlfluff/sqlfluff - rev: 0.13.1 + rev: 0.13.2 hooks: - id: sqlfluff-fix # Arbitrary arguments to show an example diff --git a/dagster.yaml b/dagster.yaml deleted file mode 100644 index ee0d8f73d..000000000 --- a/dagster.yaml +++ /dev/null @@ -1,3 +0,0 @@ ---- -telemetry: - enabled: true diff --git a/dockerfiles/BUILD b/dockerfiles/BUILD index 6de4a60cb..c81d4c0b7 100644 --- a/dockerfiles/BUILD +++ b/dockerfiles/BUILD @@ -1,9 +1,29 @@ docker_image( - name="docker", + name="dagit", source="Dockerfile.dagit", + dependencies=[ + "src/ol_orchestrate:dagit", + "src/ol_orchestrate:dagster-daemon", + "src/ol_orchestrate:project-config", + ], + target_stage="dagit" ) docker_image( - name="docker0", + name="dagster-daemon", + source="Dockerfile.dagit", + dependencies=[ + "src/ol_orchestrate:dagster-daemon", + "src/ol_orchestrate:project-config", + ], + target_stage="dagster-daemon" +) + +docker_image( + name="pipeline", + dependencies=[ + "src/ol_orchestrate:open-edx", + "src/ol_dbt:dbt_project" + ], source="Dockerfile.user_pipeline", ) diff --git a/dockerfiles/Dockerfile.dagit b/dockerfiles/Dockerfile.dagit index d20bc1541..f1ea03b40 100644 --- a/dockerfiles/Dockerfile.dagit +++ b/dockerfiles/Dockerfile.dagit @@ -1,22 +1,24 @@ FROM python:3.9-slim AS dagster-base -RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app -RUN useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster -RUN chown -R dagster: /opt/dagster/ +RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app && \ + useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster &&\ + chown -R dagster: /opt/dagster/ USER dagster ENV DAGSTER_HOME=/opt/dagster/dagster_home/ # Copy your code and workspace to /opt/dagster/app -COPY workspace.yaml /opt/dagster/app/ +COPY --chown=dagster:dagster src/ol_orchestrate/workspace.yaml /opt/dagster/app/ # Copy dagster instance YAML to $DAGSTER_HOME -COPY dagster.yaml /opt/dagster/dagster_home/ +COPY --chown=dagster:dagster src/ol_orchestrate/dagster.yaml /opt/dagster/dagster_home/ WORKDIR /opt/dagster/app EXPOSE 3000 -FROM dagster-base AS dagit -ENTRYPOINT ["dagit", "-h", "0.0.0.0", "-p", "3000"] - FROM dagster-base AS dagster-daemon +COPY --chown=dagster:dagster src.ol_orchestrate/dagster-daemon.pex /usr/local/bin/dagster-daemon ENTRYPOINT ["dagster-daemon", "run"] + +FROM dagster-base AS dagit +COPY --chown=dagster:dagster src.ol_orchestrate/dagit.pex /usr/local/bin/dagit +ENTRYPOINT ["dagit", "-h", "0.0.0.0", "-p", "3000"] diff --git a/dockerfiles/Dockerfile.user_pipeline b/dockerfiles/Dockerfile.user_pipeline index 1c96c03f6..7370e1718 100644 --- a/dockerfiles/Dockerfile.user_pipeline +++ b/dockerfiles/Dockerfile.user_pipeline @@ -1,3 +1,9 @@ FROM python:3.9-slim -CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000", ] +RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app /tmp/packages && \ + useradd -s /bin/bash -d /opt/dagster/dagster_home/ dagster +COPY *.whl /tmp/packages/ +RUN pip install --no-cache-dir /tmp/packages/* && rm -r /tmp/packages/ +COPY --chown=dagster:dagster src/ol_dbt/ /opt/dbt/ +USER dagster +CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000"] diff --git a/poetry.lock b/poetry.lock index 759ac1484..9462ae6dc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1516,6 +1516,21 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [package.dependencies] pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} +[[package]] +name = "hvac" +version = "0.11.2" +description = "HashiCorp Vault API client" +category = "main" +optional = false +python-versions = ">=2.7" + +[package.dependencies] +requests = ">=2.21.0" +six = ">=1.5.0" + +[package.extras] +parser = ["pyhcl (>=0.3.10)"] + [[package]] name = "identify" version = "2.5.1" @@ -3422,7 +3437,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "8c6636e66c5b3f1f687afda228c0ee508acf22eacdfa991f98e6ad967ec15297" +content-hash = "060b2acef1a834c46314f6e41b5fd17999fd21db1f9653099d411dd06da66f7d" [metadata.files] agate = [ @@ -4173,6 +4188,10 @@ humanfriendly = [ {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, ] +hvac = [ + {file = "hvac-0.11.2-py2.py3-none-any.whl", hash = "sha256:3e8a34804b1e20954a2b4991cc13ed9c09b32e50dadd9d3438224481150f6568"}, + {file = "hvac-0.11.2.tar.gz", hash = "sha256:f905c59d32d88d3f67571fe5a8a78de4659e04798ad809de439f667247d13626"}, +] identify = [ {file = "identify-2.5.1-py2.py3-none-any.whl", hash = "sha256:0dca2ea3e4381c435ef9c33ba100a78a9b40c0bab11189c7cf121f75815efeaa"}, {file = "identify-2.5.1.tar.gz", hash = "sha256:3d11b16f3fe19f52039fb7e39c9c884b21cb1b586988114fbe42671f03de3e82"}, diff --git a/pyproject.toml b/pyproject.toml index 9b8e7cb63..baaf7e244 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ httpx = "^0.22.0" pyarrow = "^8.0.0" pyathena = "^2.8.0" pymysql = "^1.0.0" +hvac = "^0.11.2" [tool.poetry.dev-dependencies] black = "*" @@ -58,7 +59,7 @@ requires = ["poetry_core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.sqlfluff.core] -templater = "dbt" +templater = "jinja" dialect = "hive" sql_file_exts = ".sql,.sql.j2,.dml,.ddl" @@ -67,3 +68,6 @@ unwrap_wrapped_queries = true [tool.sqlfluff.templater.jinja] apply_dbt_builtins = true + +[tool.sqlfluff.templater.dbt] +project_dir = "src/ol_dbt/" diff --git a/src/ol_dbt/ol_data/.gitignore b/src/ol_dbt/.gitignore similarity index 100% rename from src/ol_dbt/ol_data/.gitignore rename to src/ol_dbt/.gitignore diff --git a/src/ol_dbt/BUILD b/src/ol_dbt/BUILD new file mode 100644 index 000000000..54dfad211 --- /dev/null +++ b/src/ol_dbt/BUILD @@ -0,0 +1,4 @@ +files( + name="dbt_project", + sources=["**/*.yml", "**/*.json", "**/*.sql", "**/*.yaml"], +) diff --git a/src/ol_dbt/ol_data/README.md b/src/ol_dbt/README.md similarity index 100% rename from src/ol_dbt/ol_data/README.md rename to src/ol_dbt/README.md diff --git a/src/ol_dbt/ol_data/analyses/.gitkeep b/src/ol_dbt/analyses/.gitkeep similarity index 100% rename from src/ol_dbt/ol_data/analyses/.gitkeep rename to src/ol_dbt/analyses/.gitkeep diff --git a/src/ol_dbt/ol_data/dbt_project.yml b/src/ol_dbt/dbt_project.yml similarity index 100% rename from src/ol_dbt/ol_data/dbt_project.yml rename to src/ol_dbt/dbt_project.yml diff --git a/src/ol_dbt/ol_data/macros/.gitkeep b/src/ol_dbt/macros/.gitkeep similarity index 100% rename from src/ol_dbt/ol_data/macros/.gitkeep rename to src/ol_dbt/macros/.gitkeep diff --git a/src/ol_dbt/ol_data/models/example/my_first_dbt_model.sql b/src/ol_dbt/models/example/my_first_dbt_model.sql similarity index 100% rename from src/ol_dbt/ol_data/models/example/my_first_dbt_model.sql rename to src/ol_dbt/models/example/my_first_dbt_model.sql diff --git a/src/ol_dbt/ol_data/models/example/my_second_dbt_model.sql b/src/ol_dbt/models/example/my_second_dbt_model.sql similarity index 100% rename from src/ol_dbt/ol_data/models/example/my_second_dbt_model.sql rename to src/ol_dbt/models/example/my_second_dbt_model.sql diff --git a/src/ol_dbt/ol_data/models/example/schema.yml b/src/ol_dbt/models/example/schema.yml similarity index 100% rename from src/ol_dbt/ol_data/models/example/schema.yml rename to src/ol_dbt/models/example/schema.yml diff --git a/src/ol_dbt/ol_data/packages.yml b/src/ol_dbt/packages.yml similarity index 100% rename from src/ol_dbt/ol_data/packages.yml rename to src/ol_dbt/packages.yml diff --git a/src/ol_dbt/ol_data/seeds/.gitkeep b/src/ol_dbt/seeds/.gitkeep similarity index 100% rename from src/ol_dbt/ol_data/seeds/.gitkeep rename to src/ol_dbt/seeds/.gitkeep diff --git a/src/ol_dbt/ol_data/snapshots/.gitkeep b/src/ol_dbt/snapshots/.gitkeep similarity index 100% rename from src/ol_dbt/ol_data/snapshots/.gitkeep rename to src/ol_dbt/snapshots/.gitkeep diff --git a/src/ol_dbt/ol_data/tests/.gitkeep b/src/ol_dbt/tests/.gitkeep similarity index 100% rename from src/ol_dbt/ol_data/tests/.gitkeep rename to src/ol_dbt/tests/.gitkeep diff --git a/src/ol_orchestrate/BUILD b/src/ol_orchestrate/BUILD index 2cf09458c..9c55cd446 100644 --- a/src/ol_orchestrate/BUILD +++ b/src/ol_orchestrate/BUILD @@ -20,22 +20,9 @@ pex_binary( entry_point="dagster.daemon.cli:main" ) -docker_image( - name="ol-dagit", - description="Dagster web service container iamge", - dependencies=[ - ":dagit" - ], - source="dockerfiles/Dockerfile.dagster" -) - -docker_image( - name="ol-dagster-daemon", - description="Dagster daemon container image for scheduling and run triggering", - dependencies=[ - ":dagster-daemon" - ], - source="dockerfiles/Dockerfile.dagster" +files( + name="project-config", + sources=["dagster.yaml", "workspace.yaml"] ) python_distribution( diff --git a/src/ol_orchestrate/dagster.yaml b/src/ol_orchestrate/dagster.yaml new file mode 100644 index 000000000..50ee42432 --- /dev/null +++ b/src/ol_orchestrate/dagster.yaml @@ -0,0 +1,40 @@ +--- +postgres_config: &postgres_config + config: + postgres_db: + username: + env: DAGSTER_PG_USERNAME + password: + env: DAGSTER_PG_PASSWORD + hostname: + env: DAGSTER_PG_HOST + db_name: + env: dagster + port: 5432 + +telemetry: + enabled: true +scheduler: + module: dagster.core.scheduler + class: DagsterDaemonScheduler +compute_logs: + module: dagster_aws.s3.compute_log_manager + class: S3ComputeLogManager + config: + bucket: dagster-{{ environment }} + prefix: compute-logs/ +run_storage: + module: dagster_postgres.run_storage + class: PostgresRunStorage + config: + <<: *postgres_config +event_log_storage: + module: dagster_postgres.event_log + class: PostgresEventLogStorage + config: + <<: *postgres_config +schedule_storage: + module: dagster_postgres.schedule_storage + class: PostgresScheduleStorage + config: + <<: *postgres_config diff --git a/workspace.yaml b/src/ol_orchestrate/workspace.yaml similarity index 100% rename from workspace.yaml rename to src/ol_orchestrate/workspace.yaml