From ba08821060d3c18982270ba0ecc7d73d8ebff498 Mon Sep 17 00:00:00 2001 From: Vitali Yanushchyk Date: Mon, 7 Oct 2024 16:51:07 -0300 Subject: [PATCH] chg ! dedup engine --- .dockerignore | 3 +++ .gitignore | 4 ++-- README.md | 4 ++++ compose.yml | 10 ++++++++++ docker/Dockerfile | 19 +++++++++++++++++++ .../hde/deduplication_description.md | 1 + docs/components/hde/development.md | 1 + docs/components/hde/did/workflow.md | 5 +++++ docs/components/hde/index.md | 3 ++- docs/components/hde/setup.md | 7 ++++++- docs/components/hde/troubleshooting.md | 3 ++- docs/glossary/terms/process.md | 2 ++ 12 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 .dockerignore create mode 100644 compose.yml create mode 100644 docker/Dockerfile create mode 100644 docs/components/hde/deduplication_description.md diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e682ecc --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +* +!pyproject.toml +!pdm.lock diff --git a/.gitignore b/.gitignore index 1da7788..456f7ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ .* ~* +__pycache__ !.github - -__pycache__ +!.dockerignore diff --git a/README.md b/README.md index 02a58c9..927afc7 100644 --- a/README.md +++ b/README.md @@ -17,3 +17,7 @@ add in your .envrc #### Start $ mkdocs build $ mkdocs serve + +#### Using Docker compose + + $ docker compose up diff --git a/compose.yml b/compose.yml new file mode 100644 index 0000000..7388ec5 --- /dev/null +++ b/compose.yml @@ -0,0 +1,10 @@ +services: + + mkdocs: + build: + context: . + dockerfile: docker/Dockerfile + ports: + - "8000:8000" + volumes: + - .:/app diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..05808a8 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12-slim + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + git \ + libcairo2-dev libfreetype6-dev libffi-dev libjpeg-dev libpng-dev libz-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY pyproject.toml pdm.lock /app/ + +RUN pip install -U pip pdm \ + && pdm venv create --name for-dev 3.12 \ + && pdm sync --venv for-dev \ + && rm -rf ~/.cache/pip ~/.cache/pdm + +CMD ["pdm", "run", "mkdocs", "serve", "-a", "0.0.0.0:8000", "--no-strict"] diff --git a/docs/components/hde/deduplication_description.md b/docs/components/hde/deduplication_description.md new file mode 100644 index 0000000..3753902 --- /dev/null +++ b/docs/components/hde/deduplication_description.md @@ -0,0 +1 @@ +It provides users with powerful capabilities to identify and remove duplicate records within the system, ensuring that data remains clean, consistent, and reliable. diff --git a/docs/components/hde/development.md b/docs/components/hde/development.md index c59539d..56e1b35 100644 --- a/docs/components/hde/development.md +++ b/docs/components/hde/development.md @@ -3,6 +3,7 @@ To develop the service locally, you can utilize the provided `compose.yml` file. This configuration file defines all the necessary services, including the primary application and its dependencies, to create a consistent development environment. By using **Docker Compose**, you can effortlessly spin up the entire application stack, ensuring that all components work seamlessly together. To build and start the service, along with its dependencies, run the following command: + docker compose up --build diff --git a/docs/components/hde/did/workflow.md b/docs/components/hde/did/workflow.md index ffc1ab9..1bb83e5 100644 --- a/docs/components/hde/did/workflow.md +++ b/docs/components/hde/did/workflow.md @@ -1,3 +1,8 @@ +--- +tags: + - Deduplication +--- + The Image Processing and Duplicate Detection workflow is designed to provide reliable face detection, recognition, and duplicate detection by leveraging a pre-trained deep learning model. ## Inference Mode Operation diff --git a/docs/components/hde/index.md b/docs/components/hde/index.md index 1a5043a..09b366b 100644 --- a/docs/components/hde/index.md +++ b/docs/components/hde/index.md @@ -1,7 +1,8 @@ # Deduplication -Deduplication Engine component of the HOPE ecosystem. It provides users with powerful capabilities to identify and remove duplicate records within the system, ensuring that data remains clean, consistent, and reliable. +Deduplication Engine component of the HOPE ecosystem. +--8<-- "components/hde/deduplication_description.md" ## Repository diff --git a/docs/components/hde/setup.md b/docs/components/hde/setup.md index 6b93434..d90f7fb 100644 --- a/docs/components/hde/setup.md +++ b/docs/components/hde/setup.md @@ -1,3 +1,8 @@ +--- +tags: + - Deduplication +--- + ## Prerequisites This project utilizes [PDM](https://pdm-project.org/) as the package manager for managing Python dependencies and environments. @@ -78,7 +83,7 @@ This backend is used for storing locally downloaded DNN model files and encoded ##### FILE_STORAGE_DNN This backend is dedicated to storing DNN model files. Ensure that the following two files are present in this storage: -1. *deploy.prototxt*: Defines the model architecture. +1. *deploy.prototxt.txt*: Defines the model architecture. 2. *res10_300x300_ssd_iter_140000.caffemodel*: Contains the pre-trained model weights. The current process involves downloading files from a [GitHub repository](https://github.com/sr6033/face-detection-with-OpenCV-and-DNN) and saving them to this specific Azure Blob Storage using command `django-admin upgrade --with-dnn-setup`, or the specialized`django-admin dnnsetup` command . diff --git a/docs/components/hde/troubleshooting.md b/docs/components/hde/troubleshooting.md index 181130f..d4be2ab 100644 --- a/docs/components/hde/troubleshooting.md +++ b/docs/components/hde/troubleshooting.md @@ -2,4 +2,5 @@ If you encounter issues while running the service, the **admin panel** can be a To efficiently track and monitor errors within the application, **Sentry** is integrated as the primary tool for error logging and alerting. -For Sentry to work correctly, ensure that the **SENTRY_DSN** environment variable is set. +!!! warning "Sentry environment" + For Sentry to work correctly, ensure that the **SENTRY_DSN** environment variable is set. diff --git a/docs/glossary/terms/process.md b/docs/glossary/terms/process.md index 23e0b75..a9f612d 100644 --- a/docs/glossary/terms/process.md +++ b/docs/glossary/terms/process.md @@ -26,4 +26,6 @@ Sometimes used as a term pre-intervention to talk about who we are targeting.