From 2a7355d14ce533c9604b8c19ed9b58aed36fce96 Mon Sep 17 00:00:00 2001
From: Vitali Yanushchyk <vitali.yanushchyk@valor-software.com>
Date: Fri, 4 Oct 2024 05:13:45 -0300
Subject: [PATCH] add ! dedup engine

---
 docs/components/hde/.pages             |   9 +-
 docs/components/hde/API.md             |  17 ++
 docs/components/hde/demo.md            |  76 ++++++++
 docs/components/hde/development.md     |  17 ++
 docs/components/hde/did/.pages         |   5 +
 docs/components/hde/did/config.md      |  68 +++++++
 docs/components/hde/did/index.md       |   1 +
 docs/components/hde/did/workflow.md    |  63 ++++++
 docs/components/hde/index.md           |  17 +-
 docs/components/hde/setup.md           | 122 +++++++++++-
 docs/components/hde/tmp.md             | 259 -------------------------
 docs/components/hde/tmp2.md            |  79 --------
 docs/components/hde/troubleshooting.md |   5 +
 mkdocs.yml                             |   3 +-
 pdm.lock                               |  17 +-
 pyproject.toml                         |   1 +
 16 files changed, 414 insertions(+), 345 deletions(-)
 create mode 100644 docs/components/hde/API.md
 create mode 100644 docs/components/hde/demo.md
 create mode 100644 docs/components/hde/development.md
 create mode 100644 docs/components/hde/did/.pages
 create mode 100644 docs/components/hde/did/config.md
 create mode 100644 docs/components/hde/did/index.md
 create mode 100644 docs/components/hde/did/workflow.md
 delete mode 100644 docs/components/hde/tmp.md
 delete mode 100644 docs/components/hde/tmp2.md
 create mode 100644 docs/components/hde/troubleshooting.md

diff --git a/docs/components/hde/.pages b/docs/components/hde/.pages
index a069e49..8617882 100644
--- a/docs/components/hde/.pages
+++ b/docs/components/hde/.pages
@@ -1,5 +1,8 @@
 nav:
   - index.md
-  - setup.md
-  - tmp.md
-  - tmp2.md
+  - Setup: setup.md
+  - REST API: API.md
+  - Demo Application: demo.md
+  - Duplicated Image Detection: did
+  - Troubleshooting: troubleshooting.md
+  - Development: development.md
diff --git a/docs/components/hde/API.md b/docs/components/hde/API.md
new file mode 100644
index 0000000..d6172d1
--- /dev/null
+++ b/docs/components/hde/API.md
@@ -0,0 +1,17 @@
+The application provides comprehensive API documentation to facilitate ease of use and integration. API documentation is available via two main interfaces:
+
+#### Swagger UI
+An interactive interface that allows users to explore and test the API endpoints. It provides detailed information about the available endpoints, their parameters, and response formats. Users can input data and execute requests directly from the interface.
+
+URL: `http://localhost:8000/api/rest/swagger/`
+
+#### Redoc
+A static, beautifully rendered documentation interface that offers a more structured and user-friendly presentation of the API. It includes comprehensive details about each endpoint, including descriptions, parameters, and example requests and responses.
+
+URL: `http://localhost:8000/api/rest/redoc/`
+
+
+These interfaces ensure that developers have all the necessary information to effectively utilize the API, enabling seamless integration and interaction with the application’s features.
+
+!!! warning "Environment-Specific URLs"
+    The URLs will vary depending on the server where it is hosted. If the server is hosted elsewhere except for the local machine, replace **http://localhost:8000** with the server's domain URL.
diff --git a/docs/components/hde/demo.md b/docs/components/hde/demo.md
new file mode 100644
index 0000000..2443b9f
--- /dev/null
+++ b/docs/components/hde/demo.md
@@ -0,0 +1,76 @@
+To help you explore the functionality of this project, a demo server can be run locally using the provided sample data. This demo server includes pre-configured settings and sample records to allow for a comprehensive overview of the application's features without needing to configure everything from scratch.
+
+
+## Running the Demo Server Locally
+
+To set up and start the demo server locally, use the following command:
+    
+    docker compose -f tests/extras/demoapp/compose.yml up --build
+
+This command will build and launch all necessary containers for the demo environment, allowing you to see how different components of the system interact. Once everything is running, you can access the demo server's admin panel to manage and configure various settings within the application.
+
+## Accessing the Admin Panel
+
+The admin panel is accessible via the following URL in your browser, using the credentials below:
+
+- URL: **http://localhost:8000/admin**
+- Username: **adm@hde.org**
+- Password: **123**
+
+
+## API Interaction
+
+To further understand how the API works and how different endpoints can be used, there are scripts available for API interaction. These scripts are located in the `tests/extras/demoapp/scripts` directory.
+
+### Prerequisites
+
+To use these scripts, ensure that the following tools are installed:
+
+- [httpie](https://httpie.io/): A command-line HTTP client, used for making API requests in a more readable format compared to traditional curl.
+- [jq](https://jqlang.github.io/jq/) : A lightweight and flexible command-line JSON processor that allows you to parse and manipulate JSON responses from API endpoints.
+
+### Scripts Overview
+
+#### Configuration Scripts
+
+Configuration scripts are used to set up the environment for the API interactions. These scripts hold internal settings and functions that are shared across multiple API interaction scripts, making it easier to reuse common functionality and standardize configuration.
+
+| Name                  | Arguments | Description                                     |
+|-----------------------|-----------|-------------------------------------------------|
+| .vars                 | -         | Contains configuration variables                |
+| .common               | -         | Contains common functions used by other scripts |
+
+
+#### Public Scripts
+
+These scripts help manage specific parameters for API interactions, allowing for easy setup and modification of variables that will be used in other commands.
+
+| Name                  | Arguments            | Description               |
+|-----------------------|----------------------|---------------------------|
+| use_base_url          | base url             | Sets base url             |
+| use_auth_token        | auth token           | Sets authentication token |
+| use_deduplication_set | deduplication set id | Sets deduplication set id |
+
+
+#### API Interaction Scripts
+
+These scripts are used to interact directly with the API endpoints, performing various operations like creating deduplication sets, uploading images, starting the deduplication process, and retrieving results.
+
+| Name                      | Arguments                               | Description                                 |
+|---------------------------|-----------------------------------------|---------------------------------------------|
+| create_deduplication_set  | reference_pk                            | Creates new deduplication set               |
+| create_image              | filename                                | Creates image in deduplication set          |
+| ignore                    | first reference pk, second reference pk | Makes API ignore specific reference pk pair |
+| process_deduplication_set | -                                       | Starts deduplication process                |
+| show_deduplication_set    | -                                       | Shows deduplication set data                |
+| show_duplicates           | -                                       | Shows duplicates found in deduplication set |
+
+
+#### Test Case Scripts
+
+Test case scripts are designed to automate end-to-end testing scenarios, making it easy to validate the deduplication functionality.
+
+| Name             | Arguments    | Description                                                                                                                    |
+|------------------|--------------|--------------------------------------------------------------------------------------------------------------------------------|
+| base_case        | reference pk | Creates deduplication set, adds images to it and runs deduplication process                                                    |
+| all_ignored_case | reference pk | Creates deduplication set, adds images to it, adds all possible reference pk pairs to ignored pairs and shows duplicates found |
\ No newline at end of file
diff --git a/docs/components/hde/development.md b/docs/components/hde/development.md
new file mode 100644
index 0000000..c59539d
--- /dev/null
+++ b/docs/components/hde/development.md
@@ -0,0 +1,17 @@
+## Local Development
+
+To develop the service locally, you can utilize the provided `compose.yml` file. This configuration file defines all the necessary services, including the primary application and its dependencies, to create a consistent development environment. By using **Docker Compose**, you can effortlessly spin up the entire application stack, ensuring that all components work seamlessly together.
+
+To build and start the service, along with its dependencies, run the following command:
+    docker compose up --build
+
+
+## Running Tests
+To ensure that the service is working correctly, a comprehensive suite of tests is available. You can run these tests execute the following command:
+
+    docker compose run --rm backend pytest tests -v --create-db
+
+
+## Viewing Coverage Report
+
+After running the tests, a coverage report will be generated. This report helps in assessing how much of the code is covered by the tests, highlighting any areas that may need additional testing. You can find the coverage report in the `~build/coverage` directory.
diff --git a/docs/components/hde/did/.pages b/docs/components/hde/did/.pages
new file mode 100644
index 0000000..3287baa
--- /dev/null
+++ b/docs/components/hde/did/.pages
@@ -0,0 +1,5 @@
+nav:
+  - Image Processing and Duplicate Detection: index.md
+  - Configuration: config.md
+  - workflow.md
+  
\ No newline at end of file
diff --git a/docs/components/hde/did/config.md b/docs/components/hde/did/config.md
new file mode 100644
index 0000000..9a721dc
--- /dev/null
+++ b/docs/components/hde/did/config.md
@@ -0,0 +1,68 @@
+The configuration can be managed directly through the **admin panel**, which provides a simple way to modify settings without changing the codebase. Navigate to:
+
+    Home › Constance › Config
+
+Here, you will find all the configurable settings that affect the behavior of the system, allowing for quick adjustments and better control over application behavior.
+
+## Deep neural networks (DNN)
+
+The deep learning component of the system is crucial for performing advanced inference tasks, including **face detection**, **face recognition**, and **finding duplicate images** using a pre-trained model. These tasks are fundamental to ensuring the accuracy and efficiency of the system in identifying and managing images.
+
+This component relies on **Convolutional Neural Networks (CNNs)**, a type of deep learning model particularly well-suited for processing visual data. CNNs are used to automatically extract relevant features from images, such as facial landmarks and distinctive patterns, without the need for manual feature engineering.
+
+### DNN_BACKEND
+
+Specifies the computation backend to be used by [OpenCV](https://github.com/opencv/opencv) library for deep learning inference.
+
+### DNN_TARGET
+
+Specifies the target device on which [OpenCV](https://github.com/opencv/opencv) library will perform the deep learning computations.
+
+
+## Face Detection
+
+This component is responsible for locating and identifying faces in images. It uses advanced deep learning algorithms to scan images and detect the regions that contain human faces. This section outlines the key configuration parameters that influence how the face detection model processes input images and optimizes detection results.
+
+### BLOB_FROM_IMAGE_SCALE_FACTOR
+
+Specifies the scaling factor applied to all pixel values when converting an image to a blob. Mostly it equals 1.0 for no scaling or 1.0/255.0 and normalizing to the [0, 1] range.
+
+Remember that scaling factor is also applied to mean values. Both scaling factor and mean values must be the same for the training and inference to get the correct results.
+
+### BLOB_FROM_IMAGE_MEAN_VALUES
+
+Specifies the mean BGR values used in image preprocessing to normalize pixel values by subtracting the mean values of the training dataset. This helps in reducing model bias and improving accuracy.
+
+The specified mean values are subtracted from each channel (Blue, Green, Red) of the input image.
+
+Remember that mean values are also applied to scaling factor. Both scaling factor and mean values must be the same for the training and inference to get the correct results.
+
+### FACE_DETECTION_CONFIDENCE
+
+Specifies the minimum confidence score required for a detected face to be considered valid. Detections with confidence scores below this threshold are discarded as likely false positives.
+
+### NMS_THRESHOLD
+
+Specifies the Intersection over Union (IoU) threshold used in Non-Maximum Suppression (NMS) to filter out overlapping bounding boxes. If the IoU between two boxes exceeds this threshold, the box with the lower confidence score is suppressed. Lower values result in fewer, more distinct boxes; higher values allow more overlapping boxes to remain.
+
+## Face Recognition
+
+This component builds on face detection to identify and differentiate between individual faces. This involves generating face encodings, which are numerical representations of the unique facial features used for recognition. These encodings can then be compared to determine if two images contain the same person or to find matches in a database of known faces.
+
+### FACE_ENCODINGS_NUM_JITTERS
+
+Specifies the number of times to re-sample the face when calculating the encoding. Higher values increase accuracy but are computationally more expensive and slower. For example, setting 'num_jitters' to 100 makes the process 100 times slower.
+
+### FACE_ENCODINGS_MODEL
+
+Specifies the model type used for encoding face landmarks. It can be either 'small' which is faster and  only 5 key facial landmarks, or 'large' which is more precise and identifies 68 key facial landmarks but requires more computational resources.
+
+
+## Duplicate Finder
+
+This component is responsible for identifying duplicate images in the system by comparing face embeddings. These embeddings are numerical representations of facial features generated during the face recognition process. By calculating the distance between the embeddings of different images, the system can determine whether two images contain the same person, helping in the identification and removal of duplicates or grouping similar faces together.
+
+### FACE_DISTANCE_THRESHOLD
+
+Specifies the maximum allowable distance between two face embeddings for them to be considered a match. It helps determine if two faces belong to the same person by setting a threshold for similarity. Lower values result in stricter matching, while higher values allow for more lenient matches.
+
diff --git a/docs/components/hde/did/index.md b/docs/components/hde/did/index.md
new file mode 100644
index 0000000..d11c340
--- /dev/null
+++ b/docs/components/hde/did/index.md
@@ -0,0 +1 @@
+This feature consists of several interconnected components that work together to process images, detect and recognize faces, and find duplicate images using deep learning techniques. 
\ No newline at end of file
diff --git a/docs/components/hde/did/workflow.md b/docs/components/hde/did/workflow.md
new file mode 100644
index 0000000..ffc1ab9
--- /dev/null
+++ b/docs/components/hde/did/workflow.md
@@ -0,0 +1,63 @@
+The Image Processing and Duplicate Detection workflow is designed to provide reliable face detection, recognition, and duplicate detection by leveraging a pre-trained deep learning model.
+
+## Inference Mode Operation
+
+This application operates strictly in inference mode, which means that it does not perform training but instead relies on a pre-trained model for face recognition tasks. This mode ensures that the application can rapidly deploy face recognition capabilities without the computational cost or time required for training models from scratch.
+
+### Pre-Trained Model Usage.
+
+The pre-trained model is stored in Azure Blob Storage and is automatically downloaded by the application when it starts. This process ensures that the latest version of the model is always available for inference.
+### Manual Model Update.
+
+In addition to automatic loading, administrators have the option to manually update the model through the admin panel. This feature provides flexibility for applying updates or new models when improvements or changes are required without modifying the underlying code.
+
+## Model Details
+
+The face recognition capabilities are powered by the [OpenCV](https://github.com/opencv/opencv) library. Currently, the application utilizes an open-source, pre-trained model specifically designed for face detection.
+
+### Model Components
+
+- **deploy.prototxt**: This file defines the model architecture, including the network layers and the specific parameters used for each layer. It serves as a blueprint that guides how the model processes input data.
+- **res10_300x300_ssd_iter_140000.caffemodel**: This file contains the trained weights of the model. It was trained using the **Caffe** deep learning framework, with a total of 140,000 iterations, ensuring robustness in face detection tasks.
+
+### Model Architecture
+
+- The model follows the **Res10** architecture, which is known for its efficiency in detecting faces. Res10 is a lightweight model that balances speed and accuracy, making it suitable for real-time applications.
+- The model operates with a fixed input resolution of **300x300**, optimizing detection for faces within that scale. This resolution offers a compromise between detail and processing efficiency, allowing the model to quickly identify facial features without excessive computational load.
+- SSD Methodology. The model utilizes the **Single Shot MultiBox Detector (SSD)** methodology, which is a popular approach for object detection. SSD is designed to predict both the bounding boxes and the confidence scores for each object in a single forward pass through the network. By leveraging the SSD approach, the model can efficiently detect multiple faces in a single image, making it suitable for batch processing and applications where rapid detection is required.
+
+
+## Worklow Diagram
+
+The workflow diagram illustrates the overall process of Image Processing and Duplicate Detection within the system, showcasing how different components interact to achieve **face detection**, **recognition**, and **duplicate identification**. 
+
+```mermaid
+flowchart LR
+  subgraph DNNManager[DNN Manager]
+      direction TB
+      load_model[Load Model] -- computation <a href="../config/#dnn_backend">backend</a>\ntarget <a href="../config/#dnn_target">device</a>  --> set_preferences[Set Preferences]
+  end
+
+  subgraph ImageProcessing[Image Processing]
+      direction LR
+      
+      subgraph FaceDetection[Face Detection]
+          direction TB
+          load_image[Load Image] -- decoded image as 3D numpy array\n(height, width, channels of BlueGreeRed color space) --> prepare_image[Prepare Image] -- blob 4D tensor\n(normalized size, use <a href="../config/#blob_from_image_scale_factor">scale factor</a> and <a href="../config/#blob_from_image_mean_values">means</a>) --> run_model[Run Model] -- shape (1, 1, N, 7),\n1 image\nN is the number of detected faces\neach face is described by the 7 detection values--> filter_results[Filter Results] -- <a href="../config/#face_detection_confidence">confidence</a> is above the minimum threshold,\n<a href="../config/#nms_threshold">NMS</a> to suppress overlapping bounding boxes --> return_detections[Return Detections]
+      end
+      
+      subgraph FaceRecognition[Face Recognition]
+          direction TB
+          load_image_[Load Image] --> detect_faces[Detect Faces] -- detected face regions\n<a href="../config/#face_encodings_num_jitters">number of times</a> to re-sample the face\n<a href="../config/#face_encodings_model">key facial landmarks</a> --> generate_encodings[Generate Encodings] -- numerical representations of the facial features\n(face's geometry and appearance) --> save_encodings[Save Encodings]
+      end
+  end
+
+  subgraph DuplicateFinder[Duplicate Finder]
+      direction TB
+      load_encodings[Load Encodings] --> compare_encodings[Compare Encodings] -- face distance less then <a href="../config/#face_distance_threshold">threshold</a> --> return_duplicates[Return Duplicates]
+  end
+
+  DNNManager --> ImageProcessing --> DuplicateFinder
+  FaceDetection --> FaceRecognition
+
+```
diff --git a/docs/components/hde/index.md b/docs/components/hde/index.md
index 8950193..1a5043a 100644
--- a/docs/components/hde/index.md
+++ b/docs/components/hde/index.md
@@ -1,5 +1,20 @@
-# Deduplication Engine
+# Deduplication
+
+Deduplication Engine component of the HOPE ecosystem. It provides users with powerful capabilities to identify and remove duplicate records within the system, ensuring that data remains clean, consistent, and reliable.
+
 
 ## Repository
 
 <https://github.com/unicef/hope-dedup-engine>
+
+
+## Features
+
+- [Duplicated Image Detection](did/index.md)
+
+
+## Help
+
+**Got a question**? We got answers.
+
+File a GitHub [issue](https://github.com/unicef/hope-dedup-engine/issues)
diff --git a/docs/components/hde/setup.md b/docs/components/hde/setup.md
index feae8cb..6b93434 100644
--- a/docs/components/hde/setup.md
+++ b/docs/components/hde/setup.md
@@ -1 +1,121 @@
-# Setup
+## Prerequisites
+
+This project utilizes [PDM](https://pdm-project.org/) as the package manager for managing Python dependencies and environments. 
+
+To successfully set up and run this project, ensure that you have the following components in place:
+
+- **Postgres Database (v14+)**: A PostgreSQL database instance is required to store application data. Ensure that version 14 or newer is available and accessible.
+- **Redis Server**: Redis is used for caching and managing task queues. Ensure you have a running Redis server.
+- **Celery Worker(s)**: Celery is used for handling asynchronous tasks in the application. One or more workers are needed to process these tasks.
+- **Celery Beat**: Celery Beat is used for scheduling periodic tasks. Ensure that Celery Beat is configured and running.
+- **Azure Blob Storage Account(s)**: Azure Blob Storage is utilized for storing application files and media. Make sure you have access to one or more Azure Blob Storage accounts for file management.
+
+The code for this project is encapsulated within a Docker image, which provides an isolated and consistent environment for running the application. This Docker image is hosted on [Docker Hub](https://hub.docker.com/r/unicef/hope-dedupe-engine/), allowing easy access and deployment.
+
+## Environment Configuration
+
+Essential steps for verifying and configuring the environment settings required to run the project are provided. Instructions include displaying the current configuration, checking for missing variables, and ensuring all required settings are properly defined. Detailed descriptions of each variable are also available.
+
+### Display the Current Configuration
+
+    $ docker run -it -t  unicef/hope-dedupe-engine:release-0.1 django-admin env
+
+### Mandatory Environment Variables
+Check Environment Variables
+
+    $ docker run -it -t  unicef/hope-dedupe-engine:release-0.1 django-admin env --check
+
+Ensure the following environment variables are properly configured:
+
+    DATABASE_URL
+    SECRET_KEY
+    CACHE_URL
+    CELERY_BROKER_URL
+    MEDIA_ROOT
+    STATIC_ROOT
+    DEFAULT_ROOT
+    FILE_STORAGE_DNN
+    FILE_STORAGE_HOPE
+    FILE_STORAGE_STATIC
+    FILE_STORAGE_MEDIA
+
+### Variables Breakdown
+
+Detailed information about the required environment variables is provided for clarity and proper configuration.
+
+#### Operational
+
+##### DATABASE_URL
+The URL for the database connection. *Example:* `postgres://hde:password@db:5432/hope_dedupe_engine`
+
+##### SECRET_KEY
+A secret key for the Django installation. *Example:* `django-insecure-pretty-strong`
+
+##### CACHE_URL
+The URL for the cache server. *Example:* `redis://redis:6379/1`
+
+##### CELERY_BROKER_URL
+The URL for the Celery broker. *Example:* `redis://redis:6379/9`
+
+#### Root directories
+
+##### DEFAULT_ROOT
+The root directory for locally stored files. *Example:* `/var/hope_dedupe_engine/default`
+
+##### MEDIA_ROOT
+The root directory for media files. *Example:* `/var/hope_dedupe_engine/media`
+
+##### STATIC_ROOT
+The root directory for static files. *Example:* `/var/hope_dedupe_engine/static`
+
+#### Storages
+
+##### FILE_STORAGE_DEFAULT
+This backend is used for storing locally downloaded DNN model files and encoded data.
+    ```
+    FILE_STORAGE_DEFAULT=django.core.files.storage.FileSystemStorage
+    ```
+#####  FILE_STORAGE_DNN
+This backend is dedicated to storing DNN model files. Ensure that the following two files are present in this storage:
+
+1. *deploy.prototxt*: Defines the model architecture.
+2. *res10_300x300_ssd_iter_140000.caffemodel*: Contains the pre-trained model weights.
+
+The current process involves downloading files from a [GitHub repository](https://github.com/sr6033/face-detection-with-OpenCV-and-DNN) and saving them to this specific Azure Blob Storage using command `django-admin upgrade --with-dnn-setup`, or the specialized`django-admin dnnsetup` command .
+In the future, an automated pipeline related to model training could handle file updates.
+
+The storage configuration is as follows:
+```
+FILE_STORAGE_DNN="storages.backends.azure_storage.AzureStorage?account_name=<account_name>&account_key=<account_key>&overwrite_files=true&azure_container=dnn"
+```
+
+##### FILE_STORAGE_HOPE
+This backend is used for storing HOPE dataset images. It should be configured as read-only for the service.
+    ```
+    FILE_STORAGE_HOPE="storages.backends.azure_storage.AzureStorage?account_name=<account_name>&account_key=<account_key>&azure_container=hope"
+    ```
+##### FILE_STORAGE_MEDIA
+This backend is used for storing media files.
+
+##### FILE_STORAGE_STATIC
+This backend is used for storing static files, such as CSS, JavaScript, and images.
+
+## Running the Application
+
+To get the application up and running, follow the steps outlined below. The first command will set up the initial configuration, while the subsequent commands will start the server and related support services, including worker processes and task scheduling.
+
+### Initial Setup
+
+Before starting the application, perform the initial setup using the following command. This will configure the necessary environment settings and prepare the application for runtime:
+
+        docker run -d -t  unicef/hope-dedupe-engine:release-0.1 setup
+
+### Starting the Server and Services
+
+Once the initial setup is complete, run the commands below to start the server and the required background services:
+
+    docker run -d -t  unicef/hope-dedupe-engine:release-0.1 run
+    docker run -d -t  unicef/hope-dedupe-engine:release-0.1 worker
+    docker run -d -t  unicef/hope-dedupe-engine:release-0.1 beat
+
+These commands will ensure that the application server, worker processes, and task scheduler are all running in the background, allowing the full functionality of the application to be available.
diff --git a/docs/components/hde/tmp.md b/docs/components/hde/tmp.md
deleted file mode 100644
index f8ff698..0000000
--- a/docs/components/hde/tmp.md
+++ /dev/null
@@ -1,259 +0,0 @@
-Deduplication 
-
- 
- 
-
- 
- 
-
-RDI 
-
-Fuzzy 
-
-Threshold by BA 
-
- 
- 
-
- 
- 
-
-Programme based deduplication. 
-
- 
- 
-
- 
- 
-
-Check on documents happen after merge. 
-
- 
- 
-
- 
- 
-
- 
- 
-
-CHANGE REQUEST 168410 
-
- 
- 
-
-Customizable deduplication 
-
- 
- 
-
- 
- 
-
-Real cases: 
-
-Need adjudication: same "Bank Statement" number 
-
- 
- 
-
- 
- 
-
-Duplicate 
-
-Not Duplicate 
-
-Withdrawn 
-
- 
- 
-
- 
- 
-
-Not Withdrawn 
-
- 
- 
-
- 
- 
-
- 
- 
-
-Deduplication  
-
-what is the flag "Postpone deduplication”? 
-
-status pending  
-
-Document type flags 
-
-is_identity_document 
-
-valid_for_deduplication (change the signature ==> 2 valid document with same ID) 
-
-document number 
-
- 
- 
-
- 
- 
-
- 
- 
-
-Batch 
-
- 
- 
-
-Golden Records 
-
- 
- 
-
-Threashold -> need adjudication 
-
- 
- 
-
-deduplication_batch_results  
-
-deduplication_golden_record_results  
-
- 
- 
-
- 
- 
-
-Questions 
-
- 
- 
-
-A) what is the flag "Postpone deduplication" 
-
-what would it stop? 
-
-fuzzy match 
-
-bank account 
-
-document number 
-
- 
- 
-
-Disable ES 
-
- 
- 
-
- 
- 
-
-B) document type - valid_for_deduplication flag has it used? 
-
- 
- 
-
- 
- 
-
-ok so deduplication works on all documents with: 
-
-status pending  
-
-flag is_identity_document set to true 
-
- 
- 
-
- 
- 
-
-flag valid_for_deduplication only change the signature ==> 2 valid document with same ID 
-
- 
- 
-
- 
- 
-
- 
- 
-
-DEDUPLICATION 
-
-Name, gender, date of birth 
-
- 
- 
-
-  
-
-Flexible de-duplications checks (decide from user side which fields should be used to deduplicate) 
-
-No, depends on filters, index 
-
-Requires redesign of flex fields 
-
-Redesign deduplication (?) 
-
- 
- 
-
- 
- 
-
-Instructions: 
-
-List: subset (aka sessions) 
-
- 
- 
-
- 
- 
-
- 
- 
-
- 
- 
-
-is identity document 
-
-if this is set to true, we use this document to deduplicate and create ticket 
-
- 
- 
-
-unique for individual 
-
-you cannot create more than one document of this type for individual 
-
- 
- 
-
-valid for deduplication 
-
-ignores the type of the document during deduplication and deduplicate between different types with this flag set to true 
-
- 
- 
-
- 
-
-There are two different rules for documents uniqueness: 
-
-"unique for individual" flag indicates whether we should validate uniqueness of document type per individual - so that Individual can only have 1 VALID document of this document_type+country. 
-
-document data uniqueness inside a Program - so that there cannot be more than 1 VALID document with the same set of values: document_number+type+country in a Program 
-
- 
\ No newline at end of file
diff --git a/docs/components/hde/tmp2.md b/docs/components/hde/tmp2.md
deleted file mode 100644
index 9a646f8..0000000
--- a/docs/components/hde/tmp2.md
+++ /dev/null
@@ -1,79 +0,0 @@
- 
-
-Create Deduplication Set: deduplication_set (POST) 
-
- 
- 
-
-deduplication_set/<id>/images (POST) 
-
-deduplication_set/<id>/image_bulk (POST) 
-
- 
- 
-
-deduplication_set/<id>/process (POST) 
-
- 
-
------- 
-
- 
-
- 
-
-Deduplication Engine App 
-
- 
-
-Celery 
-
- 
-
-Neural Network Model (DNN) 
-
- 
-
- 
-
-Blob Stob Storages 
-
- 
-
-1) HOPEAzureStorage: Read only storage for hope pictures 
-
- 
-
-2) HDEAzureStorage 
-
-static-dde: 
-
-enconding-dde: Storage for encoding? Numpy vectors 
-
-models-dde:  
-
-Coffee model 
-
-Prototext file 
-
- 
-
- 
-
-Service use 3 azure containers: 
-
-AZURE_CONTAINER_HDE  - writable container for encodings data 
-
-AZURE_CONTAINER_HOPE - read-only container for images from HOPE 
-
-AZURE_CONTAINER_DNN - read-only container for DNN files (deploy.prototxt and res10_300x300_ssd_iter_140000.caffemodel) 
-
-Depending on constance.config.DNN_FILES_SOURCE, the service fetches DNN files from GitHub or AZURE_CONTAINER_DNN using celery task. 
-
-At startup, the absence of local DNN files triggers an automatic download. 
-
-For manual interventions, access the admin panel at: Home › Faces › DNN files. 
-
-We drop downloaded files into the settings.CV2DNN_DIR folder.CV2DNN_DIR. This folder must be reached by our backend and Celery workers. 
-
-At the future the files within the AZURE_CONTAINER_DNN can be automatically updated with new versions of our trained model via a dedicated pipeline. 
\ No newline at end of file
diff --git a/docs/components/hde/troubleshooting.md b/docs/components/hde/troubleshooting.md
new file mode 100644
index 0000000..181130f
--- /dev/null
+++ b/docs/components/hde/troubleshooting.md
@@ -0,0 +1,5 @@
+If you encounter issues while running the service, the **admin panel** can be a useful tool for diagnosing and resolving problems. The admin panel provides access to various configurations, logs, and status indicators that can help identify potential causes of issues.
+
+To efficiently track and monitor errors within the application, **Sentry** is integrated as the primary tool for error logging and alerting.
+
+For Sentry to work correctly, ensure that the **SENTRY_DSN** environment variable is set.
diff --git a/mkdocs.yml b/mkdocs.yml
index 342cb93..f39dfa9 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -161,7 +161,8 @@ plugins:
 #      draft: false
   - tags:
       tags_file: tags.md
-
+  - panzoom:
+      full_screen: true
 
 hooks:
   - docs/_hooks/hooks.py
diff --git a/pdm.lock b/pdm.lock
index 007e3f1..bcbeef5 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -5,7 +5,7 @@
 groups = ["default", "dev"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:0c6bc87d8aea7c4268f88eae9dd9d8a7c34dc82da9c58a0dfc88cd6fe0e6ff6f"
+content_hash = "sha256:bf28c96d1837be12cf827b76c9b1d9a4d7623667e8e849fd0711b60ac759f69d"
 
 [[metadata.targets]]
 requires_python = "==3.12.*"
@@ -801,6 +801,21 @@ files = [
     {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"},
 ]
 
+[[package]]
+name = "mkdocs-panzoom-plugin"
+version = "0.1.1"
+requires_python = ">=3.7"
+summary = "MkDocs Plugin to enable pan & zoom on images and mermaid diagrams"
+groups = ["default"]
+dependencies = [
+    "beautifulsoup4>=4.9.0",
+    "mkdocs>=1.0.4",
+]
+files = [
+    {file = "mkdocs_panzoom_plugin-0.1.1-py3-none-any.whl", hash = "sha256:28d6777b509703b023e3f2cf3fdbc63eb3d746a088aac78d16f3ba0f54ba189a"},
+    {file = "mkdocs_panzoom_plugin-0.1.1.tar.gz", hash = "sha256:68c7b04041a9b413c3ffea5ebf9278fed96142fed741270222fa1a5429e32fdb"},
+]
+
 [[package]]
 name = "mkdocs-pdf-export-plugin"
 version = "0.5.10"
diff --git a/pyproject.toml b/pyproject.toml
index 5d6f21b..928a8ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,7 @@ dependencies = [
     "mike>=2.1.3",
     "mkdocs-gitsnippet-plugin>=1.2.0",
     "mkdocs-macros-plugin>=1.2.0",
+    "mkdocs-panzoom-plugin>=0.1.1",
 ]
 requires-python = "==3.12.*"
 readme = "README.md"