From b2e997635fef2bd1855c7fc136b0393660f0e4d3 Mon Sep 17 00:00:00 2001 From: Roman Isecke <136338424+rbiseck3@users.noreply.github.com> Date: Tue, 3 Oct 2023 10:39:33 -0400 Subject: [PATCH] roman/es ingest test fixes (#1610) ### Description update elasticsearch docker setup to use docker-compose Would close out https://github.com/Unstructured-IO/unstructured/issues/1609 --- .github/workflows/ci.yml | 4 ++ .../ingest-test-fixtures-update-pr.yml | 6 ++- CHANGELOG.md | 2 +- .../create-and-check-es.sh | 39 ++++--------------- .../create_and_fill_es.py | 2 + .../docker-compose.yaml | 15 +++++++ .../test-ingest-elasticsearch.sh | 6 +-- unstructured/__version__.py | 2 +- 8 files changed, 38 insertions(+), 38 deletions(-) mode change 100644 => 100755 scripts/elasticsearch-test-helpers/create_and_fill_es.py create mode 100644 scripts/elasticsearch-test-helpers/docker-compose.yaml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d4f066fec..e475ec9f02 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -255,6 +255,10 @@ jobs: source .venv/bin/activate mkdir "$NLTK_DATA" make install-ci + - name: Setup docker-compose + uses: KengoTODA/actions-setup-docker-compose@v1 + with: + version: '2.22.0' - name: Test Ingest (unit) run: | source .venv/bin/activate diff --git a/.github/workflows/ingest-test-fixtures-update-pr.yml b/.github/workflows/ingest-test-fixtures-update-pr.yml index 724a893128..499a1f7593 100644 --- a/.github/workflows/ingest-test-fixtures-update-pr.yml +++ b/.github/workflows/ingest-test-fixtures-update-pr.yml @@ -9,7 +9,7 @@ env: jobs: setup: - runs-on: ubuntu-latest + runs-on: ubuntu-latest-m if: | github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && contains(github.event.head_commit.message, 'ingest-test-fixtures-update')) @@ -56,6 +56,10 @@ jobs: source .venv/bin/activate mkdir "$NLTK_DATA" make install-ci + - name: Setup docker-compose + uses: KengoTODA/actions-setup-docker-compose@v1 + with: + version: '2.22.0' - name: Update test fixtures env: AIRTABLE_PERSONAL_ACCESS_TOKEN: ${{ secrets.AIRTABLE_PERSONAL_ACCESS_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d54f444db..7324722691 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.10.19-dev5 +## 0.10.19-dev6 ### Enhancements diff --git a/scripts/elasticsearch-test-helpers/create-and-check-es.sh b/scripts/elasticsearch-test-helpers/create-and-check-es.sh index 44fca2f7d3..dc06c21a16 100755 --- a/scripts/elasticsearch-test-helpers/create-and-check-es.sh +++ b/scripts/elasticsearch-test-helpers/create-and-check-es.sh @@ -1,37 +1,14 @@ #!/usr/bin/env bash -SCRIPT_DIR=$(dirname "$(realpath "$0")") - -# Create the Elasticsearch cluster and get the container id -docker run -d --rm -p 9200:9200 -p 9300:9300 -e "xpack.security.enabled=false" -e "discovery.type=single-node" --name es-test docker.elastic.co/elasticsearch/elasticsearch:8.7.0 +set -e -# Wait for Elasticsearch container to start -echo "Waiting for Elasticsearch container to start..." -sleep 1 - -url="http://localhost:9200/_cluster/health?wait_for_status=green&timeout=50s" -status_code=0 -retry_count=0 -max_retries=6 +SCRIPT_DIR=$(dirname "$(realpath "$0")") -# Check the cluster status repeatedly until it becomes live or maximum retries are reached -while [ "$status_code" -ne 200 ] && [ "$retry_count" -lt "$max_retries" ]; do - # Send a GET request to the cluster health API - response=$(curl -s -o /dev/null -w "%{http_code}" "$url") - status_code="$response" +# Create the Elasticsearch cluster +docker compose version +docker compose -f "$SCRIPT_DIR"/docker-compose.yaml up --wait +docker compose -f "$SCRIPT_DIR"/docker-compose.yaml ps - # Process the files only when the Elasticsearch cluster is live - if [ "$status_code" -eq 200 ]; then - echo "Cluster is live." - python "$SCRIPT_DIR/create_and_fill_es.py" - else - ((retry_count++)) - echo "Cluster is not available. Retrying in 5 seconds... (Attempt $retry_count)" - sleep 5 - fi -done -# If the cluster has not become live, exit after a certain number of tries -if [ "$status_code" -ne 200 ]; then - echo "Cluster took an unusually long time to create (>25 seconds). Expected time is around 10 seconds. Exiting." -fi +echo "Cluster is live." +"$SCRIPT_DIR"/create_and_fill_es.py diff --git a/scripts/elasticsearch-test-helpers/create_and_fill_es.py b/scripts/elasticsearch-test-helpers/create_and_fill_es.py old mode 100644 new mode 100755 index 796e2187a8..a761255741 --- a/scripts/elasticsearch-test-helpers/create_and_fill_es.py +++ b/scripts/elasticsearch-test-helpers/create_and_fill_es.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import pandas as pd from elasticsearch import Elasticsearch from elasticsearch.helpers import bulk diff --git a/scripts/elasticsearch-test-helpers/docker-compose.yaml b/scripts/elasticsearch-test-helpers/docker-compose.yaml new file mode 100644 index 0000000000..47cb93ae1f --- /dev/null +++ b/scripts/elasticsearch-test-helpers/docker-compose.yaml @@ -0,0 +1,15 @@ +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:8.7.0 + container_name: es-test + ports: + - 9200:9200 + - 9300:9300 + environment: + - xpack.security.enabled=false + - discovery.type=single-node + healthcheck: + test: ["CMD-SHELL", "curl --silent --fail localhost:9200/_cluster/health || exit 1"] + interval: 30s + timeout: 30s + retries: 3 diff --git a/test_unstructured_ingest/test-ingest-elasticsearch.sh b/test_unstructured_ingest/test-ingest-elasticsearch.sh index 530ddf1bed..7b181f90ba 100755 --- a/test_unstructured_ingest/test-ingest-elasticsearch.sh +++ b/test_unstructured_ingest/test-ingest-elasticsearch.sh @@ -16,10 +16,8 @@ source "$SCRIPT_DIR"/cleanup.sh function cleanup() { # Kill the container so the script can be repeatedly run using the same ports - if docker ps --filter "name=es-test"; then - echo "Stopping Elasticsearch Docker container" - docker stop es-test - fi + echo "Stopping Elasticsearch Docker container" + docker-compose -f scripts/elasticsearch-test-helpers/docker-compose.yaml down --remove-orphans -v cleanup_dir "$OUTPUT_DIR" if [ "$CI" == "true" ]; then diff --git a/unstructured/__version__.py b/unstructured/__version__.py index ea97a53bc8..5f8fd628c9 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.10.19-dev5" # pragma: no cover +__version__ = "0.10.19-dev6" # pragma: no cover