diff --git a/.github/actions/base-ingest-cache/action.yml b/.github/actions/base-ingest-cache/action.yml index 9ebd2ab59a..dc9d5105a2 100644 --- a/.github/actions/base-ingest-cache/action.yml +++ b/.github/actions/base-ingest-cache/action.yml @@ -18,7 +18,7 @@ runs: path: | .venv nltk_data - key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt') }}-${{ hashFiles('requirements/*.txt') }} + key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt', 'requirements/*.txt') }} lookup-only: ${{ inputs.check-only }} - name: Set up Python ${{ inputs.python-version }} if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true' @@ -40,6 +40,7 @@ runs: fi make install-ci make install-nltk-models + make install-all-docs make install-ingest - name: Save Ingest Cache if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true' @@ -49,5 +50,5 @@ runs: path: | .venv nltk_data - key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt') }}-${{ hashFiles('requirements/*.txt') }} + key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt', 'requirements/*.txt') }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a675465b50..88fe84680b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,8 +79,6 @@ jobs: strategy: matrix: python-version: ["3.9","3.10","3.11"] - env: - NLTK_DATA: ${{ github.workspace }}/nltk_data runs-on: ubuntu-latest needs: [setup, changelog] steps: @@ -186,8 +184,6 @@ jobs: python-version: ["3.10"] extra: ["csv", "docx", "odt", "markdown", "pypandoc", "pdf-image", "pptx", "xlsx"] runs-on: ubuntu-latest - env: - NLTK_DATA: ${{ github.workspace }}/nltk_data needs: [setup, lint, test_unit_no_extras] steps: - uses: actions/checkout@v4 @@ -229,8 +225,6 @@ jobs: matrix: python-version: [ "3.9","3.10" ] runs-on: ubuntu-latest - env: - NLTK_DATA: ${{ github.workspace }}/nltk_data needs: [setup] steps: - uses: actions/checkout@v4 @@ -309,7 +303,6 @@ jobs: MXBAI_API_KEY: ${{secrets.MXBAI_API_KEY}} OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract" CI: "true" - NLTK_DATA: ${{ github.workspace }}/nltk_data PYTHON: python${{ matrix.python-version }} run: | source .venv/bin/activate @@ -333,8 +326,6 @@ jobs: # NOTE(yuming): Unstructured API only use Python 3.10 python-version: ["3.10"] runs-on: ubuntu-latest - env: - NLTK_DATA: ${{ github.workspace }}/nltk_data needs: [setup, lint] steps: - uses: actions/checkout@v4