Scrape a release with typesense #30
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Scrape a release with typesense | |
on: | |
workflow_dispatch: | |
inputs: | |
relative_path: | |
type: string | |
default: "snapshots/master/docs/slint" | |
description: relative path to site root to scrape (like snapshots/master/docs/slint) | |
version: | |
type: string | |
default: "master" | |
description: Slint version (used as typesense index name, for example "master") | |
jobs: | |
scrape: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Repository Checkout | |
uses: actions/checkout@v4 | |
with: | |
sparse-checkout: | | |
config | |
snapshots | |
- name: Run | |
uses: tj-actions/docker-run@v2 | |
id: docker-run | |
with: | |
name: nginx | |
image: nginx:latest | |
options: "-d -p 80:80" | |
- name: Populate web server | |
run: | | |
docker exec nginx rm -rf /usr/share/nginx/html | |
docker cp ${{ inputs.relative_path }} nginx:/usr/share/nginx/html | |
- name: test web server | |
run: | | |
curl http://localhost:80/index.html > test.html | |
cat test.html | |
- name: Clone slint directory | |
uses: actions/checkout@v4 | |
with: | |
repository: slint-ui/slint | |
ref: master | |
path: slint | |
sparse-checkout: | | |
docs | |
persist-credentials: false | |
- name: Prepare config | |
run: | | |
sed -i "s/\$TYPESENSE_INDEX_NAME/${{ inputs.version }}/g" config/typesense-scraper-config.json | |
- name: run scraper | |
run: | | |
docker run -i \ | |
--add-host=host.docker.internal:host-gateway \ | |
-e TYPESENSE_API_KEY=${{ secrets.TYPESENSE_API_KEY }} \ | |
-e TYPESENSE_HOST="062ykax5pgwon3q7p-1.a1.typesense.net" \ | |
-e TYPESENSE_PORT="443" \ | |
-e TYPESENSE_PROTOCOL="https" \ | |
-e CONFIG="$(cat config/typesense-scraper-config.json | jq -r tostring)" \ | |
typesense/docsearch-scraper:0.10.0 2>&1 | tee scraper_output.txt | |
# Retrieve the collection name | |
collection_name=$(sed -n 's/.*POST \/collections\/\(master_[0-9]*\)\/.*/\1/p' scraper_output.txt | sed -n '1p') | |
echo "COLLECTION_NAME: $collection_name"; | |
# Retrieve documents from typesense server | |
curl -H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" \ | |
"https://062ykax5pgwon3q7p-1.a1.typesense.net/collections/$collection_name/documents/export" > temp_docs.jsonl | |
# Update documents in typesense server | |
curl -H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" \ | |
-X POST \ | |
-T temp_docs.jsonl \ | |
"https://062ykax5pgwon3q7p-1.a1.typesense.net/collections/$collection_name/documents/import?action=update" | |
# Set alias to the collection | |
curl "https://062ykax5pgwon3q7p-1.a1.typesense.net/aliases/$index" -X PUT \ | |
-H "Content-Type: application/json" \ | |
-H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" -d '{ | |
"collection_name": "'"$collection_name"'" | |
}' |