Scrape a release with typesense #33
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Scrape a release with typesense | |
on: | |
workflow_dispatch: | |
inputs: | |
version: | |
type: string | |
default: "master" | |
description: Slint version (used as typesense index name, for example "master") | |
jobs: | |
scrape: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Repository Checkout | |
uses: actions/checkout@v4 | |
if: ${{ inputs.version == 'master' }} | |
with: | |
sparse-checkout: | | |
config | |
snapshots | |
- name: Repository Checkout | |
uses: actions/checkout@v4 | |
if: ${{ inputs.version != 'master' }} | |
with: | |
sparse-checkout: | | |
config | |
snapshots | |
- name: Run | |
uses: tj-actions/docker-run@v2 | |
id: docker-run | |
with: | |
name: nginx | |
image: nginx:latest | |
options: "-d -p 80:80" | |
- name: Populate web server | |
run: | | |
docker exec nginx rm -rf /usr/share/nginx/html | |
if [ "${{ inputs.version }}" == "master" ]; then | |
path="snapshots/master/docs/slint" | |
else | |
path="releases/${{ inputs.version }}/docs/slint" | |
fi | |
docker cp $path nginx:/usr/share/nginx/html | |
- name: test web server | |
run: | | |
curl http://localhost:80/index.html > test.html | |
cat test.html | |
- name: Clone slint directory | |
uses: actions/checkout@v4 | |
with: | |
repository: slint-ui/slint | |
ref: master | |
path: slint | |
sparse-checkout: | | |
docs | |
persist-credentials: false | |
- name: Prepare config | |
run: | | |
sed -i "s/\$TYPESENSE_INDEX_NAME/${{ inputs.version }}/g" config/typesense-scraper-config.json | |
- name: run scraper | |
run: | | |
docker run -i \ | |
--add-host=host.docker.internal:host-gateway \ | |
-e TYPESENSE_API_KEY=${{ secrets.TYPESENSE_API_KEY }} \ | |
-e TYPESENSE_HOST="062ykax5pgwon3q7p-1.a1.typesense.net" \ | |
-e TYPESENSE_PORT="443" \ | |
-e TYPESENSE_PROTOCOL="https" \ | |
-e CONFIG="$(cat config/typesense-scraper-config.json | jq -r tostring)" \ | |
typesense/docsearch-scraper:0.10.0 2>&1 | tee scraper_output.txt | |
# Retrieve the collection name | |
collection_name=$(sed -n 's/.*POST \/collections\/\(master_[0-9]*\)\/.*/\1/p' scraper_output.txt | sed -n '1p') | |
echo "COLLECTION_NAME: $collection_name"; | |
# Retrieve documents from typesense server | |
curl -H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" \ | |
"https://062ykax5pgwon3q7p-1.a1.typesense.net/collections/$collection_name/documents/export" > temp_docs.jsonl | |
# Replace 'http://host.docker.internal' in temp_docs.jsonl | |
sed -i "s/http:\/\/host.docker.internal/https:\/\/releases.slint.dev\/${{ inputs.version }}\/docs\/slint/g" temp_docs.jsonl | |
# Update documents in typesense server | |
curl -H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" \ | |
-X POST \ | |
-T temp_docs.jsonl \ | |
"https://062ykax5pgwon3q7p-1.a1.typesense.net/collections/$collection_name/documents/import?action=update" | |
# Set alias to the collection | |
curl "https://062ykax5pgwon3q7p-1.a1.typesense.net/aliases/$index" -X PUT \ | |
-H "Content-Type: application/json" \ | |
-H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" -d '{ | |
"collection_name": "'"$collection_name"'" | |
}' |