-
Notifications
You must be signed in to change notification settings - Fork 1
95 lines (87 loc) · 3.57 KB
/
typesense-scrape.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
name: Scrape a release with typesense
on:
workflow_dispatch:
inputs:
version:
type: string
default: "master"
description: Slint version (used as typesense index name, for example "master")
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- name: Repository Checkout
uses: actions/checkout@v4
if: ${{ inputs.version == 'master' }}
with:
sparse-checkout: |
config
snapshots
- name: Repository Checkout
uses: actions/checkout@v4
if: ${{ inputs.version != 'master' }}
with:
sparse-checkout: |
config
snapshots
- name: Run
uses: tj-actions/docker-run@v2
id: docker-run
with:
name: nginx
image: nginx:latest
options: "-d -p 80:80"
- name: Populate web server
run: |
docker exec nginx rm -rf /usr/share/nginx/html
if [ "${{ inputs.version }}" == "master" ]; then
path="snapshots/master/docs/slint"
else
path="releases/${{ inputs.version }}/docs/slint"
fi
docker cp $path nginx:/usr/share/nginx/html
- name: test web server
run: |
curl http://localhost:80/index.html > test.html
cat test.html
- name: Clone slint directory
uses: actions/checkout@v4
with:
repository: slint-ui/slint
ref: master
path: slint
sparse-checkout: |
docs
persist-credentials: false
- name: Prepare config
run: |
sed -i "s/\$TYPESENSE_INDEX_NAME/${{ inputs.version }}/g" config/typesense-scraper-config.json
- name: run scraper
run: |
docker run -i \
--add-host=host.docker.internal:host-gateway \
-e TYPESENSE_API_KEY=${{ secrets.TYPESENSE_API_KEY }} \
-e TYPESENSE_HOST="062ykax5pgwon3q7p-1.a1.typesense.net" \
-e TYPESENSE_PORT="443" \
-e TYPESENSE_PROTOCOL="https" \
-e CONFIG="$(cat config/typesense-scraper-config.json | jq -r tostring)" \
typesense/docsearch-scraper:0.10.0 2>&1 | tee scraper_output.txt
# Retrieve the collection name
collection_name=$(sed -n 's/.*POST \/collections\/\(master_[0-9]*\)\/.*/\1/p' scraper_output.txt | sed -n '1p')
echo "COLLECTION_NAME: $collection_name";
# Retrieve documents from typesense server
curl -H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" \
"https://062ykax5pgwon3q7p-1.a1.typesense.net/collections/$collection_name/documents/export" > temp_docs.jsonl
# Replace 'http://host.docker.internal' in temp_docs.jsonl
sed -i "s/http:\/\/host.docker.internal/https:\/\/releases.slint.dev\/${{ inputs.version }}\/docs\/slint/g" temp_docs.jsonl
# Update documents in typesense server
curl -H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" \
-X POST \
-T temp_docs.jsonl \
"https://062ykax5pgwon3q7p-1.a1.typesense.net/collections/$collection_name/documents/import?action=update"
# Set alias to the collection
curl "https://062ykax5pgwon3q7p-1.a1.typesense.net/aliases/$index" -X PUT \
-H "Content-Type: application/json" \
-H "X-TYPESENSE-API-KEY: ${{ secrets.TYPESENSE_API_KEY }}" -d '{
"collection_name": "'"$collection_name"'"
}'