Skip to content

Commit

Permalink
gh actions gleanerio#8
Browse files Browse the repository at this point in the history
  • Loading branch information
adplincinst committed May 29, 2024
1 parent 85a0208 commit a24c9b9
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 9 deletions.
68 changes: 68 additions & 0 deletions configs/iow/pids-geoconnex-dev-gleanerconfig.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
minio:
address: localhost
port: 9000
accessKey: amazingaccesskey
secretKey: amazingsecretkey
ssl: false
bucket: iow
gleaner:
runid: iow # this will be the bucket the output is placed in...
summon: true # do we want to visit the web sites and pull down the files
mill: false
context:
cache: true
contextmaps:
- prefix: "https://schema.org/"
file: "configs/jsonldcontext.jsonld" # wget http://schema.org/docs/jsonldcontext.jsonld
- prefix: "http://schema.org/"
file: "configs/jsonldcontext.jsonld" # wget http://schema.org/docs/jsonldcontext.jsonld
summoner:
after: "" # "21 May 20 10:00 UTC"
mode: full # full || diff: If diff compare what we have currently in gleaner to sitemap, get only new, delete missing
threads: 5
delay: # milliseconds (1000 = 1 second) to delay between calls (will FORCE threads to 1)
headless: http://localhost:9222 # URL for headless see docs/headless
millers:
graph: true
sources:
- active: 'true'
domain: https://pids.geoconnex.dev
headless: 'false'
name: refgages0
pid: https://gleaner.io/genid/geoconnex
propername: refgages0
sourcetype: sitemap
url: https://pids.geoconnex.dev/sitemap/ref/gages/gages__0.xml
- active: 'true'
domain: https://pids.geoconnex.dev
headless: 'false'
name: refmainstems
pid: https://gleaner.io/genid/geoconnex
propername: refmainstems
sourcetype: sitemap
url: https://pids.geoconnex.dev/sitemap/ref/mainstems/mainstems__0.xml
- active: 'true'
domain: https://pids.geoconnex.dev
headless: 'false'
name: dams0
pid: https://gleaner.io/genid/geoconnex
propername: dams0
sourcetype: sitemap
url: https://pids.geoconnex.dev/sitemap/ref/dams/dams__0.xml
- active: 'true'
domain: https://pids.geoconnex.dev
headless: 'false'
name: cdss0
pid: https://gleaner.io/genid/geoconnex
propername: cdss0
sourcetype: sitemap
url: https://pids.geoconnex.dev/sitemap/cdss/co_gages__0.xml
- active: 'true'
domain: https://pids.geoconnex.dev
headless: 'false'
name: nmwdist0
pid: https://gleaner.io/genid/geoconnex
propername: nmwdist0
sourcetype: sitemap
url: https://pids.geoconnex.dev/sitemap/nmwdi/st/nmwdi-st__0.xml

11 changes: 2 additions & 9 deletions scripts/iow/start-gleaner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,13 @@



TS=`date +%Y-%m-%dT%H.%M.%S`
LOGDIR="$HOME/logs/$TS"
mkdir -p $LOGDIR || exit 1
cd $LOGDIR || exit 1

for src in `cat ~/conf/gleanerconfig.yaml | grep '\Wname:'|awk '{print $2}'`
for src in `cat configs/iow/pids-geoconnex-dev-gleanerconfig.yaml | grep '\Wname:'|awk '{print $2}'`
do

OUTFILE="$LOGDIR/gleaner-$src.out"
ERRFILE="$LOGDIR/gleaner-$src.err"

echo "harvesting source '$src'..."
#strace -f -o $LOGDIR/strace-$src.out gleaner -cfg $HOME/conf/gleanerconfig.yaml -source $src -rude > $OUTFILE 2>$ERRFILE
gleaner -log debug -cfg $HOME/conf/gleanerconfig.yaml -source $src -rude > $OUTFILE 2>$ERRFILE
gleaner -log debug -cfg config/gleanerconfig.yaml -source $src -rude
done
echo "complete!"

0 comments on commit a24c9b9

Please sign in to comment.