-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add portal harvest task for mn-ucsb-1 (#256)
This is the current taskList.csv, which includes add'l entries to mn-ucsb-1
- Loading branch information
Showing
1 changed file
with
40 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,49 @@ | ||
task-type,task-name,task-group,cron-schedule,params | ||
# task type, task name, task group, cron schedule, "formatId filter (regex); suite id; node id; D1 node base url; harvest begin date; harvest increment (days);requestCount" | ||
# - task type: currently 'quality' and 'score' task are supported. | ||
# - task name: any unique string, i.e. 'quality-knb' | ||
# - task group: currently only 'metadig' is used | ||
# - nodeId | ||
# task type, job name, job group, cron schedule, "formatId filter (regex); suite id; node id; D1 node base url; harvest begin date; harvest increment (days);requestCount" | ||
# - task type: | ||
# - job name: | ||
# - job group: | ||
# - cron schedule: | ||
# - seconds, minutes, hours, day of month, month, day of week, year | ||
# - params | ||
# - formatId filter (regex): This is a list of wildcards that will match records with these formatIds to harvest, delimeted by '| | ||
# - suite id: the metadig suite id | ||
# - node id: a DataONE node URN - data will be filtered using this (DataONE sysmeta "datasource") | ||
# - D1 node base url: the base service URL for an MN or CN that will be used to query for pids to be processed | ||
# - harvest begin date: the first date to use for the DataONE 'listObjects' service | ||
# - harvest increment (days): the time span for each search | ||
# - harvest begin date: begin date: the first date to use for the DataONE 'listObjects' service | ||
# - harvest increment (days): increment (days): the time span for each search | ||
# - requestCount: the number of itmes to request from DataONE listObjects | ||
score,score-DataONE-fair,metadig,35 0/1 * * * ?,".*portal.*;FAIR.suite.1;urn:node:CN;2019-12-01T00:00:00.00Z;1;100;refresh" | ||
quality,quality-arctic,metadig,20 0/1 * * * ?,"^eml.*|^http.*eml.*;arctic.data.center.suite.1;urn:node:ARCTIC;1;100" | ||
filestore,ingest,metadig,0 0/1 * * * ?,"stage;;*.*;README.txt;filestore-ingest.log" | ||
|
||
# - requestType: for score tasks, determine type of portal processing ("portal" or "node") | ||
# | ||
# Dataset quality scoring tasks | ||
quality,quality-knb,metadig,0 0/1 * * * ?,"^eml.*|^http.*eml.*;knb.suite.1;urn:node:KNB;2020-08-28T14:05:48.764Z;1;1000" | ||
quality,quality-arctic,metadig,5 0/1 * * * ?,"^eml.*|^http.*eml.*;arctic.data.center.suite.1;urn:node:ARCTIC;2020-08-27T00:00:00.000Z;1;1000" | ||
quality,quality-dataone-fair,metadig,10 0/1 * * * ?,"^eml.*|^http.*eml.*|.*www.isotc211.org.*;FAIR-suite-0.3.1;urn:node:CN;2020-08-28T00:00:00.000Z;1;1000" | ||
quality,quality-ess-dive,metadig,15 0/1 * * * ?,"^eml.*|^http.*eml.*;ess-dive.data.center.suite.1;urn:node:ESS_DIVE;2020-08-27T20:38:19.953Z;1;1000;" | ||
# | ||
# Portal scoring tasks | ||
score,portal-KNB-FAIR,metadig,5 0/1 * * * ?,"*portals*;FAIR-suite-0.3.1;urn:node:KNB;2020-08-28T00:00:00.00Z;1;100;portal" | ||
score,portal-ARCTIC-FAIR,metadig,10 0/1 * * * ?,"*portals*;FAIR-suite-0.3.1;urn:node:ARCTIC;2020-08-28T00:00:00.00Z;1;100;portal" | ||
score,portal-mnUCSB1-FAIR,metadig,15 0/1 * * * ?,"*portals*;FAIR-suite-0.3.1;urn:node:mnUCSB1;2020-08-28T00:00:00.00Z;1;100;portal" | ||
# | ||
# Note: Portal harvesting for DataONE portals created on search.dataone.org will be performed on mnUCSB1, as MetacatUI sends create and | ||
# update requests performed on search.dataone.org to this host. We want to harvest them as soon as they are created, and not have to wait for mnUCSB1 to | ||
# sync to the CN, and then the CN index it, so the following entry is obsolete, and no longer used. | ||
# # score,portal-CN-FAIR,metadig,35 0/1 * * * ?,"*portals*;FAIR.suite-0.3.1;urn:node:CN;2020-08-24T00:00:00.00Z;1;100;portal" | ||
# | ||
# Task for creating member node metadata assessment graphs | ||
score,mn-portal-ARCTIC-FAIR,metadig,0 0 2 * * ?,";FAIR-suite-0.3.1;urn:node:ARCTIC;2020-08-28T00:00:00.00Z;1;1000;node" | ||
score,mn-portal-KNB-FAIR,metadig,0 1 2 * * ?,";FAIR-suite-0.3.1;urn:node:KNB;2020-08-28T00:00:00.00Z;1;1000;node" | ||
score,mn-portal-ESS-DIVE-FAIR,metadig,0 2 2 * * ?,";FAIR-suite-0.3.1;urn:node:ESS_DIVE;2020-08-28T00:00:00.00Z;1;1000;node" | ||
score,mn-portal-CA_OPC-FAIR,metadig,0 3 2 * * ?,";FAIR-suite-0.3.1;urn:node:CA_OPC;2020-08-28T00:00:00.00Z;1;1000;node" | ||
score,mn-portal-DataONE-FAIR,metadig,0 4 2 * * ?,";FAIR-suite-0.3.1;urn:node:CN;2020-08-28T00:00:00.00Z;1;1000;node" | ||
# | ||
# Task for ingesting files into the file store from /data/metadig/store/stage/{code,data,graph,metadata} | ||
# filestore,ingest,metadig,0 0/1 * * * ?,"stage;;*.*;README.txt;filestore-ingest.log" | ||
# | ||
# Admin NOTE: it appears that DataONE HttpMultipartRestClient can't handle two clients being created at the same time, even if they are by different threads. This needs to be | ||
# investigated further and potentially a bug needs to be logged in redmine for this. Until then, an easy workaround is to ensure that no two tasks are started | ||
# at the same time, so adjust the cron schedule accordingly. | ||
# | ||
# Node list from DataONE | ||
nodelist,MN-NODE-LIST,metadig,0 0 0/1 * * ?,"urn:node:CN" |