Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
gothub committed Feb 25, 2022
2 parents d6692aa + 6d40d1e commit a7e064a
Show file tree
Hide file tree
Showing 80 changed files with 1,539 additions and 2,049 deletions.
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,5 @@ src/main/resources/schemas/META-INF
.idea/**
test/**
**/metadig-engine.jar
Kubernetes/metadig-worker/**/*.xml
metadig-engine.iml
bin/**
Kubernetes/metadig-scorer/solr
Kubernetes/Admin/python
Kubernetes/Admin/jython
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ COPY metadig-engine.jar metadig-engine.jar
#CMD [ "./run.sh" ]
# Set classpath to include /opt/local/metadig/log4j.properties, if it exists, so that logging can be changed without
# having to rebuild the container. Note that on k8s, this dir is mapped to the persistent volume, so will be /data/metadig/log4j.properties
CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar: edu.ucsb.nceas.mdqengine.scheduler.JobScheduler
CMD java -Dlog4j2.formatMsgNoLookups=true -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar: edu.ucsb.nceas.mdqengine.scheduler.JobScheduler
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Use an OpenJDK runtime as a parent image - This is a Debian distro
FROM openjdk:8-jre-stretch
# Use an OpenJDK runtime as a parent image
FROM openjdk:8-jdk

MAINTAINER [email protected]

Expand All @@ -9,14 +9,12 @@ WORKDIR /var/lib/metadig
# This file was created from the https://github.com/NCEAS/metadig-r repo
# and contains R functions that assist in writing R based quality checks.
COPY metadig_0.2.0.tar.gz metadig.tar.gz
#COPY log4j.properties .
# The most recently built jar file is copied from the maven build directory to this dir by maven, so that
# it can be copyied to the image.
COPY metadig-engine.jar metadig-engine.jar
# For some reason, the DataONE indexer software can't find these files unless they are put in
# directory and included in the CLASSPATH (see java CMD). They are in the source tree under 'main/resources',
# but maybe further action needs to be taken for java/dataone to be able to find them without having to
# do this extra step.
# The DataONE indexer software can't find these files unless they are put in the ./solr directory and included in the
# CLASSPATH (see java CMD). They are in the source tree under 'main/resources', but maybe further action needs to be
# taken for java/dataone to be able to find them without having to do this extra step.
COPY solr solr

# DataONE indexer prints copious error msgs if these files don't exist
Expand All @@ -25,15 +23,12 @@ RUN mkdir -p /etc/dataone/index && touch /etc/dataone/index/d1client.properties
# Add R runtime and install packges required by the quality suites
RUN apt update
RUN apt -y install vim bash
RUN apt -y install r-base r-cran-httr r-cran-xml2 r-cran-tidyr r-cran-scales r-cran-lubridate r-cran-ggplot2 r-cran-magrittr
# Debian stretch doesn't have a pre-cooked package for readr, so install now.
RUN Rscript --vanilla -e 'install.packages("readr", repos=c(CRAN = "http://cran.rstudio.com"))'
RUN apt -y install r-base r-cran-httr r-cran-xml2 r-cran-tidyr r-cran-scales r-cran-lubridate r-cran-ggplot2 r-cran-magrittr r-cran-readr
# Install the metadig-engine distribution
RUN Rscript --vanilla -e 'install.packages("metadig.tar.gz", repos=NULL)'

# Run the Scorer process
# Note: docker --build-arg only allows one argument (one token only, multiple tokens inside quotes doesn't work, so have
# to specify java options directly on command line.
# Set classpath to include /opt/local/metadig/log4j.properties, if it exists, so that logging can be changed without
# having to rebuild the container. Note that on k8s, this dir is mapped to the persistent volume, so will be /data/metadig/log4j.properties
CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.scorer.Scorer
CMD java -Dlog4j2.formatMsgNoLookups=true -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.scorer.Scorer

Binary file added Docker/metadig-scorer/metadig_0.2.0.tar.gz
Binary file not shown.
207 changes: 207 additions & 0 deletions Docker/metadig-scorer/solr/application-context-mdq.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">

<bean id="mdqSubprocessor" class="org.dataone.cn.indexer.parser.ScienceMetadataDocumentSubprocessor">

<!-- match MDQ documents -->
<property name="matchDocuments">
<list>
<value>https://nceas.ucsb.edu/mdqe/v1</value>
</list>
</property>
<property name="fieldList">
<list>
<ref bean="mdq.runId"/>
<ref bean="mdq.suiteId"/>
<ref bean="mdq.timestamp"/>
<ref bean="mdq.datasource"/>
<ref bean="mdq.metadata.formatId"/>
<ref bean="mdq.dateUploaded"/>
<ref bean="mdq.obsoletes"/>
<ref bean="mdq.obsoletedBy"/>
<ref bean="mdq.sequenceId"/>
<ref bean="mdq.seriesId"/>
<ref bean="mdq.funder"/>
<ref bean="mdq.funder.lookup"/>
<ref bean="mdq.rightsHolder"/>
<ref bean="mdq.group"/>
<ref bean="mdq.checks.passed"/>
<ref bean="mdq.checks.warned"/>
<ref bean="mdq.checks.failed"/>
<ref bean="mdq.checks.info"/>
<ref bean="mdq.checks.errored"/>
<ref bean="mdq.check.count"/>
<ref bean="mdq.score.overall"/>
</list>
</property>
</bean>

<bean id="mdq.runId" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="runId"/>
<constructor-arg name="xpath"
value="/*[local-name() = 'run']/id"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.sequenceId" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="sequenceId"/>
<constructor-arg name="xpath"
value="/*[local-name() = 'run']/sequenceId"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.suiteId" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="suiteId"/>
<constructor-arg name="xpath"
value="/*[local-name() = 'run']/suiteId"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.timestamp" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="timestamp"/>
<constructor-arg name="xpath"
value="/*[local-name() = 'run']/timestamp"/>
<property name="multivalue" value="false"/>
<property name="converter" ref="dateConverter"/>
</bean>

<bean id="mdq.metadata.formatId" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="metadataFormatId"/>
<constructor-arg name="xpath"
value="normalize-space(/*/sysmeta/formatId)"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.obsoletes" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="obsoletes"/>
<constructor-arg name="xpath"
value="normalize-space(/*/sysmeta/obsoletes)"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.obsoletedBy" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="obsoletedBy"/>
<constructor-arg name="xpath"
value="normalize-space(/*/sysmeta/obsoletedBy)"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.seriesId" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="seriesId"/>
<constructor-arg name="xpath"
value="normalize-space(/*/sysmeta/seriesId)"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.datasource" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="datasource"/>
<constructor-arg name="xpath"
value="normalize-space(/*/sysmeta/originMemberNode)"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.dateUploaded" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="dateUploaded"/>
<constructor-arg name="xpath"
value="normalize-space(/*/sysmeta/dateUploaded)"/>
<property name="multivalue" value="false"/>
<!-- Note: The dateConverter doesn't work for this field, so determine why. -->
<property name="converter" ref="dateConverter"/>
</bean>

<bean id="mdq.funder" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="funder"/>
<!-- Note: use 'contains' (xpath 1.0, which java supports) so that we can match check name without version number. -->
<constructor-arg name="xpath"
value="//result[check/id[contains(text(),'check.echo.funder.')]]/output/text()"/>
<property name="multivalue" value="true"/>
<property name="dedupe" value="true"/>
</bean>

<bean id="mdq.funder.lookup" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="funderInfo"/>
<constructor-arg name="xpath"
value="//result[check/id[contains(text(),'check.lookup.award.')]]/output/text()"/>
<property name="multivalue" value="true"/>
<property name="dedupe" value="true"/>
</bean>

<bean id="mdq.rightsHolder" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="rightsHolder"/>
<constructor-arg name="xpath"
value="normalize-space(/*/sysmeta/rightsHolder)"/>
<property name="multivalue" value="false"/>
</bean>

<bean id="mdq.group" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="group"/>
<constructor-arg name="xpath"
value="/*/sysmeta/groups/group/text()"/>
<property name="multivalue" value="true"/>
<property name="dedupe" value="true"/>
</bean>

<!-- scoring by result status -->
<!-- pass: (Status = SUCCESS) and (level != INFO & level != METADATA) -->
<bean id="mdq.checks.passed" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="checksPassed"/>
<constructor-arg name="xpath"
value="count(//result[check/level[text() != 'INFO' and text() != 'METADATA']]/status[text() = 'SUCCESS'])"/>
<property name="multivalue" value="false"/>
</bean>
<!-- warned: (Status = FAILURE) & (level = OPTIONAL) -->
<bean id="mdq.checks.warned" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="checksWarned"/>
<constructor-arg name="xpath"
value="count(//result[check/level[text() = 'OPTIONAL']]/status[text() = 'FAILURE'])"/>
<property name="multivalue" value="false"/>
</bean>
<!-- failed: (Status = FAILURE) & (level = REQUIRED) -->
<bean id="mdq.checks.failed" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="checksFailed"/>
<constructor-arg name="xpath"
value="count(//result[check/level[text() = 'REQUIRED']]/status[text() = 'FAILURE'])"/>
<property name="multivalue" value="false"/>
</bean>
<!-- failed: (status = ERROR) -->
<bean id="mdq.checks.errored" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="checksErrored"/>
<constructor-arg name="xpath"
value="count(//result/status[text() = 'ERROR'])"/>
<property name="multivalue" value="false"/>
</bean>
<!-- info: Level = INFO or status = SKIP -->
<bean id="mdq.checks.info" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="checksInfo"/>
<constructor-arg name="xpath"
value="count(//result/status[text() = 'SKIP'] | //result[check/level[text() = 'INFO']])"/>
<property name="multivalue" value="false"/>
</bean>
<!-- checkCount: all checks except level = 'METADATA'. (Not used for overall score -->
<bean id="mdq.check.count" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="checkCount"/>
<constructor-arg name="xpath"
value="count(//result) - count(//result[check/level[text() = 'METADATA']])"/>
<property name="multivalue" value="false"/>
</bean>

<!-- the composite score -->
<!-- overallScore: count(pass) div count(pass + fail) -->
<bean id="mdq.score.overall" class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="scoreOverall"/>
<constructor-arg name="xpath"
value="(count(//result[check/level[text() != 'INFO' and text() != 'METADATA']]/status[text() = 'SUCCESS'])) div
(count(//result[check/level[text() != 'INFO' and text() != 'METADATA']]/status[text() = 'SUCCESS']) +
count(//result[check/level[text() = 'REQUIRED']]/status[text() = 'ERROR'] | //result[check/level[text() = 'REQUIRED']]/status[text() = 'FAILURE']))"/>
<property name="multivalue" value="false"/>
</bean>
<!-- Calculate the scores for check 'types', i.e. -->
<bean id="mdqAddDynamicFieldsSubprocessor" class="edu.ucsb.nceas.mdqengine.solr.QualityReportSubprocessor">
<property name="matchDocuments">
<list>
<value>https://nceas.ucsb.edu/mdqe/v1</value>
</list>
</property>
</bean>
</beans>
22 changes: 22 additions & 0 deletions Docker/metadig-scorer/solr/application-context-systemmeta-200.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd">

<bean id="xpath_system_metadata_200" class="java.util.ArrayList">
<constructor-arg>
<list>
<bean class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="metadataId" />
<constructor-arg name="xpath"
value="/d200:systemMetadata/identifier/text()" />
</bean>
<bean class="org.dataone.cn.indexer.parser.SolrField">
<constructor-arg name="name" value="formatId" />
<constructor-arg name="xpath"
value="/d200:systemMetadata/formatId/text()" />
</bean>
</list>
</constructor-arg>
</bean>
</beans>
Loading

0 comments on commit a7e064a

Please sign in to comment.