From 5bfd7a780b380b3feb6ddac493448e2f4aa64d29 Mon Sep 17 00:00:00 2001
From: gothub
Date: Thu, 20 Aug 2020 11:17:40 -0700
Subject: [PATCH] Improve javadocs; code cleanup
---
.../mdqengine/scheduler/JobScheduler.java | 8 ++-
.../mdqengine/scheduler/RequestReportJob.java | 57 +++++++++++++++++--
.../mdqengine/scheduler/RequestScorerJob.java | 35 ++++++++----
.../ucsb/nceas/mdqengine/scorer/Scorer.java | 14 ++---
4 files changed, 89 insertions(+), 25 deletions(-)
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
index 3f9612a3..dd72f43b 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
@@ -241,12 +241,18 @@ public static void main(String[] argv) throws Exception {
public JobScheduler () {
}
+ /**
+ * Read a single parameter from the quality engine parameter file
+ * @param paramName the parameter to read from the config file
+ * @throws ConfigurationException if there is an exception while reading the config file
+ * @throws IOException if there is an exception while reading the config file
+ */
public String readConfig (String paramName) throws ConfigurationException, IOException {
String paramValue = null;
try {
MDQconfig cfg = new MDQconfig();
paramValue = cfg.getString(paramName);
- } catch (Exception e) {
+ } catch (ConfigurationException | IOException e) {
log.error("Could not read configuration for param: " + paramName + ": " + e.getMessage());
throw e;
}
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index 22540674..27a7458b 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -320,8 +320,25 @@ public void execute(JobExecutionContext context)
store.shutdown();
}
+ /**
+ * Query a DataONE CN or MN to obtain a list of persistent identifiers (pids) for metadata objects have been
+ * added to the system during a specific time period.
+ * @param cnNode a DataONE CN connection client object
+ * @param mnNode a DataONE MN connection client object
+ * @param isCN a logical indicating whether a CN of MN object is being used
+ * @param session a DataONE authentication session
+ * @param suiteId the quality suite to check (if this pids has already been processed)
+ * @param pidFilter the DataONE format identifies to filter for
+ * @param startHarvestDatetimeStr the starting date to harvest pids from
+ * @param endHarvestDatetimeStr the ending data to harvest pids from
+ * @param startCount the start count for paging results from DataONE, for large results
+ * @param countRequested the number of items to get from DataONE on each request
+ * @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid
+ * @throws Exception if there is an exception while executing the job.
+ * @return a ListResult object containing the matching pids
+ */
public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
- String suiteId, String nodeId, String pidFilter, String startHarvestDatetimeStr,
+ String suiteId, String pidFilter, String startHarvestDatetimeStr,
String endHarvestDatetimeStr, int startCount,
int countRequested, DateTime lastDateModifiedDT) throws Exception {
@@ -331,7 +348,6 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
ObjectFormatIdentifier formatId = null;
NodeReference nodeRef = null;
- //nodeRef.setValue(nodeId);
Identifier identifier = null;
Boolean replicaStatus = false;
@@ -356,7 +372,7 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
}
//log.info("Got " + objList.getCount() + " pids for format: " + formatId.getValue() + " pids.");
} catch (Exception e) {
- log.error("Error retrieving pids for node " + nodeId + ": " + e.getMessage());
+ log.error("Error retrieving pids: " + e.getMessage());
throw e;
}
@@ -416,7 +432,24 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
return result;
}
- public boolean runExists(String pid, String suiteId, MDQStore store) throws MetadigStoreException {
+
+ /**
+ * Check if the specified quality suite has already been run for a pid.
+ *
+ * An additional check is made to see if the system metadata in the
+ * run is older than the passed in date. Because the quality engine
+ * uses fields from sysmeta (obsoletes, obsoletedBy), a run may need
+ * to be performed on an existing run in order to update the sysmeta, as
+ * the system is stored in the run object, and this run object is
+ * parsed when the run is inserted into the Solr index.
+ *
+ * @param pid the pid to check
+ * @param suiteId the suite identifier to check (e.g. "FAIR-suite-0.3.1")
+ * @param store the DataStore object to send the check request to.
+ * @throws MetadigStoreException
+ *
+ */
+ public boolean runExists(String pid, String suiteId, MDQStore store, Date dateSystemMetadataModified) throws MetadigStoreException {
boolean found = false;
Date runDateSystemMetadataModified = null;
@@ -440,6 +473,22 @@ public boolean runExists(String pid, String suiteId, MDQStore store) throws Meta
return found;
}
+ /**
+ * Submit a request to the metadig controller to run a quality suite for the specified pid.
+ *
+ * The system metadata for a pid is also obtained and sent with the request
+ *
+ *
+ * @param cnNode a DataONE CN connection client object
+ * @param mnNode a DataONE MN connection client object
+ * @param isCN a logical indicating whether a CN of MN object
+ * @param session a DataONE authentication session
+ * @param qualityServiceUrl the URL of the MetaDIG quality service
+ * @param pidStr the pid to submit the request for
+ * @param suiteId the suite identifier to submit the request for
+ *
+ * @throws Exception
+ */
public void submitReportRequest(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session, String qualityServiceUrl, String pidStr, String suiteId) throws Exception {
SystemMetadata sysmeta = null;
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index 31dcea61..1abb1dce 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -330,22 +330,21 @@ public void execute(JobExecutionContext context)
/**
* Query a DataONE CN or MN object store for a list of object that match the time range and formatId filters provided.
*
- * //@param cnNode
- * //@param mnNode
- * //@param isCN
- * @param session
- * @param pidFilter
- * @param startHarvestDatetimeStr
- * @param endHarvestDatetimeStr
- * @param startCount
- * @param countRequested
+ * @param d1Node a DataONE CN or MN connection client object
+ * @param session a DataONE authentication session
+ * @param pidFilter the DataONE format identifies to filter for
+ * @param startHarvestDatetimeStr the starting date to harvest pids from
+ * @param endHarvestDatetimeStr the ending data to harvest pids from
+ * @param startCount the start count for paging results from DataONE, for large results
+ * @param countRequested the number of items to get from DataONE on each request
+ * @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid
+ * @throws Exception if there is an exception while executing the job.
* @return a ListResult object containing the matching pids
* @throws Exception
*/
- //public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
String pidFilter, String startHarvestDatetimeStr, String endHarvestDatetimeStr,
- int startCount, int countRequested) throws Exception {
+ int startCount, int countRequested, DateTime lastDateModifiedDT) throws Exception {
MetadigProcessException metadigException = null;
@@ -450,6 +449,18 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
return result;
}
+ /**
+ * Submit a requst to the metadig controller to get qualiry score info and create a graph for the specified collection.
+ *
+ * @param qualityServiceUrl
+ * @param collectionId
+ * @param suiteId
+ * @param nodeId
+ * @param formatFamily
+ *
+ * @throws Exception
+ *
+ */
public void submitScorerRequest(String qualityServiceUrl, String collectionId, String suiteId, String nodeId, String formatFamily) throws Exception {
InputStream runResultIS = null;
@@ -475,7 +486,7 @@ public void submitScorerRequest(String qualityServiceUrl, String collectionId, S
post.addHeader("Accept", "application/xml");
// send to service
- log.debug("submitting scores request : " + scorerServiceUrl);
+ log.trace("submitting scores request : " + scorerServiceUrl);
CloseableHttpClient client = HttpClients.createDefault();
CloseableHttpResponse response = client.execute(post);
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index fede5a0f..df56654d 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -129,6 +129,7 @@ public static void main(String[] argv) throws Exception {
* A set of quality scores are retrieved from the Quality Solr Server and a quality graph and csv file are created from
* them. For DataONE collections, the 'collectionQuery' is retrieved from Solr to determine the set of pids to be
* included.
+ *
*
*/
final Consumer consumer = new DefaultConsumer(inProcessChannel) {
@@ -330,6 +331,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
}
};
+ // Initialize the RabbitMQ queue for scorer requests send by the controller
inProcessChannel.basicConsume(SCORER_QUEUE_NAME, false, consumer);
}
@@ -337,18 +339,16 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
* Retrieve pids associated with a DataONE collection.
*
* First the 'collectionQuery' field is retrieved from DataONE Solr for the collection
- * Next, a query is issued with the query from collectionQuery field, to retrieve all Solr docs for the collection ids./p>
+ *
Next, a query is issued with the query from the collectionQuery field, to retrieve all Solr docs for the collection ids./p>
*
*
Note that in the current design, the collection query is always obtained by querying the node specified in the taskList.csv file,
* which is usually an MN, but the collectionQuery is always evaluated on the CN
*
* @param collectionId a DataONE project id to fetch scores for, e.g. urn:uuid:f137095e-4266-4474-aa5f-1e1fcaa5e2dc
- * @param d1Node
- * @param session
+ * @param d1Node the DataONE connection object for a node
+ * @param session the DataONE authentication session
* @return a List of quality scores fetched from Solr
*/
- //private ScorerResult getCollectionPids(String collectionId, MultipartCNode cnNode, MultipartMNode mnNode,
- // Boolean isCN, Session session) throws MetadigProcessException {
private ScorerResult getCollectionPids(String collectionId, MultipartD1Node d1Node, Session session) throws MetadigProcessException {
Document xmldoc = null;
@@ -363,11 +363,9 @@ which will be used to query DataONE Solr for all the pids associated with that p
*/
ArrayList pids = new ArrayList<>();
queryStr = "?q=seriesId:" + escapeSpecialChars(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
- //queryStr = "?q=seriesId:" + encodeValue(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
- //queryStr = "?q=seriesId:" + collectionId + "+-obsoletedBy:*&fl=collectionQuery,label,rightsHolder&q.op=AND";
startPos = 0;
- // Just getting 1 row
+ // Just getting 1 row (for the collectionQuery field)
countRequested = 10;
// Get the collectionQuery from Solr