From 5bfd7a780b380b3feb6ddac493448e2f4aa64d29 Mon Sep 17 00:00:00 2001 From: gothub Date: Thu, 20 Aug 2020 11:17:40 -0700 Subject: [PATCH] Improve javadocs; code cleanup --- .../mdqengine/scheduler/JobScheduler.java | 8 ++- .../mdqengine/scheduler/RequestReportJob.java | 57 +++++++++++++++++-- .../mdqengine/scheduler/RequestScorerJob.java | 35 ++++++++---- .../ucsb/nceas/mdqengine/scorer/Scorer.java | 14 ++--- 4 files changed, 89 insertions(+), 25 deletions(-) diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java index 3f9612a3..dd72f43b 100644 --- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java +++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java @@ -241,12 +241,18 @@ public static void main(String[] argv) throws Exception { public JobScheduler () { } + /** + * Read a single parameter from the quality engine parameter file + * @param paramName the parameter to read from the config file + * @throws ConfigurationException if there is an exception while reading the config file + * @throws IOException if there is an exception while reading the config file + */ public String readConfig (String paramName) throws ConfigurationException, IOException { String paramValue = null; try { MDQconfig cfg = new MDQconfig(); paramValue = cfg.getString(paramName); - } catch (Exception e) { + } catch (ConfigurationException | IOException e) { log.error("Could not read configuration for param: " + paramName + ": " + e.getMessage()); throw e; } diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java index 22540674..27a7458b 100644 --- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java +++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java @@ -320,8 +320,25 @@ public void execute(JobExecutionContext context) store.shutdown(); } + /** + * Query a DataONE CN or MN to obtain a list of persistent identifiers (pids) for metadata objects have been + * added to the system during a specific time period. + * @param cnNode a DataONE CN connection client object + * @param mnNode a DataONE MN connection client object + * @param isCN a logical indicating whether a CN of MN object is being used + * @param session a DataONE authentication session + * @param suiteId the quality suite to check (if this pids has already been processed) + * @param pidFilter the DataONE format identifies to filter for + * @param startHarvestDatetimeStr the starting date to harvest pids from + * @param endHarvestDatetimeStr the ending data to harvest pids from + * @param startCount the start count for paging results from DataONE, for large results + * @param countRequested the number of items to get from DataONE on each request + * @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid + * @throws Exception if there is an exception while executing the job. + * @return a ListResult object containing the matching pids + */ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session, - String suiteId, String nodeId, String pidFilter, String startHarvestDatetimeStr, + String suiteId, String pidFilter, String startHarvestDatetimeStr, String endHarvestDatetimeStr, int startCount, int countRequested, DateTime lastDateModifiedDT) throws Exception { @@ -331,7 +348,6 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, ObjectFormatIdentifier formatId = null; NodeReference nodeRef = null; - //nodeRef.setValue(nodeId); Identifier identifier = null; Boolean replicaStatus = false; @@ -356,7 +372,7 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, } //log.info("Got " + objList.getCount() + " pids for format: " + formatId.getValue() + " pids."); } catch (Exception e) { - log.error("Error retrieving pids for node " + nodeId + ": " + e.getMessage()); + log.error("Error retrieving pids: " + e.getMessage()); throw e; } @@ -416,7 +432,24 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, return result; } - public boolean runExists(String pid, String suiteId, MDQStore store) throws MetadigStoreException { + + /** + * Check if the specified quality suite has already been run for a pid. + *

+ * An additional check is made to see if the system metadata in the + * run is older than the passed in date. Because the quality engine + * uses fields from sysmeta (obsoletes, obsoletedBy), a run may need + * to be performed on an existing run in order to update the sysmeta, as + * the system is stored in the run object, and this run object is + * parsed when the run is inserted into the Solr index. + *

+ * @param pid the pid to check + * @param suiteId the suite identifier to check (e.g. "FAIR-suite-0.3.1") + * @param store the DataStore object to send the check request to. + * @throws MetadigStoreException + * + */ + public boolean runExists(String pid, String suiteId, MDQStore store, Date dateSystemMetadataModified) throws MetadigStoreException { boolean found = false; Date runDateSystemMetadataModified = null; @@ -440,6 +473,22 @@ public boolean runExists(String pid, String suiteId, MDQStore store) throws Meta return found; } + /** + * Submit a request to the metadig controller to run a quality suite for the specified pid. + *

+ * The system metadata for a pid is also obtained and sent with the request + *

+ * + * @param cnNode a DataONE CN connection client object + * @param mnNode a DataONE MN connection client object + * @param isCN a logical indicating whether a CN of MN object + * @param session a DataONE authentication session + * @param qualityServiceUrl the URL of the MetaDIG quality service + * @param pidStr the pid to submit the request for + * @param suiteId the suite identifier to submit the request for + * + * @throws Exception + */ public void submitReportRequest(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session, String qualityServiceUrl, String pidStr, String suiteId) throws Exception { SystemMetadata sysmeta = null; diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java index 31dcea61..1abb1dce 100644 --- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java +++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java @@ -330,22 +330,21 @@ public void execute(JobExecutionContext context) /** * Query a DataONE CN or MN object store for a list of object that match the time range and formatId filters provided. * - * //@param cnNode - * //@param mnNode - * //@param isCN - * @param session - * @param pidFilter - * @param startHarvestDatetimeStr - * @param endHarvestDatetimeStr - * @param startCount - * @param countRequested + * @param d1Node a DataONE CN or MN connection client object + * @param session a DataONE authentication session + * @param pidFilter the DataONE format identifies to filter for + * @param startHarvestDatetimeStr the starting date to harvest pids from + * @param endHarvestDatetimeStr the ending data to harvest pids from + * @param startCount the start count for paging results from DataONE, for large results + * @param countRequested the number of items to get from DataONE on each request + * @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid + * @throws Exception if there is an exception while executing the job. * @return a ListResult object containing the matching pids * @throws Exception */ - //public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session, public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session, String pidFilter, String startHarvestDatetimeStr, String endHarvestDatetimeStr, - int startCount, int countRequested) throws Exception { + int startCount, int countRequested, DateTime lastDateModifiedDT) throws Exception { MetadigProcessException metadigException = null; @@ -450,6 +449,18 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session, return result; } + /** + * Submit a requst to the metadig controller to get qualiry score info and create a graph for the specified collection. + * + * @param qualityServiceUrl + * @param collectionId + * @param suiteId + * @param nodeId + * @param formatFamily + * + * @throws Exception + * + */ public void submitScorerRequest(String qualityServiceUrl, String collectionId, String suiteId, String nodeId, String formatFamily) throws Exception { InputStream runResultIS = null; @@ -475,7 +486,7 @@ public void submitScorerRequest(String qualityServiceUrl, String collectionId, S post.addHeader("Accept", "application/xml"); // send to service - log.debug("submitting scores request : " + scorerServiceUrl); + log.trace("submitting scores request : " + scorerServiceUrl); CloseableHttpClient client = HttpClients.createDefault(); CloseableHttpResponse response = client.execute(post); diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java index fede5a0f..df56654d 100644 --- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java +++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java @@ -129,6 +129,7 @@ public static void main(String[] argv) throws Exception { * A set of quality scores are retrieved from the Quality Solr Server and a quality graph and csv file are created from * them. For DataONE collections, the 'collectionQuery' is retrieved from Solr to determine the set of pids to be * included. + *

* */ final Consumer consumer = new DefaultConsumer(inProcessChannel) { @@ -330,6 +331,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp } }; + // Initialize the RabbitMQ queue for scorer requests send by the controller inProcessChannel.basicConsume(SCORER_QUEUE_NAME, false, consumer); } @@ -337,18 +339,16 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp * Retrieve pids associated with a DataONE collection. * *

First the 'collectionQuery' field is retrieved from DataONE Solr for the collection

- *

Next, a query is issued with the query from collectionQuery field, to retrieve all Solr docs for the collection ids./p> + *

Next, a query is issued with the query from the collectionQuery field, to retrieve all Solr docs for the collection ids./p> * *

Note that in the current design, the collection query is always obtained by querying the node specified in the taskList.csv file, * which is usually an MN, but the collectionQuery is always evaluated on the CN

* * @param collectionId a DataONE project id to fetch scores for, e.g. urn:uuid:f137095e-4266-4474-aa5f-1e1fcaa5e2dc - * @param d1Node - * @param session + * @param d1Node the DataONE connection object for a node + * @param session the DataONE authentication session * @return a List of quality scores fetched from Solr */ - //private ScorerResult getCollectionPids(String collectionId, MultipartCNode cnNode, MultipartMNode mnNode, - // Boolean isCN, Session session) throws MetadigProcessException { private ScorerResult getCollectionPids(String collectionId, MultipartD1Node d1Node, Session session) throws MetadigProcessException { Document xmldoc = null; @@ -363,11 +363,9 @@ which will be used to query DataONE Solr for all the pids associated with that p */ ArrayList pids = new ArrayList<>(); queryStr = "?q=seriesId:" + escapeSpecialChars(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND"; - //queryStr = "?q=seriesId:" + encodeValue(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND"; - //queryStr = "?q=seriesId:" + collectionId + "+-obsoletedBy:*&fl=collectionQuery,label,rightsHolder&q.op=AND"; startPos = 0; - // Just getting 1 row + // Just getting 1 row (for the collectionQuery field) countRequested = 10; // Get the collectionQuery from Solr