From a15a93c6c03c62838109b6b3d6bcca1c007028f4 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Tue, 7 Apr 2020 18:54:34 -0700
Subject: [PATCH 01/47] Remove obsolete method

---
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 30 -------------------
 1 file changed, 30 deletions(-)
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index 27df0a7e..b135bf08 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -1041,36 +1041,6 @@ private SubjectInfo getSubjectInfo(Subject rightsHolder, String serviceUrl, Stri
         return subjectInfo;
     }
 
-//    /**
-//     * Get a DataONE authenticated session
-//     * <p>
-//     *     If no subject or authentication token are provided, a public session is returned
-//     * </p>
-//     * @param authToken the authentication token
-//     * @return the DataONE session
-//     */
-//    Session getSession(String subjectId, String authToken) {
-//
-//        Session session;
-//
-//        // query Solr - either the member node or cn, for the project 'solrquery' field
-//        if (authToken == null || authToken.isEmpty()) {
-//            log.debug("Creating public session");
-//            session = new Session();
-//        } else {
-//            log.debug("Creating authentication session");
-//            session = new AuthTokenSession(authToken);
-//        }
-//
-//        if (subjectId != null && !subjectId.isEmpty()) {
-//            Subject subject = new Subject();
-//            subject.setValue(subjectId);
-//            session.setSubject(subject);
-//        }
-//
-//        return session;
-//    }
-
     /**
      * Get a DataONE MultipartCNode object, which will be used to communication with a CN
      *

From 2874d892d60450cd08bb3662151bf92cd0fe6c4c Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Tue, 7 Apr 2020 18:55:24 -0700
Subject: [PATCH 02/47] Get rightsholder from solr, not getSystemetadata

---
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 20 ++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index b135bf08..49a65415 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -427,6 +427,19 @@ which will be used to query DataONE Solr for all the pids associated with that p
                 node = xpathResult.item(0);
                 label = node.getTextContent();
             }
+
+            // Extract the portal 'rightsHolder'
+            fieldXpath = xpath.compile("//result/doc/str[@name='rightsHolder']/text()");
+            xpathResult = (org.w3c.dom.NodeList) fieldXpath.evaluate(xmldoc, XPathConstants.NODESET);
+            if(xpathResult.getLength() == 0) {
+                log.debug("RightsHolder not found for collection id: " + collectionId);
+                ScorerResult result = new ScorerResult();
+                result.setResult(pids);
+                return result;
+            } else {
+                node = xpathResult.item(0);
+                rightsHolder = node.getTextContent();
+            }
         } catch (XPathExpressionException xpe) {
             log.error("Error extracting collectinQuery from solr result doc: " + xpe.getMessage());
             metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
@@ -451,13 +464,6 @@ which will be used to query DataONE Solr for all the pids associated with that p
         // from the CN. Then add those groups into the query. Each group will be included in the filter query in this format:
         //     "(readPermission:"http://orcid.org/0000-0002-2192-403X")
         //      OR (rightsHolder:"http://orcid.org/0000-0002-2192-403X")"
-        SystemMetadata sysmeta = null;
-        try {
-            sysmeta = getSystemMetadata(collectionId, serviceUrl, subjectId, authToken);
-        } catch (MetadigProcessException mpe) {
-            log.error("Unable to get system metadata for collection: " + collectionId);
-            throw(mpe);
-        }
 
         Subject rightsHolder = sysmeta.getRightsHolder();
         // The subject info can only be obtained from a CN, so use the CN auth info for the current DataONE environment,

From f7e88840bf8044ff76c98ccd9936aa41cd085ed2 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Tue, 7 Apr 2020 18:55:58 -0700
Subject: [PATCH 03/47] Addl changes for metadig properties file cleanup

---
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 132 ++++++++++--------
 1 file changed, 77 insertions(+), 55 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index 49a65415..ac8cfbd1 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -20,7 +20,6 @@
 import org.apache.solr.client.solrj.beans.BindingException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.response.QueryResponse;
-import org.dataone.client.auth.AuthTokenSession;
 import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.rest.MultipartRestClient;
 import org.dataone.client.v2.impl.MultipartCNode;
@@ -35,7 +34,6 @@
 import org.joda.time.DateTime;
 import org.joda.time.format.DateTimeFormatter;
 import org.joda.time.format.ISODateTimeFormat;
-import org.quartz.JobExecutionException;
 import org.w3c.dom.Document;
 import org.xml.sax.InputSource;
 
@@ -78,9 +76,9 @@ public class Scorer {
     private static String CNauthToken = null;
     private static String CNsubjectId = null;
     private static String CNserviceUrl = null;
+    private static String CNnodeId="urn:node:CN";
     private static SolrClient client = null;
     private static String solrLocation = null;
-    private static String filestoreBase = null;
     private static final String SOLR_COLLECTION = "quality";
 
     private static long startTimeProcessing;
@@ -127,7 +125,6 @@ public static void main(String[] argv) throws Exception {
             RabbitMQhost = cfg.getString("RabbitMQ.host");
             RabbitMQport = cfg.getInt("RabbitMQ.port");
             solrLocation = cfg.getString("solr.location");
-            filestoreBase = cfg.getString("metadig.store.directory");
             CNauthToken =  cfg.getString("CN.authToken");
             CNserviceUrl = cfg.getString("CN.serviceUrl");
             CNsubjectId = cfg.getString("CN.subjectId");
@@ -161,6 +158,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                 MetadigException metadigException = null;
                 String subjectId = null;
                 String authToken = null;
+                String nodeServiceUrl = null;
                 String label = null;
                 String title = null;
 
@@ -178,21 +176,13 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                 }
 
                 // The components of the graph queue request
-                String collectionId = qEntry.getProjectId();
-                //String projectName = qEntry.getProjectName();
-                //String authTokenName = qEntry.getAuthTokenName();
-                //String subjectIdName = qEntry.getSubjectIdName();
+                String collectionId = qEntry.getCollectionId();
                 // Select quality scores based on the nodeId
                 String nodeId = qEntry.getNodeId();
-                //String serviceUrl = qEntry.getServiceUrl();
                 String formatFamily = qEntry.getFormatFamily();
                 String suiteId = qEntry.getQualitySuiteId();
-                String serviceUrl = null;
-                Scorer scorer = new Scorer();
                 long difference;
 
-                log.debug("read score query entry");
-
                 if(formatFamily == null) {
                     formatFamily = "";
                 }
@@ -208,16 +198,26 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                 }
                 log.debug("collectionId: " + collectionId);
 
+                // A nodeId is not specified, then the CN will be used
                 if(nodeId == null) {
-                    nodeId = "";
+                    nodeId=CNnodeId;
                 }
+                log.debug("nodeId: " + nodeId);
 
                 label: try {
                     MDQconfig cfg = new MDQconfig();
                     // Pids associated with a collection, based on query results using 'collectionQuery' field in solr.
                     ArrayList<String> collectionPids = null;
 
-                    //String title = "Project " + projectName;
+                    // The harvesting and evaluation of the collectionQuery is based on the nodeId that is passed in, i.e.
+                    // If an MN is specified, then the collection (portal) Solr entry will be obtained from the MN, and the
+                    // collectionQuery string will also be evaluated on that node.
+                    String nodeAbbr = nodeId.replace("urn:node:", "");
+                    authToken = cfg.getString(nodeAbbr + ".authToken");
+                    subjectId = cfg.getString(nodeAbbr + ".subjectId");
+                    // TODO:  Cache the node values from the CN listNode service
+                    nodeServiceUrl = cfg.getString(nodeAbbr + ".serviceUrl");
+
                     HashMap<String, Object> variables = new HashMap<>();
                     // Create the graph.
                     // Two types of graphs are currently supported:
@@ -228,16 +228,19 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                     //Scorer gfr = new Scorer();
                     // If creating a graph for a collection, get the set of pids associated with the collection.
                     // Only scores for these pids will be included in the graph.
+
                     if (collectionId != null && !collectionId.isEmpty()) {
-                        // The collection query is evaluated on the CN
-                        authToken = CNauthToken;
-                        subjectId = CNsubjectId;
-                        serviceUrl = CNserviceUrl;
-                        log.info("* Getting pids for collection " + collectionId);
+                        // If the nodeId is specified, use if to determine the values for authTokenName and subjectIdName,
+                        // if those values are not defined
+                        log.debug("collectionId is not null: " + collectionId);
+                            String id = nodeId.replace("urn:node:", "").toUpperCase().trim();
+
+                        // The collection query is obtained from the MN and evaluated on the CN
+                        log.info("Getting pids for collection " + collectionId);
                         // Always use the CN subject id and authentication token from the configuration file, as
                         // requests that this method uses need CN subject privs
                         ScorerResult result = null;
-                        result = gfr.getCollectionPids(collectionId, nodeId, serviceUrl, subjectId, authToken);
+                        result = gfr.getCollectionPids(collectionId, nodeServiceUrl, subjectId, authToken);
                         collectionPids = result.getResult();
                         label = result.getLabel();
                         // Don't continue if no pids (and thus scores) were found for this collection
@@ -250,9 +253,10 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                         }
                     }
 
+                    log.debug("Getting quality scores...");
                     // Quality scores will now be obtained from the MetaDIG quality Solr index, using the list of pids obtained
                     // for the collection.
-                    List<QualityScore> scores = gfr.getQualityScores(collectionId, suiteId, nodeId, formatFamily, collectionPids);
+                    List<QualityScore> scores = gfr.getQualityScores(collectionId, suiteId, formatFamily, collectionPids);
 
                     // Don't continue if no quality scores were found for this collection
                     if(scores.size() == 0) {
@@ -279,13 +283,12 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                     //String filePath = graph.create(GraphType.CUMULATIVE, title, scoreFile.getPath());
                     String filePath = graph.create(GraphType.MONTHLY, title, scoreFile.getPath());
                     // Now save the graphics file to permanent storage
-                    //String outfile = projectName + "-" + suiteId + ".png";
                     String outfile;
 
                     DateTime createDateTime = DateTime.now();
 
                     mdFile.setCreationDatetime(createDateTime);
-                    mdFile.setCollectionId(collectionId);
+                    mdFile.setPid(collectionId);
                     mdFile.setSuiteId(suiteId);
                     mdFile.setNodeId(nodeId);
                     mdFile.setStorageType(StorageType.GRAPH.toString());
@@ -301,7 +304,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                     // for fileid, storagetype, extension
                     mdFile = new MetadigFile();
                     mdFile.setCreationDatetime(createDateTime);
-                    mdFile.setCollectionId(collectionId);
+                    mdFile.setPid(collectionId);
                     mdFile.setSuiteId(suiteId);
                     mdFile.setNodeId(nodeId);
                     mdFile.setStorageType(StorageType.DATA.toString());
@@ -344,12 +347,16 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
      * <p>First the 'collectionQuery' field is retrieved from DataONE Solr for the collection</p>
      * <p>Next, a query is issued with the query from collectionQuery field, to retrieve all Solr docs for the collection ids./p>
      *
+     * <p>Note that in the current design, the collection query is always obtained by querying the node specified in the taskList.csv file,
+     * which is usually an MN, but the collectionQuery is always evaluated on the CN</p>
+     *
      * @param collectionId a DataONE project id to fetch scores for, e.g. urn:uuid:f137095e-4266-4474-aa5f-1e1fcaa5e2dc
-     * @param nodeId a DataONE node identifier, e.g. "urn:node:KNB"
-     * @param
+     * @param serviceUrl the DataONE service URL to obtain the collectionQuery string from
+     * @param subjectId the DataONE subjectId to use for the query, associated with the authentication token
+     * @param authToken the DataONE authentication token
      * @return a List of quality scores fetched from Solr
      */
-    private ScorerResult getCollectionPids(String collectionId, String nodeId, String serviceUrl, String subjectId, String authToken) throws MetadigProcessException {
+    private ScorerResult getCollectionPids(String collectionId, String serviceUrl, String subjectId, String authToken) throws MetadigProcessException {
 
         Document xmldoc = null;
         String queryStr = null;
@@ -362,12 +369,14 @@ private ScorerResult getCollectionPids(String collectionId, String nodeId, Strin
            which will be used to query DataONE Solr for all the pids associated with that project (that's 2 queries!)
          */
         ArrayList<String> pids = new ArrayList<>();
-        queryStr = "?q=id:" + escapeSpecialChars(collectionId) + "&fl=collectionQuery,label&q.op=AND";
+        queryStr = "?q=id:" + escapeSpecialChars(collectionId) + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
 
         startPos = 0;
         countRequested = 10000;
 
+        // Get the collectionQuery from Solr
         try {
+            log.debug("Getting collectionQuery with query: " + queryStr);
             xmldoc = queryD1Solr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
         } catch (MetadigProcessException mpe) {
             log.error("Unable to query Solr for collectionQuery field for collection id: " + collectionId);
@@ -385,13 +394,13 @@ which will be used to query DataONE Solr for all the pids associated with that p
         XPath xpath = null;
         org.w3c.dom.Node node = null;
         String label = null;
+        String rightsHolder = null;
 
         try {
             log.debug("Getting collectionQuery for id: " + collectionId);
             // Extract the collection query from the Solr result XML
             XPathFactory xPathfactory = XPathFactory.newInstance();
             xpath = xPathfactory.newXPath();
-            // TODO: replace this test query with the live one
             fieldXpath = xpath.compile("//result/doc/str[@name='collectionQuery']/text()");
 
             // extract the 'collectionQuery' field from the Solr result
@@ -400,7 +409,6 @@ which will be used to query DataONE Solr for all the pids associated with that p
                 log.debug("collectionQuery not found for collection id: " + collectionId);
                 ScorerResult result = new ScorerResult();
                 result.setResult(pids);
-                result.setLabel("");
                 return result;
             } else {
                 node = xpathResult.item(0);
@@ -414,14 +422,13 @@ which will be used to query DataONE Solr for all the pids associated with that p
                 log.debug("got collectionQuery: " + collectionQuery);
             }
 
-            // Extract the portal 'label' (title)
+            // Extract the portal 'label'
             fieldXpath = xpath.compile("//result/doc/str[@name='label']/text()");
             xpathResult = (org.w3c.dom.NodeList) fieldXpath.evaluate(xmldoc, XPathConstants.NODESET);
             if(xpathResult.getLength() == 0) {
-                log.debug("label not found for collection id: " + collectionId);
+                log.debug("Title (label) not found for collection id: " + collectionId);
                 ScorerResult result = new ScorerResult();
                 result.setResult(pids);
-                result.setLabel("");
                 return result;
             } else {
                 node = xpathResult.item(0);
@@ -457,7 +464,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         collectionQuery = collectionQuery.replaceAll("\\s*AND\\s*\\(-obsoletedBy:\\*\\s*AND\\s*formatType:METADATA\\)", "");
         log.debug("Edited collectionQuery: " + collectionQuery);
 
-        // Get account information for the collection owner. The account info will be used when the 'collectionQuery'
+        // Get account information for the collection rightsHolder (owner). The account info will be used when the 'collectionQuery'
         // query is made, which will use the owner's identity and group memberships, so that the pids that are returned
         // from the query are the ones that the user would see when viewing their portal page.
         // First get the sysmeta from the collection pid, in order to determine the owner. Next, get the account info
@@ -465,19 +472,21 @@ which will be used to query DataONE Solr for all the pids associated with that p
         //     "(readPermission:"http://orcid.org/0000-0002-2192-403X")
         //      OR (rightsHolder:"http://orcid.org/0000-0002-2192-403X")"
 
-        Subject rightsHolder = sysmeta.getRightsHolder();
+        // Use the rightsHolder obtained from the Solr query
+        Subject subject = new Subject();
+        subject.setValue(rightsHolder);
         // The subject info can only be obtained from a CN, so use the CN auth info for the current DataONE environment,
         // which should be configured in the metadig.properties file
-        SubjectInfo subjectInfo = getSubjectInfo(rightsHolder, CNserviceUrl, CNsubjectId, CNauthToken);
+        SubjectInfo subjectInfo = getSubjectInfo(subject, CNserviceUrl, CNsubjectId, CNauthToken);
         String groupStr = null;
 
-        groupStr = "(readPermission:" + "\"" + rightsHolder.getValue()
-                + "\")" + " OR (rightsHolder:\"" + rightsHolder.getValue() + "\"" + ")"
+        groupStr = "(readPermission:" + "\"" + rightsHolder
+                + "\")" + " OR (rightsHolder:\"" + rightsHolder + "\"" + ")"
                 + " OR (readPermission:\"public\")";
 
-        // Assemble the
+        // Assemble the query string that selects pids based on permissions from the rightsHolder
         for(Group group : subjectInfo.getGroupList()) {
-            log.debug("Adding group to query: " + group.getSubject().getValue());
+            log.trace("Adding group to query: " + group.getSubject().getValue());
             if(groupStr == null) {
                 groupStr = "(readPermission:" + "\"" + group.getSubject().getValue()
                         + "\")" + " OR (rightsHolder:\"" + group.getSubject().getValue() + "\"" + ")";
@@ -490,6 +499,9 @@ which will be used to query DataONE Solr for all the pids associated with that p
         //groupStr = "+AND+" + "(" + groupStr + ")";
         //groupStr = "&fq=" + encodeValue("rightsHolder:\"CN=PASTA-GMN,O=LTER,ST=New Mexico,C=US\"");
         groupStr = "&fq=" + encodeValue(groupStr);
+        log.trace("groupStr: " + groupStr);
+
+        // Now evaluate the collectionQuery
 
         // Send the collectionQuery string to Solr to get the pids associated with the collection
         // The 'collectionQuery' Solr field may have backslashes that are used to escape special characters (i.e. ":") that are not
@@ -500,7 +512,8 @@ which will be used to query DataONE Solr for all the pids associated with that p
         int resultCount = 0;
         startPos = 0;
         countRequested = 1000;
-        // Now get the pids associated with the collection
+        // Now get the pids associated with the collection by sending the collectionQuery to the DataONE CN
+        // The collectionQuery is always evaluated on the CN, as portals should have all DataONE data available to them.
         // One query can return many documents, so use the paging mechanism to make sure we retrieve them all.
         // Keep paging through query results until all pids have been fetched. The last 'page' of query
         // results is indicated by the number of items returned being less than the number requested.
@@ -517,11 +530,23 @@ which will be used to query DataONE Solr for all the pids associated with that p
         // Loop through the Solr result. As the result may be large, page through the results, accumulating
         // the pids returned
 
-        log.debug("query string: " + queryStr);
-        log.debug("Sending collectionQuery to Solr using subjectId: " + subjectId + ", servicerUrl: " + serviceUrl);
+        // Determine where the collectionQuery should be evaluated. When the DataONE quata service is ready, query it
+        // for this collection to determine if the collectionQuery should be sent to the CN. Since this service is
+        // not ready, send the query to the same serviceUrl, subjectId, authToken which was used to harvest the
+        // collection document and obtain the collectionQuery string
+
+        // When the service is available, use the DataONE quota service to set these variable conditionally
+        String evalServiceUrl = serviceUrl;
+        String evalSubjectId = subjectId;
+        String evalAuthToken = authToken;
+
+        log.debug("Sending collectionQuery to Solr using subjectId: " + evalSubjectId + ", servicerUrl: " + evalServiceUrl);
+        log.trace("query string: " + queryStr);
+
         do {
             //TODO: check that a result was returned
-            xmldoc = queryD1Solr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
+            // Note: the collectionQuery is always evaluated on the CN, so that the entire DataONE network is queried.
+            xmldoc = queryD1Solr(queryStr, evalServiceUrl, startPos, countRequested, evalSubjectId, evalAuthToken);
             if(xmldoc == null) {
                 log.info("no values returned from query");
                 break;
@@ -560,12 +585,11 @@ which will be used to query DataONE Solr for all the pids associated with that p
        *
        * @param collectionId a DataONE project id to fetch scores for, e.g. urn:uuid:f137095e-4266-4474-aa5f-1e1fcaa5e2dc
        * @param suiteId a MetaDIG quality suite id, e.g. "FAIR.suite.1"
-       * @param nodeId a DataONE node identifier, e.g. "urn:node:KNB"
        * @param formatFamily list of MetaDIG metadata format "families", e.g. "iso19115,eml"
-       * @param
+       * @param collectionPids the list of pids to get scores for
        * @return a List of quality scores fetched from Solr
        */
-    private List<QualityScore> getQualityScores(String collectionId, String suiteId, String nodeId, String formatFamily, ArrayList<String> collectionPids) throws Exception {
+    private List<QualityScore> getQualityScores(String collectionId, String suiteId, String formatFamily, ArrayList<String> collectionPids) throws Exception {
         // Now that we have all the pids, query the Quality Solr server for the scores for each pid associate with the project.
         // These scores will be written out to a file that will be used by the graphing routine to create a plot of the aggregated statistics.
         // If a project wasn't specified, then we are not building a special query for a list of pids, so try to get the max amount
@@ -596,7 +620,7 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
                 }
                 formatFamilySearchTerm = "(" + formatFamilySearchTerm + ")";
             }
-            log.debug("FormatFamily query term: " + formatFamilySearchTerm);
+            log.trace("FormatFamily query term: " + formatFamilySearchTerm);
         }
 
         int startPosInResult = 0;
@@ -646,16 +670,14 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
                 pidsLeft -= pidCntToRequest;
             } while (pidsLeft > 0);
         } else {
-            log.info("Getting quality scores for suiteId: " + suiteId + ", datasource: " + nodeId + " formats: " + formatFamily);
+            log.info("Getting quality scores for suiteId: " + suiteId + ", datasource: " + " formats: " + formatFamily);
             countRequested = 1000;
             formatFamilySearchTerm = null;
             queryStr = "metadataId:*";
             if(suiteId != null) {
                 queryStr += " AND suiteId:" + "\"" + suiteId + "\"";
             }
-            if(nodeId != null) {
-                queryStr += " AND datasource:" + "\"" + nodeId + "\"";
-            }
+
             if (formatFamilySearchTerm != null) {
                 queryStr += " AND metadataFormatId:" + "\"" + formatFamilySearchTerm + "\"";
             }
@@ -756,7 +778,7 @@ private void returnGraphStatus(String metadataPid, String suiteId, ScorerQueueEn
 
             log.info(" [x] Done");
             this.writeCompletedQueue(message);
-            log.info(" [x] Sent completed report for project id: '" + qEntry.getProjectId() + "'");
+            log.info(" [x] Sent completed report for project id: '" + qEntry.getCollectionId() + "'");
         } catch (Exception e) {
             // If we couldn't prepare the message, then there is nothing left to do
             log.error(" Unable to return report to controller");
@@ -1004,7 +1026,7 @@ protected SystemMetadata getSystemMetadata(String pid, String serviceUrl, String
      */
     private SubjectInfo getSubjectInfo(Subject rightsHolder, String serviceUrl, String subjectId, String authToken) throws MetadigProcessException {
 
-        log.debug("Getting subject info for: " + rightsHolder);
+        log.debug("Getting subject info for: " + rightsHolder.getValue());
         MultipartCNode cnNode = null;
         MetadigProcessException metadigException = null;
 

From edf706851728c335aa0de406c7e7686fdef614e3 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Tue, 7 Apr 2020 18:56:46 -0700
Subject: [PATCH 04/47] Add add'l debug statements to show graph creation
 options

---
 src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java
index 9150df8c..3a95fd2d 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java
@@ -102,6 +102,7 @@ public String create(GraphType type, String title, String inputFile) throws Exce
             case CUMULATIVE:
                 mdFile.setMediaType("text/x-rsrc");
                 mdFile.setAltFilename("graph_" + GraphType.CUMULATIVE.toString().toLowerCase() + "_quality_scores.R");
+                log.debug("Creating a " + GraphType.CUMULATIVE.toString().toLowerCase() + " graph with " + mdFile.getAltFilename());
 
                 codeFile = fileStore.getFile(mdFile);
                 dispatcherType = "r";
@@ -109,12 +110,15 @@ public String create(GraphType type, String title, String inputFile) throws Exce
             case MONTHLY:
                 mdFile.setMediaType("text/x-rsrc");
                 mdFile.setAltFilename("graph_" + GraphType.MONTHLY.toString().toLowerCase() + "_quality_scores.R");
+                log.debug("Creating a " + GraphType.MONTHLY.toString().toLowerCase() + " graph with " + mdFile.getAltFilename());
 
                 codeFile = fileStore.getFile(mdFile);
                 dispatcherType = "r";
                 break;
         }
 
+        log.debug("Graph program length: " + codeFile.length());
+
         // The the graph program the title of the graph
         // Currently we aren't putting titles on the graphs
         //variables.put("title", title);
@@ -130,7 +134,7 @@ public String create(GraphType type, String title, String inputFile) throws Exce
         Result result = null;
 
         try {
-            log.debug("dispatching graph program ");
+            log.debug("dispatching graph program " + codeFile.toPath());
             result = dispatcher.dispatch(variables, code);
         } catch (ScriptException e) {
             log.error("Error executing script");

From e391d9e30c4fee47f95c070dc3d4dcdd1fdd8bf7 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Tue, 7 Apr 2020 18:59:52 -0700
Subject: [PATCH 05/47] Update assessment graph R script to use most recent
 scores per month

---
 .../resources/code/graph_monthly_quality_scores.R | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/main/resources/code/graph_monthly_quality_scores.R b/src/main/resources/code/graph_monthly_quality_scores.R
index 9d31250b..61406d93 100644
--- a/src/main/resources/code/graph_monthly_quality_scores.R
+++ b/src/main/resources/code/graph_monthly_quality_scores.R
@@ -37,17 +37,12 @@ scores <- mutate(fsr, ym = as.Date(sprintf("%4s-%02d-01", year(dateUploaded), mo
   mutate(scoreI = scoreInteroperable * 100.0) %>%
   mutate(scoreR = scoreReusable * 100.0)
 
-# Use this when sequenceId problem has been resolved (github metadig-engine #232)
-#most_recent <- scores %>%
-#  arrange(ym, sequenceId, dateUploaded) %>%
-#  group_by(ym, sequenceId) %>%
-#  top_n(1, dateUploaded)
-#head(most_recent)
-
 most_recent <- scores %>%
-  arrange(ym) %>%
-  group_by(ym)
-head(most_recent)
+  arrange(ym, sequenceId, dateUploaded) %>%
+  group_by(ym, sequenceId) %>%
+  top_n(1, dateUploaded)
+
+#head(most_recent)
 
 # calculate cummulative overall
 score_cumulative <- most_recent %>%

From 235f86609184fe9f61d1bf02494af5e3b18cff55 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Tue, 21 Apr 2020 13:33:48 -0700
Subject: [PATCH 06/47] Upgrade RabbitMQ

---
 Kubernetes/Admin/metadig-engine/rabbitmq.yaml | 14 +++++++-------
 pom.xml                                       |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Kubernetes/Admin/metadig-engine/rabbitmq.yaml b/Kubernetes/Admin/metadig-engine/rabbitmq.yaml
index a0a609d5..050699b0 100644
--- a/Kubernetes/Admin/metadig-engine/rabbitmq.yaml
+++ b/Kubernetes/Admin/metadig-engine/rabbitmq.yaml
@@ -1,4 +1,4 @@
-apiVersion: apps/v1beta1
+apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: rabbitmq
@@ -13,16 +13,17 @@ spec:
       labels:
         app: rabbitmq
     spec:
-      serviceAccountName: metadig-serviceaccount
       containers:
       - name: rabbitmq
-        image: rabbitmq:3.7
+        # 3-management loads the RabbitMQ Administrative plugin with the
+        # most recent 3.x release.
+        image: rabbitmq:3-management
         ports:
-        - name: rabbitmq 
+        - name: rabbitmq
           containerPort: 5672
         - name: rabbitmqadmin
           containerPort: 15672
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
       restartPolicy: Always
       #volumes:
       #- name: metadig-ctl-claim0
@@ -47,5 +48,4 @@ spec:
   - name: rabbitmqadmin
     port: 15672
     protocol: TCP
-    targetPort: rabbitmqadmin
-
+    targetPort: rabbitmqadmin
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 8259da96..02139762 100644
--- a/pom.xml
+++ b/pom.xml
@@ -120,7 +120,7 @@
 		<dependency>
 			<groupId>com.rabbitmq</groupId>
 			<artifactId>amqp-client</artifactId>
-			<version>5.7.3</version>
+			<version>5.9.0</version>
 		</dependency>
 		<!-- https://mvnrepository.com/artifact/joda-time/joda-time -->
 		<dependency>

From c41421b387bf82ca804d5934637c516540515078 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Tue, 21 Apr 2020 13:51:36 -0700
Subject: [PATCH 07/47] Add'l simplification of metadig config file

---
 .changed.txt.swp                              | Bin 0 -> 12288 bytes
 .../edu/ucsb/nceas/mdqengine/Controller.java  |  49 +++++-------------
 2 files changed, 14 insertions(+), 35 deletions(-)
 create mode 100644 .changed.txt.swp

diff --git a/.changed.txt.swp b/.changed.txt.swp
new file mode 100644
index 0000000000000000000000000000000000000000..acddfdb187b81286f488f6f674b55eafaa058797
GIT binary patch
literal 12288
zcmeI&Jx>BL7zgmez~)PIazmZ3g=1kgguww8f}=^ftH+spfwo6TO!TAp6<iz~^^>@o
zIQjv6PEHU9B*vI%{!N}VZJ)mVw##);J2~!Do4n4bY%><NuTO8D?#hp?5;Kv^f}vG9
zA6rr6L|XY(63xGyFDQc0if*fY)Dy9?(hmbiB|(@d$5#`_<qhEtWfCZ#+o@f#Gln1l
zfwcrOd*(DZ`VF^U%kvKp_N%)){k5hd2?7v+00bZa0SG_<0ucBg0(MklS9Fzfah*z0
zmp;`+6BPs?009U<00Izz00bZa0SG_<0)J3I`i$*uG1j5T|Np;#|G!ZCnc|k>f}%^2
z=Ts;jHW|C8K!E@RAOHafKmY;|fB*y_009X6qJYt!h-H`vtxTF}PZ?pxk;q1YmcDXq
z>P%ej@*1aJHcvjMSL;IgnaDgdSag4X4Uu?1w2YEQ#Qu2pB3>9T)k=%rTgCHZFSc2_
b1RY>`W-CpsPNPWa*|bp_^JyHhk4wD*cr2gS

literal 0
HcmV?d00001

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
index 53ebf9d5..038fb74a 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
@@ -133,25 +133,20 @@ read from the port number (argv[0]) which will are the metadata and
                     }
                     String delims = "[,]";
                     String[] tokens = request.split(delims);
+                    String nodeId = null;
 
                     switch(requestType) {
                         case "score":
                             log.debug("Processing score request");
                             String collectionId = tokens[0];
-                            String projectName = tokens[1];
-                            String authTokenName = tokens[2];
-                            String subjectIdName = tokens[3];
-                            String memberNode = tokens[4];
-                            String serviceUrl = tokens[5];
-                            String formatFamily = tokens[6];
-                            String qualitySuiteId = tokens[7];
+                            nodeId = tokens[1];
+                            String formatFamily = tokens[2];
+                            String qualitySuiteId = tokens[3];
 
                             requestDateTime = new DateTime();
-                            log.info("Request queuing of: " + tokens[0] + ", " + tokens[1] + ", " + tokens[2] + ", " + tokens[3] + ", " + tokens[4]
-                                    + ", " + tokens[5] + "," + tokens[6]);
+                            log.info("Request queuing of: " + tokens[0] + ", " + tokens[1] + ", " + tokens[2] + ", " + tokens[3]);
 
-                            metadigCtrl.processScorerRequest(collectionId, projectName, authTokenName,  subjectIdName, memberNode, serviceUrl,
-                                    formatFamily, qualitySuiteId, requestDateTime);
+                            metadigCtrl.processScorerRequest(collectionId, nodeId, formatFamily, qualitySuiteId, requestDateTime);
                             break;
                         case "quality":
                             log.debug("Processing quality request");
@@ -165,7 +160,7 @@ read from the port number (argv[0]) which will are the metadata and
 
                             String suiteId = tokens[3];
                             requestDateTime = new DateTime();
-                            String nodeId = tokens[4];
+                            nodeId = tokens[4];
                             log.info("Request queuing of: " + tokens[0] + ", " + tokens[3] + ", " + tokens[4]);
                             metadigCtrl.processQualityRequest(nodeId, metadataPid, metadata, suiteId, "/tmp", requestDateTime, sysmeta);
                             break;
@@ -375,8 +370,7 @@ public void processQualityRequest(String memberNode,
      * </p>
      *
      * @param collectionId
-     * @param projectName
-     * @param memberNode
+     * @param nodeId
      * @param formatFamily
      * @param qualitySuiteId
      * @param requestDateTime
@@ -385,11 +379,7 @@ public void processQualityRequest(String memberNode,
      * @throws java.io.IOException
      */
     public void processScorerRequest(String collectionId,
-                               String projectName,
-                               String authTokenName,
-                               String subjectIdName,
-                               String memberNode,
-                               String serviceUrl,
+                               String nodeId,
                                String formatFamily,
                                String qualitySuiteId,
                                DateTime requestDateTime) throws java.io.IOException, MetadigException {
@@ -399,18 +389,7 @@ public void processScorerRequest(String collectionId,
         byte[] message = null;
         String authToken = null;
 
-        if(authTokenName != null) {
-            try {
-                authToken = readConfigParam(authTokenName);
-            } catch (ConfigurationException ce) {
-                log.error("Error reading configuration for param " + "\"" + authTokenName + "\"" + ": " + ce.getMessage());
-                MetadigException metadigException = new MetadigProcessException("Error reading configuration for param " + authTokenName + ": " + ce.getMessage());
-                metadigException.initCause(ce);
-                throw metadigException;
-            }
-        }
-
-        qEntry = new ScorerQueueEntry(collectionId, projectName, authTokenName, subjectIdName, qualitySuiteId, memberNode, serviceUrl, formatFamily, requestDateTime);
+        qEntry = new ScorerQueueEntry(collectionId, qualitySuiteId, nodeId, formatFamily, requestDateTime);
 
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
         ObjectOutput out = new ObjectOutputStream(bos);
@@ -418,7 +397,7 @@ public void processScorerRequest(String collectionId,
         message = bos.toByteArray();
 
         this.writeInProcessChannel(message, SCORER_ROUTING_KEY);
-        log.info(" [x] Queued Scorer request for collectionld: '" + qEntry.getProjectId() + "'" + " quality suite " + qualitySuiteId);
+        log.info(" [x] Queued Scorer request for collectionld: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
     }
 
     /**
@@ -536,16 +515,16 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                         completedChannel.basicAck(envelope.getDeliveryTag(), false);
                     }
 
-                    log.info(" [x] Controller received notification of completed score for: '" + qEntry.getProjectId() + "'" + ", " +
+                    log.info(" [x] Controller received notification of completed score for: '" + qEntry.getCollectionId() + "'" + ", " +
                             "hostsname: " + qEntry.getHostname());
-                    log.info("Total processing time for worker " + qEntry.getHostname() + " for PID " + qEntry.getProjectId() + ": " + qEntry.getProcessingElapsedTimeSeconds());
+                    log.info("Total processing time for worker " + qEntry.getHostname() + " for PID " + qEntry.getCollectionId() + ": " + qEntry.getProcessingElapsedTimeSeconds());
 
                     /* An exception caught by the worker will be passed back to the controller via the queue entry
                      * 'exception' field. Check this now and take the appropriate action.
                      */
                     Exception me = qEntry.getException();
                     if (me instanceof MetadigException) {
-                        log.error("Error running suite: " + qEntry.getQualitySuiteId() + ", pid: " + qEntry.getProjectId() + ", error msg: ");
+                        log.error("Error running suite: " + qEntry.getQualitySuiteId() + ", pid: " + qEntry.getCollectionId() + ", error msg: ");
                         log.error("\t" + me.getMessage());
                         Throwable thisCause = me.getCause();
                         if (thisCause != null) {

From 654072ca9bf5ddee9a1c95b8a4cf532f466abc97 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Wed, 13 May 2020 09:24:20 -0700
Subject: [PATCH 08/47] Update RequestReportJob.java

---
 .../mdqengine/scheduler/RequestReportJob.java | 44 ++++++++++++++-----
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index c46fb9d5..9f5d8a6d 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -56,7 +56,13 @@ public class RequestReportJob implements Job {
     private Log log = LogFactory.getLog(RequestReportJob.class);
 
     class ListResult {
-        Integer resultCount;
+        // The total result count returned from DataONE
+        Integer totalResultCount;
+        // The filtered result count returned from DataONE.
+        // The DataONE listObjects service returns all new pids for all formatIds
+        // but we are typically only interested in a subset of those, i.e. EML metadata pids,
+        // so this is the count of pids from the result that we are actually interested in.
+        Integer filteredResultCount;
         ArrayList<String> result = new ArrayList<>();
 
         void setResult(ArrayList result) {
@@ -67,12 +73,19 @@ ArrayList getResult() {
             return this.result;
         }
 
-        void setResultCount(Integer count) {
-            this.resultCount = count;
+        void setTotalResultCount(Integer count) {
+            this.totalResultCount = count;
+        }
+        void setFilteredResultCount(Integer count) {
+            this.filteredResultCount = count;
+        }
+
+        Integer getTotalResultCount() {
+            return this.totalResultCount;
         }
 
-        Integer getResultCount() {
-            return this.resultCount;
+        Integer getFilteredResultCount() {
+            return this.filteredResultCount;
         }
     }
 
@@ -247,7 +260,8 @@ public void execute(JobExecutionContext context)
 
         Integer startCount = new Integer(0);
         ListResult result = null;
-        Integer resultCount = null;
+        Integer totalResultCount = null;
+        Integer filteredResultCount = null;
 
         boolean morePids = true;
         while(morePids) {
@@ -257,14 +271,15 @@ public void execute(JobExecutionContext context)
             try {
                 result = getPidsToProcess(cnNode, mnNode, isCN, session, suiteId, nodeId, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
                 pidsToProcess = result.getResult();
-                resultCount = result.getResultCount();
+                totalResultCount = result.getTotalResultCount();
+                filteredResultCount = result.getFilteredResultCount();
             } catch (Exception e) {
                 JobExecutionException jee = new JobExecutionException("Unable to get pids to process", e);
                 jee.setRefireImmediately(false);
                 throw jee;
             }
 
-            log.info("Found " + resultCount + " pids" + " for node: " + nodeId);
+            log.info("Found " + filteredResultCount + " pids" + " for node: " + nodeId);
             for (String pidStr : pidsToProcess) {
                 try {
                     log.info("submitting pid: " + pidStr);
@@ -295,10 +310,10 @@ public void execute(JobExecutionContext context)
             }
 
             // Check if DataONE returned the max number of results. If so, we have to request more by paging through
-            // the results.
-            if(resultCount >= countRequested) {
+            // the results returned pidsToProcess (i.e. DataONE listObjects service).
+            if(totalResultCount >= countRequested) {
                 morePids = true;
-                startCount = startCount + resultCount;
+                startCount = startCount + totalResultCount;
                 log.info("Paging through more results, current start is " + startCount);
             } else {
                 morePids = false;
@@ -355,6 +370,7 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
             for(ObjectInfo oi: objList.getObjectInfoList()) {
                 thisFormatId = oi.getFormatId().getValue();
                 thisPid = oi.getIdentifier().getValue();
+                log.debug("Checking pid: " + thisPid + ", format: " + thisFormatId);
 
                 // Check all pid filters. There could be multiple wildcard filters, which are separated
                 // by ','.
@@ -381,7 +397,11 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
         }
 
         ListResult result = new ListResult();
-        result.setResultCount(pidCount);
+        // Set the count for the number of desired pids filtered from the total result set
+        result.setFilteredResultCount(pidCount);
+        // Set the count for the total number of pids returned from DataONE (all formatIds) for this query
+        // Set the count for the total number of pids returned from DataONE (all formatIds) for this query
+        result.setTotalResultCount(objList.getCount());
         result.setResult(pids);
 
         return result;

From c8ff28791cc57568ac25ba1e1b3786fca51339f3 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Mon, 13 Jul 2020 15:36:48 -0700
Subject: [PATCH 09/47] Add DataONE bookkeeper call to check portal status

---
 .../edu/ucsb/nceas/mdqengine/Controller.java  |  79 ++++++--
 .../authentication/BookkeeperClient.java      | 171 ++++++++++++++++++
 .../configuration/metadig.properties          |   3 +
 .../bookkeeper/BookkeeperClientTest.java      |  35 ++++
 4 files changed, 275 insertions(+), 13 deletions(-)
 create mode 100644 src/main/java/edu/ucsb/nceas/mdqengine/authentication/BookkeeperClient.java
 create mode 100644 src/test/java/edu/ucsb/nceas/mdqengine/bookkeeper/BookkeeperClientTest.java

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
index 038fb74a..4784fb37 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
@@ -1,12 +1,13 @@
 package edu.ucsb.nceas.mdqengine;
 
 import com.rabbitmq.client.*;
-import edu.ucsb.nceas.mdqengine.exception.MetadigProcessException;
+import edu.ucsb.nceas.mdqengine.authentication.BookkeeperClient;
 import edu.ucsb.nceas.mdqengine.scorer.ScorerQueueEntry;
 import edu.ucsb.nceas.mdqengine.exception.MetadigException;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.dataone.bookkeeper.api.Usage;
 import org.dataone.exceptions.MarshallingException;
 import org.dataone.service.types.v2.SystemMetadata;
 import org.dataone.service.types.v2.TypeFactory;
@@ -17,6 +18,8 @@
 import java.lang.reflect.InvocationTargetException;
 import java.net.ServerSocket;
 import java.net.Socket;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
@@ -271,6 +274,52 @@ public void disableTestMode() {
         this.totalElapsedSeconds = 0;
     }
 
+    /**
+     * Query DataONE bookkeeper service to determine if a portal is active
+     *
+     * <p>
+     *     Before generating a metadata assessment graph for a portal, check
+     *     if the portal is active. A portal can be marked to inactive by
+     *     the portal owner, or by the bookkeeper admin if usage fees are
+     *     delinquent.
+     * </p>
+     * @param collectionId The DataONE collection identifier
+     * @return
+     * @throws MetadigException
+     */
+    // Check the portal quota with DataONE bookkeaper
+    public Boolean isPortalActive(String collectionId) throws MetadigException {
+        // Check the portal quota with DataONE bookkeeper
+        log.debug("Checking bookkeeper portal Usage for collection: " + collectionId);
+        String msg = null;
+        BookkeeperClient bkClient = BookkeeperClient.getInstance();
+        List<Usage> usages = null;
+        Usage usage = null;
+        List<String> subjects = new ArrayList<String>();
+        try {
+            if(bkClient.getBookkeeperEnabled()) {
+                // Set status = null so that any usage will be returned.
+                String status = null;
+                usages = bkClient.listUsages(0, collectionId, "portal", status , subjects);
+                usage = usages.get(0);
+                log.debug("Usage for portal " + collectionId + " is " + usage.getStatus());
+                if(usage.getStatus().compareToIgnoreCase("active") == 0) {
+                    return true;
+                } else {
+                    return false;
+                }
+            } else {
+                msg = "Metadig config param 'bookkeeper.enabled is blank or missing";
+                log.error(msg);
+                throw(new MetadigException(msg));
+            }
+        } catch (Exception e) {
+            msg = "Unable to get usage from bookkeeper for collection id: " + collectionId;
+            log.error(msg);
+            throw(new MetadigException(msg));
+        }
+    };
+
     /**
      * Forward a request to the "InProcess" queue.
      * <p>
@@ -369,11 +418,11 @@ public void processQualityRequest(String memberNode,
      * create the graph from them.
      * </p>
      *
-     * @param collectionId
-     * @param nodeId
-     * @param formatFamily
-     * @param qualitySuiteId
-     * @param requestDateTime
+     * @param collectionId the DataONE collection identifier
+     * @param nodeId the node identifier the collection resides on
+     * @param formatFamily a string representing the DataONE formats to create score for
+     * @param qualitySuiteId the quality suite used to create the score graph
+     * @param requestDateTime the datetime that the request was made
      *
      * @return
      * @throws java.io.IOException
@@ -389,15 +438,19 @@ public void processScorerRequest(String collectionId,
         byte[] message = null;
         String authToken = null;
 
-        qEntry = new ScorerQueueEntry(collectionId, qualitySuiteId, nodeId, formatFamily, requestDateTime);
+        if(!isPortalActive(collectionId)) {
+            log.info("[x} Skipping Scorer request for inactive portal, collectionld: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
+        } else {
+            qEntry = new ScorerQueueEntry(collectionId, qualitySuiteId, nodeId, formatFamily, requestDateTime);
 
-        ByteArrayOutputStream bos = new ByteArrayOutputStream();
-        ObjectOutput out = new ObjectOutputStream(bos);
-        out.writeObject(qEntry);
-        message = bos.toByteArray();
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            ObjectOutput out = new ObjectOutputStream(bos);
+            out.writeObject(qEntry);
+            message = bos.toByteArray();
 
-        this.writeInProcessChannel(message, SCORER_ROUTING_KEY);
-        log.info(" [x] Queued Scorer request for collectionld: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
+            this.writeInProcessChannel(message, SCORER_ROUTING_KEY);
+            log.info(" [x] Queued Scorer request for collectionld: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
+        }
     }
 
     /**
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/authentication/BookkeeperClient.java b/src/main/java/edu/ucsb/nceas/mdqengine/authentication/BookkeeperClient.java
new file mode 100644
index 00000000..c055f807
--- /dev/null
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/authentication/BookkeeperClient.java
@@ -0,0 +1,171 @@
+package edu.ucsb.nceas.mdqengine.authentication;
+
+import edu.ucsb.nceas.mdqengine.MDQconfig;
+import edu.ucsb.nceas.mdqengine.exception.MetadigException;
+import org.apache.commons.configuration2.ex.ConfigurationException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.dataone.bookkeeper.api.Usage;
+import org.dataone.bookkeeper.api.UsageList;
+
+import java.io.*;
+import java.io.IOException;
+import java.util.List;
+
+public class BookkeeperClient {
+
+    private static BookkeeperClient instance;
+    public static Log log = LogFactory.getLog(DataONE.class);
+    private String bookkeeperURL = null;
+    private Boolean bookkeeperEnabled = true;
+    private String bookkeeperAuthToken = null;
+
+    private BookkeeperClient () {
+    }
+
+    /**
+     * Get the singleton instance of the BookKeeplerClient class
+     * @return  the instance of the class
+     */
+    public static BookkeeperClient getInstance() throws MetadigException {
+        if (instance == null) {
+            synchronized (BookkeeperClient.class) {
+                if (instance == null) {
+                    instance = new BookkeeperClient();
+                    instance.init();
+                }
+            }
+        }
+        return instance;
+    }
+
+    /**
+     * Initialize a bookkeeper client and get metadig config parameters needed for interacting with
+     * DataONE bookkeeper service
+     *
+     * @throws MetadigException
+     */
+    protected void init () throws MetadigException {
+        // Get metadig config parameter for the bookkeeper URL
+
+        try {
+            bookkeeperURL = MDQconfig.readConfigParam("bookkeeper.url");
+            bookkeeperEnabled = new Boolean(MDQconfig.readConfigParam("bookkeeper.enabled"));
+            bookkeeperAuthToken  = MDQconfig.readConfigParam("bookkeeper.authToken");
+        } catch (ConfigurationException | IOException e) {
+            throw new MetadigException("Unable to initialize DataONE bookkeeper client: " + e.getMessage());
+        }
+    }
+
+    /**
+     * Get the value that indicates whether bookkeeper quota/usage checking is enabled.
+     */
+    public Boolean getBookkeeperEnabled() {
+         return(this.bookkeeperEnabled);
+    }
+
+    /**
+     * Retrieve a bookkeeper quota usage usage
+     * @param id the usage database sequence identifier
+     * @param instanceId the usage instance identifier
+     * @param quotaType the usage quota type ("portal" | "storage" | ...)
+     * @param status the usage status ("active" | "inactive")
+     * @return
+     * @throws MetadigException
+     */
+    public List<Usage> listUsages(int id, String instanceId, String quotaType, String status, List<String> subjects) throws MetadigException {
+        // Check the portal quota with DataONE bookkeeper
+        String serviceURL  = this.bookkeeperURL;
+        ObjectMapper objectMapper = new ObjectMapper();
+        CloseableHttpClient httpClient = HttpClients.createDefault();
+        String idStr = String.valueOf(id);
+
+        if (id > 0) {
+            log.debug("Getting bookkeeper portal Usage for id: " + idStr);
+            serviceURL += "/usages?id=" + idStr;
+        } else {
+            log.debug("Getting bookkeeper portal Usage for quotaType, instanceId, status: " +
+                    quotaType + ", " +
+                    instanceId + ", " +
+                    status);
+            if(status != null) {
+                serviceURL += "/usages?quotaType=" + quotaType + "&instanceId=" + String.valueOf(instanceId) + "&status=" + status;
+            } else {
+                serviceURL += "/usages?quotaType=" + quotaType + "&instanceId=" + String.valueOf(instanceId);
+            }
+        }
+
+        // Is bookkeeper authentication/checking enabled?
+        log.debug("bookkeeper checking is enabled.");
+        log.debug("Using serviceURL: " + serviceURL);
+        HttpGet httpGet = new HttpGet(serviceURL);
+
+        String msg = null;
+        // Send a request to the bookkeeper service for the quota related to this portal
+        try {
+            httpGet.addHeader("Authorization", "Bearer " + bookkeeperAuthToken);
+            // Ask for JSON reponse
+            httpGet.addHeader("Accept", "application/json");
+
+            log.debug("Submitting request to DataONE bookkeeper: " + serviceURL);
+            // send the request to bookkeeper
+            CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
+            // Delete the token
+
+            // Read the response from bookkeeper
+            StringBuffer response = new StringBuffer();
+            int statusCode = httpResponse.getStatusLine().getStatusCode();
+
+            // If the HTTP request returned without an error, convert the result to a JSON string,
+            // then deserialize to a Java object so that we can easily inspect it.
+            if(statusCode == HttpStatus.SC_OK) {
+                BufferedReader reader = new BufferedReader(new InputStreamReader(httpResponse.getEntity().getContent()));
+                String inputLine;
+                response = new StringBuffer();
+
+                while ((inputLine = reader.readLine()) != null) {
+                    response.append(inputLine);
+                }
+
+                UsageList usageList = objectMapper.readValue(response.toString(), UsageList.class);
+                List<Usage> usages = usageList.getUsages();
+                if (usages.size() == 0) {
+                    msg =  "No usages returned.";
+                    log.error(msg);
+                    throw(new MetadigException(msg));
+                }
+                log.debug("Bookkeeper Usage status found for portal " + idStr + ": " + usages.get(0).getStatus());
+                return(usages);
+            } else {
+                log.debug("Getting bookkeeper portal Usage for quotaType, instanceId, status: " +
+                        quotaType + ", " +
+                        instanceId + ", " +
+                        status);
+                msg =  "HTTP error status getting bookkeeper usage for id, quotaType, instanceId, status: " + idStr + ": " +
+                        "," + quotaType +
+                        "," + instanceId +
+                        "," + status +
+                        httpResponse.getStatusLine().getReasonPhrase();
+                log.error(msg);
+                throw(new MetadigException(msg));
+            }
+        } catch (IOException ioe) {
+            msg =  "Error getting bookkeeper usage: " + ioe.getMessage();
+            log.error(msg);
+            throw(new MetadigException(msg));
+        } finally {
+            try {
+                httpClient.close();
+            } catch (IOException e) {
+                log.warn("Error closing connection to bookkeeper client: " + e.getMessage());
+            }
+        }
+    }
+}
diff --git a/src/main/resources/configuration/metadig.properties b/src/main/resources/configuration/metadig.properties
index e5199e89..7a05c3e7 100644
--- a/src/main/resources/configuration/metadig.properties
+++ b/src/main/resources/configuration/metadig.properties
@@ -14,3 +14,6 @@ metadig.base.directory = /opt/local/metadig
 metadig.store.directory = /opt/local/metadig/store
 index.latest = false
 metadig.data.dir = /opt/local/metadig/data
+bookkeeper.enabled = true
+bookkeeper.url = https://api.dataone.org:30443/bookkeeper/v1
+bookkeeper.authToken =
diff --git a/src/test/java/edu/ucsb/nceas/mdqengine/bookkeeper/BookkeeperClientTest.java b/src/test/java/edu/ucsb/nceas/mdqengine/bookkeeper/BookkeeperClientTest.java
new file mode 100644
index 00000000..0c727c49
--- /dev/null
+++ b/src/test/java/edu/ucsb/nceas/mdqengine/bookkeeper/BookkeeperClientTest.java
@@ -0,0 +1,35 @@
+package edu.ucsb.nceas.mdqengine.bookkeeper;
+
+import edu.ucsb.nceas.mdqengine.authentication.BookkeeperClient;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dataone.bookkeeper.api.Usage;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.fail;
+
+public class BookkeeperClientTest {
+    private String instanceId = "urn:uuid3b6827b9-4641-40c5-bae8-ccb23159b300";
+    protected Log log = LogFactory.getLog(this.getClass());
+
+    @Test
+    @Ignore
+    public void testGetUsage() {
+        log.debug("Checking bookkeeper portal Usage for collection: " + instanceId);
+        String msg = null;
+        try {
+            BookkeeperClient bkClient = BookkeeperClient.getInstance();
+            List<Usage> usages = null;
+            List<String> subjects = new ArrayList<>();
+            usages = bkClient.listUsages(0, instanceId, "portal", null, subjects);
+            assert(usages.get(0).getStatus().compareToIgnoreCase("active") == 0);
+        } catch (Exception e) {
+            msg = "Bookkeeper client test failed: " + e.getMessage();
+            fail(msg);
+        }
+    }
+}

From e3221e6c7b904d971711090d7a50c263b0f11e8f Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Mon, 13 Jul 2020 15:44:14 -0700
Subject: [PATCH 10/47] Close db connections when no longer needed (#251)

---
 .../nceas/mdqengine/filestore/MetadigFileStore.java | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/filestore/MetadigFileStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/filestore/MetadigFileStore.java
index a1b90abd..9dcb739a 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/filestore/MetadigFileStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/filestore/MetadigFileStore.java
@@ -48,13 +48,16 @@ public File getFile(MetadigFile mdFile) throws MetadigFilestoreException {
         // First query the database to find a match based on the data in the MetadigFile entry. In this version of
         // the filestore, only one file should match.
         MetadigFile resultFile = null;
-        FilestoreDB fsdb = new FilestoreDB();
+        FilestoreDB fsdb = null;
 
         try {
+            fsdb = new FilestoreDB();
             resultFile = fsdb.getFileEntry(mdFile);
         } catch (MetadigFilestoreException mse) {
             log.error("Unable to get file: " + mse.getMessage());
             throw mse;
+        } finally {
+            fsdb.shutdown();
         }
 
         path = this.getFilePath(resultFile);
@@ -70,7 +73,6 @@ public File getFile(MetadigFile mdFile) throws MetadigFilestoreException {
             throw metadigFilestoreException;
         }
 
-        fsdb.shutdown();
         return storeFile;
     }
 
@@ -139,16 +141,17 @@ public String saveFile(MetadigFile mdFile, FileInputStream fis, Boolean replace)
             log.debug("Wrote file to path: " + path);
         } catch (IOException ioe) {
             log.error("Error writing to path: " + path);
+        } finally {
+        fsdb.shutdown();
         }
 
-        fsdb.shutdown();
         return path;
     }
 
     public boolean deleteFile(MetadigFile mdFile) throws MetadigFilestoreException {
 
         String path = null;
-        FilestoreDB fsdb;
+        FilestoreDB fsdb = null;
 
         try {
             fsdb = new FilestoreDB();
@@ -156,6 +159,8 @@ public boolean deleteFile(MetadigFile mdFile) throws MetadigFilestoreException {
         } catch (MetadigFilestoreException mse) {
             log.error("Unable to connect to filestore database");
             throw (mse);
+        } finally {
+            fsdb.shutdown();
         }
 
         File fileToDelete = FileUtils.getFile(getFilePath(mdFile));

From 231022eaa0d9c7647b10b0183aacf0a5c59f41e4 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Mon, 13 Jul 2020 16:05:16 -0700
Subject: [PATCH 11/47] Use bookkeeper-client.jar, not bookkeeper.jar (#247)

---
 pom.xml | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/pom.xml b/pom.xml
index 02139762..28cab7ca 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
 
 	<groupId>edu.ucsb.nceas</groupId>
 	<artifactId>metadig-engine</artifactId>
-	<version>2.2.0</version>
+	<version>2.3.0</version>
 	<packaging>jar</packaging>
 
 	<name>metadig-engine</name>
@@ -12,6 +12,8 @@
 	<url>https://github.com/NCEAS/metadig-engine</url>
 
 	<properties>
+		<bookkeeper.version>0.1.0-SNAPSHOT</bookkeeper.version>
+		<jackson.version>2.9.8</jackson.version>
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 		<d1_libclient_java.version>2.4.0-SNAPSHOT</d1_libclient_java.version>
         <renjin.version>0.8.2242</renjin.version>
@@ -26,8 +28,8 @@
 		<spring.version>3.1.4.RELEASE</spring.version>
         <!-- <docker.registry>docker.io</docker.registry> -->
         <docker.repo>metadig</docker.repo>
-        <docker.tag>2.2.0</docker.tag>
-		<!-- <modules.test.includes>**/*Test.java</modules.test.includes> -->
+        <docker.tag>2.3.0dev</docker.tag>
+		<modules.test.includes>**/*Test.java</modules.test.includes>
 		<modules.test.excludes>**/LTERSuiteTest.java</modules.test.excludes>
 	</properties>
 
@@ -39,6 +41,16 @@
 		<url>http://nceas.ucsb.edu</url>
 	</organization>
 	<dependencies>
+		<dependency>
+			<groupId>org.dataone</groupId>
+			<artifactId>bookkeeper-client</artifactId>
+			<version>${bookkeeper.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>com.fasterxml.jackson.core</groupId>
+			<artifactId>jackson-databind</artifactId>
+			<version>${jackson.version}</version>
+		</dependency>
 		<dependency>
 			<groupId>org.renjin</groupId>
 			<artifactId>renjin-script-engine</artifactId>
@@ -351,17 +363,15 @@
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-surefire-plugin</artifactId>
-				<version>2.22.1</version>
+				<version>3.0.0-M3</version>
 				<configuration>
 					<excludes>
 						<exclude>${modules.test.excludes}</exclude>
 						<exclude>**/*IT.java</exclude>
 					</excludes>
-					<!--
 					<includes>
 						<include>${modules.test.includes}</include>
 					</includes>
-					-->
 				</configuration>
                 <executions>
 					<execution>

From da04b951d00a12efb1b83498349dfa419d4fe20b Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Mon, 13 Jul 2020 19:22:33 -0700
Subject: [PATCH 12/47] Add convenience function to read config parameters

---
 .../java/edu/ucsb/nceas/mdqengine/MDQconfig.java     | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/MDQconfig.java b/src/main/java/edu/ucsb/nceas/mdqengine/MDQconfig.java
index d3606828..c2840e01 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/MDQconfig.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/MDQconfig.java
@@ -66,4 +66,16 @@ public String getString (String paramName) throws ConfigurationException {
     public int getInt(String paramName) throws ConfigurationException {
         return(config.getInt(paramName));
     }
+
+    public static String readConfigParam (String paramName) throws ConfigurationException, IOException {
+        String paramValue = null;
+        try {
+            MDQconfig cfg = new MDQconfig();
+            paramValue = cfg.getString(paramName);
+        } catch (Exception e) {
+            log.error("Could not read configuration for param: " + paramName + ": " + e.getMessage());
+            throw e;
+        }
+        return paramValue;
+    }
 }

From 4a129b78b6c2d57ff3dc342a9b014882e3194c7c Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 17 Jul 2020 14:21:10 -0700
Subject: [PATCH 13/47] Complete DataONE bookkeeper checks

---
 .../edu/ucsb/nceas/mdqengine/Controller.java  | 54 +++++++++++--------
 .../mdqengine/authentication/DataONE.java     |  1 +
 .../BookkeeperClient.java                     | 14 +----
 3 files changed, 36 insertions(+), 33 deletions(-)
 rename src/main/java/edu/ucsb/nceas/mdqengine/{authentication => authorization}/BookkeeperClient.java (92%)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
index 4784fb37..d38ec9bf 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
@@ -1,7 +1,7 @@
 package edu.ucsb.nceas.mdqengine;
 
 import com.rabbitmq.client.*;
-import edu.ucsb.nceas.mdqengine.authentication.BookkeeperClient;
+import edu.ucsb.nceas.mdqengine.authorization.BookkeeperClient;
 import edu.ucsb.nceas.mdqengine.scorer.ScorerQueueEntry;
 import edu.ucsb.nceas.mdqengine.exception.MetadigException;
 import org.apache.commons.configuration2.ex.ConfigurationException;
@@ -56,6 +56,7 @@ public class Controller {
     // where metadig-controller and the RabbitMQ server are running in containers that belong
     // to the same Pod. These defaults will be used if the properties file cannot be read.
     // These values are read from a config file, see class 'MDQconfig'
+    private static Boolean bookkeeperEnabled = false;
     private static String RabbitMQhost = null;
     private static int RabbitMQport = 0;
     private static String RabbitMQpassword = null;
@@ -246,6 +247,7 @@ public void readConfig () throws ConfigurationException, IOException {
         RabbitMQusername = cfg.getString("RabbitMQ.username");
         RabbitMQhost = cfg.getString("RabbitMQ.host");
         RabbitMQport = cfg.getInt("RabbitMQ.port");
+        bookkeeperEnabled = new Boolean(cfg.getString("bookkeeper.enabled"));
     }
 
 
@@ -297,25 +299,18 @@ public Boolean isPortalActive(String collectionId) throws MetadigException {
         Usage usage = null;
         List<String> subjects = new ArrayList<String>();
         try {
-            if(bkClient.getBookkeeperEnabled()) {
-                // Set status = null so that any usage will be returned.
-                String status = null;
-                usages = bkClient.listUsages(0, collectionId, "portal", status , subjects);
-                usage = usages.get(0);
-                log.debug("Usage for portal " + collectionId + " is " + usage.getStatus());
-                if(usage.getStatus().compareToIgnoreCase("active") == 0) {
-                    return true;
-                } else {
-                    return false;
-                }
+            // Set status = null so that any usage will be returned.
+            String status = null;
+            usages = bkClient.listUsages(0, collectionId, "portal", status , subjects);
+            usage = usages.get(0);
+            log.debug("Usage for portal " + collectionId + " is " + usage.getStatus());
+            if(usage.getStatus().compareToIgnoreCase("active") == 0) {
+                return true;
             } else {
-                msg = "Metadig config param 'bookkeeper.enabled is blank or missing";
-                log.error(msg);
-                throw(new MetadigException(msg));
+                return false;
             }
         } catch (Exception e) {
             msg = "Unable to get usage from bookkeeper for collection id: " + collectionId;
-            log.error(msg);
             throw(new MetadigException(msg));
         }
     };
@@ -431,16 +426,29 @@ public void processScorerRequest(String collectionId,
                                String nodeId,
                                String formatFamily,
                                String qualitySuiteId,
-                               DateTime requestDateTime) throws java.io.IOException, MetadigException {
+                               DateTime requestDateTime) throws java.io.IOException {
 
         log.info("Processing scorer request, collection: " + collectionId + ", suite: " + qualitySuiteId);
         ScorerQueueEntry qEntry = null;
         byte[] message = null;
-        String authToken = null;
 
-        if(!isPortalActive(collectionId)) {
-            log.info("[x} Skipping Scorer request for inactive portal, collectionld: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
-        } else {
+        /**
+         * Bookkeeper checking can be disabled via a metadig-engine configuration parameter. The primary use case for
+         * doing this is for testing purposes, otherwise checking should always be enabled.
+         */
+        if (bookkeeperEnabled) {
+            try {
+                if (!isPortalActive(collectionId)) {
+                    log.info("Skipping Scorer request for inactive portal with pid: '" + collectionId + "'" + ", quality suite " + qualitySuiteId);
+                    return;
+                }
+            } catch (MetadigException me) {
+                log.error("Unable to contact DataONE bookkeeper: "  + me.getMessage()
+                        + "\nSkipping Scorer request for portal with pid: '" + collectionId
+                        + "'" + ", quality suite " + qualitySuiteId);
+                return;
+            }
+
             qEntry = new ScorerQueueEntry(collectionId, qualitySuiteId, nodeId, formatFamily, requestDateTime);
 
             ByteArrayOutputStream bos = new ByteArrayOutputStream();
@@ -450,6 +458,10 @@ public void processScorerRequest(String collectionId,
 
             this.writeInProcessChannel(message, SCORER_ROUTING_KEY);
             log.info(" [x] Queued Scorer request for collectionld: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
+        } else {
+            log.info("Skipping Scorer request for portal, collectionld: '" + collectionId
+                    + "'" + ", quality suite " + qualitySuiteId
+            + "\n as DataONE bookkeeper service is disabled via metadig-engine configuration.");
         }
     }
 
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/authentication/DataONE.java b/src/main/java/edu/ucsb/nceas/mdqengine/authentication/DataONE.java
index 2cd1af4d..d5e8b73a 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/authentication/DataONE.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/authentication/DataONE.java
@@ -35,6 +35,7 @@ public static Session getSession(String subjectId, String authToken) {
             Subject subject = new Subject();
             subject.setValue(subjectId);
             session.setSubject(subject);
+            log.debug("Set session subjectId to: " + session.getSubject().getValue());
         }
 
         return session;
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/authentication/BookkeeperClient.java b/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
similarity index 92%
rename from src/main/java/edu/ucsb/nceas/mdqengine/authentication/BookkeeperClient.java
rename to src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
index c055f807..d0475163 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/authentication/BookkeeperClient.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
@@ -1,6 +1,7 @@
-package edu.ucsb.nceas.mdqengine.authentication;
+package edu.ucsb.nceas.mdqengine.authorization;
 
 import edu.ucsb.nceas.mdqengine.MDQconfig;
+import edu.ucsb.nceas.mdqengine.authentication.DataONE;
 import edu.ucsb.nceas.mdqengine.exception.MetadigException;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.commons.logging.Log;
@@ -24,7 +25,6 @@ public class BookkeeperClient {
     private static BookkeeperClient instance;
     public static Log log = LogFactory.getLog(DataONE.class);
     private String bookkeeperURL = null;
-    private Boolean bookkeeperEnabled = true;
     private String bookkeeperAuthToken = null;
 
     private BookkeeperClient () {
@@ -57,20 +57,12 @@ protected void init () throws MetadigException {
 
         try {
             bookkeeperURL = MDQconfig.readConfigParam("bookkeeper.url");
-            bookkeeperEnabled = new Boolean(MDQconfig.readConfigParam("bookkeeper.enabled"));
             bookkeeperAuthToken  = MDQconfig.readConfigParam("bookkeeper.authToken");
         } catch (ConfigurationException | IOException e) {
             throw new MetadigException("Unable to initialize DataONE bookkeeper client: " + e.getMessage());
         }
     }
 
-    /**
-     * Get the value that indicates whether bookkeeper quota/usage checking is enabled.
-     */
-    public Boolean getBookkeeperEnabled() {
-         return(this.bookkeeperEnabled);
-    }
-
     /**
      * Retrieve a bookkeeper quota usage usage
      * @param id the usage database sequence identifier
@@ -102,8 +94,6 @@ public List<Usage> listUsages(int id, String instanceId, String quotaType, Strin
             }
         }
 
-        // Is bookkeeper authentication/checking enabled?
-        log.debug("bookkeeper checking is enabled.");
         log.debug("Using serviceURL: " + serviceURL);
         HttpGet httpGet = new HttpGet(serviceURL);
 

From d13bce52b9344ba07f254fbdc99a68925c701a36 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 17 Jul 2020 14:23:34 -0700
Subject: [PATCH 14/47] Retrieve graphs, csv based on pid, storage type, media
 only

---
 .../java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java b/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
index 5a5d17cd..1ea8d7ed 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
@@ -106,8 +106,7 @@ public MetadigFile getFileEntry(MetadigFile mdFile) throws MetadigFilestoreExcep
                 stmt.setString(1, storageType);
                 stmt.setString(2, altFilename);
             } else {
-                sql = "select * from filestore where pid = ? and suite_id = ?" +
-                        " and node_id = ? and format_filter = ? and storage_type = ? and media_type = ?";
+                sql = "select * from filestore where pid = ? and storage_type = ? and media_type = ?";
                 stmt = conn.prepareStatement(sql);
                 stmt.setString(1, pid);
                 stmt.setString(2, suiteId);

From 73b008cb6311f07b60bd68f820e329c3ec2c560b Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 17 Jul 2020 14:27:08 -0700
Subject: [PATCH 15/47] Code cleanup

---
 .../java/edu/ucsb/nceas/mdqengine/Worker.java |  1 -
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 33 +++++--------------
 .../nceas/mdqengine/store/InMemoryStore.java  |  1 -
 .../ucsb/nceas/mdqengine/store/MDQStore.java  |  1 -
 .../ucsb/nceas/mdqengine/store/MNStore.java   |  1 -
 .../bookkeeper/BookkeeperClientTest.java      |  2 +-
 .../mdqengine/filestore/FilestoreTestIT.java  |  2 +-
 7 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/Worker.java b/src/main/java/edu/ucsb/nceas/mdqengine/Worker.java
index 5e267dce..7cd516bb 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/Worker.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/Worker.java
@@ -242,7 +242,6 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                 /* Once the quality report has been created and saved to persistent storage,
                    it can be added to the Solr index */
                 if(!failFast) {
-                    MDQStore dbstore = null;
                     log.debug("Indexing report");
                     try {
                         startTimeIndexing = System.currentTimeMillis();
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index ac8cfbd1..e160635f 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -512,6 +512,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         int resultCount = 0;
         startPos = 0;
         countRequested = 1000;
+
         // Now get the pids associated with the collection by sending the collectionQuery to the DataONE CN
         // The collectionQuery is always evaluated on the CN, as portals should have all DataONE data available to them.
         // One query can return many documents, so use the paging mechanism to make sure we retrieve them all.
@@ -530,23 +531,17 @@ which will be used to query DataONE Solr for all the pids associated with that p
         // Loop through the Solr result. As the result may be large, page through the results, accumulating
         // the pids returned
 
-        // Determine where the collectionQuery should be evaluated. When the DataONE quata service is ready, query it
-        // for this collection to determine if the collectionQuery should be sent to the CN. Since this service is
-        // not ready, send the query to the same serviceUrl, subjectId, authToken which was used to harvest the
-        // collection document and obtain the collectionQuery string
-
-        // When the service is available, use the DataONE quota service to set these variable conditionally
-        String evalServiceUrl = serviceUrl;
-        String evalSubjectId = subjectId;
-        String evalAuthToken = authToken;
+        /** The collectionQuery is evaluated on the same node that the portal document was harvested from (via the
+          * DataONE listObjects service. This node could either be an MN or CN.
+         */
 
-        log.debug("Sending collectionQuery to Solr using subjectId: " + evalSubjectId + ", servicerUrl: " + evalServiceUrl);
-        log.trace("query string: " + queryStr);
+        log.debug("Sending collectionQuery to Solr using subjectId: " + subjectId + ", servicerUrl: " + serviceUrl);
+        log.debug("query string: " + queryStr);
 
         do {
             //TODO: check that a result was returned
             // Note: the collectionQuery is always evaluated on the CN, so that the entire DataONE network is queried.
-            xmldoc = queryD1Solr(queryStr, evalServiceUrl, startPos, countRequested, evalSubjectId, evalAuthToken);
+            xmldoc = queryD1Solr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
             if(xmldoc == null) {
                 log.info("no values returned from query");
                 break;
@@ -802,12 +797,6 @@ private Document queryD1Solr(String queryStr, String serviceUrl, int startPos, i
         MultipartRestClient mrc = null;
         // Polymorphism doesn't work with D1 node classes, so have to use the derived classes
         MultipartD1Node d1Node = null;
-//
-//        Subject subject = new Subject();
-//        if(subjectId != null && !subjectId.isEmpty()) {
-//            subject.setValue(subjectId);
-//        }
-
         Session session = DataONE.getSession(subjectId, authToken);
 
         // Add the start and count, if pagination is being used
@@ -818,6 +807,7 @@ private Document queryD1Solr(String queryStr, String serviceUrl, int startPos, i
 
         try {
             d1Node = getMultipartD1Node(session, serviceUrl);
+            log.debug("Created MultipartD1Node: " + d1Node.toString());
         } catch (Exception ex) {
             log.error("Unable to create MultipartD1Node for Solr query");
             metadigException = new MetadigProcessException("Unable to create multipart node client to query DataONE solr: " + ex.getMessage());
@@ -1031,11 +1021,6 @@ private SubjectInfo getSubjectInfo(Subject rightsHolder, String serviceUrl, Stri
         MetadigProcessException metadigException = null;
 
         SubjectInfo subjectInfo = null;
-        //Subject requestingSubject = new Subject();
-//        if(subjectId != null && ! subjectId.isEmpty()) {
-//            requestingSubject.setValue(subjectId);
-//        }
-
         Session session = DataONE.getSession(subjectId, authToken);
 
         // Identity node as either a CN or MN based on the serviceUrl
@@ -1100,7 +1085,7 @@ MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws Me
             log.debug("creating cn MultipartMNode" + ", subjectId: " + session.getSubject().getValue());
             d1Node = new MultipartCNode(mrc, serviceUrl, session);
         } else {
-            log.debug("creating mn MultipartMNode" + " , subjectId: " + session.getSubject().getValue());
+            log.debug("creating mn MultipartMNode" + ", subjectId: " + session.getSubject().getValue());
             d1Node = new MultipartMNode(mrc, serviceUrl, session);
         }
         return d1Node;
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
index cf3c0ffa..44bb386c 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
@@ -1,6 +1,5 @@
 package edu.ucsb.nceas.mdqengine.store;
 
-import com.sun.javafx.scene.control.skin.TableCellSkin;
 import edu.ucsb.nceas.mdqengine.MDQconfig;
 import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
 import edu.ucsb.nceas.mdqengine.model.*;
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
index 013b1a77..fbef0bc3 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
@@ -1,6 +1,5 @@
 package edu.ucsb.nceas.mdqengine.store;
 
-import com.sun.javafx.scene.control.skin.TableCellSkin;
 import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
 import edu.ucsb.nceas.mdqengine.model.*;
 
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
index 34c0efef..ec7a2772 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
@@ -1,6 +1,5 @@
 package edu.ucsb.nceas.mdqengine.store;
 
-import com.sun.javafx.scene.control.skin.TableCellSkin;
 import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
 import edu.ucsb.nceas.mdqengine.model.*;
 import edu.ucsb.nceas.mdqengine.serialize.XmlMarshaller;
diff --git a/src/test/java/edu/ucsb/nceas/mdqengine/bookkeeper/BookkeeperClientTest.java b/src/test/java/edu/ucsb/nceas/mdqengine/bookkeeper/BookkeeperClientTest.java
index 0c727c49..7e6c4a88 100644
--- a/src/test/java/edu/ucsb/nceas/mdqengine/bookkeeper/BookkeeperClientTest.java
+++ b/src/test/java/edu/ucsb/nceas/mdqengine/bookkeeper/BookkeeperClientTest.java
@@ -1,6 +1,6 @@
 package edu.ucsb.nceas.mdqengine.bookkeeper;
 
-import edu.ucsb.nceas.mdqengine.authentication.BookkeeperClient;
+import edu.ucsb.nceas.mdqengine.authorization.BookkeeperClient;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dataone.bookkeeper.api.Usage;
diff --git a/src/test/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreTestIT.java b/src/test/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreTestIT.java
index bac92f10..0d4c8b05 100644
--- a/src/test/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreTestIT.java
+++ b/src/test/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreTestIT.java
@@ -42,7 +42,7 @@ public void saveFile() throws IOException, MetadigFilestoreException {
 
         MetadigFile mdFile = new MetadigFile();
         mdFile.setCreationDatetime(DateTime.now());
-        mdFile.setCollectionId("1234");
+        mdFile.setPid("1234");
         mdFile.setSuiteId("FAIR.suite.1");
         mdFile.setNodeId("urn:node:KNB");
         mdFile.setStorageType(StorageType.TMP.toString());

From b961bdf906c8eb898c1ae8f6eb29dfb7ce8b26ef Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 17 Jul 2020 14:27:41 -0700
Subject: [PATCH 16/47] Update sql init scripts

---
 src/main/resources/sql/quality-v2.3.0.sql   | 3 ++-
 src/main/resources/sql/update_to_v2.3.0.sql | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/main/resources/sql/quality-v2.3.0.sql b/src/main/resources/sql/quality-v2.3.0.sql
index 964af51d..3c4e7dfb 100644
--- a/src/main/resources/sql/quality-v2.3.0.sql
+++ b/src/main/resources/sql/quality-v2.3.0.sql
@@ -56,7 +56,8 @@ create TABLE filestore (
   media_type TEXT not NULL,
   alt_filename TEXT not NULL,
   CONSTRAINT file_id_pk PRIMARY KEY (file_id),
-  CONSTRAINT all_properties_fk UNIQUE (pid, suite_id, node_id, format_filter, storage_type, media_type, alt_filename)
+  -- CONSTRAINT all_properties_fk UNIQUE (pid, suite_id, node_id, format_filter, storage_type, media_type, alt_filename)
+  CONSTRAINT all_properties_fk UNIQUE (pid, storage_type, media_type, alt_filename)
 );
 
 alter table filestore owner to metadig;
diff --git a/src/main/resources/sql/update_to_v2.3.0.sql b/src/main/resources/sql/update_to_v2.3.0.sql
index 5f6c4de1..90fe8da3 100644
--- a/src/main/resources/sql/update_to_v2.3.0.sql
+++ b/src/main/resources/sql/update_to_v2.3.0.sql
@@ -1,5 +1,6 @@
-ALTER TABLE filestore DROP CONSTRAINT all_properties_fk;
+ALTER TABLE filestore DROP CONSTRAINT IF EXISTS all_properties_fk;
 ALTER TABLE filestore RENAME COLUMN collection_id to pid;
 ALTER TABLE filestore DROP column metadata_id;
-ALTER TABLE filestore ADD CONSTRAINT all_properties_fk UNIQUE (pid, suite_id, node_id, format_filter, storage_type, media_type, alt_filename);
+-- ALTER TABLE filestore ADD CONSTRAINT all_properties_fk UNIQUE (pid, suite_id, node_id, format_filter, storage_type, media_type, alt_filename);
+ALTER TABLE filestore ADD CONSTRAINT all_properties_fk UNIQUE (pid, storage_type, media_type, alt_filename);
 

From 0720f498ffbef6c8f203cf4900f2e193371126fb Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 17 Jul 2020 14:29:01 -0700
Subject: [PATCH 17/47] Update debug logging conf

---
 pom.xml                                       | 25 ++++-----
 src/main/resources/commons-logging.properties |  5 ++
 src/main/resources/log4j.properties           | 52 +++++++++----------
 3 files changed, 41 insertions(+), 41 deletions(-)
 create mode 100644 src/main/resources/commons-logging.properties

diff --git a/pom.xml b/pom.xml
index 28cab7ca..53f787f0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -117,17 +117,6 @@
 			<version>${d1_libclient_java.version}</version>
 			<type>jar</type>
 		</dependency>
-		<dependency>
-			<groupId>org.slf4j</groupId>
-			<artifactId>slf4j-simple</artifactId>
-			<version>1.7.25</version>
-		</dependency>
-		<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
-		<dependency>
-			<groupId>org.slf4j</groupId>
-			<artifactId>slf4j-api</artifactId>
-			<version>1.7.25</version>
-		</dependency>
 		<!-- https://mvnrepository.com/artifact/com.rabbitmq/amqp-client -->
 		<dependency>
 			<groupId>com.rabbitmq</groupId>
@@ -144,6 +133,16 @@
 			<groupId>org.apache.solr</groupId>
 			<artifactId>solr-solrj</artifactId>
 			<version>7.3.0</version>
+			<exclusions>
+				<exclusion>
+					<groupId>org.apache.logging.log4j</groupId>
+					<artifactId>log4j-api</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.apache.logging.log4j</groupId>
+					<artifactId>log4j-core</artifactId>
+				</exclusion>
+			</exclusions>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.solr</groupId>
@@ -170,10 +169,6 @@
 			<version>${d1_cn_index_processor_version}</version>
 			<type>jar</type>
 			<exclusions>
-				<exclusion>
-					<groupId>org.slf4j</groupId>
-					<artifactId>slf4j-log4j12</artifactId>
-				</exclusion>
 				<exclusion>
 					<groupId>com.hp.hpl.jena</groupId>
 					<artifactId>jena</artifactId>
diff --git a/src/main/resources/commons-logging.properties b/src/main/resources/commons-logging.properties
new file mode 100644
index 00000000..a13fe14c
--- /dev/null
+++ b/src/main/resources/commons-logging.properties
@@ -0,0 +1,5 @@
+org.apache.commons.logging.LogFactory=org.apache.commons.logging.impl.LogFactoryImpl
+org.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger
+log4j.configuration=log4j.properties
+
+#org.apache.commons.logging.Log=org.apache.commons.logging.impl.SimpleLog
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index b9d51737..4050b437 100755
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,31 +1,31 @@
+#log4j.rootLogger=ALL, stdout, warnStdout
+log4j.rootLogger=ALL, stdout
 
-# set the log level to WARN and the log should be printed to stdout. 
-log4j.rootLogger=WARN, stderr
-#log4j.threshold=FATAL, ERROR, WARN, INFO
+# configure stdout
+# set the conversion pattern of stdout
+# Print the date in ISO 8601 format
 
 
-### LOGGING TO CONSOLE #########################################################
-log4j.appender.stderr=org.apache.log4j.ConsoleAppender
-log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
+#This will be used to print WARN level or higher messages to console
+#log4j.appender.warnStdout=org.apache.log4j.ConsoleAppender
+#log4j.appender.warnStdout.layout=org.apache.log4j.PatternLayout
+#log4j.appender.warnStdout.Threshold=WARN
 
-# define the pattern to be used in the logs... 
-log4j.appender.stderr.layout.ConversionPattern=%d{yyyyMMdd-HH:mm:ss}: [%p]: %m [%c]%n
+log4j.appender.stdout = org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Threshold = DEBUG
+#log4j.appender.stdout.Target  = System.out
+log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
+#log4j.appender.stdout.layout.ConversionPattern = %-5p %d [%t][%F:%L] : %m%n
+log4j.appender.stdout.layout.ConversionPattern = %-5p %d [%F:%L] : %m%n
+#log4j.appender.stdout.filter.filter1=org.apache.log4j.varia.LevelRangeFilter
+#log4j.appender.stdout.filter.filter1.levelMin=INFO
+#log4j.appender.stdout.filter.filter1.levelMax=WARN
 
-# %p -> priority level of the event - (e.g. WARN)
-# %m -> message to be printed
-# %c -> category name ... in this case name of the class
-# %d -> Used to output the date of the logging event. example, %d{HH:mm:ss,SSS} or %d{dd MMM yyyy HH:mm:ss,SSS}. Default format is ISO8601 format
-# %M -> print the method name where the event was generated ... can be extremely slow. 
-# %L -> print the line number of the event generated ... can be extremely slow.
-# %t -> Used to output the name of the thread that generated the log event
-# %n -> carriage return
-
-################################################################################
-# EXAMPLE: Print only messages of level WARN or above in the package com.foo:
-log4j.logger.edu.ucsb.nceas=INFO
-#log4j.logger.edu.ucsb.nceas=DEBUG
-#log4j.logger.com.hp.hpl.jena=WARN
-log4j.logger.org.dataone.ore=ERROR
-log4j.logger.org.dataone.client=ERROR
-#log4j.logger.org.apache.http=DEBUG
-org.dataone.client.auth=ERROR
+# Classes in the my.project package will accept messages of INFO level or higher
+# and send those messages to the console and to the log file
+log4j.logger.org.edu.ucsb.nceas=DEBUG, stdout
+log4j.logger.org.apache=WARN, stdout
+log4j.logger.org.dataone=WARN, stdout
+# Need to set additivity to false or else both the my.project and root loggers
+# will accept messages from classes in package my.project
+#log4j.additivity.org.edu.ucsb.nceas
\ No newline at end of file

From 370f4762efb0e03b4b65bbe54da90a872636c373 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 17 Jul 2020 14:29:38 -0700
Subject: [PATCH 18/47] Exclude unneeded jars from build

---
 pom.xml | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 53f787f0..c71130a1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -22,8 +22,7 @@
         <moxy.version>2.6.3</moxy.version>
 		<d1_cn_index_processor_version>2.4.0-SNAPSHOT</d1_cn_index_processor_version>
 		<d1_cn_index_common_version>2.4.0-SNAPSHOT</d1_cn_index_common_version>
-		<!-- Have to use a very old version of Spring in order to stay compatible with
-		    d1_cn_index_processor -->
+		<!-- Have to use a very old version of Spring in order to stay compatible with d1_cn_index_processor -->
 		<!-- <spring.version>4.3.8.RELEASE</spring.version> -->
 		<spring.version>3.1.4.RELEASE</spring.version>
         <!-- <docker.registry>docker.io</docker.registry> -->
@@ -41,10 +40,84 @@
 		<url>http://nceas.ucsb.edu</url>
 	</organization>
 	<dependencies>
+		<!-- https://mvnrepository.com/artifact/commons-logging/commons-logging -->
+		<dependency>
+			<groupId>commons-logging</groupId>
+			<artifactId>commons-logging</artifactId>
+			<version>1.2</version>
+		</dependency>
+		<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
+		<dependency>
+			<groupId>org.apache.logging.log4j</groupId>
+			<artifactId>log4j-core</artifactId>
+			<version>2.13.3</version>
+		</dependency>
+		<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-api -->
+		<dependency>
+			<groupId>org.apache.logging.log4j</groupId>
+			<artifactId>log4j-api</artifactId>
+			<version>2.13.3</version>
+		</dependency>
+		<!--<dependency>-->
+			<!--<groupId>org.apache.logging.log4j</groupId>-->
+			<!--<artifactId>log4j-api</artifactId>-->
+			<!--<version>2.13.3</version>-->
+		<!--</dependency>-->
+		<!--<dependency>-->
+			<!--<groupId>org.apache.logging.log4j</groupId>-->
+			<!--<artifactId>log4j-core</artifactId>-->
+			<!--<version>2.13.3</version>-->
+		<!--</dependency>-->
 		<dependency>
 			<groupId>org.dataone</groupId>
 			<artifactId>bookkeeper-client</artifactId>
 			<version>${bookkeeper.version}</version>
+			<exclusions>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-core</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-json-logging</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-testing</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-jdbi3</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-auth</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.postgresql</groupId>
+					<artifactId>postgresql</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>com.opentable.components</groupId>
+					<artifactId>otj-pg-embedded</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>com.opentable.components</groupId>
+					<artifactId>otj-pg-embedded</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.flywaydb</groupId>
+					<artifactId>flyway-maven-plugin</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.mockito</groupId>
+					<artifactId>mockito-core</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.dataone</groupId>
+					<artifactId>d1_libclient_java</artifactId>
+				</exclusion>
+			</exclusions>
 		</dependency>
 		<dependency>
 			<groupId>com.fasterxml.jackson.core</groupId>

From ff6524a85e0496c1aa0a4a5e0251a725a1f24831 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 17 Jul 2020 14:30:38 -0700
Subject: [PATCH 19/47] remove unneeded, obsolete files

---
 src/test/resources/log4j.properties | 28 ----------------------------
 1 file changed, 28 deletions(-)
 delete mode 100755 src/test/resources/log4j.properties

diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties
deleted file mode 100755
index 61ead9c3..00000000
--- a/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-
-# set the log level to WARN and the log should be printed to stdout. 
-log4j.rootLogger=WARN, stderr
-#log4j.threshold=FATAL, ERROR, WARN, INFO
-
-
-### LOGGING TO CONSOLE #########################################################
-log4j.appender.stderr=org.apache.log4j.ConsoleAppender
-log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
-
-# define the pattern to be used in the logs... 
-log4j.appender.stderr.layout.ConversionPattern=%d{yyyyMMdd-HH:mm:ss}: [%p]: %m [%c]%n
-
-# %p -> priority level of the event - (e.g. WARN)
-# %m -> message to be printed
-# %c -> category name ... in this case name of the class
-# %d -> Used to output the date of the logging event. example, %d{HH:mm:ss,SSS} or %d{dd MMM yyyy HH:mm:ss,SSS}. Default format is ISO8601 format
-# %M -> print the method name where the event was generated ... can be extremely slow. 
-# %L -> print the line number of the event generated ... can be extremely slow.
-# %t -> Used to output the name of the thread that generated the log event
-# %n -> carriage return
-
-################################################################################
-# EXAMPLE: Print only messages of level WARN or above in the package com.foo:
-log4j.logger.edu.ucsb.nceas=DEBUG
-#log4j.logger.com.hp.hpl.jena=WARN
-#log4j.logger.org.dataone.ore=INFO
-#log4j.logger.org.apache.http=DEBUG

From 01a01546d1e44c19b771267e260a1b2bad542973 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 24 Jul 2020 10:06:09 -0700
Subject: [PATCH 20/47] Use bookkeeper `/usages/status` vs `/usages`
 (getStatus() vs listUsages()) (#247)

---
 .../edu/ucsb/nceas/mdqengine/Controller.java  | 19 ++---
 .../authorization/BookkeeperClient.java       | 75 +++++++++++++++++++
 2 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
index d38ec9bf..aaad57f6 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
@@ -8,6 +8,7 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dataone.bookkeeper.api.Usage;
+import org.dataone.bookkeeper.api.UsageStatus;
 import org.dataone.exceptions.MarshallingException;
 import org.dataone.service.types.v2.SystemMetadata;
 import org.dataone.service.types.v2.TypeFactory;
@@ -18,7 +19,6 @@
 import java.lang.reflect.InvocationTargetException;
 import java.net.ServerSocket;
 import java.net.Socket;
-import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
@@ -72,6 +72,8 @@ public class Controller {
 
     public static void main(String[] argv) throws Exception {
 
+        //System.setProperty("lo4j2.debug", "true");
+        //System.setProperty("log4j.configurationFile", "log4j2.xml");
         Controller metadigCtrl = Controller.getInstance();
         metadigCtrl.start();
         if (metadigCtrl.getIsStarted()) {
@@ -292,25 +294,24 @@ public void disableTestMode() {
     // Check the portal quota with DataONE bookkeaper
     public Boolean isPortalActive(String collectionId) throws MetadigException {
         // Check the portal quota with DataONE bookkeeper
-        log.debug("Checking bookkeeper portal Usage for collection: " + collectionId);
+        log.info("Checking bookkeeper portal Usage for collection: " + collectionId);
         String msg = null;
         BookkeeperClient bkClient = BookkeeperClient.getInstance();
         List<Usage> usages = null;
-        Usage usage = null;
-        List<String> subjects = new ArrayList<String>();
+        UsageStatus usageStatus = null;
         try {
             // Set status = null so that any usage will be returned.
             String status = null;
-            usages = bkClient.listUsages(0, collectionId, "portal", status , subjects);
-            usage = usages.get(0);
-            log.debug("Usage for portal " + collectionId + " is " + usage.getStatus());
-            if(usage.getStatus().compareToIgnoreCase("active") == 0) {
+            //usages = bkClient.listUsages(0, collectionId, "portal", status , subjects);
+            usageStatus = bkClient.getUsageStatus(collectionId, "portal");
+            log.info("Usage status for portal " + collectionId + " is " + usageStatus.getStatus());
+            if(usageStatus.getStatus().compareToIgnoreCase("active") == 0) {
                 return true;
             } else {
                 return false;
             }
         } catch (Exception e) {
-            msg = "Unable to get usage from bookkeeper for collection id: " + collectionId;
+            msg = "Unable to get usage status from bookkeeper for collection id: " + collectionId;
             throw(new MetadigException(msg));
         }
     };
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java b/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
index d0475163..9dd246a8 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
@@ -15,6 +15,7 @@
 import org.apache.http.impl.client.HttpClients;
 import org.dataone.bookkeeper.api.Usage;
 import org.dataone.bookkeeper.api.UsageList;
+import org.dataone.bookkeeper.api.UsageStatus;
 
 import java.io.*;
 import java.io.IOException;
@@ -158,4 +159,78 @@ public List<Usage> listUsages(int id, String instanceId, String quotaType, Strin
             }
         }
     }
+    /**
+     * Retrieve a bookkeeper quota usage usage
+     * @param instanceId the usage instance identifier
+     * @param quotaType the usage quota type ("portal" | "storage" | ...)
+     * @return
+     * @throws MetadigException
+     */
+    public UsageStatus getUsageStatus(String instanceId, String quotaType) throws MetadigException {
+        // Check the portal quota with DataONE bookkeeper
+        String serviceURL  = this.bookkeeperURL;
+        ObjectMapper objectMapper = new ObjectMapper();
+        CloseableHttpClient httpClient = HttpClients.createDefault();
+
+        log.debug("Getting bookkeeper portal Usage for quotaType, instanceId: " +
+            quotaType + ", " + instanceId);
+        serviceURL += "/usages/status?quotaType=" + quotaType + "&instanceId=" + String.valueOf(instanceId);
+
+        log.debug("Using serviceURL: " + serviceURL);
+        HttpGet httpGet = new HttpGet(serviceURL);
+
+        String msg = null;
+        // Send a request to the bookkeeper service for the quota related to this portal
+        try {
+            httpGet.addHeader("Accept", "application/json");
+
+            log.debug("Submitting request to DataONE bookkeeper: " + serviceURL);
+            // send the request to bookkeeper
+            CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
+            // Delete the token
+
+            // Read the response from bookkeeper
+            StringBuffer response = new StringBuffer();
+            int statusCode = httpResponse.getStatusLine().getStatusCode();
+
+            // If the HTTP request returned without an error, convert the result to a JSON string,
+            // then deserialize to a Java object so that we can easily inspect it.
+            if(statusCode == HttpStatus.SC_OK) {
+                BufferedReader reader = new BufferedReader(new InputStreamReader(httpResponse.getEntity().getContent()));
+                String inputLine;
+                response = new StringBuffer();
+
+                while ((inputLine = reader.readLine()) != null) {
+                    response.append(inputLine);
+                }
+
+                UsageStatus usageStatus = objectMapper.readValue(response.toString(), UsageStatus.class);
+                if (usageStatus == null) {
+                    msg =  "No usage status returned.";
+                    log.error(msg);
+                    throw(new MetadigException(msg));
+                }
+                log.debug("Bookkeeper Usage status found for portal "  + instanceId + usageStatus.getStatus());
+                return(usageStatus);
+            } else {
+                log.debug("Getting bookkeeper portal usage status for quotaType, instanceId, status: " +
+                        quotaType + ", " + instanceId);
+                msg =  "HTTP error status getting bookkeeper usage status for quotaType, instanceId:"
+                        + quotaType + "," + instanceId;
+                        httpResponse.getStatusLine().getReasonPhrase();
+                log.error(msg);
+                throw(new MetadigException(msg));
+            }
+        } catch (IOException ioe) {
+            msg =  "Error getting bookkeeper usage status: " + ioe.getMessage();
+            log.error(msg);
+            throw(new MetadigException(msg));
+        } finally {
+            try {
+                httpClient.close();
+            } catch (IOException e) {
+                log.warn("Error closing connection to bookkeeper client: " + e.getMessage());
+            }
+        }
+    }
 }

From 7adb869b64020a5c171760361365791d5273798c Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 24 Jul 2020 10:07:06 -0700
Subject: [PATCH 21/47] Update debug logging

---
 src/main/resources/commons-logging.properties |  6 +--
 src/main/resources/log4j.properties           | 52 +++++++++----------
 2 files changed, 29 insertions(+), 29 deletions(-)
 mode change 100755 => 100644 src/main/resources/log4j.properties

diff --git a/src/main/resources/commons-logging.properties b/src/main/resources/commons-logging.properties
index a13fe14c..b2b80118 100644
--- a/src/main/resources/commons-logging.properties
+++ b/src/main/resources/commons-logging.properties
@@ -1,5 +1,5 @@
-org.apache.commons.logging.LogFactory=org.apache.commons.logging.impl.LogFactoryImpl
+# Explicitly set the Apache Commons Logging (JCL) implementation to log4j.
+# JCL will attempt to discover an implementation if one is not specified, so
+# make sure log4j is used.
 org.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger
 log4j.configuration=log4j.properties
-
-#org.apache.commons.logging.Log=org.apache.commons.logging.impl.SimpleLog
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
old mode 100755
new mode 100644
index 4050b437..1485317d
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,31 +1,31 @@
-#log4j.rootLogger=ALL, stdout, warnStdout
-log4j.rootLogger=ALL, stdout
+# set the log level to WARN and the log should be printed to stdout. 
+log4j.rootLogger=DEBUG, stderr
+#log4j.threshold=FATAL, ERROR, WARN, INFO
 
-# configure stdout
-# set the conversion pattern of stdout
-# Print the date in ISO 8601 format
 
+### LOGGING TO CONSOLE #########################################################
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
 
-#This will be used to print WARN level or higher messages to console
-#log4j.appender.warnStdout=org.apache.log4j.ConsoleAppender
-#log4j.appender.warnStdout.layout=org.apache.log4j.PatternLayout
-#log4j.appender.warnStdout.Threshold=WARN
+# define the pattern to be used in the logs... 
+log4j.appender.stderr.layout.ConversionPattern=%d{yyyyMMdd-HH:mm:ss}: [%p]: %m [%c]%n
 
-log4j.appender.stdout = org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.Threshold = DEBUG
-#log4j.appender.stdout.Target  = System.out
-log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
-#log4j.appender.stdout.layout.ConversionPattern = %-5p %d [%t][%F:%L] : %m%n
-log4j.appender.stdout.layout.ConversionPattern = %-5p %d [%F:%L] : %m%n
-#log4j.appender.stdout.filter.filter1=org.apache.log4j.varia.LevelRangeFilter
-#log4j.appender.stdout.filter.filter1.levelMin=INFO
-#log4j.appender.stdout.filter.filter1.levelMax=WARN
+# %p -> priority level of the event - (e.g. WARN)
+# %m -> message to be printed
+# %c -> category name ... in this case name of the class
+# %d -> Used to output the date of the logging event. example, %d{HH:mm:ss,SSS} or %d{dd MMM yyyy HH:mm:ss,SSS}. Default format is ISO8601 format
+# %M -> print the method name where the event was generated ... can be extremely slow. 
+# %L -> print the line number of the event generated ... can be extremely slow.
+# %t -> Used to output the name of the thread that generated the log event
+# %n -> carriage return
 
-# Classes in the my.project package will accept messages of INFO level or higher
-# and send those messages to the console and to the log file
-log4j.logger.org.edu.ucsb.nceas=DEBUG, stdout
-log4j.logger.org.apache=WARN, stdout
-log4j.logger.org.dataone=WARN, stdout
-# Need to set additivity to false or else both the my.project and root loggers
-# will accept messages from classes in package my.project
-#log4j.additivity.org.edu.ucsb.nceas
\ No newline at end of file
+################################################################################
+# EXAMPLE: Print only messages of level WARN or above in the package com.foo:
+log4j.logger.edu.ucsb.nceas.mdqengine=DEBUG
+#log4j.logger.com.hp.hpl.jena=WARN
+log4j.logger.org.dataone.ore=ERROR
+log4j.logger.org.dataone.client=ERROR
+#log4j.logger.org.apache.http=DEBUG
+log4j.logger.org.dataone.client.auth=ERROR
+log4j.logger.org.apache.commons.beanutils=WARN
+log4j.logger.org.apache.http=WARN

From 5b9282462860cd95c0fa31f3e9c53c543ce34e46 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 24 Jul 2020 10:18:42 -0700
Subject: [PATCH 22/47] Enable TLS client authentication (#258)

---
 .../Admin/Authentication/update-LE-cert.sh    | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/Kubernetes/Admin/Authentication/update-LE-cert.sh b/Kubernetes/Admin/Authentication/update-LE-cert.sh
index 18f00010..6196572f 100644
--- a/Kubernetes/Admin/Authentication/update-LE-cert.sh
+++ b/Kubernetes/Admin/Authentication/update-LE-cert.sh
@@ -8,13 +8,19 @@ debug=1
 # The user managing k8s
 user=metadig
 # k8s namespace that we are managing
-ns=metadig
+#k8sns=metadig
+k8sns=nginx-ingress
 
 # Save current LE cert modified time so we can see if certbot delivers
 # new certs
-host=`hostname -f`
-CA_DIR=/etc/letsencrypt/live/${host}
-certFilename=${CA_DIR}/cert.pem
+domain=`hostname -f`
+damainDir=$domain
+domain=api.test.dataone.org,${domain}
+CA_DIR=/etc/letsencrypt/live/${domainDir}
+# Use fullchain.pem, which includes the intermediate certificate, that will allow TLS
+# client authentication, for those clients that don't know about LE certs
+#certFilename=${CA_DIR}/cert.pem
+certFilename=${CA_DIR}/fullchain.pem
 privkeyFilename=${CA_DIR}/privkey.pem
 certModTime=`stat -c %Y ${certFilename}`
 
@@ -28,7 +34,8 @@ certModTime=`stat -c %Y ${certFilename}`
 # the IP that the certbot request will come from.
 ufw allow 80
 #sudo ufw allow from ${certbotIP} to any port 80
-/usr/bin/certbot renew > /var/log/letsencrypt/letsencrypt-renew.log 2>&1
+#/usr/bin/certbot renew -d ${domain} > /var/log/letsencrypt/letsencrypt-renew.log 2>&1
+/usr/bin/certbot renew -d ${domain} > /var/log/letsencrypt/letsencrypt-renew.log 2>&1
 # Close the port as soon as certbot is done
 ufw delete allow 80
 #sudo ufw delete allow from ${certbotIP} to any port 80
@@ -55,7 +62,8 @@ if (( $certModTimeNew > $certModTime )); then
   su ${user} -c "kubectl get secret ${k8sns}-tls-cert --namespace ${k8sns}"
   su ${user} -c "kubectl delete secret ${k8sns}-tls-cert --namespace ${k8sns}"
   #sudo kubectl create secret tls ${k8sns}-tls-cert --key ${CA_DIR}/privkey.pem --cert ${CA_DIR}/cert.pem --namespace ${k8sns}
-  su ${user} -c "kubectl create secret tls ${k8sns}-tls-cert --key ~${user}/tmp/privkey.pem --cert ~${user}/tmp/cert.pem --namespace ${k8sns}"
+  #su ${user} -c "kubectl create secret tls ${k8sns}-tls-cert --key ~${user}/tmp/privkey.pem --cert ~${user}/tmp/cert.pem --namespace ${k8sns}"
+  su ${user} -c "kubectl create secret tls ${k8sns}-tls-cert --key ~${user}/tmp/privkey.pem --cert ~${user}/tmp/chain.pem --namespace ${k8sns}"
   #su metadig -c "kubectl get secret metadig-tls-cert --namespace metadig"
   rm -f ~${user}/tmp/privkey.pem ~${user}/tmp/cert.pem
 
@@ -65,4 +73,4 @@ else
   if (( $debug )); then
     echo "Let's Encrypt cert not updated by certbot, Not updating k8s with new certfile "
   fi
-fi
+fi
\ No newline at end of file

From e95601df90aa9ab7babf47c44ba48ae79d9b0bd7 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Fri, 24 Jul 2020 10:21:17 -0700
Subject: [PATCH 23/47] Add Solr LE cert update script

---
 .../Admin/Solr/renew-LE-cert-for-solr.sh      | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 Kubernetes/Admin/Solr/renew-LE-cert-for-solr.sh

diff --git a/Kubernetes/Admin/Solr/renew-LE-cert-for-solr.sh b/Kubernetes/Admin/Solr/renew-LE-cert-for-solr.sh
new file mode 100644
index 00000000..8304f22d
--- /dev/null
+++ b/Kubernetes/Admin/Solr/renew-LE-cert-for-solr.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Follow these steps in order to enable SSL for solr standalone server.
+# From SO: https://stackoverflow.com/questions/41592427/letsencypt-solr-ssl-jvm
+# As i have a key for the Domain already, and Solr responds on mydomain.com:8983 all that is needed is to create a Java Key Store (jks) from the existing keys on the system
+
+# Note: Use the password "metadig" when prompted by openssl
+sudo openssl pkcs12 -export -in /etc/letsencrypt/live/docker-ucsb-4.dataone.org/fullchain.pem -inkey /etc/letsencrypt/live/docker-ucsb-4.dataone.org/privkey.pem -out pkcs.p12 -name metadig
+
+# specifing the location of the Lets-Encrypt Cert (on my system /etc/letsencrypt/live/mydomain.com/)
+# Then convert the PKCS12 key to a jks, replacing password where needed.
+
+# keytool -importkeystore -deststorepass PASSWORD_STORE -destkeypass PASSWORD_KEYPASS -destkeystore keystore.jks -srckeystore pkcs.p12 -srcstoretype PKCS12 -srcstorepass STORE_PASS -alias NAME
+
+sudo keytool -importkeystore -deststorepass metadig -destkeypass metadig -destkeystore keystore.jks -srckeystore pkcs.p12 -srcstoretype PKCS12 -srcstorepass metadig -alias metadig
+sudo cp keystore.jks /opt/solr/server/etc/solr-ssl-letsencrypt.keystore.jks 
+sudo chown solr /opt/solr/server/etc/solr-ssl-letsencrypt.keystore.jks 
+sudo chgrp solr /opt/solr/server/etc/solr-ssl-letsencrypt.keystore.jks 
+
+rm -f keystore.jks
+
+# Now that the keystore has been created, Solr must be told where it is:
+
+#* on docker-ucsb-4, the ’service solr start’ (/etc/init.d/solr) reads from /etc/default/solr.in.sh
+#    * these values are currently used
+#        * SOLR_SSL_ENABLED=true
+#        * # Uncomment to set SSL-related system properties
+#        * # Be sure to update the paths to the correct keystore for your environment
+#        * SOLR_SSL_KEY_STORE=/opt/solr/server/etc/solr-ssl-letsencrypt.keystore.jks
+#        * SOLR_SSL_KEY_STORE_PASSWORD=metadig
+#        * SOLR_SSL_KEY_STORE_TYPE=JKS
+#        * SOLR_SSL_TRUST_STORE=/opt/solr/server/etc/solr-ssl-letsencrypt.keystore.jks
+#        * SOLR_SSL_TRUST_STORE_PASSWORD=metadig
+#        * SOLR_SSL_TRUST_STORE_TYPE=JKS
+#        * #SOLR_SSL_NEED_CLIENT_AUTH=false
+#        * SOLR_SSL_WANT_CLIENT_AUTH=false
+
+
+# Now restart Solr
+sudo service solr restart

From e0a7aab674f79e6b006a5d53d538083452611839 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Mon, 27 Jul 2020 06:00:13 -0700
Subject: [PATCH 24/47] Enable/disable bookkeeper check with config parameter
 (#247)

---
 .../edu/ucsb/nceas/mdqengine/Controller.java  | 34 +++++++++++--------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
index aaad57f6..be83cd6e 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
@@ -414,7 +414,7 @@ public void processQualityRequest(String memberNode,
      * create the graph from them.
      * </p>
      *
-     * @param collectionId the DataONE collection identifier
+     * @param collectionId the DataONE collection identifier (the portal seriesId)
      * @param nodeId the node identifier the collection resides on
      * @param formatFamily a string representing the DataONE formats to create score for
      * @param qualitySuiteId the quality suite used to create the score graph
@@ -429,7 +429,8 @@ public void processScorerRequest(String collectionId,
                                String qualitySuiteId,
                                DateTime requestDateTime) throws java.io.IOException {
 
-        log.info("Processing scorer request, collection: " + collectionId + ", suite: " + qualitySuiteId);
+        log.info("Processing scorer request, collection: " + collectionId + ", suite: " + qualitySuiteId
+                    + "nodeId: " + nodeId + ", formatFamily: " + formatFamily);
         ScorerQueueEntry qEntry = null;
         byte[] message = null;
 
@@ -439,31 +440,34 @@ public void processScorerRequest(String collectionId,
          */
         if (bookkeeperEnabled) {
             try {
+                // Bookkeeper creates a portal usage with the portal sid as the 'instanceId', however
                 if (!isPortalActive(collectionId)) {
                     log.info("Skipping Scorer request for inactive portal with pid: '" + collectionId + "'" + ", quality suite " + qualitySuiteId);
                     return;
+                } else {
+                    log.info("Bookkeeper check indicates portal for pid: " + collectionId + " is active.");
+                    log.info("Processing with Scorer request for inactive portal with pid: '" + collectionId + "'" + ", quality suite " + qualitySuiteId);
                 }
             } catch (MetadigException me) {
-                log.error("Unable to contact DataONE bookkeeper: "  + me.getMessage()
+                log.error("Unable to contact DataONE bookkeeper: " + me.getMessage()
                         + "\nSkipping Scorer request for portal with pid: '" + collectionId
                         + "'" + ", quality suite " + qualitySuiteId);
                 return;
             }
+        } else {
+            log.info("Bookkeeper quota checking is disabled, proceeding with Scorer request for portal, collectionld: '" + collectionId
+                    + "'" + ", quality suite " + qualitySuiteId);
+        }
 
-            qEntry = new ScorerQueueEntry(collectionId, qualitySuiteId, nodeId, formatFamily, requestDateTime);
+        qEntry = new ScorerQueueEntry(collectionId, qualitySuiteId, nodeId, formatFamily, requestDateTime);
 
-            ByteArrayOutputStream bos = new ByteArrayOutputStream();
-            ObjectOutput out = new ObjectOutputStream(bos);
-            out.writeObject(qEntry);
-            message = bos.toByteArray();
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        ObjectOutput out = new ObjectOutputStream(bos);
+        out.writeObject(qEntry);
+        message = bos.toByteArray();
 
-            this.writeInProcessChannel(message, SCORER_ROUTING_KEY);
-            log.info(" [x] Queued Scorer request for collectionld: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
-        } else {
-            log.info("Skipping Scorer request for portal, collectionld: '" + collectionId
-                    + "'" + ", quality suite " + qualitySuiteId
-            + "\n as DataONE bookkeeper service is disabled via metadig-engine configuration.");
-        }
+        this.writeInProcessChannel(message, SCORER_ROUTING_KEY);
+        log.info(" [x] Queued Scorer request for pid: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
     }
 
     /**

From 097b7f477c28d15f341b752437a59c99395b4524 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Mon, 27 Jul 2020 06:01:42 -0700
Subject: [PATCH 25/47] Associate portal series id with portal assessment
 graphs

---
 .../mdqengine/scheduler/RequestScorerJob.java | 41 +++++++++++++------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index 9be3d2cc..daeaee34 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -2,7 +2,7 @@
 
 import edu.ucsb.nceas.mdqengine.Controller;
 import edu.ucsb.nceas.mdqengine.MDQconfig;
-import edu.ucsb.nceas.mdqengine.authentication.DataONE;
+import edu.ucsb.nceas.mdqengine.DataONE;
 import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
 import edu.ucsb.nceas.mdqengine.model.Task;
 import edu.ucsb.nceas.mdqengine.store.DatabaseStore;
@@ -15,11 +15,11 @@
 import org.apache.http.client.methods.HttpPost;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
-import org.dataone.client.auth.AuthTokenSession;
 import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.rest.MultipartRestClient;
 import org.dataone.client.v2.impl.MultipartCNode;
 import org.dataone.client.v2.impl.MultipartMNode;
+import org.dataone.service.types.v2.SystemMetadata;
 import org.dataone.service.types.v1.*;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
@@ -346,18 +346,18 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
         try {
             // Even though MultipartMNode and MultipartCNode have the same parent class, their interfaces are differnt, so polymorphism
             // isn't happening here.
+            log.debug("session: " + session.getSubject().getValue());
+            log.debug("startDate: " + startDate);
+            log.debug("endDate: " + endDate);
+            log.debug("formatId: " + formatId);
+            log.debug("Identifier: " + identifier);
+            log.debug("startCount: " + startCount);
+            log.debug("countRequested: " + countRequested);
             if(isCN) {
                 log.debug("cnNode: " + cnNode);
-                log.debug("Listing objects for CN");
-                log.debug("session: " + session.getSubject().getValue());
-                log.debug("startDate: " + startDate);
-                log.debug("endDate: " + endDate);
-                log.debug("formatId: " + formatId);
-                log.debug("Identifier: " + identifier);
-                log.debug("startCount: " + startCount);
-                log.debug("countRequested: " + countRequested);
                 objList = cnNode.listObjects(session, startDate, endDate, formatId, nodeRef, identifier, startCount, countRequested);
             } else {
+                log.debug("mnNode: " + mnNode);
                 objList = mnNode.listObjects(session, startDate, endDate, formatId, identifier, replicaStatus, startCount, countRequested);
             }
             log.debug("Retrieved " + objList.getCount() + " pids");
@@ -391,10 +391,27 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
                 // been updated (i.e. obsoletedBy, access) and the quality report and index contain
                 // sysmeta fields.
                 if(found) {
+                    // The DataONE listObjects service retuns the pid for each object, but does not return the seriesId,
+                    // so this has to be retrieved now, as Bookkeeper service and MetacatUI (when the graph is requested for
+                    // this portal) uses the sid, not the pid, so create and store the graph based on the sid.
                     //    if (!runExists(thisPid, suiteId, store)) {
+
+                    Identifier thisId = new Identifier();
+                    thisId.setValue(thisPid);
+
+                    org.dataone.service.types.v2.SystemMetadata sysmeta  = null;
+
+                    if(isCN) {
+                        sysmeta = cnNode.getSystemMetadata(session, thisId);
+                    } else {
+                        sysmeta = mnNode.getSystemMetadata(session, thisId);
+                    }
+
+                    String thisSeriesId = sysmeta.getSeriesId().getValue();
+
                     pidCount = pidCount++;
-                    pids.add(thisPid);
-                    log.info("adding pid to process: " + thisPid + ", formatId: " + thisFormatId);
+                    pids.add(thisSeriesId);
+                    log.info("adding seriesId to process: " + thisSeriesId + ", formatId: " + thisFormatId);
                     //    }
                 }
             }

From 6a656950fcd69601d37289d61abb664b9470b9ed Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Mon, 27 Jul 2020 06:03:09 -0700
Subject: [PATCH 26/47] Refactor DataONE related methods into a new package

---
 .../edu/ucsb/nceas/mdqengine/DataONE.java     | 234 +++++++++++++++++
 .../mdqengine/authentication/DataONE.java     |  43 ----
 .../authorization/BookkeeperClient.java       |   3 +-
 .../mdqengine/filestore/FilestoreDB.java      |   1 -
 .../mdqengine/scheduler/RequestReportJob.java |   3 +-
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 236 +-----------------
 6 files changed, 243 insertions(+), 277 deletions(-)
 create mode 100644 src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
 delete mode 100644 src/main/java/edu/ucsb/nceas/mdqengine/authentication/DataONE.java

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
new file mode 100644
index 00000000..7a2781a1
--- /dev/null
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
@@ -0,0 +1,234 @@
+package edu.ucsb.nceas.mdqengine;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import edu.ucsb.nceas.mdqengine.exception.MetadigProcessException;
+import org.dataone.client.auth.AuthTokenSession;
+import org.dataone.client.rest.MultipartRestClient;
+import org.dataone.client.v2.impl.MultipartD1Node;
+import org.dataone.service.types.v1.Identifier;
+import org.dataone.service.types.v1.Session;
+import org.dataone.service.types.v1.SystemMetadata;
+import edu.ucsb.nceas.mdqengine.exception.MetadigException;
+import org.dataone.client.rest.DefaultHttpMultipartRestClient;
+import org.dataone.client.v2.impl.MultipartCNode;
+import org.dataone.client.v2.impl.MultipartMNode;
+import org.dataone.service.types.v1.Subject;
+import org.dataone.service.types.v1.SubjectInfo;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import java.io.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class DataONE {
+
+
+    private static Log log = LogFactory.getLog(DataONE.class);
+
+    /**
+     * Get a DataONE subject information object
+     * @param serviceUrl the service URL of the DataONE node to request the subject info from
+     * @param authToken the authorization token to use for the request
+     * @return a DataONE subject information object
+     * @throws MetadigProcessException
+     */
+    public static SubjectInfo getSubjectInfo(Subject rightsHolder, String serviceUrl, String subjectId, String authToken) throws MetadigProcessException {
+
+        log.debug("Getting subject info for: " + rightsHolder.getValue());
+        MultipartCNode cnNode = null;
+        MetadigProcessException metadigException = null;
+
+        SubjectInfo subjectInfo = null;
+        Session session = DataONE.getSession(subjectId, authToken);
+
+        // Identity node as either a CN or MN based on the serviceUrl
+        String pattern = "https*://cn.*?\\.dataone\\.org|https*://cn.*?\\.test\\.dataone\\.org";
+        Pattern r = Pattern.compile(pattern);
+        Matcher m = r.matcher(serviceUrl);
+        if (!m.find()) {
+            log.error("Must call a CN to get subject information");
+            metadigException = new MetadigProcessException("Must call a CN to get subject information.");
+            throw metadigException;
+        }
+
+        // Only CNs can call the 'subjectInfo' service (aka accounts), so we have to use
+        // a MultipartCNode instance here.
+        try {
+            cnNode = (MultipartCNode) getMultipartD1Node(session, serviceUrl);
+        } catch (Exception ex) {
+            metadigException = new MetadigProcessException("Unable to create multipart D1 node: " + subjectId + ": " + ex.getMessage());
+            metadigException.initCause(ex);
+            throw metadigException;
+        }
+
+        try {
+            subjectInfo = cnNode.getSubjectInfo(session, rightsHolder);
+        } catch (Exception ex) {
+            metadigException = new MetadigProcessException("Unable to get subject information." + ex.getMessage());
+            metadigException.initCause(ex);
+            throw metadigException;
+        }
+
+        return subjectInfo;
+    }
+
+    /**
+     * Get a DataONE MultipartCNode object, which will be used to communication with a CN
+     *
+     * @param session a DataONE authentication session
+     * @param serviceUrl the service URL for the node we are connecting to
+     * @return a DataONE MultipartCNode object
+     * @throws MetadigException
+     */
+    public static MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws MetadigException {
+
+        MultipartRestClient mrc = null;
+        MultipartD1Node d1Node = null;
+        MetadigProcessException metadigException = null;
+
+        // First create an HTTP client
+        try {
+            mrc = new DefaultHttpMultipartRestClient();
+        } catch (Exception ex) {
+            log.error("Error creating rest client: " + ex.getMessage());
+            metadigException = new MetadigProcessException("Unable to get collection pids");
+            metadigException.initCause(ex);
+            throw metadigException;
+        }
+
+        Boolean isCN = isCN(serviceUrl);
+
+        // Now create a DataONE object that uses the rest client
+        if (isCN) {
+            log.debug("creating cn MultipartMNode" + ", subjectId: " + session.getSubject().getValue());
+            d1Node = new MultipartCNode(mrc, serviceUrl, session);
+        } else {
+            log.debug("creating mn MultipartMNode" + ", subjectId: " + session.getSubject().getValue());
+            d1Node = new MultipartMNode(mrc, serviceUrl, session);
+        }
+        return d1Node;
+    }
+
+    /**
+     * Send a query to the DataONE Query Service , using the DataONE CN or MN API
+     *
+     * @param queryStr the query string to pass to the Solr server
+     * @param serviceUrl the service URL for the DataONE CN or MN
+     * @param startPos the start of the query result to return, if query pagination is being used
+     * @param countRequested the number of results to return
+     * @return an XML document containing the query result
+     * @throws Exception
+     */
+    public static Document querySolr(String queryStr, String serviceUrl, int startPos, int countRequested, String subjectId, String authToken) throws MetadigProcessException {
+
+        MultipartRestClient mrc = null;
+        // Polymorphism doesn't work with D1 node classes, so have to use the derived classes
+        MultipartD1Node d1Node = null;
+        Session session = DataONE.getSession(subjectId, authToken);
+
+        // Add the start and count, if pagination is being used
+        queryStr = queryStr + "&start=" + startPos + "&rows=" + countRequested;
+        // Query the MN or CN Solr engine to get the query associated with this project that will return all project related pids.
+        InputStream qis = null;
+        MetadigProcessException metadigException = null;
+
+        try {
+            d1Node = getMultipartD1Node(session, serviceUrl);
+            log.debug("Created MultipartD1Node: " + d1Node.toString());
+        } catch (Exception ex) {
+            log.error("Unable to create MultipartD1Node for Solr query");
+            metadigException = new MetadigProcessException("Unable to create multipart node client to query DataONE solr: " + ex.getMessage());
+            metadigException.initCause(ex);
+            throw metadigException;
+        }
+
+        // Send a query to a CN or MN
+        try {
+            qis = d1Node.query(session, "solr", queryStr);
+        } catch (Exception e) {
+            log.error("Error retrieving pids: " + e.getMessage());
+            metadigException = new MetadigProcessException("Unable to query dataone node: " + e.getMessage());
+            metadigException.initCause(e);
+            throw metadigException;
+        }
+
+        Document xmldoc = null;
+        DocumentBuilder builder = null;
+
+        try {
+            // If results were returned, create an XML document from them
+            if (qis.available() == 1) {
+                try {
+                    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+                    builder = factory.newDocumentBuilder();
+                    xmldoc = builder.parse(new InputSource(qis));
+                } catch (Exception e) {
+                    log.error("Unable to create w3c Document from input stream", e);
+                    e.printStackTrace();
+                } finally {
+                    qis.close();
+                }
+            } else {
+                log.info("No results returned from D1 Solr query");
+                qis.close();
+            }
+        } catch (IOException ioe) {
+            metadigException = new MetadigProcessException("Unable prepare query result xml document: " + ioe.getMessage());
+            metadigException.initCause(ioe);
+            throw metadigException;
+        }
+
+        return xmldoc;
+    }
+    /**
+     * Get a DataONE authenticated session
+     * <p>
+     *     If no subject or authentication token are provided, a public session is returned
+     * </p>
+     * @param authToken the authentication token
+     * @return the DataONE session
+     */
+    public static Session getSession(String subjectId, String authToken) {
+
+        Session session;
+
+        // query Solr - either the member node or cn, for the project 'solrquery' field
+        if (authToken == null || authToken.isEmpty()) {
+            log.debug("Creating public sessioni");
+            session = new Session();
+        } else {
+            log.debug("Creating authentication session for subjectId: " + subjectId + ", token: " + authToken.substring(0, 5) + "...");
+            session = new AuthTokenSession(authToken);
+        }
+
+        if (subjectId != null && !subjectId.isEmpty()) {
+            Subject subject = new Subject();
+            subject.setValue(subjectId);
+            session.setSubject(subject);
+            log.debug("Set session subjectId to: " + session.getSubject().getValue());
+        }
+
+        return session;
+    }
+
+    protected static Boolean isCN(String serviceUrl) {
+
+        Boolean isCN = false;
+        // Identity node as either a CN or MN based on the serviceUrl
+        String pattern = "https*://cn.*?\\.dataone\\.org|https*://cn.*?\\.test\\.dataone\\.org";
+        Pattern r = Pattern.compile(pattern);
+        Matcher m = r.matcher(serviceUrl);
+        if (m.find()) {
+            isCN = true;
+            log.debug("service URL is for a CN: " + serviceUrl);
+        } else {
+            log.debug("service URL is not for a CN: " + serviceUrl);
+            isCN = false;
+        }
+        return isCN;
+    }
+}
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/authentication/DataONE.java b/src/main/java/edu/ucsb/nceas/mdqengine/authentication/DataONE.java
deleted file mode 100644
index d5e8b73a..00000000
--- a/src/main/java/edu/ucsb/nceas/mdqengine/authentication/DataONE.java
+++ /dev/null
@@ -1,43 +0,0 @@
-package edu.ucsb.nceas.mdqengine.authentication;
-
-import org.dataone.client.auth.AuthTokenSession;
-import org.dataone.service.types.v1.Session;
-import org.dataone.service.types.v1.Subject;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-public class DataONE {
-
-public static Log log = LogFactory.getLog(DataONE.class);
-
-    /**
-     * Get a DataONE authenticated session
-     * <p>
-     *     If no subject or authentication token are provided, a public session is returned
-     * </p>
-     * @param authToken the authentication token
-     * @return the DataONE session
-     */
-    public static Session getSession(String subjectId, String authToken) {
-
-        Session session;
-
-        // query Solr - either the member node or cn, for the project 'solrquery' field
-        if (authToken == null || authToken.isEmpty()) {
-            log.debug("Creating public sessioni");
-            session = new Session();
-        } else {
-            log.debug("Creating authentication session for subjectId: " + subjectId + ", token: " + authToken.substring(0, 5) + "...");
-            session = new AuthTokenSession(authToken);
-        }
-
-        if (subjectId != null && !subjectId.isEmpty()) {
-            Subject subject = new Subject();
-            subject.setValue(subjectId);
-            session.setSubject(subject);
-            log.debug("Set session subjectId to: " + session.getSubject().getValue());
-        }
-
-        return session;
-    }
-}
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java b/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
index 9dd246a8..f9eac335 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/authorization/BookkeeperClient.java
@@ -1,7 +1,6 @@
 package edu.ucsb.nceas.mdqengine.authorization;
 
 import edu.ucsb.nceas.mdqengine.MDQconfig;
-import edu.ucsb.nceas.mdqengine.authentication.DataONE;
 import edu.ucsb.nceas.mdqengine.exception.MetadigException;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.commons.logging.Log;
@@ -24,7 +23,7 @@
 public class BookkeeperClient {
 
     private static BookkeeperClient instance;
-    public static Log log = LogFactory.getLog(DataONE.class);
+    public static Log log = LogFactory.getLog(BookkeeperClient.class);
     private String bookkeeperURL = null;
     private String bookkeeperAuthToken = null;
 
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java b/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
index 1ea8d7ed..ff67bac6 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
@@ -9,7 +9,6 @@
 
 import edu.ucsb.nceas.mdqengine.model.*;
 import org.joda.time.DateTime;
-import sun.tools.tree.NewArrayExpression;
 
 import java.io.IOException;
 import java.sql.*;
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index 9f5d8a6d..b59b0224 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -1,7 +1,7 @@
 package edu.ucsb.nceas.mdqengine.scheduler;
 
 import edu.ucsb.nceas.mdqengine.MDQconfig;
-import edu.ucsb.nceas.mdqengine.authentication.DataONE;
+import edu.ucsb.nceas.mdqengine.DataONE;
 import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
 import edu.ucsb.nceas.mdqengine.model.Run;
 import edu.ucsb.nceas.mdqengine.model.Task;
@@ -400,7 +400,6 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
         // Set the count for the number of desired pids filtered from the total result set
         result.setFilteredResultCount(pidCount);
         // Set the count for the total number of pids returned from DataONE (all formatIds) for this query
-        // Set the count for the total number of pids returned from DataONE (all formatIds) for this query
         result.setTotalResultCount(objList.getCount());
         result.setResult(pids);
 
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index e160635f..6414f852 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -2,7 +2,7 @@
 
 import com.rabbitmq.client.*;
 import edu.ucsb.nceas.mdqengine.MDQconfig;
-import edu.ucsb.nceas.mdqengine.authentication.DataONE;
+import edu.ucsb.nceas.mdqengine.DataONE;
 import edu.ucsb.nceas.mdqengine.exception.MetadigException;
 import edu.ucsb.nceas.mdqengine.exception.MetadigProcessException;
 import edu.ucsb.nceas.mdqengine.filestore.MetadigFile;
@@ -20,17 +20,12 @@
 import org.apache.solr.client.solrj.beans.BindingException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.response.QueryResponse;
-import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.rest.MultipartRestClient;
-import org.dataone.client.v2.impl.MultipartCNode;
-import org.dataone.client.v2.impl.MultipartMNode;
 import org.dataone.client.v2.impl.MultipartD1Node; // Don't include org.dataone.client.rest.MultipartD1Node (this is what IDEA selects)
 import org.dataone.service.types.v1.Session;
 import org.dataone.service.types.v1.Subject;
 import org.dataone.service.types.v1.Group;
-import org.dataone.service.types.v1.Identifier;
 import org.dataone.service.types.v1.SubjectInfo;
-import org.dataone.service.types.v1.SystemMetadata;
 import org.joda.time.DateTime;
 import org.joda.time.format.DateTimeFormatter;
 import org.joda.time.format.ISODateTimeFormat;
@@ -369,7 +364,7 @@ private ScorerResult getCollectionPids(String collectionId, String serviceUrl, S
            which will be used to query DataONE Solr for all the pids associated with that project (that's 2 queries!)
          */
         ArrayList<String> pids = new ArrayList<>();
-        queryStr = "?q=id:" + escapeSpecialChars(collectionId) + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
+        queryStr = "?q=seriesId:" + escapeSpecialChars(collectionId) + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
 
         startPos = 0;
         countRequested = 10000;
@@ -377,7 +372,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         // Get the collectionQuery from Solr
         try {
             log.debug("Getting collectionQuery with query: " + queryStr);
-            xmldoc = queryD1Solr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
+            xmldoc = DataONE.querySolr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
         } catch (MetadigProcessException mpe) {
             log.error("Unable to query Solr for collectionQuery field for collection id: " + collectionId);
             throw new MetadigProcessException("Unable to query Solr for collectionQuery field for collection id: " + collectionId);
@@ -477,7 +472,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         subject.setValue(rightsHolder);
         // The subject info can only be obtained from a CN, so use the CN auth info for the current DataONE environment,
         // which should be configured in the metadig.properties file
-        SubjectInfo subjectInfo = getSubjectInfo(subject, CNserviceUrl, CNsubjectId, CNauthToken);
+        SubjectInfo subjectInfo = DataONE.getSubjectInfo(subject, CNserviceUrl, CNsubjectId, CNauthToken);
         String groupStr = null;
 
         groupStr = "(readPermission:" + "\"" + rightsHolder
@@ -541,7 +536,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         do {
             //TODO: check that a result was returned
             // Note: the collectionQuery is always evaluated on the CN, so that the entire DataONE network is queried.
-            xmldoc = queryD1Solr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
+            xmldoc = DataONE.querySolr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
             if(xmldoc == null) {
                 log.info("no values returned from query");
                 break;
@@ -621,6 +616,7 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
         int startPosInResult = 0;
         int startPosInQuery = 0; // this will always be zero - we are listing the pids to retrieve, so will always want to start at the first result
 
+        log.trace("Getting scores from Solr for " + collectionPids.size() + " pids.");
         // Now accumulate the Quality Solr document results for the list of pids for the project.
         if (collectionId != null && ! collectionId.isEmpty()) {
             log.info("Getting quality scores for collection: " + collectionId);
@@ -652,7 +648,7 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
                 if (suiteId != null) {
                     queryStr += " AND suiteId:" + suiteId;
                 }
-                log.trace("query to quality Solr server: " + queryStr);
+                log.debug("query to quality Solr server: " + queryStr);
                 // Send query to Quality Solr Server
                 // Get all the pids in this pid string
                 resultList = queryQualitySolr(queryStr, startPosInQuery, pidCntToRequest);
@@ -782,78 +778,6 @@ private void returnGraphStatus(String metadataPid, String suiteId, ScorerQueueEn
         }
     }
 
-    /**
-     * Send a query to the DataONE Query Service , using the DataONE CN or MN API
-     *
-     * @param queryStr the query string to pass to the Solr server
-     * @param serviceUrl the service URL for the DataONE CN or MN
-     * @param startPos the start of the query result to return, if query pagination is being used
-     * @param countRequested the number of results to return
-     * @return an XML document containing the query result
-     * @throws Exception
-     */
-    private Document queryD1Solr(String queryStr, String serviceUrl, int startPos, int countRequested, String subjectId, String authToken) throws MetadigProcessException {
-
-        MultipartRestClient mrc = null;
-        // Polymorphism doesn't work with D1 node classes, so have to use the derived classes
-        MultipartD1Node d1Node = null;
-        Session session = DataONE.getSession(subjectId, authToken);
-
-        // Add the start and count, if pagination is being used
-        queryStr = queryStr + "&start=" + startPos + "&rows=" + countRequested;
-        // Query the MN or CN Solr engine to get the query associated with this project that will return all project related pids.
-        InputStream qis = null;
-        MetadigProcessException metadigException = null;
-
-        try {
-            d1Node = getMultipartD1Node(session, serviceUrl);
-            log.debug("Created MultipartD1Node: " + d1Node.toString());
-        } catch (Exception ex) {
-            log.error("Unable to create MultipartD1Node for Solr query");
-            metadigException = new MetadigProcessException("Unable to create multipart node client to query DataONE solr: " + ex.getMessage());
-            metadigException.initCause(ex);
-            throw metadigException;
-        }
-
-        // Send a query to a CN or MN
-        try {
-            qis = d1Node.query(session, "solr", queryStr);
-        } catch (Exception e) {
-            log.error("Error retrieving pids: " + e.getMessage());
-            metadigException = new MetadigProcessException("Unable to query dataone node: " + e.getMessage());
-            metadigException.initCause(e);
-            throw metadigException;
-        }
-
-        Document xmldoc = null;
-        DocumentBuilder builder = null;
-
-        try {
-            // If results were returned, create an XML document from them
-            if (qis.available() == 1) {
-                try {
-                    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
-                    builder = factory.newDocumentBuilder();
-                    xmldoc = builder.parse(new InputSource(qis));
-                } catch (Exception e) {
-                    log.error("Unable to create w3c Document from input stream", e);
-                    e.printStackTrace();
-                } finally {
-                    qis.close();
-                }
-            } else {
-                log.info("No results returned from D1 Solr query");
-                qis.close();
-            }
-        } catch (IOException ioe) {
-            metadigException = new MetadigProcessException("Unable prepare query result xml document: " + ioe.getMessage());
-            metadigException.initCause(ioe);
-            throw metadigException;
-        }
-
-        return xmldoc;
-    }
-
     /**
      * Send a query to the Quality Solr Server.
      * @param queryStr the query to send to Solr
@@ -959,137 +883,6 @@ public void writeCompletedQueue (byte[] message) throws IOException {
         completedChannel.basicPublish(EXCHANGE_NAME, COMPLETED_ROUTING_KEY, basicProperties, message);
     }
 
-    /**
-     * Get a DataONE system metadata object
-     * @param pid the pid to get the system metadata for
-     * @param serviceUrl the service URL of the DataONE node to request the sysmeta
-     * @param authToken the authorization token to use for the request
-     * @return a DataONE system metadata object
-     * @throws MetadigProcessException
-     */
-    protected SystemMetadata getSystemMetadata(String pid, String serviceUrl, String subjectId, String authToken) throws MetadigProcessException {
-
-        SystemMetadata sysmeta = null;
-        MultipartRestClient mrc = null;
-        MultipartD1Node d1Node = null;
-        MetadigProcessException metadigException = null;
-
-        log.debug("serviceUrl: " + serviceUrl);
-        log.debug("subjectId: " + subjectId);
-
-//        Subject subject = new Subject();
-//        if(subjectId != null && ! subjectId.isEmpty()) {
-//            subject.setValue(subjectId);
-//        }
-
-        Session session = DataONE.getSession(subjectId, authToken);
-        Identifier identifier = new Identifier();
-        identifier.setValue(pid);
-
-        try {
-            d1Node = getMultipartD1Node(session, serviceUrl);
-        } catch (Exception ex) {
-            metadigException = new MetadigProcessException("Unable to get multipartD1Node for serviceUrl: " + serviceUrl);
-            metadigException.initCause(ex);
-            throw metadigException;
-        }
-
-        try {
-            sysmeta = d1Node.getSystemMetadata(session, identifier);
-            log.debug("retrieved sysmeta for pid: " + sysmeta.getIdentifier().getValue());
-        } catch (Exception ex) {
-            log.error("Unable to retrieve sysmeta for pid: " + pid);
-            metadigException = new MetadigProcessException("Unable to get sysmeta for pid: " + pid);
-            metadigException.initCause(ex);
-            throw metadigException;
-        }
-
-        return sysmeta;
-    }
-
-    /**
-     * Get a DataONE subject information object
-     * @param serviceUrl the service URL of the DataONE node to request the subject info from
-     * @param authToken the authorization token to use for the request
-     * @return a DataONE subject information object
-     * @throws MetadigProcessException
-     */
-    private SubjectInfo getSubjectInfo(Subject rightsHolder, String serviceUrl, String subjectId, String authToken) throws MetadigProcessException {
-
-        log.debug("Getting subject info for: " + rightsHolder.getValue());
-        MultipartCNode cnNode = null;
-        MetadigProcessException metadigException = null;
-
-        SubjectInfo subjectInfo = null;
-        Session session = DataONE.getSession(subjectId, authToken);
-
-        // Identity node as either a CN or MN based on the serviceUrl
-        String pattern = "https*://cn.*?\\.dataone\\.org|https*://cn.*?\\.test\\.dataone\\.org";
-        Pattern r = Pattern.compile(pattern);
-        Matcher m = r.matcher(serviceUrl);
-        if (!m.find()) {
-            log.error("Must call a CN to get subject information");
-            metadigException = new MetadigProcessException("Must call a CN to get subject information.");
-            throw metadigException;
-        }
-
-        // Only CNs can call the 'subjectInfo' service (aka accounts), so we have to use
-        // a MultipartCNode instance here.
-        try {
-            cnNode = (MultipartCNode) getMultipartD1Node(session, serviceUrl);
-        } catch (Exception ex) {
-            metadigException = new MetadigProcessException("Unable to create multipart D1 node: " + subjectId + ": " + ex.getMessage());
-            metadigException.initCause(ex);
-            throw metadigException;
-        }
-
-        try {
-            subjectInfo = cnNode.getSubjectInfo(session, rightsHolder);
-        } catch (Exception ex) {
-            metadigException = new MetadigProcessException("Unable to get subject information." + ex.getMessage());
-            metadigException.initCause(ex);
-            throw metadigException;
-        }
-
-        return subjectInfo;
-    }
-
-    /**
-     * Get a DataONE MultipartCNode object, which will be used to communication with a CN
-     *
-     * @param session a DataONE authentication session
-     * @param serviceUrl the service URL for the node we are connecting to
-     * @return a DataONE MultipartCNode object
-     * @throws MetadigException
-     */
-    MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws MetadigException {
-
-        MultipartRestClient mrc = null;
-        MultipartD1Node d1Node = null;
-        MetadigProcessException metadigException = null;
-
-        // First create an HTTP client
-        try {
-            mrc = new DefaultHttpMultipartRestClient();
-        } catch (Exception ex) {
-            log.error("Error creating rest client: " + ex.getMessage());
-            metadigException = new MetadigProcessException("Unable to get collection pids");
-            metadigException.initCause(ex);
-            throw metadigException;
-        }
-
-        Boolean isCN = isCN(serviceUrl);
-
-        // Now create a DataONE object that uses the rest client
-        if (isCN) {
-            log.debug("creating cn MultipartMNode" + ", subjectId: " + session.getSubject().getValue());
-            d1Node = new MultipartCNode(mrc, serviceUrl, session);
-        } else {
-            log.debug("creating mn MultipartMNode" + ", subjectId: " + session.getSubject().getValue());
-            d1Node = new MultipartMNode(mrc, serviceUrl, session);
-        }
-        return d1Node;
-    }
     /**
      * Read a file from a Java resources folder.
      *
@@ -1163,21 +956,6 @@ private static String encodeValue(String value) {
         }
     }
 
-    private Boolean isCN(String serviceUrl) {
 
-        Boolean isCN = false;
-        // Identity node as either a CN or MN based on the serviceUrl
-        String pattern = "https*://cn.*?\\.dataone\\.org|https*://cn.*?\\.test\\.dataone\\.org";
-        Pattern r = Pattern.compile(pattern);
-        Matcher m = r.matcher(serviceUrl);
-        if (m.find()) {
-            isCN = true;
-            log.debug("service URL is for a CN: " + serviceUrl);
-        } else {
-            log.debug("service URL is not for a CN: " + serviceUrl);
-            isCN = false;
-        }
-        return isCN;
-    }
 }
 

From 581ad284e6fc272d98bd31bad08cc72fabd0cad9 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 2 Aug 2020 12:49:47 -0700
Subject: [PATCH 27/47] Get list of new portal ids from Solr (#110)

---
 .../edu/ucsb/nceas/mdqengine/DataONE.java     |  61 +++--
 .../mdqengine/scheduler/RequestScorerJob.java | 258 ++++++++----------
 2 files changed, 150 insertions(+), 169 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
index 7a2781a1..82e4552d 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
@@ -6,9 +6,7 @@
 import org.dataone.client.auth.AuthTokenSession;
 import org.dataone.client.rest.MultipartRestClient;
 import org.dataone.client.v2.impl.MultipartD1Node;
-import org.dataone.service.types.v1.Identifier;
 import org.dataone.service.types.v1.Session;
-import org.dataone.service.types.v1.SystemMetadata;
 import edu.ucsb.nceas.mdqengine.exception.MetadigException;
 import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.v2.impl.MultipartCNode;
@@ -117,18 +115,20 @@ public static MultipartD1Node getMultipartD1Node(Session session, String service
      * Send a query to the DataONE Query Service , using the DataONE CN or MN API
      *
      * @param queryStr the query string to pass to the Solr server
-     * @param serviceUrl the service URL for the DataONE CN or MN
      * @param startPos the start of the query result to return, if query pagination is being used
      * @param countRequested the number of results to return
      * @return an XML document containing the query result
      * @throws Exception
      */
-    public static Document querySolr(String queryStr, String serviceUrl, int startPos, int countRequested, String subjectId, String authToken) throws MetadigProcessException {
+    //public static Document querySolr(String queryStr, int startPos, int countRequested, MultipartCNode cnNode,
+    //                                 MultipartMNode mnNode, Boolean isCN,
+    //                                 Session session) throws MetadigProcessException {
+    public static Document querySolr(String queryStr, int startPos, int countRequested, MultipartD1Node d1Node,
+                Session session) throws MetadigProcessException {
 
-        MultipartRestClient mrc = null;
-        // Polymorphism doesn't work with D1 node classes, so have to use the derived classes
-        MultipartD1Node d1Node = null;
-        Session session = DataONE.getSession(subjectId, authToken);
+//        // Polymorphism doesn't work with D1 node classes, so have to use the derived classes
+//        MultipartD1Node d1Node = null;
+//        Session session = DataONE.getSession(subjectId, authToken);
 
         // Add the start and count, if pagination is being used
         queryStr = queryStr + "&start=" + startPos + "&rows=" + countRequested;
@@ -136,19 +136,34 @@ public static Document querySolr(String queryStr, String serviceUrl, int startPo
         InputStream qis = null;
         MetadigProcessException metadigException = null;
 
-        try {
-            d1Node = getMultipartD1Node(session, serviceUrl);
-            log.debug("Created MultipartD1Node: " + d1Node.toString());
-        } catch (Exception ex) {
-            log.error("Unable to create MultipartD1Node for Solr query");
-            metadigException = new MetadigProcessException("Unable to create multipart node client to query DataONE solr: " + ex.getMessage());
-            metadigException.initCause(ex);
-            throw metadigException;
-        }
-
+//        try {
+//            d1Node = getMultipartD1Node(session, serviceUrl);
+//            log.debug("Created MultipartD1Node, nodeId: " + d1Node.getNodeId().getValue());
+//        } catch (Exception ex) {
+//            log.error("Unable to create MultipartD1Node for Solr query");
+//            metadigException = new MetadigProcessException("Unable to create multipart node client to query DataONE solr: " + ex.getMessage());
+//            metadigException.initCause(ex);
+//            throw metadigException;
+//        }
+
+        log.debug("Sending query: " + queryStr);
         // Send a query to a CN or MN
+//        try {
+//            if(isCN) {
+//                qis = cnNode.query(session, "solr", queryStr);
+//            } else {
+//                qis = mnNode.query(session, "solr", queryStr);
+//            }
+//            log.debug("Sent query");
+//        } catch (Exception e) {
+//            log.error("Error retrieving pids: " + e.getMessage());
+//            metadigException = new MetadigProcessException("Unable to query dataone node: " + e.getMessage());
+//            metadigException.initCause(e);
+//            throw metadigException;
+//        }
         try {
             qis = d1Node.query(session, "solr", queryStr);
+            log.debug("Sent query");
         } catch (Exception e) {
             log.error("Error retrieving pids: " + e.getMessage());
             metadigException = new MetadigProcessException("Unable to query dataone node: " + e.getMessage());
@@ -156,16 +171,19 @@ public static Document querySolr(String queryStr, String serviceUrl, int startPo
             throw metadigException;
         }
 
+        log.debug("Creating xml doc with results");
         Document xmldoc = null;
         DocumentBuilder builder = null;
 
         try {
             // If results were returned, create an XML document from them
-            if (qis.available() == 1) {
+            log.debug("qis available: " + qis.available());
+            if (qis.available() > 0) {
                 try {
                     DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
                     builder = factory.newDocumentBuilder();
                     xmldoc = builder.parse(new InputSource(qis));
+                    log.debug("Created xml doc: " + xmldoc.toString());
                 } catch (Exception e) {
                     log.error("Unable to create w3c Document from input stream", e);
                     e.printStackTrace();
@@ -177,11 +195,14 @@ public static Document querySolr(String queryStr, String serviceUrl, int startPo
                 qis.close();
             }
         } catch (IOException ioe) {
+            log.debug("IO exception: " + ioe.getMessage());
             metadigException = new MetadigProcessException("Unable prepare query result xml document: " + ioe.getMessage());
             metadigException.initCause(ioe);
             throw metadigException;
         }
 
+        log.debug("Created results xml doc");
+
         return xmldoc;
     }
     /**
@@ -215,7 +236,7 @@ public static Session getSession(String subjectId, String authToken) {
         return session;
     }
 
-    protected static Boolean isCN(String serviceUrl) {
+    public static Boolean isCN(String serviceUrl) {
 
         Boolean isCN = false;
         // Identity node as either a CN or MN based on the serviceUrl
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index daeaee34..29352235 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -3,6 +3,7 @@
 import edu.ucsb.nceas.mdqengine.Controller;
 import edu.ucsb.nceas.mdqengine.MDQconfig;
 import edu.ucsb.nceas.mdqengine.DataONE;
+import edu.ucsb.nceas.mdqengine.exception.MetadigProcessException;
 import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
 import edu.ucsb.nceas.mdqengine.model.Task;
 import edu.ucsb.nceas.mdqengine.store.DatabaseStore;
@@ -18,20 +19,20 @@
 import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.rest.MultipartRestClient;
 import org.dataone.client.v2.impl.MultipartCNode;
+import org.dataone.client.v2.impl.MultipartD1Node;
 import org.dataone.client.v2.impl.MultipartMNode;
-import org.dataone.service.types.v2.SystemMetadata;
 import org.dataone.service.types.v1.*;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 import org.quartz.*;
+import org.w3c.dom.Document;
 
+import javax.xml.xpath.*;
 import java.io.IOException;
 import java.io.InputStream;
-import java.time.ZonedDateTime;
 import java.util.ArrayList;
-import java.util.Date;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -124,6 +125,7 @@ public void execute(JobExecutionContext context)
         String nodeId = dataMap.getString("nodeId");
         String startHarvestDatetimeStr = dataMap.getString("startHarvestDatetime");
         int harvestDatetimeInc = dataMap.getInt("harvestDatetimeInc");
+        // Number of pids to get each query (this number of pids will be fetched each query until all pids are obtained)
         int countRequested = dataMap.getInt("countRequested");
         // TODO: add formatFamily to scheduler request
         String formatFamily = null;
@@ -135,6 +137,8 @@ public void execute(JobExecutionContext context)
         String subjectId = null;
         String nodeServiceUrl = null;
 
+        log.info("Executing task: " + taskName + ", taskType: " + taskType);
+
         try {
             cfg = new MDQconfig();
             qualityServiceUrl = cfg.getString("quality.serviceUrl");
@@ -151,8 +155,6 @@ public void execute(JobExecutionContext context)
             throw jee;
         }
 
-        log.info("Executing task: " + taskName + ", taskType: " + taskType);
-
         try {
             mrc = new DefaultHttpMultipartRestClient();
         } catch (Exception e) {
@@ -165,12 +167,16 @@ public void execute(JobExecutionContext context)
         Session session = DataONE.getSession(subjectId, authToken);
 
         // Don't know node type yet from the id, so have to manually check if it's a CN
-        Boolean isCN = isCN(nodeServiceUrl);
+        Boolean isCN = DataONE.isCN(nodeServiceUrl);
+
+        MultipartD1Node d1Node = null;
         if(isCN) {
-            cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
+            //cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
+            d1Node = new MultipartCNode(mrc, nodeServiceUrl, session);
             log.debug("Created cnNode for serviceUrl: " + nodeServiceUrl);
         } else {
-            mnNode = new MultipartMNode(mrc, nodeServiceUrl, session);
+            //mnNode = new MultipartMNode(mrc, nodeServiceUrl, session);
+            d1Node = new MultipartMNode(mrc, nodeServiceUrl, session);
             log.debug("Created mnNode for serviceUrl: " + nodeServiceUrl);
         }
 
@@ -203,7 +209,7 @@ public void execute(JobExecutionContext context)
         String lastHarvestDateStr = null;
 
         Task task;
-        task = store.getTask(taskName);
+        task = store.getTask(taskName, taskType);
 
         // If a 'task' entry has not been saved for this task name yet, then a 'lastHarvested'
         // DataTime will not be available, in which case the 'startHarvestDataTime' from the
@@ -245,16 +251,20 @@ public void execute(JobExecutionContext context)
         String startDTRstr = dtfOut.print(startDTR);
         String endDTRstr = dtfOut.print(endDTR);
 
-        Integer startCount = new Integer(0);
+        int startCount = 0;
         RequestScorerJob.ListResult result = null;
         Integer resultCount = null;
 
+        log.debug("Getting portal pids to process...");
         boolean morePids = true;
         while(morePids) {
             ArrayList<String> pidsToProcess = null;
+            log.debug("startCount: " + startCount);
+            log.debug("countRequested:" + countRequested);
 
             try {
-                result = getPidsToProcess(cnNode, mnNode, isCN, session, nodeId, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
+                //result = getPidsToProcess(cnNode, mnNode, isCN, session, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
+                result = getPidsToProcess(d1Node, session, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
                 pidsToProcess = result.getResult();
                 resultCount = result.getResultCount();
             } catch (Exception e) {
@@ -263,32 +273,17 @@ public void execute(JobExecutionContext context)
                 throw jee;
             }
 
-            log.info("Found " + resultCount + " pids" + " for servierUrl: " + nodeServiceUrl);
+            log.info("Found " + resultCount + " seriesIds" + " for date: " + startDTRstr +  " at servierUrl: " + nodeServiceUrl);
             for (String pidStr : pidsToProcess) {
                 try {
-                    log.info("submitting pid: " + pidStr);
                     submitScorerRequest(qualityServiceUrl, pidStr, suiteId, nodeId, formatFamily);
-
                 } catch (Exception e) {
-                    JobExecutionException jee = new JobExecutionException("Unable to submit request to create new quality reports", e);
+                    JobExecutionException jee = new JobExecutionException("Unable to submit request to create new score graph/data file", e);
                     jee.setRefireImmediately(false);
                     throw jee;
                 }
             }
 
-            task.setLastHarvestDatetime(endDTRstr);
-            log.debug("taskName: " + task.getTaskName());
-            log.debug("taskType: " + task.getTaskType());
-            log.debug("lastharvestdate: " + task.getLastHarvestDatetime());
-
-            try {
-                store.saveTask(task);
-            } catch (MetadigStoreException mse) {
-                log.error("Error saving task: " + task.getTaskName());
-                JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
-                jee.setRefireImmediately(false);
-                throw jee;
-            }
             // Check if DataONE returned the max number of results. If so, we have to request more by paging through
             // the results.
             if(resultCount >= countRequested) {
@@ -297,6 +292,21 @@ public void execute(JobExecutionContext context)
                 log.info("Paging through more results, current start is " + startCount);
             } else {
                 morePids = false;
+
+                // Record the new "last harvested" date
+                task.setLastHarvestDatetime(endDTRstr);
+                log.debug("taskName: " + task.getTaskName());
+                log.debug("taskType: " + task.getTaskType());
+                log.debug("lastharvestdate: " + task.getLastHarvestDatetime());
+
+                try {
+                    store.saveTask(task);
+                } catch (MetadigStoreException mse) {
+                    log.error("Error saving task: " + task.getTaskName());
+                    JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
+                    jee.setRefireImmediately(false);
+                    throw jee;
+                }
             }
         }
         store.shutdown();
@@ -305,11 +315,10 @@ public void execute(JobExecutionContext context)
     /**
      * Query a DataONE CN or MN object store for a list of object that match the time range and formatId filters provided.
      *
-     * @param cnNode the CN to query
-     * @param mnNode the MN to query
-     * @param isCN was a CN or MN specified
-     * @param session the authentication session to use
-     * @param nodeId the DataONE nodeId of the node to query
+     * //@param cnNode
+     * //@param mnNode
+     * //@param isCN
+     * @param session
      * @param pidFilter
      * @param startHarvestDatetimeStr
      * @param endHarvestDatetimeStr
@@ -318,113 +327,85 @@ public void execute(JobExecutionContext context)
      * @return a ListResult object containing the matching pids
      * @throws Exception
      */
-    public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session, String nodeId,
+    //public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
+    public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
                                        String pidFilter, String startHarvestDatetimeStr, String endHarvestDatetimeStr,
                                        int startCount, int countRequested) throws Exception {
 
-        ArrayList<String> pids = new ArrayList<String>();
-        InputStream qis = null;
-        ObjectList objList = null;
-
-        ObjectFormatIdentifier formatId = null;
-        NodeReference nodeRef = null;
-        //nodeRef.setValue(nodeId);
-        Identifier identifier = null;
-        Boolean replicaStatus = false;
-
-        // Do some back-flips to convert the start and end date to the ancient Java 'Date' type that is
-        // used by DataONE 'listObjects()'.
-        ZonedDateTime zdt = ZonedDateTime.parse(startHarvestDatetimeStr);
-        // start date milliseconds since the epoch date "midnight, January 1, 1970 UTC"
-        long msSinceEpoch = zdt.toInstant().toEpochMilli();
-        Date startDate = new Date(msSinceEpoch);
-
-        zdt = ZonedDateTime.parse(endHarvestDatetimeStr);
-        msSinceEpoch = zdt.toInstant().toEpochMilli();
-        Date endDate = new Date(msSinceEpoch);
-
-        try {
-            // Even though MultipartMNode and MultipartCNode have the same parent class, their interfaces are differnt, so polymorphism
-            // isn't happening here.
-            log.debug("session: " + session.getSubject().getValue());
-            log.debug("startDate: " + startDate);
-            log.debug("endDate: " + endDate);
-            log.debug("formatId: " + formatId);
-            log.debug("Identifier: " + identifier);
-            log.debug("startCount: " + startCount);
-            log.debug("countRequested: " + countRequested);
-            if(isCN) {
-                log.debug("cnNode: " + cnNode);
-                objList = cnNode.listObjects(session, startDate, endDate, formatId, nodeRef, identifier, startCount, countRequested);
-            } else {
-                log.debug("mnNode: " + mnNode);
-                objList = mnNode.listObjects(session, startDate, endDate, formatId, identifier, replicaStatus, startCount, countRequested);
-            }
-            log.debug("Retrieved " + objList.getCount() + " pids");
-        } catch (Exception e) {
-            log.error("Error retrieving pids for node: " + e.getMessage());
-            throw e;
-        }
+        MetadigProcessException metadigException = null;
 
-        String thisFormatId = null;
-        String thisPid = null;
-        int pidCount = 0;
-
-        log.info("Checking retrieved pids for matches with pid filter");
-        if (objList.getCount() > 0) {
-            for(ObjectInfo oi: objList.getObjectInfoList()) {
-                thisFormatId = oi.getFormatId().getValue();
-                thisPid = oi.getIdentifier().getValue();
-
-                // Check all pid filters. There could be multiple wildcard filters, which are separated
-                // by ','.
-                String [] filters = pidFilter.split("\\|");
-                Boolean found = false;
-                for(String thisFilter:filters) {
-                    if(thisFormatId.matches(thisFilter)) {
-                        found = true;
-                        continue;
-                    }
-                }
+        org.w3c.dom.NodeList xpathResult = null;
+        XPathExpression fieldXpath = null;
+        XPath xpath = null;
+        org.w3c.dom.Node node = null;
+        ArrayList<String> pids = new ArrayList<String>();
+        Document xmldoc = null;
 
-                // Always re-create a report, even if it exists for a pid, as the sysmeta could have
-                // been updated (i.e. obsoletedBy, access) and the quality report and index contain
-                // sysmeta fields.
-                if(found) {
-                    // The DataONE listObjects service retuns the pid for each object, but does not return the seriesId,
-                    // so this has to be retrieved now, as Bookkeeper service and MetacatUI (when the graph is requested for
-                    // this portal) uses the sid, not the pid, so create and store the graph based on the sid.
-                    //    if (!runExists(thisPid, suiteId, store)) {
+        String queryStr = "?q=formatId:" + pidFilter + "+-obsoletedBy:*" + "+dateUploaded:[" + startHarvestDatetimeStr + "%20TO%20"
+                + endHarvestDatetimeStr + "]"
+                + "&fl=seriesId&q.op=AND";
+        log.debug("query: " + queryStr);
 
-                    Identifier thisId = new Identifier();
-                    thisId.setValue(thisPid);
+        // Send the query to DataONE Solr to retrieve portal seriesIds for a given time frame
 
-                    org.dataone.service.types.v2.SystemMetadata sysmeta  = null;
+        // One query can return many documents, so use the paging mechanism to make sure we retrieve them all.
+        // Keep paging through query results until all pids have been fetched. The last 'page' of query
+        // results is indicated by the number of items returned being less than the number requested.
+        int thisResultLength;
+        // Now setup the xpath to retrieve the ids returned from the collection query.
+        try {
+            log.debug("Compiling xpath for seriesId");
+            // Extract the collection query from the Solr result XML
+            XPathFactory xPathfactory = XPathFactory.newInstance();
+            xpath = xPathfactory.newXPath();
+            fieldXpath = xpath.compile("//result/doc/str[@name='seriesId']/text()");
+        } catch (XPathExpressionException xpe) {
+            log.error("Error extracting id from solr result doc: " + xpe.getMessage());
+            metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
+            metadigException.initCause(xpe);
+            throw metadigException;
+        }
 
-                    if(isCN) {
-                        sysmeta = cnNode.getSystemMetadata(session, thisId);
-                    } else {
-                        sysmeta = mnNode.getSystemMetadata(session, thisId);
-                    }
+        // Loop through the Solr result. As the result may be large, page through the results, accumulating
+        // the pids returned into a ListResult object.
 
-                    String thisSeriesId = sysmeta.getSeriesId().getValue();
+        //log.debug("Getting portal seriesIds from Solr using subjectId: " + subjectId + ", servicerUrl: " + serviceUrl);
+        log.debug("Getting portal seriesIds from Solr " );
+        int startPos = startCount;
 
-                    pidCount = pidCount++;
-                    pids.add(thisSeriesId);
-                    log.info("adding seriesId to process: " + thisSeriesId + ", formatId: " + thisFormatId);
-                    //    }
-                }
+        do {
+            //xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, cnNode, mnNode, isCN, session);
+            xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, d1Node, session);
+            if(xmldoc == null) {
+                log.info("no values returned from query");
+                break;
+            }
+            try {
+                log.debug("processing xpathresult...");
+                xpathResult = (org.w3c.dom.NodeList) fieldXpath.evaluate(xmldoc, XPathConstants.NODESET);
+                log.debug("processed xpathResult");
+            } catch (XPathExpressionException xpe) {
+                log.error("Error extracting seriesId from solr result doc: " + xpe.getMessage());
+                metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
+                metadigException.initCause(xpe);
+                throw metadigException;
+            }
+            String currentPid = null;
+            thisResultLength = xpathResult.getLength();
+            log.debug("Got " + thisResultLength + " pids this query");
+            if(thisResultLength == 0) break;
+            for (int index = 0; index < xpathResult.getLength(); index++) {
+                node = xpathResult.item(index);
+                currentPid = node.getTextContent();
+                pids.add(currentPid);
+                log.debug("adding pid: " + currentPid);
             }
-        }
 
-        if(pids.size() == 0) {
-            log.info("No matching pids found");
-        } else {
-            log.info(pids.size() + " matching pids found.");
-        }
+            startPos += thisResultLength;
+        } while (thisResultLength > 0);
 
         RequestScorerJob.ListResult result = new RequestScorerJob.ListResult();
-        result.setResultCount(pidCount);
+        result.setResultCount(pids.size());
         result.setResult(pids);
 
         return result;
@@ -437,7 +418,7 @@ public void submitScorerRequest(String qualityServiceUrl, String collectionId, S
         String scorerServiceUrl = qualityServiceUrl + "/scores" + "?suite=" + suiteId;
 
         if(collectionId != null && ! collectionId.isEmpty()) {
-            scorerServiceUrl += "&collection=" + collectionId;
+            scorerServiceUrl += "&id=" + collectionId;
         }
 
         if(nodeId != null && ! nodeId.isEmpty()) {
@@ -455,7 +436,7 @@ public void submitScorerRequest(String qualityServiceUrl, String collectionId, S
             post.addHeader("Accept", "application/xml");
 
             // send to service
-            log.trace("submitting scores request : " + scorerServiceUrl);
+            log.debug("submitting scores request : " + scorerServiceUrl);
             //post.setEntity((HttpEntity) entity);
             CloseableHttpClient client = HttpClients.createDefault();
             CloseableHttpResponse response = client.execute(post);
@@ -469,26 +450,5 @@ public void submitScorerRequest(String qualityServiceUrl, String collectionId, S
             throw(e);
         }
     }
-
-    private Boolean isCN(String serviceUrl) {
-
-        Boolean isCN = false;
-        // Identity node as either a CN or MN based on the serviceUrl
-        String pattern = "https*://cn.*?\\.dataone\\.org|https*://cn.*?\\.test\\.dataone\\.org";
-        Pattern r = Pattern.compile(pattern);
-        Matcher m = r.matcher(serviceUrl);
-        if (m.find()) {
-            isCN = true;
-            log.debug("service URL is for a CN: " + serviceUrl);
-        } else {
-            log.debug("service URL is not for a CN: " + serviceUrl);
-            isCN = false;
-        }
-
-        return isCN;
-    }
-
-
-
 }
 

From 66d0f9b195cfec991e6de51385cc3c8da8fd51fd Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 2 Aug 2020 12:50:54 -0700
Subject: [PATCH 28/47] Evaluate portal 'collectionQuery' on CN (#110)

---
 .../edu/ucsb/nceas/mdqengine/Controller.java  |   8 +-
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 121 ++++++++++++++----
 2 files changed, 103 insertions(+), 26 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
index be83cd6e..f0958ff4 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/Controller.java
@@ -416,7 +416,7 @@ public void processQualityRequest(String memberNode,
      *
      * @param collectionId the DataONE collection identifier (the portal seriesId)
      * @param nodeId the node identifier the collection resides on
-     * @param formatFamily a string representing the DataONE formats to create score for
+     * @param formatFamily a string representing the DataONE formats to create score for ("eml", "iso"), optional
      * @param qualitySuiteId the quality suite used to create the score graph
      * @param requestDateTime the datetime that the request was made
      *
@@ -425,12 +425,12 @@ public void processQualityRequest(String memberNode,
      */
     public void processScorerRequest(String collectionId,
                                String nodeId,
-                               String formatFamily,
+                               String formatFamily, // Optional format filter, if creating a graph for a submit of metadata formats ("eml", "iso")
                                String qualitySuiteId,
                                DateTime requestDateTime) throws java.io.IOException {
 
         log.info("Processing scorer request, collection: " + collectionId + ", suite: " + qualitySuiteId
-                    + "nodeId: " + nodeId + ", formatFamily: " + formatFamily);
+                    + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
         ScorerQueueEntry qEntry = null;
         byte[] message = null;
 
@@ -467,7 +467,7 @@ public void processScorerRequest(String collectionId,
         message = bos.toByteArray();
 
         this.writeInProcessChannel(message, SCORER_ROUTING_KEY);
-        log.info(" [x] Queued Scorer request for pid: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
+        log.info(" [x] Queued Scorer request for id: '" + qEntry.getCollectionId() + "'" + ", quality suite " + qualitySuiteId + ", nodeId: " + nodeId + ", formatFamily: " + formatFamily);
     }
 
     /**
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index 6414f852..b8cfb205 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -20,8 +20,11 @@
 import org.apache.solr.client.solrj.beans.BindingException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.response.QueryResponse;
+import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.rest.MultipartRestClient;
+import org.dataone.client.v2.impl.MultipartCNode;
 import org.dataone.client.v2.impl.MultipartD1Node; // Don't include org.dataone.client.rest.MultipartD1Node (this is what IDEA selects)
+import org.dataone.client.v2.impl.MultipartMNode;
 import org.dataone.service.types.v1.Session;
 import org.dataone.service.types.v1.Subject;
 import org.dataone.service.types.v1.Group;
@@ -29,6 +32,7 @@
 import org.joda.time.DateTime;
 import org.joda.time.format.DateTimeFormatter;
 import org.joda.time.format.ISODateTimeFormat;
+import org.quartz.JobExecutionException;
 import org.w3c.dom.Document;
 import org.xml.sax.InputSource;
 
@@ -156,6 +160,9 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                 String nodeServiceUrl = null;
                 String label = null;
                 String title = null;
+                MultipartRestClient mrc = null;
+                MultipartMNode mnNode = null;
+                MultipartCNode cnNode = null;
 
                 //long startTime = System.nanoTime();
                 startTimeProcessing = System.currentTimeMillis();
@@ -199,6 +206,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                 }
                 log.debug("nodeId: " + nodeId);
 
+
                 label: try {
                     MDQconfig cfg = new MDQconfig();
                     // Pids associated with a collection, based on query results using 'collectionQuery' field in solr.
@@ -224,6 +232,43 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                     // If creating a graph for a collection, get the set of pids associated with the collection.
                     // Only scores for these pids will be included in the graph.
 
+                    try {
+                        mrc = new DefaultHttpMultipartRestClient();
+                    } catch (Exception e) {
+                        log.error("Error creating rest client: " + e.getMessage());
+                        JobExecutionException jee = new JobExecutionException(e);
+                        jee.setRefireImmediately(false);
+                        throw jee;
+                    }
+
+                    Session session = DataONE.getSession(subjectId, authToken);
+
+                    // Don't know node type yet from the id, so have to manually check if it's a CN
+                    Boolean isCN = DataONE.isCN(nodeServiceUrl);
+
+                    MultipartD1Node d1Node = null;
+                    if(isCN) {
+                        //cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
+                        d1Node = new MultipartCNode(mrc, nodeServiceUrl, session);
+                        log.debug("Created cnNode for serviceUrl: " + nodeServiceUrl);
+                    } else {
+                        //mnNode = new MultipartMNode(mrc, nodeServiceUrl, session);
+                        d1Node = new MultipartMNode(mrc, nodeServiceUrl, session);
+                        log.debug("Created mnNode for serviceUrl: " + nodeServiceUrl);
+                    }
+//
+//                    Session session = DataONE.getSession(subjectId, authToken);
+//
+//                    // Don't know node type yet from the id, so have to manually check if it's a CN
+//                    Boolean isCN = DataONE.isCN(nodeServiceUrl);
+//                    if(isCN) {
+//                        cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
+//                        log.debug("Created cnNode for serviceUrl: " + nodeServiceUrl);
+//                    } else {
+//                        mnNode = new MultipartMNode(mrc, nodeServiceUrl, session);
+//                        log.debug("Created mnNode for serviceUrl: " + nodeServiceUrl);
+//                    }
+
                     if (collectionId != null && !collectionId.isEmpty()) {
                         // If the nodeId is specified, use if to determine the values for authTokenName and subjectIdName,
                         // if those values are not defined
@@ -235,7 +280,8 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                         // Always use the CN subject id and authentication token from the configuration file, as
                         // requests that this method uses need CN subject privs
                         ScorerResult result = null;
-                        result = gfr.getCollectionPids(collectionId, nodeServiceUrl, subjectId, authToken);
+                        //result = gfr.getCollectionPids(collectionId, cnNode, mnNode, isCN, session);
+                        result = gfr.getCollectionPids(collectionId, d1Node, session);
                         collectionPids = result.getResult();
                         label = result.getLabel();
                         // Don't continue if no pids (and thus scores) were found for this collection
@@ -346,12 +392,13 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
      * which is usually an MN, but the collectionQuery is always evaluated on the CN</p>
      *
      * @param collectionId a DataONE project id to fetch scores for, e.g. urn:uuid:f137095e-4266-4474-aa5f-1e1fcaa5e2dc
-     * @param serviceUrl the DataONE service URL to obtain the collectionQuery string from
-     * @param subjectId the DataONE subjectId to use for the query, associated with the authentication token
-     * @param authToken the DataONE authentication token
+     * @param d1Node
+     * @param session
      * @return a List of quality scores fetched from Solr
      */
-    private ScorerResult getCollectionPids(String collectionId, String serviceUrl, String subjectId, String authToken) throws MetadigProcessException {
+    //private ScorerResult getCollectionPids(String collectionId, MultipartCNode cnNode, MultipartMNode mnNode,
+    //                                       Boolean isCN, Session session) throws MetadigProcessException {
+    private ScorerResult getCollectionPids(String collectionId, MultipartD1Node d1Node, Session session) throws MetadigProcessException {
 
         Document xmldoc = null;
         String queryStr = null;
@@ -364,7 +411,9 @@ private ScorerResult getCollectionPids(String collectionId, String serviceUrl, S
            which will be used to query DataONE Solr for all the pids associated with that project (that's 2 queries!)
          */
         ArrayList<String> pids = new ArrayList<>();
-        queryStr = "?q=seriesId:" + escapeSpecialChars(collectionId) + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
+        queryStr = "?q=seriesId:" + escapeSpecialChars(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
+        //queryStr = "?q=seriesId:" + encodeValue(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
+        //queryStr = "?q=seriesId:" + collectionId + "+-obsoletedBy:*&fl=collectionQuery,label,rightsHolder&q.op=AND";
 
         startPos = 0;
         countRequested = 10000;
@@ -372,7 +421,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         // Get the collectionQuery from Solr
         try {
             log.debug("Getting collectionQuery with query: " + queryStr);
-            xmldoc = DataONE.querySolr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
+            xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, d1Node, session);
         } catch (MetadigProcessException mpe) {
             log.error("Unable to query Solr for collectionQuery field for collection id: " + collectionId);
             throw new MetadigProcessException("Unable to query Solr for collectionQuery field for collection id: " + collectionId);
@@ -530,13 +579,32 @@ which will be used to query DataONE Solr for all the pids associated with that p
           * DataONE listObjects service. This node could either be an MN or CN.
          */
 
-        log.debug("Sending collectionQuery to Solr using subjectId: " + subjectId + ", servicerUrl: " + serviceUrl);
+        //log.debug("Sending collectionQuery to Solr using subjectId: " + subjectId + ", servicerUrl: " + serviceUrl);
+        MultipartRestClient mrc = null;
+        MultipartCNode cnNode = null;
+
         log.debug("query string: " + queryStr);
 
+        try {
+            mrc = new DefaultHttpMultipartRestClient();
+        } catch (Exception e) {
+            log.error("Error creating rest client: " + e.getMessage());
+            JobExecutionException jee = new JobExecutionException(e);
+            jee.setRefireImmediately(false);
+            throw new MetadigProcessException("Unable  to create connection to CN ");
+        }
+
+        Session CNsession = DataONE.getSession(CNsubjectId, CNauthToken);
+
+        // Don't know node type yet from the id, so have to manually check if it's a CN
+        Boolean isCN = DataONE.isCN(CNserviceUrl);
+
+        cnNode = new MultipartCNode(mrc, CNserviceUrl, CNsession);
+
         do {
             //TODO: check that a result was returned
             // Note: the collectionQuery is always evaluated on the CN, so that the entire DataONE network is queried.
-            xmldoc = DataONE.querySolr(queryStr, serviceUrl, startPos, countRequested, subjectId, authToken);
+            xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, cnNode, CNsession);
             if(xmldoc == null) {
                 log.info("no values returned from query");
                 break;
@@ -930,20 +998,31 @@ private String URLencodeChars(String value, String target) {
      * @return the escaped value
      */
     private String escapeSpecialChars(String value) {
-        // {
+
+        // These are reserved characters in Solr
+        // +  -  &&  | |  !  ( )  { }  [ ]  ^  "  ~  *  ?  :  \
         value = value.replace("%7B", "\\%7B");
-        // }
         value = value.replace("%7D", "\\%7D");
-        // :
-        //value = value.replace("%3A", "\\%3A");
         value = value.replace(":", "%5C:");
-
-        //value = value.replace("(", "\\(");
-        //value = value.replace(")", "\\)");
-        //value = value.replace("?", "\\?");
-        //value = value.replace("%3F", "\\%3F");
-        //value = value.replace("\"", "\\\"");
-        //value = value.replace("'", "\\'");
+        value = value.replace(",", "%5C,");
+        value = value.replace(")", "%5C)");
+        value = value.replace("+", "%5C+");
+        value = value.replace("-", "%5C-");
+        value = value.replace("&", "%5C&");
+        value = value.replace("|", "%5C|");
+        value = value.replace("!", "%5C!");
+        value = value.replace("(", "%5C(");
+        value = value.replace(")", "%5C)");
+        value = value.replace("{", "%5C{");
+        value = value.replace("}", "%5C}");
+        value = value.replace("[", "%5C[");
+        value = value.replace("]", "%5C]");
+        value = value.replace("^", "%5C^");
+        value = value.replace("\"", "%5C\"");
+        value = value.replace("~", "%5C~");
+        value = value.replace("*", "%5C*");
+        value = value.replace("?", "%5C?");
+        value = value.replace("\\", "%5C\\");
 
         return value;
     }
@@ -955,7 +1034,5 @@ private static String encodeValue(String value) {
             throw new RuntimeException(ex.getCause());
         }
     }
-
-
 }
 

From 35b77b4fb34066630cf010403da4030d9e6cb949 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 2 Aug 2020 12:52:04 -0700
Subject: [PATCH 29/47] Store task type (used by scheduler)

---
 .../mdqengine/scheduler/RequestReportJob.java |  2 +-
 .../nceas/mdqengine/store/DatabaseStore.java  | 74 +------------------
 .../nceas/mdqengine/store/InMemoryStore.java  |  2 +-
 .../ucsb/nceas/mdqengine/store/MDQStore.java  |  2 +-
 .../ucsb/nceas/mdqengine/store/MNStore.java   |  2 +-
 5 files changed, 7 insertions(+), 75 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index b59b0224..6a11c68c 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -217,7 +217,7 @@ public void execute(JobExecutionContext context)
         //node = store.getNode(nodeId, jobName);
 
         Task task;
-        task = store.getTask(taskName);
+        task = store.getTask(taskName, taskType);
         // If a 'task' entry has not been saved for this task name yet, then a 'lastHarvested'
         // DataTime will not be available, in which case the 'startHarvestDataTime' from the
         // config file will be used.
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
index ac340bdd..3fcca606 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
@@ -325,75 +325,6 @@ public void shutdown() {
         }
     }
 
-//    public Node getNode(String nodeId, String jobName) {
-//
-//        //return runs.get(id);
-//        Result result = new Result();
-//        PreparedStatement stmt = null;
-//        String lastDT = null;
-//        Node node = new Node();
-//
-//        // Select records from the 'nodes' table
-//        try {
-//            log.debug("preparing statement for query");
-//            String sql = "select * from nodes where node_id = ? and job_name = ?";
-//            stmt = conn.prepareStatement(sql);
-//            stmt.setString(1, nodeId);
-//            stmt.setString(2, jobName);
-//
-//            log.debug("issuing query: " + sql);
-//            ResultSet rs = stmt.executeQuery();
-//            if(rs.next()) {
-//                node.setNodeId(rs.getString("node_id"));
-//                node.setJobName(rs.getString("job_name"));
-//                node.setLastHarvestDatetime(rs.getString("last_harvest_datetime"));
-//                rs.close();
-//                stmt.close();
-//            } else {
-//                log.debug("No results returned from query");
-//            }
-//        } catch ( Exception e ) {
-//            log.error( e.getClass().getName()+": "+ e.getMessage());
-//        }
-//
-//        return(node);
-//    }
-
-
-//    public void saveNode(Node node) throws MetadigStoreException {
-//
-//        PreparedStatement stmt = null;
-//
-//        // Perform an 'upsert' on the 'nodes' table - if a record exists for the 'metadata_id, suite_id' already,
-//        // then update the record with the incoming data.
-//        try {
-//            String sql = "INSERT INTO nodes (node_id, job_name, last_harvest_datetime) VALUES (?, ?, ?)"
-//                    + " ON CONFLICT ON CONSTRAINT nodes_id_job_name_pk"
-//                    + " DO UPDATE SET (node_id, job_name, last_harvest_datetime) = (?, ?, ?);";
-//
-//            stmt = conn.prepareStatement(sql);
-//            stmt.setString(1, node.getNodeId());
-//            stmt.setString(2, node.getJobName());
-//            stmt.setString(3, node.getLastHarvestDatetime());
-//            stmt.setString(4, node.getNodeId());
-//            stmt.setString(5, node.getJobName());
-//            stmt.setString(6, node.getLastHarvestDatetime());
-//            stmt.executeUpdate();
-//            stmt.close();
-//            conn.commit();
-//            //conn.close();
-//        } catch (SQLException e) {
-//            log.error( e.getClass().getName()+": "+ e.getMessage());
-//            MetadigStoreException me = new MetadigStoreException("Unable save last harvest date to the datdabase.");
-//            me.initCause(e);
-//            throw(me);
-//        }
-//
-//        // Next, insert a record into the child table ('runs')
-//        log.debug("Records created successfully");
-//    }
-
-
     public void saveTask(Task task) throws MetadigStoreException {
 
         PreparedStatement stmt = null;
@@ -427,7 +358,7 @@ public void saveTask(Task task) throws MetadigStoreException {
         log.debug("Records created successfully");
     }
 
-    public Task getTask(String taskName) {
+    public Task getTask(String taskName, String taskType) {
 
         //return runs.get(id);
         Result result = new Result();
@@ -438,9 +369,10 @@ public Task getTask(String taskName) {
         // Select records from the 'nodes' table
         try {
             log.debug("preparing statement for query");
-            String sql = "select * from tasks where task_name = ?";
+            String sql = "select * from tasks where task_name = ? and task_type = ?";
             stmt = conn.prepareStatement(sql);
             stmt.setString(1, taskName);
+            stmt.setString(2, taskType);
 
             log.debug("issuing query: " + sql);
             ResultSet rs = stmt.executeQuery();
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
index 44bb386c..af7637a0 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
@@ -210,7 +210,7 @@ public void deleteRun(Run run) {
 //	public void saveNode(Node node) throws MetadigStoreException { }
 
 	@Override
-	public Task getTask(String taskName) { return new Task(); }
+	public Task getTask(String taskName, String taskType) { return new Task(); }
 
 	@Override
 	public void saveTask(Task task) throws MetadigStoreException { }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
index fbef0bc3..c573803d 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
@@ -33,7 +33,7 @@ public interface MDQStore {
 //	public Node getNode(String nodeId, String jobName);
 //	public void saveNode(Node node) throws MetadigStoreException;
 
-	public Task getTask(String taskName);
+	public Task getTask(String taskName, String taskType);
 	public void saveTask(Task task) throws MetadigStoreException;
 
 }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
index ec7a2772..4613577e 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
@@ -335,7 +335,7 @@ public void renew() {}
 //	public void saveNode(Node node) throws MetadigStoreException { }
 
 	@Override
-	public Task getTask(String taskName) { return new Task(); }
+	public Task getTask(String taskName, String taskType) { return new Task(); }
 
 	@Override
 	public void saveTask(Task task) throws MetadigStoreException { }

From 2c6531123819b732ac7794b2e96af55668c4c86d Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 2 Aug 2020 12:53:11 -0700
Subject: [PATCH 30/47] Minor fix to assessment graph retrieval

---
 .../edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java   | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java b/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
index ff67bac6..3f7cc497 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/filestore/FilestoreDB.java
@@ -105,14 +105,12 @@ public MetadigFile getFileEntry(MetadigFile mdFile) throws MetadigFilestoreExcep
                 stmt.setString(1, storageType);
                 stmt.setString(2, altFilename);
             } else {
-                sql = "select * from filestore where pid = ? and storage_type = ? and media_type = ?";
+                sql = "select * from filestore where pid = ? and suite_id = ? and storage_type = ? and media_type = ?";
                 stmt = conn.prepareStatement(sql);
                 stmt.setString(1, pid);
                 stmt.setString(2, suiteId);
-                stmt.setString(3, nodeId);
-                stmt.setString(4, mdFormatFilter);
-                stmt.setString(5, storageType);
-                stmt.setString(6, mediaType);
+                stmt.setString(3, storageType);
+                stmt.setString(4, mediaType);
             }
 
             log.debug("issuing query: " + sql);

From 0709ff21137c09f04258e6a2e40c8cdb767117f7 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 2 Aug 2020 12:55:39 -0700
Subject: [PATCH 31/47] Read log4j.properties dynamically on container startup

---
 Kubernetes/metadig-scheduler/Dockerfile | 8 +++++---
 Kubernetes/metadig-scorer/Dockerfile    | 6 ++++--
 Kubernetes/metadig-worker/Dockerfile    | 6 ++++--
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/Kubernetes/metadig-scheduler/Dockerfile b/Kubernetes/metadig-scheduler/Dockerfile
index 93fb94f8..b193bb70 100644
--- a/Kubernetes/metadig-scheduler/Dockerfile
+++ b/Kubernetes/metadig-scheduler/Dockerfile
@@ -6,14 +6,14 @@ MAINTAINER slaughter@nceas.ucsb.edu
 # Set the working directory 
 WORKDIR /var/lib/metadig
 
-COPY log4j.properties .
+#COPY log4j.properties .
 # The most recently built jar file is copied from the maven build directory to this dir by maven, so that
 # it can be copied to the image.
 COPY metadig-engine.jar metadig-engine.jar
 
 #COPY metadig.properties /etc/metadig/metadig.properties
 #COPY taskList.csv /etc/metadig/taskList.csv
-COPY log4j.properties .
+#COPY log4j.properties .
 
 #COPY run.sh run.sh
 # The 'run.sh' script copies config files that should be available from persistent volume to the standard location where the software
@@ -23,4 +23,6 @@ COPY log4j.properties .
 #CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp ./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.scheduler.JobScheduler
 
 #CMD [ "./run.sh" ]
-CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp ./metadig-engine.jar: edu.ucsb.nceas.mdqengine.scheduler.JobScheduler
+# Set classpath to include /opt/local/metadig/log4j.properties, if it exists, so that logging can be changed without
+# having to rebuild the container. Note that on k8s, this dir is mapped to the persistent volume, so will be /data/metadig/log4j.properties
+CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar: edu.ucsb.nceas.mdqengine.scheduler.JobScheduler
diff --git a/Kubernetes/metadig-scorer/Dockerfile b/Kubernetes/metadig-scorer/Dockerfile
index 63e47408..d539ee7b 100644
--- a/Kubernetes/metadig-scorer/Dockerfile
+++ b/Kubernetes/metadig-scorer/Dockerfile
@@ -9,7 +9,7 @@ WORKDIR /var/lib/metadig
 # This file was created from the https://github.com/NCEAS/metadig-r repo
 # and contains R functions that assist in writing R based quality checks.
 COPY metadig_0.2.0.tar.gz metadig.tar.gz
-COPY log4j.properties .
+#COPY log4j.properties .
 # The most recently built jar file is copied from the maven build directory to this dir by maven, so that
 # it can be copyied to the image.
 COPY metadig-engine.jar metadig-engine.jar
@@ -40,5 +40,7 @@ RUN Rscript --vanilla -e 'install.packages("metadig.tar.gz", repos=NULL)'
 # Run the Scorer process
 # Note: docker --build-arg only allows one argument (one token only, multiple tokens inside quotes doesn't work, so have
 #   to specify java options directly on command line.
-CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp ./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.scorer.Scorer
+# Set classpath to include /opt/local/metadig/log4j.properties, if it exists, so that logging can be changed without
+# having to rebuild the container. Note that on k8s, this dir is mapped to the persistent volume, so will be /data/metadig/log4j.properties
+CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.scorer.Scorer
 
diff --git a/Kubernetes/metadig-worker/Dockerfile b/Kubernetes/metadig-worker/Dockerfile
index b662b5f6..fff34db1 100644
--- a/Kubernetes/metadig-worker/Dockerfile
+++ b/Kubernetes/metadig-worker/Dockerfile
@@ -11,7 +11,7 @@ WORKDIR /var/lib/metadig
 # This file was created from the https://github.com/NCEAS/metadig-r repo
 # and contains R functions that assist in writing R based quality checks.
 COPY metadig_0.2.0.tar.gz metadig.tar.gz
-COPY log4j.properties .
+#COPY log4j.properties .
 # The most recently built jar file is copied from the maven build directory to this dir by maven, so that
 # it can be copyied to the image.
 COPY metadig-engine.jar metadig-engine.jar
@@ -34,6 +34,8 @@ RUN Rscript --vanilla r-cmds.txt
 # Run the Worker process
 # Note: docker --buile-arg only allows one argument (one token only, multiple tokens inside quotes doesn't work, so have
 #   to specify java options directly on command line.
-CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp ./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.Worker
+# Set classpath to include /opt/local/metadig/log4j.properties, if it exists, so that logging can be changed without
+# having to rebuild the container. Note that on k8s, this dir is mapped to the persistent volume, so will be /data/metadig/log4j.properties
+CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.Worker
 #CMD java -Xms128m -Xmx256m -Dlog4j.configuration=log4j.properties -cp ./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.Worker
 

From 639786477bfddbc6515137c8f2364a5ed43c487b Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 2 Aug 2020 12:56:44 -0700
Subject: [PATCH 32/47] Load R packages from cran.rstudio.com (#259)

---
 Kubernetes/metadig-scorer/Dockerfile | 13 +++----------
 Kubernetes/metadig-worker/Dockerfile |  4 +++-
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/Kubernetes/metadig-scorer/Dockerfile b/Kubernetes/metadig-scorer/Dockerfile
index d539ee7b..e7146b8a 100644
--- a/Kubernetes/metadig-scorer/Dockerfile
+++ b/Kubernetes/metadig-scorer/Dockerfile
@@ -24,17 +24,10 @@ RUN mkdir -p /etc/dataone/index && touch /etc/dataone/index/d1client.properties
 
 # Add R runtime and install packges required by the quality suites
 RUN apt update
-RUN apt -y install vim
-RUN apt -y install r-base
-RUN apt -y install r-cran-httr
-RUN apt -y install r-cran-xml2
-RUN apt -y install r-cran-tidyr
-RUN apt -y install r-cran-scales
-RUN apt -y install r-cran-lubridate
-RUN apt -y install r-cran-ggplot2
-RUN apt -y install r-cran-magrittr
+RUN apt -y install vim bash
+RUN apt -y install r-base r-cran-httr r-cran-xml2 r-cran-tidyr r-cran-scales r-cran-lubridate r-cran-ggplot2 r-cran-magrittr
 # Debian stretch doesn't have a pre-cooked package for readr, so install now.
-RUN Rscript --vanilla -e 'install.packages("readr", repos="https://cran.mtu.edu/")'
+RUN Rscript --vanilla -e 'install.packages("readr", repos=c(CRAN = "http://cran.rstudio.com"))'
 RUN Rscript --vanilla -e 'install.packages("metadig.tar.gz", repos=NULL)'
 
 # Run the Scorer process
diff --git a/Kubernetes/metadig-worker/Dockerfile b/Kubernetes/metadig-worker/Dockerfile
index fff34db1..09d55309 100644
--- a/Kubernetes/metadig-worker/Dockerfile
+++ b/Kubernetes/metadig-worker/Dockerfile
@@ -27,8 +27,10 @@ RUN mkdir -p /etc/dataone/index && touch /etc/dataone/index/d1client.properties
 # Add R runtime and install packges required by the quality suites
 COPY r-cmds.txt r-cmds.txt
 RUN apk update
+# bash is needed by the openssl install
+RUN apk add bash
 RUN apk add g++ R R-dev R-doc libc-dev openssl-dev libxml2 libxml2-dev
-RUN echo 'options(repos = c(CRAN = "https://cran.cnr.berkeley.edu/"))' >> /usr/lib/R/etc/Rprofile.site
+RUN echo 'options(repos = c(CRAN = "http://cran.rstudio.com"))' >> /usr/lib/R/etc/Rprofile.site
 RUN Rscript --vanilla r-cmds.txt
 
 # Run the Worker process

From 4607036cfa539d7cdc59ed37037a5e3d29d106b1 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Tue, 4 Aug 2020 18:39:43 -0700
Subject: [PATCH 33/47] minor formatting changes

---
 .../code/graph_cumulative_quality_scores.R    | 57 +++++++++----------
 .../code/graph_monthly_quality_scores.R       | 37 ++++++------
 2 files changed, 44 insertions(+), 50 deletions(-)

diff --git a/src/main/resources/code/graph_cumulative_quality_scores.R b/src/main/resources/code/graph_cumulative_quality_scores.R
index 2344bf3b..78b97804 100644
--- a/src/main/resources/code/graph_cumulative_quality_scores.R
+++ b/src/main/resources/code/graph_cumulative_quality_scores.R
@@ -7,27 +7,22 @@ library(readr)
 library(magrittr)
 
 # Plot cummulative quality scores by month
-# This program is dispatched (called) by the MetaDIG Grapher class. Several
+# This program is dispatched (called) by the MetaDIG Scorer class. Several
 # variables are injected by metadig-engine Dispatcher
 # - title: the graph title
-# - title: the graph title
 # - inFile: the CSV file containing quality scores, which has been prepared by Grapher
 # - outFile: the graphics output file to create
 # Variables read by metadig-engine Dispatcher after execution
-# mdq_result, output, status 
+# mdq_result, output, status
+
+# Define these variable ("infile", "outFile" for local testing only
+#inFile <- "toolik.csv"
+#outFile <- "toolik-cumulative.png"
 
-# Define these variable for local testing only
-#inFile <- "dbo.csv"
-#outFile <- "dbo.png"
-#inFile <- "sasap.csv"
-#outFile <- "sasap.png"
-#inFile <- "FAIR-scores-eml.csv"
-#outFile <- "FAIR-scores-eml.png"
 axisTextFontSize <- 7
-legendTextFontSize <- 7
+legendTextFontSize <- 8
 axisTitleFontSize <- 9
 legendTitleFontSize <- 9
-
 # Load data
 fsr <- read_csv(inFile)
 
@@ -37,11 +32,10 @@ scores <- mutate(fsr, ym = as.Date(sprintf("%4s-%02d-01", year(dateUploaded), mo
   mutate(scoreI = scoreInteroperable * 100.0) %>%
   mutate(scoreR = scoreReusable * 100.0)
 
-# Use this when sequenceId problem has been resolved (github metadig-engine #232)
 most_recent <- scores %>%
-  arrange(ym, sequenceId, dateUploaded) %>%
-  group_by(ym, sequenceId) %>%
-  top_n(1, dateUploaded)
+ arrange(ym, sequenceId, dateUploaded) %>%
+ group_by(ym, sequenceId) %>%
+ top_n(1, dateUploaded)
 head(most_recent)
 
 # calculate cummulative overall
@@ -56,12 +50,16 @@ score_cumulative$metric <- factor(score_cumulative$metric,
                                   levels=c("f", "a", "i", "r", "fc", "ac", "ic", "rc"),
                                   labels=c("Findable", "Accessible", "Interoperable", "Reusable",
                                            "Cum. Findable", "Cum. Accessible", "Cum. Interoperable", "Cum. Reusable"))
-score_monthly <- score_cumulative %>% filter(metric %in% c("Findable", "Accessible", "Interoperable", "Reusable"))
-# Calculate the overall mean for each FAIR category
-mf <- score_cumulative %>% filter(metric %in% c("Findable")) %>% extract2("mean") %>% mean(., na.rm = TRUE)
-ma <- score_cumulative %>% filter(metric %in% c("Accessible")) %>% extract2("mean") %>% mean(., na.rm = TRUE)
-mi <- score_cumulative %>% filter(metric %in% c("Interoperable")) %>% extract2("mean") %>% mean(., na.rm = TRUE)
-mr <- score_cumulative %>% filter(metric %in% c("Reusable")) %>% extract2("mean") %>% mean(., na.rm = TRUE)
+score_cumulative_alone <- score_cumulative %>% filter(metric %in% c("Cum. Findable", "Cum. Accessible", "Cum. Interoperable", "Cum. Reusable"))
+
+# Fetch the last year in the cumulative scores
+ymLatest <- with(score_cumulative, max(ym))
+# Fetch last means - these will be used for the legend to show the mean of the latest and hopefully
+# best scores for the latest time slot (month)
+mfLatest <- score_cumulative_alone %>% filter(ym == ymLatest) %>% filter(metric %in% "Cum. Findable") %>% extract2("mean")
+maLatest <- score_cumulative_alone %>% filter(ym == ymLatest) %>% filter(metric %in% "Cum. Accessible") %>% extract2("mean")
+miLatest <- score_cumulative_alone %>% filter(ym == ymLatest) %>% filter(metric %in% "Cum. Interoperable") %>% extract2("mean")
+mrLatest <- score_cumulative_alone %>% filter(ym == ymLatest) %>% filter(metric %in% "Cum. Reusable") %>% extract2("mean")
 
 # See if the 'dateUploaded' dates span multiple years and if not, the x-axis needs to be configured for ggplot so that
 # it will display. If it is configured for years and only a single year exists, the x-axis will not display.
@@ -81,7 +79,7 @@ if(minYear == maxYear) {
 
 # Plot cummulative overall
 d1_colors <- c("#ff582d", "#c70a61", "#1a6379", "#60c5e4", "#ff582d", "#c70a61", "#1a6379", "#60c5e4")
-p <- ggplot(data=score_monthly, mapping=aes(x=ym, y=mean, color=metric)) +
+p <- ggplot(data=score_cumulative_alone, mapping=aes(x=ym, y=mean, color=metric)) +
   geom_line() +
   geom_point(size=1) +
   theme_bw() +
@@ -93,19 +91,18 @@ p <- ggplot(data=score_monthly, mapping=aes(x=ym, y=mean, color=metric)) +
         legend.text = element_text(size = legendTextFontSize),
         panel.grid.minor = element_blank(),
         panel.background = element_blank()) +
-  #scale_color_manual(name = "Metric", labels = c("Findable",  "Accessible", "Interoperable", "Reusable"), 
-  #                   values=d1_colors) +
-  scale_color_manual(name = "Metric", labels = c(sprintf("Findable (%.0f%%)", mf), 
-                                sprintf("Accessible (%.0f%%)", ma),
-                                sprintf("Interoperable (%.0f%%)", mi),
-                                sprintf("Reusable (%.0f%%)", mr)), values=d1_colors) +
+
+  scale_color_manual(name = "Metric", labels = c(sprintf("Findable (%.0f%%)", mfLatest),
+                                                 sprintf("Accessible (%.0f%%)", maLatest),
+                                                 sprintf("Interoperable (%.0f%%)", miLatest),
+                                                 sprintf("Reusable (%.0f%%)", mrLatest)), values=d1_colors) +
   scale_x_date(date_breaks=dateBreaks, date_minor_breaks=dateMinorBreaks, labels=date_format(dateFormat)) +
   xlab(xLabel) +
   scale_y_continuous(limits=c(0,100)) +
   ylab("Average FAIR Score") +
   #ggtitle(paste0("DataONE: FAIR scores for ", format(sum(standards$n), big.mark=","), " EML and ISO metadata records"))
   #scale_fill_discrete(name = "metric", labels = c("Finabl", "Accessibl", "Interoperabl", "Reusabl")) +
-  ggsave(outFile, width = 8, height = 3)
+  ggsave(outFile, width = 8.0, height = 3.0)
 
 output <- sprintf("Created graphics file %s", outFile)
 status <- "SUCCESS"
diff --git a/src/main/resources/code/graph_monthly_quality_scores.R b/src/main/resources/code/graph_monthly_quality_scores.R
index 61406d93..7c06b560 100644
--- a/src/main/resources/code/graph_monthly_quality_scores.R
+++ b/src/main/resources/code/graph_monthly_quality_scores.R
@@ -6,30 +6,27 @@ library(lubridate)
 library(readr)
 library(magrittr)
 
-# Plot cummulative quality scores by month
+# Plot mean quality scores by month
 # This program is dispatched (called) by the MetaDIG Grapher class. Several
 # variables are injected by metadig-engine Dispatcher
 # - title: the graph title
-# - title: the graph title
 # - inFile: the CSV file containing quality scores, which has been prepared by Grapher
 # - outFile: the graphics output file to create
 # Variables read by metadig-engine Dispatcher after execution
-# mdq_result, output, status
+# - mdq_result, output, status
+
+# Define these variable ("infile", "outFile" for local testing only
+#inFile <- "toolik.csv"
+#outFile <- "toolik-monthly.png"
 
-# Define these variable for local testing only
-#inFile <- "dbo.csv"
-#outFile <- "dbo.png"
-#inFile <- "sasap.csv"
-#outFile <- "sasap.png"
-#inFile <- "FAIR-scores-eml.csv"
-#outFile <- "FAIR-scores-eml.png"
-axisTextFontSize <- 6
-legendTextFontSize <- 6
-axisTitleFontSize <- 8
-legendTitleFontSize <- 8
+axisTextFontSize <- 7
+legendTextFontSize <- 8
+axisTitleFontSize <- 9
+legendTitleFontSize <- 9
 
 # Load data
 fsr <- read_csv(inFile)
+#fsr <- read_csv(inFile) %>% filter(grepl("*eml*", formatId))
 
 scores <- mutate(fsr, ym = as.Date(sprintf("%4s-%02d-01", year(dateUploaded), month(dateUploaded)))) %>%
   mutate(scoreF = scoreFindable * 100.0) %>%
@@ -65,8 +62,8 @@ mr <- score_cumulative %>% filter(metric %in% c("Reusable")) %>% extract2("mean"
 
 # See if the 'dateUploaded' dates span multiple years and if not, the x-axis needs to be configured for ggplot so that
 # it will display. If it is configured for years and only a single year exists, the x-axis will not display.
-minYear <- format(with(scores, min(dateUploaded)), "%Y")
-maxYear <- format(with(scores, max(dateUploaded)), "%Y")
+minYear <- format(with(score_monthly, min(ym)), "%Y")
+maxYear <- format(with(score_monthly, max(ym)), "%Y")
 if(minYear == maxYear) {
   xLabel <- "Month"
   dateBreaks <- "months"
@@ -96,16 +93,16 @@ p <- ggplot(data=score_monthly, mapping=aes(x=ym, y=mean, color=metric)) +
   #scale_color_manual(name = "Metric", labels = c("Findable",  "Accessible", "Interoperable", "Reusable"),
   #                   values=d1_colors) +
   scale_color_manual(name = "Metric", labels = c(sprintf("Findable (%.0f%%)", mf),
-                                sprintf("Accessible (%.0f%%)", ma),
-                                sprintf("Interoperable (%.0f%%)", mi),
-                                sprintf("Reusable (%.0f%%)", mr)), values=d1_colors) +
+                                                 sprintf("Accessible (%.0f%%)", ma),
+                                                 sprintf("Interoperable (%.0f%%)", mi),
+                                                 sprintf("Reusable (%.0f%%)", mr)), values=d1_colors) +
   scale_x_date(date_breaks=dateBreaks, date_minor_breaks=dateMinorBreaks, labels=date_format(dateFormat)) +
   xlab(xLabel) +
   scale_y_continuous(limits=c(0,100)) +
   ylab("Average FAIR Score") +
   #ggtitle(paste0("DataONE: FAIR scores for ", format(sum(standards$n), big.mark=","), " EML and ISO metadata records"))
   #scale_fill_discrete(name = "metric", labels = c("Finabl", "Accessibl", "Interoperabl", "Reusabl")) +
-  ggsave(outFile, width = 7.5, height = 2.5)
+  ggsave(outFile, width = 8.0, height = 3.0)
 
 output <- sprintf("Created graphics file %s", outFile)
 status <- "SUCCESS"

From 7849f7d70995299d2375a3cf274546fe25a7f46a Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Thu, 6 Aug 2020 10:22:25 -0700
Subject: [PATCH 34/47] provide access for MN assessment graphs (#262)

---
 .../edu/ucsb/nceas/mdqengine/DataONE.java     |  28 -----
 .../mdqengine/scheduler/JobScheduler.java     |   5 +
 .../mdqengine/scheduler/RequestScorerJob.java |  92 +++++++++------
 .../ucsb/nceas/mdqengine/scorer/Graph.java    |   1 -
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 108 +++++++++---------
 5 files changed, 114 insertions(+), 120 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
index 82e4552d..a0e935c8 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
@@ -126,41 +126,13 @@ public static MultipartD1Node getMultipartD1Node(Session session, String service
     public static Document querySolr(String queryStr, int startPos, int countRequested, MultipartD1Node d1Node,
                 Session session) throws MetadigProcessException {
 
-//        // Polymorphism doesn't work with D1 node classes, so have to use the derived classes
-//        MultipartD1Node d1Node = null;
-//        Session session = DataONE.getSession(subjectId, authToken);
-
         // Add the start and count, if pagination is being used
         queryStr = queryStr + "&start=" + startPos + "&rows=" + countRequested;
         // Query the MN or CN Solr engine to get the query associated with this project that will return all project related pids.
         InputStream qis = null;
         MetadigProcessException metadigException = null;
 
-//        try {
-//            d1Node = getMultipartD1Node(session, serviceUrl);
-//            log.debug("Created MultipartD1Node, nodeId: " + d1Node.getNodeId().getValue());
-//        } catch (Exception ex) {
-//            log.error("Unable to create MultipartD1Node for Solr query");
-//            metadigException = new MetadigProcessException("Unable to create multipart node client to query DataONE solr: " + ex.getMessage());
-//            metadigException.initCause(ex);
-//            throw metadigException;
-//        }
-
         log.debug("Sending query: " + queryStr);
-        // Send a query to a CN or MN
-//        try {
-//            if(isCN) {
-//                qis = cnNode.query(session, "solr", queryStr);
-//            } else {
-//                qis = mnNode.query(session, "solr", queryStr);
-//            }
-//            log.debug("Sent query");
-//        } catch (Exception e) {
-//            log.error("Error retrieving pids: " + e.getMessage());
-//            metadigException = new MetadigProcessException("Unable to query dataone node: " + e.getMessage());
-//            metadigException.initCause(e);
-//            throw metadigException;
-//        }
         try {
             qis = d1Node.query(session, "solr", queryStr);
             log.debug("Sent query");
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
index 3f237f03..90efc13f 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
@@ -47,6 +47,7 @@ public static void main(String[] argv) throws Exception {
         String startHarvestDatetime = null;
         int countRequested = 1000;
         int harvestDatetimeInc = 1;
+        String requestType = null;
 
         // Filestore variables
         String dirIncludeMatch = null;
@@ -144,6 +145,8 @@ public static void main(String[] argv) throws Exception {
                 harvestDatetimeInc = Integer.parseInt(splitted[++icnt].trim());
                 // The number of results to return from the DataONE 'listObjects' service
                 countRequested = Integer.parseInt(splitted[++icnt].trim());
+                // Is this scores request for a portal or an entire member node?
+                requestType =  splitted[++icnt].trim();
 
                 log.debug("pidFilter: " + pidFilter);
                 log.debug("suiteId: " + suiteId);
@@ -151,6 +154,7 @@ public static void main(String[] argv) throws Exception {
                 log.debug("startHarvestDatetime: " + startHarvestDatetime);
                 log.debug("harvestDatetimeInc: " + harvestDatetimeInc);
                 log.debug("countRequested: " + countRequested);
+                log.debug("requestType: " + requestType);
             } else if(taskType.equals("filestore")) {
                 // Example taskList.csv entry:
                 // filestore,ingest,metadig,,,0 0/30 * * * ?,"stage;;*.*;README.txt;filestore-ingest.log"
@@ -204,6 +208,7 @@ public static void main(String[] argv) throws Exception {
                             .usingJobData("startHarvestDatetime", startHarvestDatetime)
                             .usingJobData("harvestDatetimeInc", harvestDatetimeInc)
                             .usingJobData("countRequested", countRequested)
+                            .usingJobData("requestType", requestType)
                             .build();
                 } else if (taskType.equalsIgnoreCase("filestore")) {
                     job = newJob(FilestoreIngestJob.class)
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index 29352235..11d965fe 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -127,6 +127,10 @@ public void execute(JobExecutionContext context)
         int harvestDatetimeInc = dataMap.getInt("harvestDatetimeInc");
         // Number of pids to get each query (this number of pids will be fetched each query until all pids are obtained)
         int countRequested = dataMap.getInt("countRequested");
+        String requestType = null;
+        if (taskType.equalsIgnoreCase("score")) {
+            requestType = dataMap.getString("requestType");
+        }
         // TODO: add formatFamily to scheduler request
         String formatFamily = null;
         MultipartRestClient mrc = null;
@@ -255,57 +259,69 @@ public void execute(JobExecutionContext context)
         RequestScorerJob.ListResult result = null;
         Integer resultCount = null;
 
-        log.debug("Getting portal pids to process...");
-        boolean morePids = true;
-        while(morePids) {
-            ArrayList<String> pidsToProcess = null;
-            log.debug("startCount: " + startCount);
-            log.debug("countRequested:" + countRequested);
-
+        if(requestType != null && requestType.equalsIgnoreCase("node")) {
             try {
-                //result = getPidsToProcess(cnNode, mnNode, isCN, session, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
-                result = getPidsToProcess(d1Node, session, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
-                pidsToProcess = result.getResult();
-                resultCount = result.getResultCount();
+                // For a 'node' scores request, the 'collection' is the entire node, so specify
+                // the nodeId as the collectionid.
+                submitScorerRequest(qualityServiceUrl, nodeId, suiteId, nodeId, formatFamily);
             } catch (Exception e) {
-                JobExecutionException jee = new JobExecutionException("Unable to get pids to process", e);
+                JobExecutionException jee = new JobExecutionException("Unable to submit request to create new node ("
+                        + nodeId + ")" + " score graph/data file ", e);
                 jee.setRefireImmediately(false);
                 throw jee;
             }
+        } else {
+            log.debug("Getting portal pids to process...");
+            boolean morePids = true;
+            while (morePids) {
+                ArrayList<String> pidsToProcess = null;
+                log.debug("startCount: " + startCount);
+                log.debug("countRequested:" + countRequested);
 
-            log.info("Found " + resultCount + " seriesIds" + " for date: " + startDTRstr +  " at servierUrl: " + nodeServiceUrl);
-            for (String pidStr : pidsToProcess) {
                 try {
-                    submitScorerRequest(qualityServiceUrl, pidStr, suiteId, nodeId, formatFamily);
+                    result = getPidsToProcess(d1Node, session, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
+                    pidsToProcess = result.getResult();
+                    resultCount = result.getResultCount();
                 } catch (Exception e) {
-                    JobExecutionException jee = new JobExecutionException("Unable to submit request to create new score graph/data file", e);
+                    JobExecutionException jee = new JobExecutionException("Unable to get pids to process", e);
                     jee.setRefireImmediately(false);
                     throw jee;
                 }
-            }
 
-            // Check if DataONE returned the max number of results. If so, we have to request more by paging through
-            // the results.
-            if(resultCount >= countRequested) {
-                morePids = true;
-                startCount = startCount + resultCount;
-                log.info("Paging through more results, current start is " + startCount);
-            } else {
-                morePids = false;
-
-                // Record the new "last harvested" date
-                task.setLastHarvestDatetime(endDTRstr);
-                log.debug("taskName: " + task.getTaskName());
-                log.debug("taskType: " + task.getTaskType());
-                log.debug("lastharvestdate: " + task.getLastHarvestDatetime());
+                log.info("Found " + resultCount + " seriesIds" + " for date: " + startDTRstr + " at servierUrl: " + nodeServiceUrl);
+                for (String pidStr : pidsToProcess) {
+                    try {
+                        submitScorerRequest(qualityServiceUrl, pidStr, suiteId, nodeId, formatFamily);
+                    } catch (Exception e) {
+                        JobExecutionException jee = new JobExecutionException("Unable to submit request to create new score graph/data file", e);
+                        jee.setRefireImmediately(false);
+                        throw jee;
+                    }
+                }
 
-                try {
-                    store.saveTask(task);
-                } catch (MetadigStoreException mse) {
-                    log.error("Error saving task: " + task.getTaskName());
-                    JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
-                    jee.setRefireImmediately(false);
-                    throw jee;
+                // Check if DataONE returned the max number of results. If so, we have to request more by paging through
+                // the results.
+                if (resultCount >= countRequested) {
+                    morePids = true;
+                    startCount = startCount + resultCount;
+                    log.info("Paging through more results, current start is " + startCount);
+                } else {
+                    morePids = false;
+
+                    // Record the new "last harvested" date
+                    task.setLastHarvestDatetime(endDTRstr);
+                    log.debug("taskName: " + task.getTaskName());
+                    log.debug("taskType: " + task.getTaskType());
+                    log.debug("lastharvestdate: " + task.getLastHarvestDatetime());
+
+                    try {
+                        store.saveTask(task);
+                    } catch (MetadigStoreException mse) {
+                        log.error("Error saving task: " + task.getTaskName());
+                        JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
+                        jee.setRefireImmediately(false);
+                        throw jee;
+                    }
                 }
             }
         }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java
index 3a95fd2d..1f477f21 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Graph.java
@@ -93,7 +93,6 @@ public String create(GraphType type, String title, String inputFile) throws Exce
         File codeFile = null;
         String dispatcherType = null;
 
-
         MetadigFile mdFile = new MetadigFile();
         mdFile.setCreationDatetime(DateTime.now());
         mdFile.setStorageType(StorageType.CODE.toString());
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index b8cfb205..d31affb3 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -20,6 +20,7 @@
 import org.apache.solr.client.solrj.beans.BindingException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.client.solrj.util.ClientUtils;
 import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.rest.MultipartRestClient;
 import org.dataone.client.v2.impl.MultipartCNode;
@@ -163,6 +164,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                 MultipartRestClient mrc = null;
                 MultipartMNode mnNode = null;
                 MultipartCNode cnNode = null;
+                GraphType graphType = null;
 
                 //long startTime = System.nanoTime();
                 startTimeProcessing = System.currentTimeMillis();
@@ -228,7 +230,6 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                     // - a graph for specified filters: member node, suite id, metadata format
                     MetadigFile mdFile = new MetadigFile();
                     Graph graph = new Graph();
-                    //Scorer gfr = new Scorer();
                     // If creating a graph for a collection, get the set of pids associated with the collection.
                     // Only scores for these pids will be included in the graph.
 
@@ -256,37 +257,32 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                         d1Node = new MultipartMNode(mrc, nodeServiceUrl, session);
                         log.debug("Created mnNode for serviceUrl: " + nodeServiceUrl);
                     }
-//
-//                    Session session = DataONE.getSession(subjectId, authToken);
-//
-//                    // Don't know node type yet from the id, so have to manually check if it's a CN
-//                    Boolean isCN = DataONE.isCN(nodeServiceUrl);
-//                    if(isCN) {
-//                        cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
-//                        log.debug("Created cnNode for serviceUrl: " + nodeServiceUrl);
-//                    } else {
-//                        mnNode = new MultipartMNode(mrc, nodeServiceUrl, session);
-//                        log.debug("Created mnNode for serviceUrl: " + nodeServiceUrl);
-//                    }
-
-                    if (collectionId != null && !collectionId.isEmpty()) {
+
+                    // Check if this is a "node" collection. For "node" collections, all scores for a member node
+                    // are used to create the assessment graph, so we don't need to get the collection pids as is
+                    // done for portals (by evaluating the Solr collectionQuery). Therefor, getCollectionPids doesn't
+                    // need to be called and we can proceed directly to getting the quality scores from the quality
+                    // Solr server.
+                    if (collectionId.matches("^\\s*urn:node:.*")) {
+                        graphType = GraphType.CUMULATIVE;
+                        log.debug("Processing a member node request, skipping step of getting collection pids (not required).");
+                    } else {
+                        graphType = GraphType.MONTHLY;
                         // If the nodeId is specified, use if to determine the values for authTokenName and subjectIdName,
                         // if those values are not defined
-                        log.debug("collectionId is not null: " + collectionId);
-                            String id = nodeId.replace("urn:node:", "").toUpperCase().trim();
+                        String id = nodeId.replace("urn:node:", "").toUpperCase().trim();
 
                         // The collection query is obtained from the MN and evaluated on the CN
                         log.info("Getting pids for collection " + collectionId);
                         // Always use the CN subject id and authentication token from the configuration file, as
                         // requests that this method uses need CN subject privs
                         ScorerResult result = null;
-                        //result = gfr.getCollectionPids(collectionId, cnNode, mnNode, isCN, session);
                         result = gfr.getCollectionPids(collectionId, d1Node, session);
                         collectionPids = result.getResult();
                         label = result.getLabel();
                         // Don't continue if no pids (and thus scores) were found for this collection
                         // TODO: Save a blank image and csv if no collection pids returned
-                        if(collectionPids.size() == 0) {
+                        if (collectionPids.size() == 0) {
                             log.info("No pids returned for this collection.");
                             break label;
                         } else {
@@ -322,7 +318,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                     // Generate a temporary graph file based on the quality scores
                     log.debug("Creating graph for collection id: " + collectionId);
                     //String filePath = graph.create(GraphType.CUMULATIVE, title, scoreFile.getPath());
-                    String filePath = graph.create(GraphType.MONTHLY, title, scoreFile.getPath());
+                    String filePath = graph.create(graphType, title, scoreFile.getPath());
                     // Now save the graphics file to permanent storage
                     String outfile;
 
@@ -416,7 +412,8 @@ which will be used to query DataONE Solr for all the pids associated with that p
         //queryStr = "?q=seriesId:" + collectionId + "+-obsoletedBy:*&fl=collectionQuery,label,rightsHolder&q.op=AND";
 
         startPos = 0;
-        countRequested = 10000;
+        // Just getting 1 row
+        countRequested = 10;
 
         // Get the collectionQuery from Solr
         try {
@@ -430,6 +427,8 @@ which will be used to query DataONE Solr for all the pids associated with that p
         if(xmldoc == null) {
             log.error("No document returned from solr with queryStr: " + queryStr);
             throw new MetadigProcessException("No result returned from Solr query: " + queryStr);
+        } else {
+            log.trace("xml: " + xmldoc);
         }
 
         String collectionQuery = null;
@@ -441,7 +440,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         String rightsHolder = null;
 
         try {
-            log.debug("Getting collectionQuery for id: " + collectionId);
+            log.debug("Parsing collectionQuery from resultdoc for id: " + collectionId);
             // Extract the collection query from the Solr result XML
             XPathFactory xPathfactory = XPathFactory.newInstance();
             xpath = xPathfactory.newXPath();
@@ -505,6 +504,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         // Here is an example collectionQuery: (((project:"State of Alaska\'s Salmon and People") AND (-obsoletedBy:* AND formatType:METADATA)))
         // We have to remove the 'AND (-obsoletedBy:* AND formatType:METADATA)' portion
 
+        log.debug("Pre-edited collectionQuery: " + collectionQuery);
         collectionQuery = collectionQuery.replaceAll("\\s*AND\\s*\\(-obsoletedBy:\\*\\s*AND\\s*formatType:METADATA\\)", "");
         log.debug("Edited collectionQuery: " + collectionQuery);
 
@@ -575,15 +575,11 @@ which will be used to query DataONE Solr for all the pids associated with that p
         // Loop through the Solr result. As the result may be large, page through the results, accumulating
         // the pids returned
 
-        /** The collectionQuery is evaluated on the same node that the portal document was harvested from (via the
-          * DataONE listObjects service. This node could either be an MN or CN.
-         */
-
         //log.debug("Sending collectionQuery to Solr using subjectId: " + subjectId + ", servicerUrl: " + serviceUrl);
         MultipartRestClient mrc = null;
         MultipartCNode cnNode = null;
 
-        log.debug("query string: " + queryStr);
+        log.debug("collectionQuery query string: " + queryStr);
 
         try {
             mrc = new DefaultHttpMultipartRestClient();
@@ -684,10 +680,38 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
         int startPosInResult = 0;
         int startPosInQuery = 0; // this will always be zero - we are listing the pids to retrieve, so will always want to start at the first result
 
-        log.trace("Getting scores from Solr for " + collectionPids.size() + " pids.");
-        // Now accumulate the Quality Solr document results for the list of pids for the project.
-        if (collectionId != null && ! collectionId.isEmpty()) {
-            log.info("Getting quality scores for collection: " + collectionId);
+        // Now accumulate the Quality Solr document results for all scores for the node
+        if (collectionId.matches("^\\s*urn:node:.*")) {
+            log.info("Getting quality scores for member node with suiteId: " + suiteId + ", datasource: " + collectionId + " formats: " + formatFamily);
+            countRequested = 1000;
+            formatFamilySearchTerm = null;
+            queryStr = "metadataId:*";
+            if(suiteId != null) {
+                //queryStr += " AND suiteId:" + "\"" + suiteId + "\"";
+                queryStr += " AND suiteId:" + ClientUtils.escapeQueryChars(suiteId);
+            }
+
+            // Add this member nodeId as the datasource
+            //queryStr += " AND datasource:" + "\"" + collectionId + "\"";
+            queryStr += " AND datasource:" + ClientUtils.escapeQueryChars(collectionId);
+
+            if (formatFamilySearchTerm != null) {
+                //queryStr += " AND metadataFormatId:" + "\"" + formatFamilySearchTerm + "\"";
+                queryStr += " AND metadataFormatId:" + ClientUtils.escapeQueryChars(formatFamilySearchTerm);
+            }
+            log.trace("query to quality Solr server: " + queryStr);
+            do {
+                resultList = queryQualitySolr(queryStr, startPosInQuery, countRequested);
+                // If no more results, break
+                if(resultList.size() == 0) break;
+                // Add results from this pid range to the accumulator of all results.
+                allResults.addAll(resultList);
+                startPosInQuery += resultList.size();
+                //startPosInQuery += countRequested;
+            } while (resultList.size() > 0);
+        } else {
+            // Now accumulate the Quality Solr document results for the list of pids for the project.
+            log.info("Getting quality scores for collection: " + collectionId + ", for " + collectionPids.size() + " pids." );
             int pidCntToRequest = 25;
             int totalPidCnt = collectionPids.size();
             int pidsLeft = totalPidCnt;
@@ -728,28 +752,6 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
                 }
                 pidsLeft -= pidCntToRequest;
             } while (pidsLeft > 0);
-        } else {
-            log.info("Getting quality scores for suiteId: " + suiteId + ", datasource: " + " formats: " + formatFamily);
-            countRequested = 1000;
-            formatFamilySearchTerm = null;
-            queryStr = "metadataId:*";
-            if(suiteId != null) {
-                queryStr += " AND suiteId:" + "\"" + suiteId + "\"";
-            }
-
-            if (formatFamilySearchTerm != null) {
-                queryStr += " AND metadataFormatId:" + "\"" + formatFamilySearchTerm + "\"";
-            }
-            log.trace("query to quality Solr server: " + queryStr);
-            do {
-                resultList = queryQualitySolr(queryStr, startPosInQuery, countRequested);
-                // If no more results, break
-                if(resultList.size() == 0) break;
-                // Add results from this pid range to the accumulator of all results.
-                allResults.addAll(resultList);
-                //startPosInQuery += resultList.size();
-                startPosInQuery += countRequested;
-            } while (resultList.size() > 0);
         }
         log.debug("Got " + allResults.size() + " scores from Quality Solr server");
         return allResults;

From 4ec7f4c0b9e52979eacbf4455ca73146cb75e8f4 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Thu, 6 Aug 2020 10:39:16 -0700
Subject: [PATCH 35/47] Update docker tag (2.3.0); update maven dependencies

---
 pom.xml | 189 ++++++++++++++++++++++++++------------------------------
 1 file changed, 87 insertions(+), 102 deletions(-)

diff --git a/pom.xml b/pom.xml
index c71130a1..c653250e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -8,7 +8,7 @@
 	<packaging>jar</packaging>
 
 	<name>metadig-engine</name>
-	<description>MetaDig library for running metadata quality tests</description>
+	<description>MetaDIG library for running metadata quality tests</description>
 	<url>https://github.com/NCEAS/metadig-engine</url>
 
 	<properties>
@@ -27,7 +27,7 @@
 		<spring.version>3.1.4.RELEASE</spring.version>
         <!-- <docker.registry>docker.io</docker.registry> -->
         <docker.repo>metadig</docker.repo>
-        <docker.tag>2.3.0dev</docker.tag>
+        <docker.tag>2.3.0</docker.tag>
 		<modules.test.includes>**/*Test.java</modules.test.includes>
 		<modules.test.excludes>**/LTERSuiteTest.java</modules.test.excludes>
 	</properties>
@@ -40,86 +40,13 @@
 		<url>http://nceas.ucsb.edu</url>
 	</organization>
 	<dependencies>
-		<!-- https://mvnrepository.com/artifact/commons-logging/commons-logging -->
-		<dependency>
-			<groupId>commons-logging</groupId>
-			<artifactId>commons-logging</artifactId>
-			<version>1.2</version>
-		</dependency>
-		<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
-		<dependency>
-			<groupId>org.apache.logging.log4j</groupId>
-			<artifactId>log4j-core</artifactId>
-			<version>2.13.3</version>
-		</dependency>
-		<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-api -->
-		<dependency>
-			<groupId>org.apache.logging.log4j</groupId>
-			<artifactId>log4j-api</artifactId>
-			<version>2.13.3</version>
-		</dependency>
-		<!--<dependency>-->
-			<!--<groupId>org.apache.logging.log4j</groupId>-->
-			<!--<artifactId>log4j-api</artifactId>-->
-			<!--<version>2.13.3</version>-->
-		<!--</dependency>-->
-		<!--<dependency>-->
-			<!--<groupId>org.apache.logging.log4j</groupId>-->
-			<!--<artifactId>log4j-core</artifactId>-->
-			<!--<version>2.13.3</version>-->
-		<!--</dependency>-->
-		<dependency>
-			<groupId>org.dataone</groupId>
-			<artifactId>bookkeeper-client</artifactId>
-			<version>${bookkeeper.version}</version>
-			<exclusions>
-				<exclusion>
-					<groupId>io.dropwizard</groupId>
-					<artifactId>dropwizard-core</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>io.dropwizard</groupId>
-					<artifactId>dropwizard-json-logging</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>io.dropwizard</groupId>
-					<artifactId>dropwizard-testing</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>io.dropwizard</groupId>
-					<artifactId>dropwizard-jdbi3</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>io.dropwizard</groupId>
-					<artifactId>dropwizard-auth</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>org.postgresql</groupId>
-					<artifactId>postgresql</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>com.opentable.components</groupId>
-					<artifactId>otj-pg-embedded</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>com.opentable.components</groupId>
-					<artifactId>otj-pg-embedded</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>org.flywaydb</groupId>
-					<artifactId>flyway-maven-plugin</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>org.mockito</groupId>
-					<artifactId>mockito-core</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>org.dataone</groupId>
-					<artifactId>d1_libclient_java</artifactId>
-				</exclusion>
-			</exclusions>
-		</dependency>
-		<dependency>
+        <!-- https://mvnrepository.com/artifact/org.quartz-scheduler/quartz -->
+        <!--<dependency>-->
+            <!--<groupId>org.quartz-scheduler</groupId>-->
+            <!--<artifactId>quartz</artifactId>-->
+            <!--<version>2.3.2</version>-->
+        <!--</dependency>-->
+        <dependency>
 			<groupId>com.fasterxml.jackson.core</groupId>
 			<artifactId>jackson-databind</artifactId>
 			<version>${jackson.version}</version>
@@ -128,6 +55,12 @@
 			<groupId>org.renjin</groupId>
 			<artifactId>renjin-script-engine</artifactId>
 			<version>${renjin.version}</version>
+            <exclusions>
+				<exclusion>
+					<groupId>org.apache.httpcomponents</groupId>
+					<artifactId>httpclient</artifactId>
+				</exclusion>
+			</exclusions>
 		</dependency>
 		<!-- https://mvnrepository.com/artifact/org.python/jython-standalone -->
 		<dependency>
@@ -169,9 +102,15 @@
 		<dependency>
 			<groupId>org.apache.commons</groupId>
 			<artifactId>commons-configuration2</artifactId>
-			<version>2.3</version>
+			<version>2.7</version>
 		</dependency>
-		<!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
+        <!-- https://mvnrepository.com/artifact/log4j/log4j -->
+        <dependency>
+            <groupId>log4j</groupId>
+            <artifactId>log4j</artifactId>
+            <version>1.2.17</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
 		<dependency>
 			<groupId>commons-beanutils</groupId>
 			<artifactId>commons-beanutils</artifactId>
@@ -189,6 +128,12 @@
 			<artifactId>d1_libclient_java</artifactId>
 			<version>${d1_libclient_java.version}</version>
 			<type>jar</type>
+			<exclusions>
+				<exclusion>
+					<groupId>org.apache.httpcomponents</groupId>
+					<artifactId>httpclient-cache</artifactId>
+				</exclusion>
+			</exclusions>
 		</dependency>
 		<!-- https://mvnrepository.com/artifact/com.rabbitmq/amqp-client -->
 		<dependency>
@@ -205,22 +150,12 @@
 		<dependency>
 			<groupId>org.apache.solr</groupId>
 			<artifactId>solr-solrj</artifactId>
-			<version>7.3.0</version>
-			<exclusions>
-				<exclusion>
-					<groupId>org.apache.logging.log4j</groupId>
-					<artifactId>log4j-api</artifactId>
-				</exclusion>
-				<exclusion>
-					<groupId>org.apache.logging.log4j</groupId>
-					<artifactId>log4j-core</artifactId>
-				</exclusion>
-			</exclusions>
+			<version>7.5.0</version>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.solr</groupId>
 			<artifactId>solr-core</artifactId>
-			<version>7.3.0</version>
+			<version>7.5.0</version>
 		</dependency>
 		<!-- httpclient 4.5.5 is needed by solrj 7.2.1 but the solrj pom only loads
 		     httpclient 4.5.3, so this dependency is included here. If this version is not included,
@@ -231,6 +166,11 @@
 			<artifactId>httpclient</artifactId>
 			<version>4.5.5</version>
 		</dependency>
+		<dependency>
+			<groupId>org.apache.httpcomponents</groupId>
+			<artifactId>httpclient-cache</artifactId>
+			<version>4.5.5</version>
+		</dependency>
 		<dependency>
 			<groupId>org.springframework</groupId>
 			<artifactId>spring-context-support</artifactId>
@@ -286,14 +226,59 @@
 			<artifactId>httpcore</artifactId>
 			<version>4.4.10</version>
 		</dependency>
+		<dependency>
+			<groupId>org.dataone</groupId>
+			<artifactId>bookkeeper-client</artifactId>
+			<version>${bookkeeper.version}</version>
+			<exclusions>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-core</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-json-logging</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-testing</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-jdbi3</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>io.dropwizard</groupId>
+					<artifactId>dropwizard-auth</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.postgresql</groupId>
+					<artifactId>postgresql</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>com.opentable.components</groupId>
+					<artifactId>otj-pg-embedded</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>com.opentable.components</groupId>
+					<artifactId>otj-pg-embedded</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.flywaydb</groupId>
+					<artifactId>flyway-maven-plugin</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.mockito</groupId>
+					<artifactId>mockito-core</artifactId>
+				</exclusion>
+				<exclusion>
+					<groupId>org.dataone</groupId>
+					<artifactId>d1_libclient_java</artifactId>
+				</exclusion>
+			</exclusions>
+		</dependency>
 	</dependencies>
 	<repositories>
-        <!-- <repository>
-            <id>docker.io</id>
-            <name>docker hub</name>
-            <url>https://hub.docker.com</url>
-        </repository>
-	-->
 		<repository>
 			<id>bedatadriven</id>
 			<name>bedatadriven public repo</name>

From 8f68c91a027ac3f9001ed3d845fb517986fbfa2e Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 9 Aug 2020 12:06:24 -0700
Subject: [PATCH 36/47] Reuse CN clients when possible (#264)

---
 .../edu/ucsb/nceas/mdqengine/DataONE.java     | 34 ++++-------------
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 38 +++++++++----------
 2 files changed, 24 insertions(+), 48 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
index a0e935c8..6973efab 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
@@ -29,42 +29,22 @@ public class DataONE {
 
     /**
      * Get a DataONE subject information object
-     * @param serviceUrl the service URL of the DataONE node to request the subject info from
-     * @param authToken the authorization token to use for the request
+     * @param rightsHolder the DataONE subject to get info for
+     * @param CNnode the DataONE CN to send the request to
+     * @param session the DataONE authenticated session
      * @return a DataONE subject information object
      * @throws MetadigProcessException
      */
-    public static SubjectInfo getSubjectInfo(Subject rightsHolder, String serviceUrl, String subjectId, String authToken) throws MetadigProcessException {
+    public static SubjectInfo getSubjectInfo(Subject rightsHolder, MultipartCNode CNnode,
+                                             Session session) throws MetadigProcessException {
 
         log.debug("Getting subject info for: " + rightsHolder.getValue());
-        MultipartCNode cnNode = null;
+        //MultipartCNode cnNode = null;
         MetadigProcessException metadigException = null;
-
         SubjectInfo subjectInfo = null;
-        Session session = DataONE.getSession(subjectId, authToken);
-
-        // Identity node as either a CN or MN based on the serviceUrl
-        String pattern = "https*://cn.*?\\.dataone\\.org|https*://cn.*?\\.test\\.dataone\\.org";
-        Pattern r = Pattern.compile(pattern);
-        Matcher m = r.matcher(serviceUrl);
-        if (!m.find()) {
-            log.error("Must call a CN to get subject information");
-            metadigException = new MetadigProcessException("Must call a CN to get subject information.");
-            throw metadigException;
-        }
-
-        // Only CNs can call the 'subjectInfo' service (aka accounts), so we have to use
-        // a MultipartCNode instance here.
-        try {
-            cnNode = (MultipartCNode) getMultipartD1Node(session, serviceUrl);
-        } catch (Exception ex) {
-            metadigException = new MetadigProcessException("Unable to create multipart D1 node: " + subjectId + ": " + ex.getMessage());
-            metadigException.initCause(ex);
-            throw metadigException;
-        }
 
         try {
-            subjectInfo = cnNode.getSubjectInfo(session, rightsHolder);
+            subjectInfo = CNnode.getSubjectInfo(session, rightsHolder);
         } catch (Exception ex) {
             metadigException = new MetadigProcessException("Unable to get subject information." + ex.getMessage());
             metadigException.initCause(ex);
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index d31affb3..30bee0b8 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -35,10 +35,7 @@
 import org.joda.time.format.ISODateTimeFormat;
 import org.quartz.JobExecutionException;
 import org.w3c.dom.Document;
-import org.xml.sax.InputSource;
 
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.xpath.*;
 import java.io.*;
 import java.net.URLEncoder;
@@ -438,8 +435,23 @@ which will be used to query DataONE Solr for all the pids associated with that p
         org.w3c.dom.Node node = null;
         String label = null;
         String rightsHolder = null;
+        MultipartRestClient mrc = null;
+        MultipartCNode CNnode = null;
+        Session CNsession = null;
 
         try {
+
+            CNsession = DataONE.getSession(CNsubjectId, CNauthToken);
+            //        // Only CNs can call the 'subjectInfo' service (aka accounts), so we have to use
+            // a MultipartCNode instance here.
+            try {
+                CNnode = (MultipartCNode) DataONE.getMultipartD1Node(CNsession, CNserviceUrl);
+            } catch (Exception ex) {
+                metadigException = new MetadigProcessException("Unable to create multipart D1 node: " + ex.getMessage());
+                metadigException.initCause(ex);
+                throw metadigException;
+            }
+
             log.debug("Parsing collectionQuery from resultdoc for id: " + collectionId);
             // Extract the collection query from the Solr result XML
             XPathFactory xPathfactory = XPathFactory.newInstance();
@@ -521,7 +533,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         subject.setValue(rightsHolder);
         // The subject info can only be obtained from a CN, so use the CN auth info for the current DataONE environment,
         // which should be configured in the metadig.properties file
-        SubjectInfo subjectInfo = DataONE.getSubjectInfo(subject, CNserviceUrl, CNsubjectId, CNauthToken);
+        SubjectInfo subjectInfo = DataONE.getSubjectInfo(subject, CNnode, CNsession);
         String groupStr = null;
 
         groupStr = "(readPermission:" + "\"" + rightsHolder
@@ -581,26 +593,10 @@ which will be used to query DataONE Solr for all the pids associated with that p
 
         log.debug("collectionQuery query string: " + queryStr);
 
-        try {
-            mrc = new DefaultHttpMultipartRestClient();
-        } catch (Exception e) {
-            log.error("Error creating rest client: " + e.getMessage());
-            JobExecutionException jee = new JobExecutionException(e);
-            jee.setRefireImmediately(false);
-            throw new MetadigProcessException("Unable  to create connection to CN ");
-        }
-
-        Session CNsession = DataONE.getSession(CNsubjectId, CNauthToken);
-
-        // Don't know node type yet from the id, so have to manually check if it's a CN
-        Boolean isCN = DataONE.isCN(CNserviceUrl);
-
-        cnNode = new MultipartCNode(mrc, CNserviceUrl, CNsession);
-
         do {
             //TODO: check that a result was returned
             // Note: the collectionQuery is always evaluated on the CN, so that the entire DataONE network is queried.
-            xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, cnNode, CNsession);
+            xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, CNnode, CNsession);
             if(xmldoc == null) {
                 log.info("no values returned from query");
                 break;

From 088352fb4632ebd9fd961248de481edf25e24165 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 9 Aug 2020 12:20:05 -0700
Subject: [PATCH 37/47] Detect CN or MN based on service URL (#265)

---
 .../edu/ucsb/nceas/mdqengine/DataONE.java     | 36 ++++++++++++-------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
index 6973efab..561ff9ee 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
@@ -19,8 +19,6 @@
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import java.io.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 public class DataONE {
 
@@ -188,19 +186,33 @@ public static Session getSession(String subjectId, String authToken) {
         return session;
     }
 
-    public static Boolean isCN(String serviceUrl) {
+    /*
+     * Determine if the string represents a DataONE CN.
+     * @param nodeStr either a DataONE node serviceURL (e.g. https://knb.ecoinformatics.org/knb/d1/mn)
+     *      or a DataONE node identifier (e.g. urn:node:CN)
+     */
+    public static Boolean isCN(String nodeStr) {
 
         Boolean isCN = false;
-        // Identity node as either a CN or MN based on the serviceUrl
-        String pattern = "https*://cn.*?\\.dataone\\.org|https*://cn.*?\\.test\\.dataone\\.org";
-        Pattern r = Pattern.compile(pattern);
-        Matcher m = r.matcher(serviceUrl);
-        if (m.find()) {
-            isCN = true;
-            log.debug("service URL is for a CN: " + serviceUrl);
+
+        // match node urn, e.g. "https://cn.dataone.org/cn"
+        if (nodeStr.matches("^\\s*urn:node:.*")) {
+            if (nodeStr.matches("^\\s*urn:node:CN.*$|^\\s*urn:node:cn.*$")) {
+                isCN = true;
+                log.debug("The nodeId is for a CN: " + nodeStr);
+            } else {
+                log.debug("The nodeId is not for a CN: " + nodeStr);
+                isCN = false;
+            }
         } else {
-            log.debug("service URL is not for a CN: " + serviceUrl);
-            isCN = false;
+            // match cn service url e.g. "https://cn.dataone.org/cn"
+            if (nodeStr.matches("^\\s*https*://cn.*?\\.dataone\\.org.*$|https*://cn.*?\\.test\\.dataone\\.org.*$")) {
+                isCN = true;
+                log.debug("The service URL is for a CN: " + nodeStr);
+            } else {
+                log.debug("The service URL is not for a CN: " + nodeStr);
+                isCN = false;
+            }
         }
         return isCN;
     }

From 1ce5d3c92830459a172e665f5f6c1e27e27881fc Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Sun, 9 Aug 2020 12:20:56 -0700
Subject: [PATCH 38/47] Properly escape values for solr queries

---
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index 30bee0b8..e61bbfcd 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -43,8 +43,6 @@
 import java.util.*;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 /**
  * The Scorer class contains methods that create graphs of aggregated quality scores.
@@ -588,8 +586,6 @@ which will be used to query DataONE Solr for all the pids associated with that p
         // the pids returned
 
         //log.debug("Sending collectionQuery to Solr using subjectId: " + subjectId + ", servicerUrl: " + serviceUrl);
-        MultipartRestClient mrc = null;
-        MultipartCNode cnNode = null;
 
         log.debug("collectionQuery query string: " + queryStr);
 
@@ -653,6 +649,7 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
         String listString;
         ArrayList<String> tmpList;
         String formatFamilySearchTerm = null;
+        String datasource = null;
 
         // The metadata format family can be specified to filter the quality scores that will be included
         // in the graph./
@@ -678,32 +675,36 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
 
         // Now accumulate the Quality Solr document results for all scores for the node
         if (collectionId.matches("^\\s*urn:node:.*")) {
-            log.info("Getting quality scores for member node with suiteId: " + suiteId + ", datasource: " + collectionId + " formats: " + formatFamily);
-            countRequested = 1000;
+            countRequested = 10000;
+            if(DataONE.isCN(collectionId)) {
+                // Don't encode the wildcard, otherwise it will be deactivated in Solr
+                datasource = "*";
+                log.info("Getting quality scores for CN node with suiteId: " + suiteId + ", datasource: " + datasource + " formats: " + formatFamily);
+            } else {
+                datasource = ClientUtils.escapeQueryChars(collectionId);
+                log.info("Getting quality scores for member node with (encoded) suiteId: " + suiteId + ", datasource: " + datasource + " formats: " + formatFamily);
+            }
             formatFamilySearchTerm = null;
             queryStr = "metadataId:*";
             if(suiteId != null) {
-                //queryStr += " AND suiteId:" + "\"" + suiteId + "\"";
                 queryStr += " AND suiteId:" + ClientUtils.escapeQueryChars(suiteId);
             }
 
             // Add this member nodeId as the datasource
-            //queryStr += " AND datasource:" + "\"" + collectionId + "\"";
-            queryStr += " AND datasource:" + ClientUtils.escapeQueryChars(collectionId);
+            queryStr += " AND datasource:" + datasource;
 
             if (formatFamilySearchTerm != null) {
                 //queryStr += " AND metadataFormatId:" + "\"" + formatFamilySearchTerm + "\"";
                 queryStr += " AND metadataFormatId:" + ClientUtils.escapeQueryChars(formatFamilySearchTerm);
             }
-            log.trace("query to quality Solr server: " + queryStr);
             do {
+                log.trace("query to quality Solr server: " + queryStr + ", startPos: " + startPosInQuery + ", countRequested: " + countRequested);
                 resultList = queryQualitySolr(queryStr, startPosInQuery, countRequested);
                 // If no more results, break
                 if(resultList.size() == 0) break;
                 // Add results from this pid range to the accumulator of all results.
                 allResults.addAll(resultList);
                 startPosInQuery += resultList.size();
-                //startPosInQuery += countRequested;
             } while (resultList.size() > 0);
         } else {
             // Now accumulate the Quality Solr document results for the list of pids for the project.
@@ -736,9 +737,9 @@ private List<QualityScore> getQualityScores(String collectionId, String suiteId,
                 if (suiteId != null) {
                     queryStr += " AND suiteId:" + suiteId;
                 }
-                log.debug("query to quality Solr server: " + queryStr);
                 // Send query to Quality Solr Server
                 // Get all the pids in this pid string
+                log.trace("query to quality Solr server: " + queryStr + ", startPos: " + startPosInQuery + ", countRequested: " + pidCntToRequest);
                 resultList = queryQualitySolr(queryStr, startPosInQuery, pidCntToRequest);
                 // It's possible that none of the pids from the collection have quality scores
                 // This should not happen but check just in case.

From ad04aff57638a675b4f0bc64f1764208e594ff16 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Wed, 19 Aug 2020 13:46:40 -0700
Subject: [PATCH 39/47] Code cleanup, adjust logging levels

---
 .../edu/ucsb/nceas/mdqengine/DataONE.java     | 42 ++++++++-------
 .../edu/ucsb/nceas/mdqengine/MDQconfig.java   | 18 +------
 .../java/edu/ucsb/nceas/mdqengine/Worker.java | 13 -----
 .../mdqengine/scheduler/JobScheduler.java     |  4 +-
 .../mdqengine/scheduler/RequestReportJob.java | 20 ++------
 .../mdqengine/scheduler/RequestScorerJob.java | 39 +++++---------
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   |  8 ---
 .../nceas/mdqengine/store/DatabaseStore.java  | 35 ++++++-------
 .../ucsb/nceas/mdqengine/store/MDQStore.java  | 51 +++++++++----------
 9 files changed, 80 insertions(+), 150 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
index 561ff9ee..17fea9e1 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/DataONE.java
@@ -4,11 +4,12 @@
 import org.apache.commons.logging.LogFactory;
 import edu.ucsb.nceas.mdqengine.exception.MetadigProcessException;
 import org.dataone.client.auth.AuthTokenSession;
+import org.dataone.client.rest.DefaultHttpMultipartRestClient;
+import org.dataone.client.rest.HttpMultipartRestClient;
 import org.dataone.client.rest.MultipartRestClient;
 import org.dataone.client.v2.impl.MultipartD1Node;
 import org.dataone.service.types.v1.Session;
 import edu.ucsb.nceas.mdqengine.exception.MetadigException;
-import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.v2.impl.MultipartCNode;
 import org.dataone.client.v2.impl.MultipartMNode;
 import org.dataone.service.types.v1.Subject;
@@ -36,7 +37,7 @@ public class DataONE {
     public static SubjectInfo getSubjectInfo(Subject rightsHolder, MultipartCNode CNnode,
                                              Session session) throws MetadigProcessException {
 
-        log.debug("Getting subject info for: " + rightsHolder.getValue());
+        log.trace("Getting subject info for: " + rightsHolder.getValue());
         //MultipartCNode cnNode = null;
         MetadigProcessException metadigException = null;
         SubjectInfo subjectInfo = null;
@@ -68,7 +69,7 @@ public static MultipartD1Node getMultipartD1Node(Session session, String service
 
         // First create an HTTP client
         try {
-            mrc = new DefaultHttpMultipartRestClient();
+            mrc = new HttpMultipartRestClient();
         } catch (Exception ex) {
             log.error("Error creating rest client: " + ex.getMessage());
             metadigException = new MetadigProcessException("Unable to get collection pids");
@@ -80,10 +81,10 @@ public static MultipartD1Node getMultipartD1Node(Session session, String service
 
         // Now create a DataONE object that uses the rest client
         if (isCN) {
-            log.debug("creating cn MultipartMNode" + ", subjectId: " + session.getSubject().getValue());
+            log.debug("creating cn MultipartMNode");
             d1Node = new MultipartCNode(mrc, serviceUrl, session);
         } else {
-            log.debug("creating mn MultipartMNode" + ", subjectId: " + session.getSubject().getValue());
+            log.debug("creating mn MultipartMNode");
             d1Node = new MultipartMNode(mrc, serviceUrl, session);
         }
         return d1Node;
@@ -98,9 +99,6 @@ public static MultipartD1Node getMultipartD1Node(Session session, String service
      * @return an XML document containing the query result
      * @throws Exception
      */
-    //public static Document querySolr(String queryStr, int startPos, int countRequested, MultipartCNode cnNode,
-    //                                 MultipartMNode mnNode, Boolean isCN,
-    //                                 Session session) throws MetadigProcessException {
     public static Document querySolr(String queryStr, int startPos, int countRequested, MultipartD1Node d1Node,
                 Session session) throws MetadigProcessException {
 
@@ -110,10 +108,10 @@ public static Document querySolr(String queryStr, int startPos, int countRequest
         InputStream qis = null;
         MetadigProcessException metadigException = null;
 
-        log.debug("Sending query: " + queryStr);
+        log.trace("Sending query: " + queryStr);
         try {
             qis = d1Node.query(session, "solr", queryStr);
-            log.debug("Sent query");
+            log.trace("Sent query");
         } catch (Exception e) {
             log.error("Error retrieving pids: " + e.getMessage());
             metadigException = new MetadigProcessException("Unable to query dataone node: " + e.getMessage());
@@ -121,19 +119,19 @@ public static Document querySolr(String queryStr, int startPos, int countRequest
             throw metadigException;
         }
 
-        log.debug("Creating xml doc with results");
+        log.trace("Creating xml doc with results");
         Document xmldoc = null;
         DocumentBuilder builder = null;
 
         try {
             // If results were returned, create an XML document from them
-            log.debug("qis available: " + qis.available());
+            log.trace("qis available: " + qis.available());
             if (qis.available() > 0) {
                 try {
                     DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
                     builder = factory.newDocumentBuilder();
                     xmldoc = builder.parse(new InputSource(qis));
-                    log.debug("Created xml doc: " + xmldoc.toString());
+                    log.trace("Created xml doc: " + xmldoc.toString());
                 } catch (Exception e) {
                     log.error("Unable to create w3c Document from input stream", e);
                     e.printStackTrace();
@@ -145,13 +143,13 @@ public static Document querySolr(String queryStr, int startPos, int countRequest
                 qis.close();
             }
         } catch (IOException ioe) {
-            log.debug("IO exception: " + ioe.getMessage());
+            log.trace("IO exception: " + ioe.getMessage());
             metadigException = new MetadigProcessException("Unable prepare query result xml document: " + ioe.getMessage());
             metadigException.initCause(ioe);
             throw metadigException;
         }
 
-        log.debug("Created results xml doc");
+        log.trace("Created results xml doc");
 
         return xmldoc;
     }
@@ -169,10 +167,10 @@ public static Session getSession(String subjectId, String authToken) {
 
         // query Solr - either the member node or cn, for the project 'solrquery' field
         if (authToken == null || authToken.isEmpty()) {
-            log.debug("Creating public sessioni");
+            log.trace("Creating public sessioni");
             session = new Session();
         } else {
-            log.debug("Creating authentication session for subjectId: " + subjectId + ", token: " + authToken.substring(0, 5) + "...");
+            log.trace("Creating authentication session for subjectId: " + subjectId + ", token: " + authToken.substring(0, 5) + "...");
             session = new AuthTokenSession(authToken);
         }
 
@@ -180,7 +178,7 @@ public static Session getSession(String subjectId, String authToken) {
             Subject subject = new Subject();
             subject.setValue(subjectId);
             session.setSubject(subject);
-            log.debug("Set session subjectId to: " + session.getSubject().getValue());
+            log.trace("Set session subjectId to: " + session.getSubject().getValue());
         }
 
         return session;
@@ -199,18 +197,18 @@ public static Boolean isCN(String nodeStr) {
         if (nodeStr.matches("^\\s*urn:node:.*")) {
             if (nodeStr.matches("^\\s*urn:node:CN.*$|^\\s*urn:node:cn.*$")) {
                 isCN = true;
-                log.debug("The nodeId is for a CN: " + nodeStr);
+                log.trace("The nodeId is for a CN: " + nodeStr);
             } else {
-                log.debug("The nodeId is not for a CN: " + nodeStr);
+                log.trace("The nodeId is not for a CN: " + nodeStr);
                 isCN = false;
             }
         } else {
             // match cn service url e.g. "https://cn.dataone.org/cn"
             if (nodeStr.matches("^\\s*https*://cn.*?\\.dataone\\.org.*$|https*://cn.*?\\.test\\.dataone\\.org.*$")) {
                 isCN = true;
-                log.debug("The service URL is for a CN: " + nodeStr);
+                log.trace("The service URL is for a CN: " + nodeStr);
             } else {
-                log.debug("The service URL is not for a CN: " + nodeStr);
+                log.trace("The service URL is not for a CN: " + nodeStr);
                 isCN = false;
             }
         }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/MDQconfig.java b/src/main/java/edu/ucsb/nceas/mdqengine/MDQconfig.java
index c2840e01..b3e7de4a 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/MDQconfig.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/MDQconfig.java
@@ -20,19 +20,7 @@ public class MDQconfig {
     public static Configuration config;
 
     public MDQconfig () throws ConfigurationException, IOException {
-        // Check if we are running in a servlet
         boolean inServlet = false;
-        /*
-        try {
-            Class servletClass = Class.forName("javax.servlet.http.HttpServlet");
-            inServlet = true;
-            log.debug("Loaded javax.servlet.http.HttpServlet - running in servlet environment.");
-        //} catch (ClassNotFoundException ex) {
-        } catch (Exception e) {
-            log.debug("Unable to load javax.servlet.http.HttpServlet - not running in servlet environment.");
-            inServlet = false;
-        }
-        */
 
         // If running in a servlet, have to get the config info from the webapp context, as we can't
         // read from external dirs on disk.
@@ -41,15 +29,13 @@ public MDQconfig () throws ConfigurationException, IOException {
             InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream("/metadig.properties");
             String TMP_DIR = System.getProperty("java.io.tmpdir");
             File tempFile = new File(TMP_DIR + "/metadig.properties");
-            log.debug("Reading config properties in servlet from: " + tempFile);
+            log.trace("Reading config properties in servlet from: " + tempFile);
             FileOutputStream out = new FileOutputStream(tempFile);
             IOUtils.copy(inputStream, out);
             config = configs.properties(tempFile);
-            log.debug("Successfully read properties from: " + tempFile);
         } else {
-            log.debug("Reading config properties from: " + configFilePath);
+            log.trace("Reading config properties from: " + configFilePath);
             config = configs.properties(new File(configFilePath));
-            log.debug("Successfully read properties from: " + configFilePath);
         }
     }
 
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/Worker.java b/src/main/java/edu/ucsb/nceas/mdqengine/Worker.java
index 7cd516bb..ecbbb554 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/Worker.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/Worker.java
@@ -477,19 +477,6 @@ public Run processReport(QueueEntry message) throws InterruptedException, Except
             } catch (Throwable thrown) {
                 log.error("Error while waiting for group lookup thread completion");
             }
-            // Wait for a few seconds for the 'accounts'
-//            for (int i = 0; i < 5; i++) {
-//                try {
-//                    groups = future.get();
-//                } catch (Throwable thrown) {
-//                    log.error("Error while waiting for thread completion");
-//                }
-//                // Sleep for 1 second
-//
-//                if (groups.size() > 0 ) break;
-//                log.debug("Waiting 1 second for DataONE group lookup");
-//                Thread.sleep(1000);
-//            }
 
             if (groups != null) {
                 smm.setGroups(groups);
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
index 90efc13f..3f9612a3 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
@@ -85,6 +85,7 @@ public static void main(String[] argv) throws Exception {
             cronSchedule   = record.get("cron-schedule").trim();
             params         = record.get("params").trim();
             log.debug("Task type: " + taskType);
+            log.debug("Task name: " + taskName);
             log.debug("cronSchedule: " + cronSchedule);
             params = params.startsWith("\"") ? params.substring(1) : params;
             params = params.endsWith("\"") ? params.substring(0, params.length()-1) : params;
@@ -182,7 +183,6 @@ public static void main(String[] argv) throws Exception {
             }
 
             try {
-                log.debug("Setting task");
                 // Currently there is only taskType="quality", but there could be more in the future!
                 JobDetail job = null;
                 if(taskType.equals("quality")) {
@@ -223,13 +223,11 @@ public static void main(String[] argv) throws Exception {
                             .build();
                 }
 
-                log.debug("Setting trigger");
                 CronTrigger trigger = newTrigger()
                     .withIdentity(taskName + "-trigger", taskGroup)
                     .withSchedule(cronSchedule(cronSchedule))
                     .build();
 
-                log.debug("Scheduling task");
                 scheduler.scheduleJob(job, trigger);
 
             } catch (SchedulerException se) {
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index 6a11c68c..19fdc7ea 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -124,21 +124,13 @@ public void execute(JobExecutionContext context)
         JobDataMap dataMap = context.getJobDetail().getJobDataMap();
 
         String taskName = dataMap.getString("taskName");
-        log.debug("taskName: " + taskName);
         String taskType = dataMap.getString("taskType");
-        log.debug("taskType: " + taskType);
         String pidFilter = dataMap.getString("pidFilter");
-        log.debug("pidFilter: " + pidFilter);
         String suiteId = dataMap.getString("suiteId");
-        log.debug("suiteId: " + suiteId);
         String nodeId = dataMap.getString("nodeId");
-        log.debug("nodeId: " + nodeId);
         String startHarvestDatetimeStr = dataMap.getString("startHarvestDatetime");
-        log.debug("startHavestDatetimeStr: " + startHarvestDatetimeStr);
         int harvestDatetimeInc = dataMap.getInt("harvestDatetimeInc");
-        log.debug("harvestDatetimeInc: " + harvestDatetimeInc);
         int countRequested = dataMap.getInt("countRequested");
-        log.debug("countRequested: " + countRequested);
         MultipartRestClient mrc = null;
         MultipartMNode mnNode = null;
         MultipartCNode cnNode = null;
@@ -162,7 +154,7 @@ public void execute(JobExecutionContext context)
             throw jee;
         }
 
-        log.debug("Executing task for node: " + nodeId + ", suiteId: " + suiteId);
+        log.info("Executing task " + taskType + ", " + taskName + " for node: " + nodeId + ", suiteId: " + suiteId);
 
         try {
             mrc = new HttpMultipartRestClient();
@@ -183,7 +175,7 @@ public void execute(JobExecutionContext context)
             mnNode = new MultipartMNode(mrc, nodeServiceUrl, session);
         }
 
-        // Don't know node type yet from the id, so have to manually check if it's a CN
+        // Get a connection to the database
         MDQStore store = null;
 
         try {
@@ -208,13 +200,9 @@ public void execute(JobExecutionContext context)
         DateTime currentDT = new DateTime(DateTimeZone.UTC);
         DateTimeFormatter dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SS'Z'");
         String currentDatetimeStr = dtfOut.print(currentDT);
-
         DateTime startDateTimeRange = null;
         DateTime endDateTimeRange = null;
-
         String lastHarvestDateStr = null;
-        //edu.ucsb.nceas.mdqengine.model.Node node;
-        //node = store.getNode(nodeId, jobName);
 
         Task task;
         task = store.getTask(taskName, taskType);
@@ -349,8 +337,8 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
         Date endDate = new Date(msSinceEpoch);
 
         try {
-            // Even though MultipartMNode and MultipartCNode have the same parent class, their interfaces are differnt, so polymorphism
-            // isn't happening here.
+            // Even though MultipartMNode and MultipartCNode have the same parent class D1Node, the interface for D1Node doesn't
+            // include listObjects (it should), so we have to maintain a cnNode and mnNode.
             if(isCN) {
                 objList = cnNode.listObjects(session, startDate, endDate, formatId, nodeRef, identifier, startCount, countRequested);
             } else {
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index 11d965fe..fe908c2d 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -3,6 +3,7 @@
 import edu.ucsb.nceas.mdqengine.Controller;
 import edu.ucsb.nceas.mdqengine.MDQconfig;
 import edu.ucsb.nceas.mdqengine.DataONE;
+import edu.ucsb.nceas.mdqengine.exception.MetadigException;
 import edu.ucsb.nceas.mdqengine.exception.MetadigProcessException;
 import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
 import edu.ucsb.nceas.mdqengine.model.Task;
@@ -16,11 +17,7 @@
 import org.apache.http.client.methods.HttpPost;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
-import org.dataone.client.rest.DefaultHttpMultipartRestClient;
-import org.dataone.client.rest.MultipartRestClient;
-import org.dataone.client.v2.impl.MultipartCNode;
 import org.dataone.client.v2.impl.MultipartD1Node;
-import org.dataone.client.v2.impl.MultipartMNode;
 import org.dataone.service.types.v1.*;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
@@ -33,8 +30,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 /**
  * <p>
@@ -74,7 +69,7 @@ Integer getResultCount() {
     }
 
     // Since Quartz will re-instantiate a class every time it
-    // gets executed, members non-static member variables can
+    // gets executed, non-static member variables can
     // not be used to maintain state!
 
     /**
@@ -146,13 +141,12 @@ public void execute(JobExecutionContext context)
         try {
             cfg = new MDQconfig();
             qualityServiceUrl = cfg.getString("quality.serviceUrl");
-            log.debug("nodeId from request: " + nodeId);
+            log.trace("nodeId from request: " + nodeId);
             String nodeAbbr = nodeId.replace("urn:node:", "");
             authToken = cfg.getString(nodeAbbr + ".authToken");
             subjectId = cfg.getString(nodeAbbr + ".subjectId");
-            // TODO:  Cache the node values from the CN listNode service
             nodeServiceUrl = cfg.getString(nodeAbbr + ".serviceUrl");
-            log.debug("nodeServiceUrl: " + nodeServiceUrl);
+            log.trace("nodeServiceUrl: " + nodeServiceUrl);
         } catch (ConfigurationException | IOException ce) {
             JobExecutionException jee = new JobExecutionException("Error executing task.");
             jee.initCause(ce);
@@ -271,12 +265,11 @@ public void execute(JobExecutionContext context)
                 throw jee;
             }
         } else {
-            log.debug("Getting portal pids to process...");
+            Integer allIds = 0;
             boolean morePids = true;
             while (morePids) {
                 ArrayList<String> pidsToProcess = null;
-                log.debug("startCount: " + startCount);
-                log.debug("countRequested:" + countRequested);
+                log.trace("Getting portal pids to process, startCount: " + startCount + ", countRequested: " + countRequested);
 
                 try {
                     result = getPidsToProcess(d1Node, session, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
@@ -288,7 +281,7 @@ public void execute(JobExecutionContext context)
                     throw jee;
                 }
 
-                log.info("Found " + resultCount + " seriesIds" + " for date: " + startDTRstr + " at servierUrl: " + nodeServiceUrl);
+                log.trace(taskName + ": found " + resultCount + " seriesIds" + " for date: " + startDTRstr + " at servierUrl: " + nodeServiceUrl);
                 for (String pidStr : pidsToProcess) {
                     try {
                         submitScorerRequest(qualityServiceUrl, pidStr, suiteId, nodeId, formatFamily);
@@ -304,15 +297,12 @@ public void execute(JobExecutionContext context)
                 if (resultCount >= countRequested) {
                     morePids = true;
                     startCount = startCount + resultCount;
-                    log.info("Paging through more results, current start is " + startCount);
+                    log.trace("Paging through more results, current start is " + startCount);
                 } else {
                     morePids = false;
 
                     // Record the new "last harvested" date
                     task.setLastHarvestDatetime(endDTRstr);
-                    log.debug("taskName: " + task.getTaskName());
-                    log.debug("taskType: " + task.getTaskType());
-                    log.debug("lastharvestdate: " + task.getLastHarvestDatetime());
 
                     try {
                         store.saveTask(task);
@@ -360,7 +350,7 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
         String queryStr = "?q=formatId:" + pidFilter + "+-obsoletedBy:*" + "+dateUploaded:[" + startHarvestDatetimeStr + "%20TO%20"
                 + endHarvestDatetimeStr + "]"
                 + "&fl=seriesId&q.op=AND";
-        log.debug("query: " + queryStr);
+        log.trace("query: " + queryStr);
 
         // Send the query to DataONE Solr to retrieve portal seriesIds for a given time frame
 
@@ -370,7 +360,7 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
         int thisResultLength;
         // Now setup the xpath to retrieve the ids returned from the collection query.
         try {
-            log.debug("Compiling xpath for seriesId");
+            log.trace("Compiling xpath for seriesId");
             // Extract the collection query from the Solr result XML
             XPathFactory xPathfactory = XPathFactory.newInstance();
             xpath = xPathfactory.newXPath();
@@ -384,9 +374,7 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
 
         // Loop through the Solr result. As the result may be large, page through the results, accumulating
         // the pids returned into a ListResult object.
-
-        //log.debug("Getting portal seriesIds from Solr using subjectId: " + subjectId + ", servicerUrl: " + serviceUrl);
-        log.debug("Getting portal seriesIds from Solr " );
+        log.trace("Getting portal seriesIds from Solr " );
         int startPos = startCount;
 
         do {
@@ -408,13 +396,13 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
             }
             String currentPid = null;
             thisResultLength = xpathResult.getLength();
-            log.debug("Got " + thisResultLength + " pids this query");
+            log.trace("Got " + thisResultLength + " pids this query");
             if(thisResultLength == 0) break;
             for (int index = 0; index < xpathResult.getLength(); index++) {
                 node = xpathResult.item(index);
                 currentPid = node.getTextContent();
                 pids.add(currentPid);
-                log.debug("adding pid: " + currentPid);
+                log.trace("adding pid: " + currentPid);
             }
 
             startPos += thisResultLength;
@@ -453,7 +441,6 @@ public void submitScorerRequest(String qualityServiceUrl, String collectionId, S
 
             // send to service
             log.debug("submitting scores request : " + scorerServiceUrl);
-            //post.setEntity((HttpEntity) entity);
             CloseableHttpClient client = HttpClients.createDefault();
             CloseableHttpResponse response = client.execute(post);
 
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index e61bbfcd..23ea5697 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -99,14 +99,6 @@ void setResult(ArrayList result) {
         ArrayList getResult() {
             return this.result;
         }
-
-//        void setResultCount(Integer count) {
-//            this.resultCount = count;
-//        }
-//
-//        Integer getResultCount() {
-//            return this.resultCount;
-//        }
     }
 
     public static void main(String[] argv) throws Exception {
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
index 3fcca606..9958136c 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
@@ -48,7 +48,7 @@ public class DatabaseStore implements MDQStore {
     private DataSource dataSource = null;
 
     public DatabaseStore () throws MetadigStoreException {
-        log.debug("Initializing a new DatabaseStore to " + dbUrl + ".");
+        log.trace("Initializing a new DatabaseStore to " + dbUrl + ".");
         this.init();
     }
 
@@ -57,7 +57,7 @@ public DatabaseStore () throws MetadigStoreException {
      */
     private void init() throws MetadigStoreException {
 
-        log.debug("initializing connection");
+        log.trace("initializing connection");
         String additionalDir = null;
         try {
             MDQconfig cfg = new MDQconfig();
@@ -90,7 +90,7 @@ private void init() throws MetadigStoreException {
             throw(mse);
         }
 
-        log.debug("Connection initialized");
+        log.trace("Connection initialized");
 
         PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
 
@@ -111,7 +111,6 @@ private void init() throws MetadigStoreException {
                 Suite suite = null;
                 try {
                     URL url = resource.getURL();
-                    //log.debug("Loading suite found at: " + url.toString());
                     String xml = IOUtils.toString(url.openStream(), "UTF-8");
                     suite = (Suite) XmlMarshaller.fromXml(xml, Suite.class);
                 } catch (JAXBException | IOException | SAXException e) {
@@ -123,7 +122,7 @@ private void init() throws MetadigStoreException {
             }
         }
         if(this.isAvailable()) {
-            log.debug("Initialized database store: opened database successfully");
+            log.trace("Initialized database store: opened database successfully");
         } else {
             throw new MetadigStoreException("Error initializing database, connection not available");
         }
@@ -153,13 +152,13 @@ public Run getRun(String metadataId, String suiteId) throws MetadigStoreExceptio
         MetadigStoreException me = new MetadigStoreException("Unable get quality report to the datdabase.");
         // Select records from the 'runs' table
         try {
-            log.debug("preparing statement for query");
+            log.trace("preparing statement for query");
             String sql = "select * from runs where metadata_id = ? and suite_id = ?";
             stmt = conn.prepareStatement(sql);
             stmt.setString(1, metadataId);
             stmt.setString(2, suiteId);
 
-            log.debug("issuing query: " + sql);
+            log.trace("issuing query: " + sql);
             ResultSet rs = stmt.executeQuery();
             if(rs.next()) {
                 mId = rs.getString("metadata_id");
@@ -176,9 +175,9 @@ public Run getRun(String metadataId, String suiteId) throws MetadigStoreExceptio
                 // have to be manually added after the JAXB marshalling has created the run object.
                 run.setSequenceId(seqId);
                 run.setIsLatest(isLatest);
-                log.debug("Retrieved run successfully for metadata id: " + run.getObjectIdentifier());
+                log.trace("Retrieved run successfully for metadata id: " + run.getObjectIdentifier());
             } else {
-                log.debug("Run not found for metadata id: " + metadataId + ", suiteId: " + suiteId);
+                log.trace("Run not found for metadata id: " + metadataId + ", suiteId: " + suiteId);
             }
         } catch ( Exception e ) {
             log.error( e.getClass().getName()+": "+ e.getMessage());
@@ -210,8 +209,6 @@ public void saveRun(Run run) throws MetadigStoreException {
         String sequenceId = run.getSequenceId();
         Boolean isLatest = run.getIsLatest();
         String resultStr = null;
-        //DateTime now = new DateTime();
-        //OffsetDateTime dateTime = OffsetDateTime.now();
         Timestamp dateTime = Timestamp.from(Instant.now());
         run.setTimestamp(dateTime);
 
@@ -288,7 +285,7 @@ public void saveRun(Run run) throws MetadigStoreException {
         }
 
         // Next, insert a record into the child table ('runs')
-        log.debug("Records created successfully");
+        log.trace("Records created successfully");
     }
 
     /*
@@ -296,7 +293,7 @@ public void saveRun(Run run) throws MetadigStoreException {
      */
     public boolean isAvailable() {
         boolean reachable = false;
-        log.debug("Checking if store (i.e. sql connection) is available.");
+        log.trace("Checking if store (i.e. sql connection) is available.");
         try {
             reachable = conn.isValid(10);
         } catch (Exception e ) {
@@ -310,7 +307,7 @@ public boolean isAvailable() {
      */
     public void renew() throws MetadigStoreException {
         if(!this.isAvailable()) {
-            log.debug("Renewing connection to database");
+            log.trace("Renewing connection to database");
             this.init();
         }
     }
@@ -319,7 +316,7 @@ public void shutdown() {
 
         try {
             conn.close();
-            log.debug("Successfully closed database");
+            log.trace("Successfully closed database");
         } catch ( java.sql.SQLException e) {
             log.error("Error closing database: " + e.getMessage());
         }
@@ -355,7 +352,7 @@ public void saveTask(Task task) throws MetadigStoreException {
         }
 
         // Next, insert a record into the child table ('runs')
-        log.debug("Records created successfully");
+        log.trace("Records created successfully");
     }
 
     public Task getTask(String taskName, String taskType) {
@@ -368,13 +365,13 @@ public Task getTask(String taskName, String taskType) {
 
         // Select records from the 'nodes' table
         try {
-            log.debug("preparing statement for query");
+            log.trace("preparing statement for query");
             String sql = "select * from tasks where task_name = ? and task_type = ?";
             stmt = conn.prepareStatement(sql);
             stmt.setString(1, taskName);
             stmt.setString(2, taskType);
 
-            log.debug("issuing query: " + sql);
+            log.trace("issuing query: " + sql);
             ResultSet rs = stmt.executeQuery();
             if(rs.next()) {
                 task.setTaskName(rs.getString("task_name"));
@@ -383,7 +380,7 @@ public Task getTask(String taskName, String taskType) {
                 rs.close();
                 stmt.close();
             } else {
-                log.debug("No results returned from query");
+                log.trace("No results returned from query");
             }
         } catch ( Exception e ) {
             log.error( e.getClass().getName()+": "+ e.getMessage());
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
index c573803d..b9796c29 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
@@ -7,33 +7,30 @@
 
 public interface MDQStore {
 	
-	public Collection<String> listSuites();
-	public Suite getSuite(String id);
-	public void createSuite(Suite suite);
-	public void updateSuite(Suite suite);
-	public void deleteSuite(Suite suite);
-
-	public Collection<String> listChecks();
-	public Check getCheck(String id);
-	public void createCheck(Check check);
-	public void updateCheck(Check check);
-	public void deleteCheck(Check check);
+	Collection<String> listSuites();
+	Suite getSuite(String id);
+	void createSuite(Suite suite);
+	void updateSuite(Suite suite);
+	void deleteSuite(Suite suite);
+
+	Collection<String> listChecks();
+	Check getCheck(String id);
+	void createCheck(Check check);
+	void updateCheck(Check check);
+	void deleteCheck(Check check);
 	
-	public Collection<String> listRuns();
-	public Run getRun(String suite, String id ) throws MetadigStoreException;
-	public void saveRun(Run run) throws MetadigStoreException;
-	public void createRun(Run run);
-	public void deleteRun(Run run);
-
-	public void shutdown();
-
-	public boolean isAvailable();
-	public void renew() throws MetadigStoreException;
-//
-//	public Node getNode(String nodeId, String jobName);
-//	public void saveNode(Node node) throws MetadigStoreException;
-
-	public Task getTask(String taskName, String taskType);
-	public void saveTask(Task task) throws MetadigStoreException;
+	Collection<String> listRuns();
+	Run getRun(String suite, String id ) throws MetadigStoreException;
+	void saveRun(Run run) throws MetadigStoreException;
+	void createRun(Run run);
+	void deleteRun(Run run);
+
+	void shutdown();
+
+	boolean isAvailable();
+	void renew() throws MetadigStoreException;
+
+	Task getTask(String taskName, String taskType);
+	void saveTask(Task task) throws MetadigStoreException;
 
 }

From ef01e2639accf142080b7cb8559b667ec21d1c76 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Wed, 19 Aug 2020 13:50:54 -0700
Subject: [PATCH 40/47] CN harvesting is missing some pids bug (#267)

---
 .../mdqengine/scheduler/RequestReportJob.java | 177 +++++++++---------
 1 file changed, 86 insertions(+), 91 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index 19fdc7ea..43ebc9e0 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -37,8 +37,6 @@
 import java.time.ZonedDateTime;
 import java.util.ArrayList;
 import java.util.Date;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 /**
  * <p>
@@ -56,37 +54,44 @@ public class RequestReportJob implements Job {
     private Log log = LogFactory.getLog(RequestReportJob.class);
 
     class ListResult {
-        // The total result count returned from DataONE
-        Integer totalResultCount;
+        // The total result count for all object types returned from DataONE. This is the count of all object types
+        // that were retrieved for a given request. The DataONE 'listObjects' service does provide
+        // parameters to filter by formatId wildcard, so we have to retrieve all pids for a time range
+        // and filter the result list.
+        private Integer totalResultCount = 0;
         // The filtered result count returned from DataONE.
         // The DataONE listObjects service returns all new pids for all formatIds
         // but we are typically only interested in a subset of those, i.e. EML metadata pids,
         // so this is the count of pids from the result that we are actually interested in.
-        Integer filteredResultCount;
-        ArrayList<String> result = new ArrayList<>();
+        private Integer filteredResultCount = 0;
+        private ArrayList<String> result = new ArrayList<>();
+
+        // The scheduler keeps track of the sysmeta 'dateSystemMetadataModified' of the last pid harvested,
+        // which will be used as the starting time of the next harvest.
+        private DateTime lastDateModifiedDT = null;
 
         void setResult(ArrayList result) {
             this.result = result;
         }
 
-        ArrayList getResult() {
+        public ArrayList getResult() {
             return this.result;
         }
 
         void setTotalResultCount(Integer count) {
             this.totalResultCount = count;
         }
-        void setFilteredResultCount(Integer count) {
-            this.filteredResultCount = count;
+        void setFilteredResultCount(Integer count) { this.filteredResultCount = count; }
+        void setLastDateModified(DateTime date) {
+            log.debug("Setter last modified date, date: " + date.toString());
+            this.lastDateModifiedDT = date;
         }
 
-        Integer getTotalResultCount() {
-            return this.totalResultCount;
-        }
+        public Integer getTotalResultCount() { return this.totalResultCount; }
 
-        Integer getFilteredResultCount() {
-            return this.filteredResultCount;
-        }
+        public Integer getFilteredResultCount() { return this.filteredResultCount; }
+
+        public DateTime getLastDateModified() { return this.lastDateModifiedDT; }
     }
 
     // Since Quartz will re-instantiate a class every time it
@@ -198,7 +203,7 @@ public void execute(JobExecutionContext context)
         // Get current datetime, which may be used for start time range.
         DateTimeZone.setDefault(DateTimeZone.UTC);
         DateTime currentDT = new DateTime(DateTimeZone.UTC);
-        DateTimeFormatter dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SS'Z'");
+        DateTimeFormatter dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
         String currentDatetimeStr = dtfOut.print(currentDT);
         DateTime startDateTimeRange = null;
         DateTime endDateTimeRange = null;
@@ -219,58 +224,63 @@ public void execute(JobExecutionContext context)
             lastHarvestDateStr = task.getLastHarvestDatetime();
         }
 
-        DateTime lastHarvestDate = new DateTime(lastHarvestDateStr);
+        DateTime lastHarvestDateDT = new DateTime(lastHarvestDateStr);
         // Set the search start datetime to the last harvest datetime, unless it is in the
         // future. (This can happen when the previous time range end was for the current day,
         // as the end datetime range for the previous task run will have been stored as the
         // new lastharvestDateTime.
-        DateTime startDTR = null;
-        if(lastHarvestDate.isAfter(currentDT.toInstant())) {
-            startDTR = currentDT;
+        DateTime startDT = null;
+        if(lastHarvestDateDT.isAfter(currentDT.toInstant())) {
+            startDT = currentDT;
         } else {
-            startDTR = new DateTime(lastHarvestDate);
+            startDT = new DateTime(lastHarvestDateDT);
         }
 
-        DateTime endDTR = new DateTime(startDTR);
-        endDTR = endDTR.plusDays(harvestDatetimeInc);
-        if(endDTR.isAfter(currentDT.toInstant())) {
-            endDTR = currentDT;
+        DateTime endDT = new DateTime(startDT);
+        endDT = endDT.plusDays(harvestDatetimeInc);
+        if(endDT.isAfter(currentDT.toInstant())) {
+            endDT = currentDT;
         }
 
-        // If the start and end harvest dates are the same (happends for a new node), then
-        // tweek the start so that DataONE listObjects doesn't complain.
-        if(startDTR == endDTR ) {
-            startDTR = startDTR.minusMinutes(1);
+        // If the start and end harvest dates are the same (happens for a new node), then
+        // tweak the start so that DataONE listObjects doesn't complain.
+        if(startDT == endDT ) {
+            startDT = startDT.minusMinutes(1);
+            log.debug("Reset start back 1 minute to: " + startDT);
         }
 
-        String startDTRstr = dtfOut.print(startDTR);
-        String endDTRstr = dtfOut.print(endDTR);
+        // Track the sysmeta dateUploaded of the latest harvested pid. This will become the starting time of
+        // the next harvest.
+        DateTime lastDateModifiedDT = startDT;
+
+        String startDTstr = dtfOut.print(startDT);
+        String endDTstr = dtfOut.print(endDT);
 
         Integer startCount = new Integer(0);
         ListResult result = null;
-        Integer totalResultCount = null;
-        Integer filteredResultCount = null;
+        Integer totalResultCount = 0;
+        Integer filteredResultCount = 0;
+        Integer allPidsCnt = 0;
 
         boolean morePids = true;
         while(morePids) {
             ArrayList<String> pidsToProcess = null;
-            log.info("Getting pids for node: " + nodeId + ", suiteId: " + suiteId + ", harvest start: " + startDTRstr);
-
             try {
-                result = getPidsToProcess(cnNode, mnNode, isCN, session, suiteId, nodeId, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
+                result = getPidsToProcess(cnNode, mnNode, isCN, session, suiteId, nodeId, pidFilter, startDTstr, endDTstr, startCount, countRequested, lastDateModifiedDT);
                 pidsToProcess = result.getResult();
                 totalResultCount = result.getTotalResultCount();
                 filteredResultCount = result.getFilteredResultCount();
+                lastDateModifiedDT = result.getLastDateModified();
             } catch (Exception e) {
                 JobExecutionException jee = new JobExecutionException("Unable to get pids to process", e);
                 jee.setRefireImmediately(false);
                 throw jee;
             }
 
-            log.info("Found " + filteredResultCount + " pids" + " for node: " + nodeId);
+            allPidsCnt = pidsToProcess.size();
             for (String pidStr : pidsToProcess) {
                 try {
-                    log.info("submitting pid: " + pidStr);
+                    log.debug("submitting pid: " + pidStr);
                     submitReportRequest(cnNode, mnNode, isCN, session, qualityServiceUrl, pidStr, suiteId);
                 } catch (org.dataone.service.exceptions.NotFound nfe) {
                     log.error("Unable to process pid: " + pidStr +  nfe.getMessage());
@@ -278,16 +288,24 @@ public void execute(JobExecutionContext context)
                 } catch (Exception e) {
                     log.error("Unable to process pid:  " + pidStr + " - " + e.getMessage());
                     continue;
-                    //JobExecutionException jee = new JobExecutionException("Unable to submit request to create new quality reports", e);
-                    //jee.setRefireImmediately(false);
-                    //throw jee;
                 }
             }
 
-            task.setLastHarvestDatetime(endDTRstr);
-            log.debug("taskName: " + task.getTaskName());
-            log.debug("taskType: " + task.getTaskType());
-            log.debug("lastharvestdate: " + task.getLastHarvestDatetime());
+            // Check if DataONE returned the max number of results. If so, we have to request more by paging through
+            // the results returned pidsToProcess (i.e. DataONE listObjects service). If the returned result is
+            // less than the requested result, then all pids have been retrieved.
+            if(totalResultCount >= countRequested) {
+                morePids = true;
+                startCount = startCount + totalResultCount;
+                log.trace("Paging through more results, current start is " + startCount);
+            } else {
+                morePids = false;
+            }
+        }
+        // Don't update the lastHarvestDateDT if no pids were found.
+        if (allPidsCnt > 0) {
+            task.setLastHarvestDatetime(dtfOut.print(lastDateModifiedDT));
+            log.debug("Saving lastHarvestDate: " + dtfOut.print(lastDateModifiedDT));
             try {
                 store.saveTask(task);
             } catch (MetadigStoreException mse) {
@@ -296,24 +314,15 @@ public void execute(JobExecutionContext context)
                 jee.setRefireImmediately(false);
                 throw jee;
             }
-
-            // Check if DataONE returned the max number of results. If so, we have to request more by paging through
-            // the results returned pidsToProcess (i.e. DataONE listObjects service).
-            if(totalResultCount >= countRequested) {
-                morePids = true;
-                startCount = startCount + totalResultCount;
-                log.info("Paging through more results, current start is " + startCount);
-            } else {
-                morePids = false;
-            }
         }
+        log.info(taskName + ": Found " + allPidsCnt + " pids for start: " + startDTstr + ", end: " + endDTstr + " at servierUrl: " + nodeServiceUrl);
         store.shutdown();
     }
 
     public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
                                   String suiteId, String nodeId, String pidFilter, String startHarvestDatetimeStr,
                                   String endHarvestDatetimeStr, int startCount,
-                                  int countRequested) throws Exception {
+                                  int countRequested, DateTime lastDateModifiedDT) throws Exception {
 
         ArrayList<String> pids = new ArrayList<String>();
         InputStream qis = null;
@@ -353,15 +362,16 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
         String thisFormatId = null;
         String thisPid = null;
         int pidCount = 0;
+        Date thisDateModified;
 
         if (objList.getCount() > 0) {
             for(ObjectInfo oi: objList.getObjectInfoList()) {
                 thisFormatId = oi.getFormatId().getValue();
                 thisPid = oi.getIdentifier().getValue();
-                log.debug("Checking pid: " + thisPid + ", format: " + thisFormatId);
+                log.trace("Checking pid: " + thisPid + ", format: " + thisFormatId);
 
-                // Check all pid filters. There could be multiple wildcard filters, which are separated
-                // by ','.
+                // Check all pid filters to see if this pids's format was found in the list of desired formats.
+                // There could be multiple wildcard filters, which are separated by ','.
                 String [] filters = pidFilter.split("\\|");
                 Boolean found = false;
                 for(String thisFilter:filters) {
@@ -378,7 +388,16 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
                 //    if (!runExists(thisPid, suiteId, store)) {
                     pidCount = pidCount++;
                     pids.add(thisPid);
-                    log.info("adding pid " + thisPid + ", formatId: " + thisFormatId);
+                    log.trace("adding pid " + thisPid + ", formatId: " + thisFormatId);
+                    // If this pid's modified date is after the stored latest encountered modified date, then update
+                    // the lastModified date
+                    DateTime thisDateModifiedDT = new DateTime(oi.getDateSysMetadataModified());
+                    // Add a millisecond to lastDateModfiedDT so that this pid won't be harvested again (in the event
+                    // that this is the last pid to be harvested in this round.
+                    if (thisDateModifiedDT.isAfter(lastDateModifiedDT)) {
+                        lastDateModifiedDT = thisDateModifiedDT.plusMillis(1) ;
+                        log.debug("Updated lastDateMoidifed: " + lastDateModifiedDT.toString());
+                    }
                 //    }
                 }
             }
@@ -390,6 +409,8 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
         // Set the count for the total number of pids returned from DataONE (all formatIds) for this query
         result.setTotalResultCount(objList.getCount());
         result.setResult(pids);
+        // Return the sysmeta 'dateSystemMetadataModified' of the last pid harvested.
+        result.setLastDateModified(lastDateModifiedDT);
 
         return result;
     }
@@ -445,45 +466,19 @@ public void submitReportRequest(MultipartCNode cnNode, MultipartMNode mnNode, Bo
             } else  {
                 objectIS = mnNode.get(session, pid);
             }
-            log.debug("Retrieved metadata object for pid: " + pidStr);
+            log.trace("Retrieved metadata object for pid: " + pidStr);
         } catch (NotAuthorized na) {
-            log.error("Not authorized to read pid: " + pid + ", continuing with next pid...");
+            log.error("Not authorized to read pid: " + pid + ", unable to retrieve metadata, continuing with next pid...");
             return;
-        } catch (Exception e) {
-            throw(e);
         }
 
         // quality suite service url, i.e. "http://docke-ucsb-1.dataone.org:30433/quality/suites/knb.suite.1/run
         qualityServiceUrl = qualityServiceUrl + "/suites/" + suiteId + "/run";
         HttpPost post = new HttpPost(qualityServiceUrl);
 
-        try {
-            // add document
-            SimpleMultipartEntity entity = new SimpleMultipartEntity();
-            entity.addFilePart("document", objectIS);
-
-            ByteArrayOutputStream baos = new ByteArrayOutputStream();
-            TypeMarshaller.marshalTypeToOutputStream(sysmeta, baos);
-            entity.addFilePart("systemMetadata", new ByteArrayInputStream(baos.toByteArray()));
-
-            // make sure we get XML back
-            post.addHeader("Accept", "application/xml");
-
-            // send to service
-            log.trace("submitting: " + qualityServiceUrl);
-            post.setEntity((HttpEntity) entity);
-            CloseableHttpClient client = HttpClients.createDefault();
-            CloseableHttpResponse response = client.execute(post);
-
-            // retrieve results
-            HttpEntity reponseEntity = response.getEntity();
-            if (reponseEntity != null) {
-                runResultIS = reponseEntity.getContent();
-            }
-        } catch (Exception e) {
-            throw(e);
-        }
-    }
+        // add document
+        SimpleMultipartEntity entity = new SimpleMultipartEntity();
+        entity.addFilePart("document", objectIS);
 
     private Boolean isCN(String serviceUrl) {
 

From 7e557579f60961b82224b27480bd97eb90593b88 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Wed, 19 Aug 2020 13:52:20 -0700
Subject: [PATCH 41/47] Detect D1 client connection type (CN or MN) (#265)

---
 .../mdqengine/scheduler/RequestReportJob.java | 33 ++++++++++---------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index 43ebc9e0..3900ac12 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -173,7 +173,7 @@ public void execute(JobExecutionContext context)
         Session session = DataONE.getSession(subjectId, authToken);
 
         // Don't know node type yet from the id, so have to manually check if it's a CN
-        Boolean isCN = isCN(nodeServiceUrl);
+        Boolean isCN = DataONE.isCN(nodeServiceUrl);
         if(isCN) {
             cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
         } else {
@@ -418,6 +418,7 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
     public boolean runExists(String pid, String suiteId, MDQStore store) throws MetadigStoreException {
 
         boolean found = false;
+        Date runDateSystemMetadataModified = null;
 
         if(!store.isAvailable()) {
             try {
@@ -480,21 +481,23 @@ public void submitReportRequest(MultipartCNode cnNode, MultipartMNode mnNode, Bo
         SimpleMultipartEntity entity = new SimpleMultipartEntity();
         entity.addFilePart("document", objectIS);
 
-    private Boolean isCN(String serviceUrl) {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        TypeMarshaller.marshalTypeToOutputStream(sysmeta, baos);
+        entity.addFilePart("systemMetadata", new ByteArrayInputStream(baos.toByteArray()));
 
-        Boolean isCN = false;
-        // Identity node as either a CN or MN based on the serviceUrl
-        String pattern = "https*://cn.*?\\.dataone\\.org|https*://cn.*?\\.test\\.dataone\\.org";
-        Pattern r = Pattern.compile(pattern);
-        Matcher m = r.matcher(serviceUrl);
-        if (m.find()) {
-            isCN = true;
-            log.debug("service URL is for a CN: " + serviceUrl);
-        } else {
-            log.debug("service URL is not for a CN: " + serviceUrl);
-            isCN = false;
-        }
+        // make sure we get XML back
+        post.addHeader("Accept", "application/xml");
 
-        return isCN;
+        // send to service
+        log.trace("submitting: " + qualityServiceUrl);
+        post.setEntity((HttpEntity) entity);
+        CloseableHttpClient client = HttpClients.createDefault();
+        CloseableHttpResponse response = client.execute(post);
+
+        // retrieve results
+        HttpEntity reponseEntity = response.getEntity();
+        if (reponseEntity != null) {
+            runResultIS = reponseEntity.getContent();
+        }
     }
 }

From 594f4b8709a276e55325a7b34c1a532d29e98fc6 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Wed, 19 Aug 2020 13:53:33 -0700
Subject: [PATCH 42/47] Reuse CN clients when possible (#264)

---
 .../mdqengine/scheduler/RequestScorerJob.java | 46 ++++++-------------
 1 file changed, 13 insertions(+), 33 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index fe908c2d..7c099f31 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -103,9 +103,6 @@ public void execute(JobExecutionContext context)
             throws JobExecutionException {
 
         String qualityServiceUrl  = null;
-        String CNsubjectId = null;
-        String CNauthToken = null;
-        String CNserviceUrl = null;
         MDQconfig cfg = null;
 
         JobKey key = context.getJobDetail().getKey();
@@ -123,20 +120,17 @@ public void execute(JobExecutionContext context)
         // Number of pids to get each query (this number of pids will be fetched each query until all pids are obtained)
         int countRequested = dataMap.getInt("countRequested");
         String requestType = null;
-        if (taskType.equalsIgnoreCase("score")) {
-            requestType = dataMap.getString("requestType");
-        }
-        // TODO: add formatFamily to scheduler request
         String formatFamily = null;
-        MultipartRestClient mrc = null;
-        MultipartMNode mnNode = null;
-        MultipartCNode cnNode = null;
-
+        MultipartD1Node d1Node = null;
         String authToken = null;
         String subjectId = null;
         String nodeServiceUrl = null;
 
-        log.info("Executing task: " + taskName + ", taskType: " + taskType);
+        if (taskType.equalsIgnoreCase("score")) {
+            requestType = dataMap.getString("requestType");
+        }
+
+        log.info("Executing task " + taskType + ", " + taskName + " for node: " + nodeId + ", suiteId: " + suiteId);
 
         try {
             cfg = new MDQconfig();
@@ -153,33 +147,19 @@ public void execute(JobExecutionContext context)
             throw jee;
         }
 
-        try {
-            mrc = new DefaultHttpMultipartRestClient();
-        } catch (Exception e) {
-            log.error("Error creating rest client: " + e.getMessage());
-            JobExecutionException jee = new JobExecutionException(e);
-            jee.setRefireImmediately(false);
-            throw jee;
-        }
-
         Session session = DataONE.getSession(subjectId, authToken);
 
-        // Don't know node type yet from the id, so have to manually check if it's a CN
-        Boolean isCN = DataONE.isCN(nodeServiceUrl);
-
-        MultipartD1Node d1Node = null;
-        if(isCN) {
-            //cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
-            d1Node = new MultipartCNode(mrc, nodeServiceUrl, session);
-            log.debug("Created cnNode for serviceUrl: " + nodeServiceUrl);
-        } else {
-            //mnNode = new MultipartMNode(mrc, nodeServiceUrl, session);
-            d1Node = new MultipartMNode(mrc, nodeServiceUrl, session);
-            log.debug("Created mnNode for serviceUrl: " + nodeServiceUrl);
+        // Get a connection to the DataONE node (CN or MN)
+        try {
+            d1Node = DataONE.getMultipartD1Node(session, nodeServiceUrl);
+        } catch (MetadigException mpe) {
+            mpe.printStackTrace();
+            throw new JobExecutionException(taskName + ": unable to create connection to service URL " + nodeServiceUrl , mpe);
         }
 
         MDQStore store = null;
 
+        // Get stored task info from the last task execution
         try {
             store = new DatabaseStore();
         } catch (Exception e) {

From bc176ed3cb7933942cafd3c987d9b55fe2509b27 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Wed, 19 Aug 2020 13:55:51 -0700
Subject: [PATCH 43/47] Reuse CN clients when possible (#264)

---
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 72 ++++++-------------
 1 file changed, 20 insertions(+), 52 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index 23ea5697..fede5a0f 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -21,11 +21,9 @@
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.util.ClientUtils;
-import org.dataone.client.rest.DefaultHttpMultipartRestClient;
 import org.dataone.client.rest.MultipartRestClient;
 import org.dataone.client.v2.impl.MultipartCNode;
-import org.dataone.client.v2.impl.MultipartD1Node; // Don't include org.dataone.client.rest.MultipartD1Node (this is what IDEA selects)
-import org.dataone.client.v2.impl.MultipartMNode;
+import org.dataone.client.v2.impl.MultipartD1Node;
 import org.dataone.service.types.v1.Session;
 import org.dataone.service.types.v1.Subject;
 import org.dataone.service.types.v1.Group;
@@ -148,9 +146,8 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                 String nodeServiceUrl = null;
                 String label = null;
                 String title = null;
-                MultipartRestClient mrc = null;
-                MultipartMNode mnNode = null;
-                MultipartCNode cnNode = null;
+                //MultipartRestClient mrc = null;
+                MultipartD1Node d1Node = null;
                 GraphType graphType = null;
 
                 //long startTime = System.nanoTime();
@@ -201,9 +198,6 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                     // Pids associated with a collection, based on query results using 'collectionQuery' field in solr.
                     ArrayList<String> collectionPids = null;
 
-                    // The harvesting and evaluation of the collectionQuery is based on the nodeId that is passed in, i.e.
-                    // If an MN is specified, then the collection (portal) Solr entry will be obtained from the MN, and the
-                    // collectionQuery string will also be evaluated on that node.
                     String nodeAbbr = nodeId.replace("urn:node:", "");
                     authToken = cfg.getString(nodeAbbr + ".authToken");
                     subjectId = cfg.getString(nodeAbbr + ".subjectId");
@@ -211,45 +205,20 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                     nodeServiceUrl = cfg.getString(nodeAbbr + ".serviceUrl");
 
                     HashMap<String, Object> variables = new HashMap<>();
-                    // Create the graph.
-                    // Two types of graphs are currently supported:
-                    // - a graph for all pids included in a DataONE collection (portal), and a specified suite id
-                    // - a graph for specified filters: member node, suite id, metadata format
+
                     MetadigFile mdFile = new MetadigFile();
                     Graph graph = new Graph();
-                    // If creating a graph for a collection, get the set of pids associated with the collection.
-                    // Only scores for these pids will be included in the graph.
-
-                    try {
-                        mrc = new DefaultHttpMultipartRestClient();
-                    } catch (Exception e) {
-                        log.error("Error creating rest client: " + e.getMessage());
-                        JobExecutionException jee = new JobExecutionException(e);
-                        jee.setRefireImmediately(false);
-                        throw jee;
-                    }
-
                     Session session = DataONE.getSession(subjectId, authToken);
 
-                    // Don't know node type yet from the id, so have to manually check if it's a CN
-                    Boolean isCN = DataONE.isCN(nodeServiceUrl);
+                    d1Node = DataONE.getMultipartD1Node(session, nodeServiceUrl);
 
-                    MultipartD1Node d1Node = null;
-                    if(isCN) {
-                        //cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
-                        d1Node = new MultipartCNode(mrc, nodeServiceUrl, session);
-                        log.debug("Created cnNode for serviceUrl: " + nodeServiceUrl);
-                    } else {
-                        //mnNode = new MultipartMNode(mrc, nodeServiceUrl, session);
-                        d1Node = new MultipartMNode(mrc, nodeServiceUrl, session);
-                        log.debug("Created mnNode for serviceUrl: " + nodeServiceUrl);
-                    }
-
-                    // Check if this is a "node" collection. For "node" collections, all scores for a member node
-                    // are used to create the assessment graph, so we don't need to get the collection pids as is
-                    // done for portals (by evaluating the Solr collectionQuery). Therefor, getCollectionPids doesn't
-                    // need to be called and we can proceed directly to getting the quality scores from the quality
-                    // Solr server.
+                    // Quality scores must be retrieved from the quality Solr server from which a graph is created.
+                    // There are two
+                    // Check if this is a "node" collection. For "node" collections, all scores from the quality
+                    // Solr server with 'datasource' = nodeId are used to create the assessment graph, so we don't need
+                    // to get the collection pids. However, this is done for portals (by evaluating the DataONE Solr collectionQuery).
+                    // Therefor, for a "node" collection, getCollectionPids doesn't need to be called and we can proceed directly
+                    // to getting the quality scores from the quality Solr server.
                     if (collectionId.matches("^\\s*urn:node:.*")) {
                         graphType = GraphType.CUMULATIVE;
                         log.debug("Processing a member node request, skipping step of getting collection pids (not required).");
@@ -290,6 +259,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
                         log.info("# of quality scores returned: " + scores.size());
                     }
 
+                    // Create the data file used by the graphing method
                     File scoreFile = gfr.createScoreFile(scores);
                     log.debug("Created score file: " + scoreFile.getPath());
 
@@ -304,13 +274,11 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
 
                     // Generate a temporary graph file based on the quality scores
                     log.debug("Creating graph for collection id: " + collectionId);
-                    //String filePath = graph.create(GraphType.CUMULATIVE, title, scoreFile.getPath());
                     String filePath = graph.create(graphType, title, scoreFile.getPath());
+
                     // Now save the graphics file to permanent storage
                     String outfile;
-
                     DateTime createDateTime = DateTime.now();
-
                     mdFile.setCreationDatetime(createDateTime);
                     mdFile.setPid(collectionId);
                     mdFile.setSuiteId(suiteId);
@@ -425,17 +393,17 @@ which will be used to query DataONE Solr for all the pids associated with that p
         org.w3c.dom.Node node = null;
         String label = null;
         String rightsHolder = null;
-        MultipartRestClient mrc = null;
-        MultipartCNode CNnode = null;
+        //MultipartRestClient mrc = null;
+        MultipartCNode cnNode = null;
         Session CNsession = null;
 
         try {
 
             CNsession = DataONE.getSession(CNsubjectId, CNauthToken);
-            //        // Only CNs can call the 'subjectInfo' service (aka accounts), so we have to use
+            // Only CNs can call the 'subjectInfo' service (aka accounts), so we have to use
             // a MultipartCNode instance here.
             try {
-                CNnode = (MultipartCNode) DataONE.getMultipartD1Node(CNsession, CNserviceUrl);
+                cnNode = (MultipartCNode) DataONE.getMultipartD1Node(CNsession, CNserviceUrl);
             } catch (Exception ex) {
                 metadigException = new MetadigProcessException("Unable to create multipart D1 node: " + ex.getMessage());
                 metadigException.initCause(ex);
@@ -523,7 +491,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         subject.setValue(rightsHolder);
         // The subject info can only be obtained from a CN, so use the CN auth info for the current DataONE environment,
         // which should be configured in the metadig.properties file
-        SubjectInfo subjectInfo = DataONE.getSubjectInfo(subject, CNnode, CNsession);
+        SubjectInfo subjectInfo = DataONE.getSubjectInfo(subject, cnNode, CNsession);
         String groupStr = null;
 
         groupStr = "(readPermission:" + "\"" + rightsHolder
@@ -584,7 +552,7 @@ which will be used to query DataONE Solr for all the pids associated with that p
         do {
             //TODO: check that a result was returned
             // Note: the collectionQuery is always evaluated on the CN, so that the entire DataONE network is queried.
-            xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, CNnode, CNsession);
+            xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, cnNode, CNsession);
             if(xmldoc == null) {
                 log.info("no values returned from query");
                 break;

From d1f5a97d271ee2c0a666f69dfc8af81030679d78 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Thu, 20 Aug 2020 11:16:46 -0700
Subject: [PATCH 44/47] CN harvesting is missing some pids (#267)

---
 .../mdqengine/scheduler/RequestReportJob.java |  29 ++--
 .../mdqengine/scheduler/RequestScorerJob.java | 129 +++++++++++++-----
 2 files changed, 107 insertions(+), 51 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index 3900ac12..22540674 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -83,7 +83,6 @@ void setTotalResultCount(Integer count) {
         }
         void setFilteredResultCount(Integer count) { this.filteredResultCount = count; }
         void setLastDateModified(DateTime date) {
-            log.debug("Setter last modified date, date: " + date.toString());
             this.lastDateModifiedDT = date;
         }
 
@@ -91,11 +90,13 @@ void setLastDateModified(DateTime date) {
 
         public Integer getFilteredResultCount() { return this.filteredResultCount; }
 
-        public DateTime getLastDateModified() { return this.lastDateModifiedDT; }
+        public DateTime getLastDateModified() {
+            return this.lastDateModifiedDT;
+        }
     }
 
     // Since Quartz will re-instantiate a class every time it
-    // gets executed, members non-static member variables can
+    // gets executed, non-static member variables can
     // not be used to maintain state!
 
     /**
@@ -236,17 +237,17 @@ public void execute(JobExecutionContext context)
             startDT = new DateTime(lastHarvestDateDT);
         }
 
-        DateTime endDT = new DateTime(startDT);
-        endDT = endDT.plusDays(harvestDatetimeInc);
-        if(endDT.isAfter(currentDT.toInstant())) {
-            endDT = currentDT;
-        }
+//        DateTime endDT = new DateTime(startDT);
+//        endDT = endDT.plusDays(harvestDatetimeInc);
+//        if(endDT.isAfter(currentDT.toInstant())) {
+//            endDT = currentDT;
+//        }
+        DateTime endDT = new DateTime(currentDT);
 
         // If the start and end harvest dates are the same (happens for a new node), then
         // tweak the start so that DataONE listObjects doesn't complain.
         if(startDT == endDT ) {
             startDT = startDT.minusMinutes(1);
-            log.debug("Reset start back 1 minute to: " + startDT);
         }
 
         // Track the sysmeta dateUploaded of the latest harvested pid. This will become the starting time of
@@ -266,7 +267,7 @@ public void execute(JobExecutionContext context)
         while(morePids) {
             ArrayList<String> pidsToProcess = null;
             try {
-                result = getPidsToProcess(cnNode, mnNode, isCN, session, suiteId, nodeId, pidFilter, startDTstr, endDTstr, startCount, countRequested, lastDateModifiedDT);
+                result = getPidsToProcess(cnNode, mnNode, isCN, session, suiteId, pidFilter, startDTstr, endDTstr, startCount, countRequested, lastDateModifiedDT);
                 pidsToProcess = result.getResult();
                 totalResultCount = result.getTotalResultCount();
                 filteredResultCount = result.getFilteredResultCount();
@@ -280,7 +281,7 @@ public void execute(JobExecutionContext context)
             allPidsCnt = pidsToProcess.size();
             for (String pidStr : pidsToProcess) {
                 try {
-                    log.debug("submitting pid: " + pidStr);
+                    log.debug(taskName + ": submitting pid: " + pidStr);
                     submitReportRequest(cnNode, mnNode, isCN, session, qualityServiceUrl, pidStr, suiteId);
                 } catch (org.dataone.service.exceptions.NotFound nfe) {
                     log.error("Unable to process pid: " + pidStr +  nfe.getMessage());
@@ -362,7 +363,7 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
         String thisFormatId = null;
         String thisPid = null;
         int pidCount = 0;
-        Date thisDateModified;
+        DateTime thisDateModifiedDT;
 
         if (objList.getCount() > 0) {
             for(ObjectInfo oi: objList.getObjectInfoList()) {
@@ -391,11 +392,11 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
                     log.trace("adding pid " + thisPid + ", formatId: " + thisFormatId);
                     // If this pid's modified date is after the stored latest encountered modified date, then update
                     // the lastModified date
-                    DateTime thisDateModifiedDT = new DateTime(oi.getDateSysMetadataModified());
+                    thisDateModifiedDT = new DateTime(oi.getDateSysMetadataModified());
                     // Add a millisecond to lastDateModfiedDT so that this pid won't be harvested again (in the event
                     // that this is the last pid to be harvested in this round.
                     if (thisDateModifiedDT.isAfter(lastDateModifiedDT)) {
-                        lastDateModifiedDT = thisDateModifiedDT.plusMillis(1) ;
+                        lastDateModifiedDT = thisDateModifiedDT.plusMillis(1);
                         log.debug("Updated lastDateMoidifed: " + lastDateModifiedDT.toString());
                     }
                 //    }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index 7c099f31..31dcea61 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -51,6 +51,10 @@ class ListResult {
         Integer resultCount;
         ArrayList<String> result = new ArrayList<>();
 
+        // The scheduler keeps track of Solr 'dateModified' of the last pid harvested,
+        // which will be used as the starting time of the next harvest.
+        private DateTime lastDateModifiedDT = null;
+
         void setResult(ArrayList result) {
             this.result = result;
         }
@@ -66,6 +70,12 @@ void setResultCount(Integer count) {
         Integer getResultCount() {
             return this.resultCount;
         }
+
+        void setLastDateModified(DateTime date) {
+            this.lastDateModifiedDT = date;
+        }
+
+        public DateTime getLastDateModified() { return this.lastDateModifiedDT; }
     }
 
     // Since Quartz will re-instantiate a class every time it
@@ -180,10 +190,7 @@ public void execute(JobExecutionContext context)
         // Get current datetime, which may be used for start time range.
         DateTimeZone.setDefault(DateTimeZone.UTC);
         DateTime currentDT = new DateTime(DateTimeZone.UTC);
-        DateTimeFormatter dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SS'Z'");
-        String currentDatetimeStr = dtfOut.print(currentDT);
-        DateTime startDateTimeRange = null;
-        DateTime endDateTimeRange = null;
+        DateTimeFormatter dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
         String lastHarvestDateStr = null;
 
         Task task;
@@ -202,41 +209,54 @@ public void execute(JobExecutionContext context)
             lastHarvestDateStr = task.getLastHarvestDatetime();
         }
 
-        DateTime lastHarvestDate = new DateTime(lastHarvestDateStr);
+        DateTime lastHarvestDateDT = new DateTime(lastHarvestDateStr);
         // Set the search start datetime to the last harvest datetime, unless it is in the
         // future. (This can happen when the previous time range end was for the current day,
         // as the end datetime range for the previous task run will have been stored as the
         // new lastharvestDateTime.
-        DateTime startDTR = null;
-        if(lastHarvestDate.isAfter(currentDT.toInstant())) {
-            startDTR = currentDT;
+        DateTime startDT = null;
+        if(lastHarvestDateDT.isAfter(currentDT.toInstant())) {
+            startDT = currentDT;
         } else {
-            startDTR = new DateTime(lastHarvestDate);
+            startDT = new DateTime(lastHarvestDateDT);
         }
 
-        DateTime endDTR = new DateTime(startDTR);
-        endDTR = endDTR.plusDays(harvestDatetimeInc);
-        if(endDTR.isAfter(currentDT.toInstant())) {
-            endDTR = currentDT;
-        }
+//        DateTime endDT = new DateTime(startDT);
+//        endDT = endDT.plusDays(harvestDatetimeInc);
+//        if(endDT.isAfter(currentDT.toInstant())) {
+//            endDT = currentDT;
+//        }
+
+        DateTime endDT = new DateTime(currentDT);
 
         // If the start and end harvest dates are the same (happends for a new node), then
         // tweek the start so that DataONE listObjects doesn't complain.
-        if(startDTR == endDTR ) {
-            startDTR = startDTR.minusMinutes(1);
+        if(startDT == endDT ) {
+            startDT = startDT.minusMinutes(1);
         }
 
-        String startDTRstr = dtfOut.print(startDTR);
-        String endDTRstr = dtfOut.print(endDTR);
+        // Track the sysmeta dateUploaded of the latest harvested pid. This will become the starting time of
+        // the next harvest.
+        DateTime lastDateModifiedDT = startDT;
+
+        String startDTstr = dtfOut.print(startDT);
+        String endDTstr = dtfOut.print(endDT);
 
         int startCount = 0;
         RequestScorerJob.ListResult result = null;
-        Integer resultCount = null;
+        Integer resultCount = 0;
 
+        // Two types of score requests can be processed - a "node" request that will get score info for an
+        // entire repository (e.g. urn:node:ARCTIC) or a "portal" request that will get scores for a
+        // specific portal (from the Solr portal entry collectionQuery).
         if(requestType != null && requestType.equalsIgnoreCase("node")) {
             try {
                 // For a 'node' scores request, the 'collection' is the entire node, so specify
-                // the nodeId as the collectionid.
+                // the nodeId as the collectionid. It is not necessary to retrieve a collectionQuery for this
+                // 'node' portal, as there is no Solr entry for this type collection. All quality scores available
+                // in the quality Solr server will be directly retrieved, filtering on the 'nodeId' (datasource)
+                log.info("TaskName: " + taskName + ", taskType: " + taskType + " submitting node request for nodeId: "
+                        + nodeId + ", suiteId: " + suiteId + "formatFamily: " + formatFamily);
                 submitScorerRequest(qualityServiceUrl, nodeId, suiteId, nodeId, formatFamily);
             } catch (Exception e) {
                 JobExecutionException jee = new JobExecutionException("Unable to submit request to create new node ("
@@ -248,22 +268,26 @@ public void execute(JobExecutionContext context)
             Integer allIds = 0;
             boolean morePids = true;
             while (morePids) {
+                // Get a list of pids selected by a collection (portal) search filter (collectionQuery) and get
+                // the quality scores (from the quality Solr server) for that list of pids.
                 ArrayList<String> pidsToProcess = null;
                 log.trace("Getting portal pids to process, startCount: " + startCount + ", countRequested: " + countRequested);
 
                 try {
-                    result = getPidsToProcess(d1Node, session, pidFilter, startDTRstr, endDTRstr, startCount, countRequested);
+                    result = getPidsToProcess(d1Node, session, pidFilter, startDTstr, endDTstr, startCount, countRequested, lastDateModifiedDT);
                     pidsToProcess = result.getResult();
                     resultCount = result.getResultCount();
+                    lastDateModifiedDT = result.getLastDateModified();
                 } catch (Exception e) {
                     JobExecutionException jee = new JobExecutionException("Unable to get pids to process", e);
                     jee.setRefireImmediately(false);
                     throw jee;
                 }
 
-                log.trace(taskName + ": found " + resultCount + " seriesIds" + " for date: " + startDTRstr + " at servierUrl: " + nodeServiceUrl);
+                log.trace(taskName + ": found " + resultCount + " seriesIds" + " for date: " + startDTstr + " at servierUrl: " + nodeServiceUrl);
                 for (String pidStr : pidsToProcess) {
                     try {
+                        log.debug(taskName + ": submitting seriesId: " + pidStr);
                         submitScorerRequest(qualityServiceUrl, pidStr, suiteId, nodeId, formatFamily);
                     } catch (Exception e) {
                         JobExecutionException jee = new JobExecutionException("Unable to submit request to create new score graph/data file", e);
@@ -274,6 +298,7 @@ public void execute(JobExecutionContext context)
 
                 // Check if DataONE returned the max number of results. If so, we have to request more by paging through
                 // the results.
+                allIds += pidsToProcess.size();
                 if (resultCount >= countRequested) {
                     morePids = true;
                     startCount = startCount + resultCount;
@@ -281,19 +306,23 @@ public void execute(JobExecutionContext context)
                 } else {
                     morePids = false;
 
-                    // Record the new "last harvested" date
-                    task.setLastHarvestDatetime(endDTRstr);
+                }
+            }
 
-                    try {
-                        store.saveTask(task);
-                    } catch (MetadigStoreException mse) {
-                        log.error("Error saving task: " + task.getTaskName());
-                        JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
-                        jee.setRefireImmediately(false);
-                        throw jee;
-                    }
+            if (allIds > 0) {
+                // Record the new "last harvested" date
+                task.setLastHarvestDatetime(dtfOut.print(lastDateModifiedDT));
+                log.debug("Saving lastHarvestDate: " + dtfOut.print(lastDateModifiedDT));
+                try {
+                    store.saveTask(task);
+                } catch (MetadigStoreException mse) {
+                    log.error("Error saving task: " + task.getTaskName());
+                    JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
+                    jee.setRefireImmediately(false);
+                    throw jee;
                 }
             }
+            log.info(taskName + ": found " + allIds + " seriesIds" + " for start: " + startDTstr + ", end: " + endDTstr + " at servierUrl: " + nodeServiceUrl);
         }
         store.shutdown();
     }
@@ -322,14 +351,15 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
 
         org.w3c.dom.NodeList xpathResult = null;
         XPathExpression fieldXpath = null;
+        XPathExpression dateModifiedXpath = null;
         XPath xpath = null;
         org.w3c.dom.Node node = null;
         ArrayList<String> pids = new ArrayList<String>();
         Document xmldoc = null;
 
-        String queryStr = "?q=formatId:" + pidFilter + "+-obsoletedBy:*" + "+dateUploaded:[" + startHarvestDatetimeStr + "%20TO%20"
+        String queryStr = "?q=formatId:" + pidFilter + "+-obsoletedBy:*" + "+dateModified:[" + startHarvestDatetimeStr + "%20TO%20"
                 + endHarvestDatetimeStr + "]"
-                + "&fl=seriesId&q.op=AND";
+                + "&fl=seriesId,dateModified&q.op=AND";
         log.trace("query: " + queryStr);
 
         // Send the query to DataONE Solr to retrieve portal seriesIds for a given time frame
@@ -345,6 +375,7 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
             XPathFactory xPathfactory = XPathFactory.newInstance();
             xpath = xPathfactory.newXPath();
             fieldXpath = xpath.compile("//result/doc/str[@name='seriesId']/text()");
+            dateModifiedXpath = xpath.compile("//result/doc/date[@name='dateModified']/text()");
         } catch (XPathExpressionException xpe) {
             log.error("Error extracting id from solr result doc: " + xpe.getMessage());
             metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
@@ -358,16 +389,13 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
         int startPos = startCount;
 
         do {
-            //xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, cnNode, mnNode, isCN, session);
             xmldoc = DataONE.querySolr(queryStr, startPos, countRequested, d1Node, session);
             if(xmldoc == null) {
                 log.info("no values returned from query");
                 break;
             }
             try {
-                log.debug("processing xpathresult...");
                 xpathResult = (org.w3c.dom.NodeList) fieldXpath.evaluate(xmldoc, XPathConstants.NODESET);
-                log.debug("processed xpathResult");
             } catch (XPathExpressionException xpe) {
                 log.error("Error extracting seriesId from solr result doc: " + xpe.getMessage());
                 metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
@@ -385,12 +413,39 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
                 log.trace("adding pid: " + currentPid);
             }
 
+            // Get dateModified for the returned seriesIds
+            try {
+                xpathResult = (org.w3c.dom.NodeList) dateModifiedXpath.evaluate(xmldoc, XPathConstants.NODESET);
+            } catch (XPathExpressionException xpe) {
+                log.error("Error extracting dateModified from solr result doc: " + xpe.getMessage());
+                metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
+                metadigException.initCause(xpe);
+                throw metadigException;
+            }
+
+            DateTime thisDateModified;
+            thisResultLength = xpathResult.getLength();
+            if(thisResultLength == 0) break;
+            for (int index = 0; index < xpathResult.getLength(); index++) {
+                node = xpathResult.item(index);
+                String dateStr = node.getTextContent();
+                log.debug("Checking date str: " + dateStr);
+                thisDateModified = DateTime.parse(dateStr,
+                        DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"));
+                if(thisDateModified.isAfter(lastDateModifiedDT)) {
+                    lastDateModifiedDT = thisDateModified.plusMillis(1);
+                    log.debug("Updated lastDateModified to " + lastDateModifiedDT);
+                }
+            }
+
             startPos += thisResultLength;
         } while (thisResultLength > 0);
 
         RequestScorerJob.ListResult result = new RequestScorerJob.ListResult();
         result.setResultCount(pids.size());
         result.setResult(pids);
+        // Return the sysmeta 'dateSystemMetadataModified' of the last pid harvested.
+        result.setLastDateModified(lastDateModifiedDT);
 
         return result;
     }

From 5bfd7a780b380b3feb6ddac493448e2f4aa64d29 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Thu, 20 Aug 2020 11:17:40 -0700
Subject: [PATCH 45/47] Improve javadocs; code cleanup

---
 .../mdqengine/scheduler/JobScheduler.java     |  8 ++-
 .../mdqengine/scheduler/RequestReportJob.java | 57 +++++++++++++++++--
 .../mdqengine/scheduler/RequestScorerJob.java | 35 ++++++++----
 .../ucsb/nceas/mdqengine/scorer/Scorer.java   | 14 ++---
 4 files changed, 89 insertions(+), 25 deletions(-)

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
index 3f9612a3..dd72f43b 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
@@ -241,12 +241,18 @@ public static void main(String[] argv) throws Exception {
     public JobScheduler () {
     }
 
+    /**
+     * Read a single parameter from the quality engine parameter file
+     * @param paramName the parameter to read from the config file
+     * @throws ConfigurationException if there is an exception while reading the config file
+     * @throws IOException if there is an exception while reading the config file
+     */
     public String readConfig (String paramName) throws ConfigurationException, IOException {
         String paramValue = null;
         try {
             MDQconfig cfg = new MDQconfig();
             paramValue = cfg.getString(paramName);
-        } catch (Exception e) {
+        } catch (ConfigurationException | IOException e) {
             log.error("Could not read configuration for param: " + paramName + ": " + e.getMessage());
             throw e;
         }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index 22540674..27a7458b 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -320,8 +320,25 @@ public void execute(JobExecutionContext context)
         store.shutdown();
     }
 
+    /**
+     * Query a DataONE CN or MN to obtain a list of persistent identifiers (pids) for metadata objects have been
+     * added to the system during a specific time period.
+     * @param cnNode a DataONE CN connection client object
+     * @param mnNode a DataONE MN connection client object
+     * @param isCN a logical indicating whether a CN of MN object is being used
+     * @param session a DataONE authentication session
+     * @param suiteId the quality suite to check (if this pids has already been processed)
+     * @param pidFilter the DataONE format identifies to filter for
+     * @param startHarvestDatetimeStr the starting date to harvest pids from
+     * @param endHarvestDatetimeStr the ending data to harvest pids from
+     * @param startCount the start count for paging results from DataONE, for large results
+     * @param countRequested the number of items to get from DataONE on each request
+     * @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid
+     * @throws Exception if there is an exception while executing the job.
+     * @return a ListResult object containing the matching pids
+     */
     public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
-                                  String suiteId, String nodeId, String pidFilter, String startHarvestDatetimeStr,
+                                  String suiteId, String pidFilter, String startHarvestDatetimeStr,
                                   String endHarvestDatetimeStr, int startCount,
                                   int countRequested, DateTime lastDateModifiedDT) throws Exception {
 
@@ -331,7 +348,6 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
 
         ObjectFormatIdentifier formatId = null;
         NodeReference nodeRef = null;
-        //nodeRef.setValue(nodeId);
         Identifier identifier = null;
         Boolean replicaStatus = false;
 
@@ -356,7 +372,7 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
             }
             //log.info("Got " + objList.getCount() + " pids for format: " + formatId.getValue() + " pids.");
         } catch (Exception e) {
-            log.error("Error retrieving pids for node " + nodeId + ": " + e.getMessage());
+            log.error("Error retrieving pids: " + e.getMessage());
             throw e;
         }
 
@@ -416,7 +432,24 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
         return result;
     }
 
-    public boolean runExists(String pid, String suiteId, MDQStore store) throws MetadigStoreException {
+
+    /**
+     * Check if the specified quality suite has already been run for a pid.
+     * <p>
+     * An additional check is made to see if the system metadata in the
+     * run is older than the passed in date. Because the quality engine
+     * uses fields from sysmeta (obsoletes, obsoletedBy), a run may need
+     * to be performed on an existing run in order to update the sysmeta, as
+     * the system is stored in the run object, and this run object is
+     * parsed when the run is inserted into the Solr index.
+     * </p>
+     * @param pid the pid to check
+     * @param suiteId the suite identifier to check (e.g. "FAIR-suite-0.3.1")
+     * @param store the DataStore object to send the check request to.
+     * @throws MetadigStoreException
+     *
+     */
+    public boolean runExists(String pid, String suiteId, MDQStore store, Date dateSystemMetadataModified) throws MetadigStoreException {
 
         boolean found = false;
         Date runDateSystemMetadataModified = null;
@@ -440,6 +473,22 @@ public boolean runExists(String pid, String suiteId, MDQStore store) throws Meta
         return found;
     }
 
+    /**
+     * Submit a request to the metadig controller to run a quality suite for the specified pid.
+     * <p>
+     *     The system metadata for a pid is also obtained and sent with the request
+     * </p>
+     *
+     * @param cnNode a DataONE CN connection client object
+     * @param mnNode a DataONE MN connection client object
+     * @param isCN a logical indicating whether a CN of MN object
+     * @param session a DataONE authentication session
+     * @param qualityServiceUrl the URL of the MetaDIG quality service
+     * @param pidStr the pid to submit the request for
+     * @param suiteId the suite identifier to submit the request for
+     *
+     * @throws Exception
+     */
     public void submitReportRequest(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN,  Session session, String qualityServiceUrl, String pidStr, String suiteId) throws Exception {
 
         SystemMetadata sysmeta = null;
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index 31dcea61..1abb1dce 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -330,22 +330,21 @@ public void execute(JobExecutionContext context)
     /**
      * Query a DataONE CN or MN object store for a list of object that match the time range and formatId filters provided.
      *
-     * //@param cnNode
-     * //@param mnNode
-     * //@param isCN
-     * @param session
-     * @param pidFilter
-     * @param startHarvestDatetimeStr
-     * @param endHarvestDatetimeStr
-     * @param startCount
-     * @param countRequested
+     * @param d1Node a DataONE CN or MN connection client object
+     * @param session a DataONE authentication session
+     * @param pidFilter the DataONE format identifies to filter for
+     * @param startHarvestDatetimeStr the starting date to harvest pids from
+     * @param endHarvestDatetimeStr the ending data to harvest pids from
+     * @param startCount the start count for paging results from DataONE, for large results
+     * @param countRequested the number of items to get from DataONE on each request
+     * @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid
+     * @throws Exception if there is an exception while executing the job.
      * @return a ListResult object containing the matching pids
      * @throws Exception
      */
-    //public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
     public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
                                        String pidFilter, String startHarvestDatetimeStr, String endHarvestDatetimeStr,
-                                       int startCount, int countRequested) throws Exception {
+                                       int startCount, int countRequested, DateTime lastDateModifiedDT) throws Exception {
 
         MetadigProcessException metadigException = null;
 
@@ -450,6 +449,18 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
         return result;
     }
 
+    /**
+     * Submit a requst to the metadig controller to get qualiry score info and create a graph for the specified collection.
+     *
+     * @param qualityServiceUrl
+     * @param collectionId
+     * @param suiteId
+     * @param nodeId
+     * @param formatFamily
+     *
+     * @throws Exception
+     *
+     */
     public void submitScorerRequest(String qualityServiceUrl, String collectionId, String suiteId, String nodeId, String formatFamily) throws  Exception {
 
         InputStream runResultIS = null;
@@ -475,7 +486,7 @@ public void submitScorerRequest(String qualityServiceUrl, String collectionId, S
             post.addHeader("Accept", "application/xml");
 
             // send to service
-            log.debug("submitting scores request : " + scorerServiceUrl);
+            log.trace("submitting scores request : " + scorerServiceUrl);
             CloseableHttpClient client = HttpClients.createDefault();
             CloseableHttpResponse response = client.execute(post);
 
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
index fede5a0f..df56654d 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
@@ -129,6 +129,7 @@ public static void main(String[] argv) throws Exception {
          *     A set of quality scores are retrieved from the Quality Solr Server and a quality graph and csv file are created from
          *     them. For DataONE collections, the 'collectionQuery' is retrieved from Solr to determine the set of pids to be
          *     included.
+         * </p>
          *
          */
         final Consumer consumer = new DefaultConsumer(inProcessChannel) {
@@ -330,6 +331,7 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
             }
         };
 
+        // Initialize the RabbitMQ queue for scorer requests send by the controller
         inProcessChannel.basicConsume(SCORER_QUEUE_NAME, false, consumer);
     }
 
@@ -337,18 +339,16 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
      * Retrieve pids associated with a DataONE collection.
      *
      * <p>First the 'collectionQuery' field is retrieved from DataONE Solr for the collection</p>
-     * <p>Next, a query is issued with the query from collectionQuery field, to retrieve all Solr docs for the collection ids./p>
+     * <p>Next, a query is issued with the query from the collectionQuery field, to retrieve all Solr docs for the collection ids./p>
      *
      * <p>Note that in the current design, the collection query is always obtained by querying the node specified in the taskList.csv file,
      * which is usually an MN, but the collectionQuery is always evaluated on the CN</p>
      *
      * @param collectionId a DataONE project id to fetch scores for, e.g. urn:uuid:f137095e-4266-4474-aa5f-1e1fcaa5e2dc
-     * @param d1Node
-     * @param session
+     * @param d1Node the DataONE connection object for a node
+     * @param session the DataONE authentication session
      * @return a List of quality scores fetched from Solr
      */
-    //private ScorerResult getCollectionPids(String collectionId, MultipartCNode cnNode, MultipartMNode mnNode,
-    //                                       Boolean isCN, Session session) throws MetadigProcessException {
     private ScorerResult getCollectionPids(String collectionId, MultipartD1Node d1Node, Session session) throws MetadigProcessException {
 
         Document xmldoc = null;
@@ -363,11 +363,9 @@ which will be used to query DataONE Solr for all the pids associated with that p
          */
         ArrayList<String> pids = new ArrayList<>();
         queryStr = "?q=seriesId:" + escapeSpecialChars(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
-        //queryStr = "?q=seriesId:" + encodeValue(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
-        //queryStr = "?q=seriesId:" + collectionId + "+-obsoletedBy:*&fl=collectionQuery,label,rightsHolder&q.op=AND";
 
         startPos = 0;
-        // Just getting 1 row
+        // Just getting 1 row (for the collectionQuery field)
         countRequested = 10;
 
         // Get the collectionQuery from Solr

From bc9b37ad577e78479797b42837283c7df2634f86 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Wed, 2 Sep 2020 15:45:35 -0700
Subject: [PATCH 46/47] CN harvesting is missing some pids #267

---
 .../edu/ucsb/nceas/mdqengine/model/Task.java  |  16 +-
 .../mdqengine/scheduler/JobScheduler.java     |  17 ++
 .../nceas/mdqengine/scheduler/NodeList.java   | 168 +++++++++++
 .../mdqengine/scheduler/RequestReportJob.java | 267 +++++++++++-------
 .../mdqengine/scheduler/RequestScorerJob.java |  51 ++--
 .../nceas/mdqengine/store/DatabaseStore.java  | 252 ++++++++++++++++-
 .../nceas/mdqengine/store/InMemoryStore.java  |  15 +-
 .../ucsb/nceas/mdqengine/store/MDQStore.java  |  12 +-
 .../ucsb/nceas/mdqengine/store/MNStore.java   |  21 +-
 src/main/resources/sql/quality-v2.3.0.sql     |  26 +-
 10 files changed, 682 insertions(+), 163 deletions(-)
 create mode 100644 src/main/java/edu/ucsb/nceas/mdqengine/scheduler/NodeList.java

diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/model/Task.java b/src/main/java/edu/ucsb/nceas/mdqengine/model/Task.java
index 5e174d42..f2290b28 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/model/Task.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/model/Task.java
@@ -1,10 +1,12 @@
 package edu.ucsb.nceas.mdqengine.model;
 
+import java.util.HashMap;
+
 public class Task {
 
     private String taskName;
     private String taskType;
-    private String lastHarvestDatetime;
+    private HashMap<String, String> lastHarvestDatetimes = new HashMap<>();
 
     public void setTaskName(String name) {
         this.taskName = name;
@@ -18,10 +20,16 @@ public String getTaskName() {
 
     public String getTaskType() { return taskType; }
 
-    public void setLastHarvestDatetime(String lastHarvestDatetime) {
-        this.lastHarvestDatetime = lastHarvestDatetime;
+    public void setLastHarvestDatetimes(HashMap<String, String> lastHarvestDatetimes) {
+        this.lastHarvestDatetimes = lastHarvestDatetimes;
+    }
+
+    public void setLastHarvestDatetime(String lastHarvestDatetime, String nodeId) {
+        this.lastHarvestDatetimes.put(nodeId, lastHarvestDatetime);
     }
 
-    public String getLastHarvestDatetime() { return lastHarvestDatetime; }
+    public String getLastHarvestDatetime(String nodeId) {
+        return this.lastHarvestDatetimes.get(nodeId);
+    }
 
 }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
index dd72f43b..c38e8d1f 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/JobScheduler.java
@@ -180,6 +180,16 @@ public static void main(String[] argv) throws Exception {
                 log.debug("fileExcludeMatch: " + fileExcludeMatch);
                 logFile = splitted[++icnt].trim();
                 log.debug("log file: " + logFile);
+            } else if (taskType.equals("nodelist")) {
+                log.debug("Scheduling nodelist update from DataONE, task name: " + taskName + ", task group: " + taskGroup);
+                String[] splitted = Arrays.stream(params.split(";"))
+                        .map(String::trim)
+                        .toArray(String[]::new);
+
+                int icnt = -1;
+                log.debug("Split length: " + splitted.length);
+                nodeId = splitted[++icnt].trim();
+                log.debug("nodeId: " + nodeId);
             }
 
             try {
@@ -221,6 +231,13 @@ public static void main(String[] argv) throws Exception {
                             .usingJobData("fileExcludeMatch", fileExcludeMatch)
                             .usingJobData("logFile", logFile)
                             .build();
+                } else if (taskType.equalsIgnoreCase("nodelist")) {
+                    job = newJob(NodeList.class)
+                            .withIdentity(taskName, taskGroup)
+                            .usingJobData("taskName", taskName)
+                            .usingJobData("taskType", taskType)
+                            .usingJobData("nodeId", nodeId)
+                            .build();
                 }
 
                 CronTrigger trigger = newTrigger()
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/NodeList.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/NodeList.java
new file mode 100644
index 00000000..5eecc2cd
--- /dev/null
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/NodeList.java
@@ -0,0 +1,168 @@
+package edu.ucsb.nceas.mdqengine.scheduler;
+
+import edu.ucsb.nceas.mdqengine.DataONE;
+import edu.ucsb.nceas.mdqengine.MDQconfig;
+import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
+import edu.ucsb.nceas.mdqengine.store.DatabaseStore;
+import edu.ucsb.nceas.mdqengine.store.MDQStore;
+import org.apache.commons.configuration2.ex.ConfigurationException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dataone.client.rest.HttpMultipartRestClient;
+import org.dataone.client.rest.MultipartRestClient;
+import org.dataone.client.v2.impl.MultipartCNode;
+import org.dataone.service.exceptions.NotImplemented;
+import org.dataone.service.exceptions.ServiceFailure;
+import org.dataone.service.types.v1.*;
+import org.dataone.service.types.v2.Node;
+import org.dataone.service.types.v2.Property;
+import org.quartz.*;
+
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.TimeZone;
+
+/**
+ * <p>
+ * Run a MetaDIG Quality Engine Scheduler task, for example,
+ * query a member node for new pids and request that a quality
+ * report is created for each one.
+ * </p>
+ *
+ * @author Peter Slaughter
+ */
+@PersistJobDataAfterExecution
+@DisallowConcurrentExecution
+public class NodeList implements Job {
+
+    private Log log = LogFactory.getLog(NodeList.class);
+
+    // Since Quartz will re-instantiate a class every time it
+    // gets executed, non-static member variables can
+    // not be used to maintain state!
+
+    /**
+     * <p>
+     * Called by the <code>{@link org.quartz.Scheduler}</code> when a
+     * <code>{@link org.quartz.Trigger}</code> fires that is associated with
+     * the <code>Job</code>.
+     * </p>
+     *
+     * @throws JobExecutionException if there is an exception while executing the job.
+     */
+    public void execute(JobExecutionContext context)
+            throws JobExecutionException {
+
+        Log log = LogFactory.getLog(NodeList.class);
+        JobKey key = context.getJobDetail().getKey();
+        JobDataMap dataMap = context.getJobDetail().getJobDataMap();
+
+        String taskName = dataMap.getString("taskName");
+        String taskType = dataMap.getString("taskType");
+        String nodeId = dataMap.getString("nodeId");
+        MultipartRestClient mrc = null;
+        MultipartCNode cnNode = null;
+
+        String nodeServiceUrl = null;
+
+        try {
+            MDQconfig cfg = new MDQconfig();
+            String nodeAbbr = nodeId.replace("urn:node:", "");
+            // TODO:  Cache the node values from the CN listNode service
+            nodeServiceUrl = cfg.getString(nodeAbbr + ".serviceUrl");
+        } catch (ConfigurationException | IOException ce) {
+            JobExecutionException jee = new JobExecutionException(taskName + ": error executing task.");
+            jee.initCause(ce);
+            throw jee;
+        }
+
+        log.debug("Executing task " + taskType + ", " + taskName + " for node: " + nodeId);
+
+        Session session = DataONE.getSession(null, null);
+
+        try {
+            mrc = new HttpMultipartRestClient();
+        } catch (Exception e) {
+            log.error(taskName + ": error creating rest client: " + e.getMessage());
+            JobExecutionException jee = new JobExecutionException(e);
+            jee.setRefireImmediately(false);
+            throw jee;
+        }
+
+        cnNode = new MultipartCNode(mrc, nodeServiceUrl, session);
+        org.dataone.service.types.v2.NodeList nodeList = null;
+
+        try {
+            nodeList = cnNode.listNodes();
+        } catch (NotImplemented | ServiceFailure e) {
+            e.printStackTrace();
+            throw new JobExecutionException(taskName + ": cannot renew store, unable to schedule job", e);
+        }
+
+        // Get a connection to the database
+        MDQStore store = null;
+
+        try {
+            store = new DatabaseStore();
+        } catch (Exception e) {
+            e.printStackTrace();
+            throw new JobExecutionException(taskName + ": cannot create store, unable to schedule job", e);
+        }
+
+        if (!store.isAvailable()) {
+            try {
+                store.renew();
+            } catch (MetadigStoreException e) {
+                e.printStackTrace();
+                throw new JobExecutionException(taskName + ": cannot renew store, unable to schedule job", e);
+            }
+        }
+
+        Property property = null;
+        ArrayList<Property> plist = null;
+        for (Node node : nodeList.getNodeList()) {
+            log.debug("node: " + node.getName());
+            log.debug("type: " + node.getType().toString());
+            log.debug("id: " + node.getIdentifier().getValue());
+            log.debug("state: " + node.getState().toString());
+            log.debug("is synchonized: " + node.isSynchronize());
+
+            if (! node.isSynchronize()) {
+                log.debug(taskName + ": Skipping unsynchronized node " + node.getIdentifier().getValue());
+                continue;
+            } else if (node.getType().toString().equalsIgnoreCase("MN")) {
+                log.debug(taskName + ": saving node " + node.getIdentifier().getValue());
+                try {
+                    store.saveNode(node);
+                } catch (MetadigStoreException mse) {
+                    mse.printStackTrace();
+                    throw new JobExecutionException("Cannot save node " + node.getIdentifier().getValue() + " to store", mse);
+                }
+            } else {
+                log.debug(taskName + ": skipping CN node: " + node.getIdentifier().getValue());
+            }
+        }
+
+        // For debugging purposes: retrieve and print out all node entries if trace logging is enabled.
+        if (log.isTraceEnabled()) {
+            log.trace("Retrieving and printing out all saved node harvest dates...");
+
+            ArrayList<Node> nodes = store.getNodes();
+            for (Node node : nodes) {
+                log.trace("identifier: " + node.getIdentifier().getValue());
+
+                DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+                dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+                String lastHarvestDatetimeStr = dateFormat.format(node.getSynchronization().getLastHarvested());
+
+                log.trace("harvest: " + lastHarvestDatetimeStr);
+                log.trace("synchronize: " + node.isSynchronize());
+                log.trace("state: " + node.getState().toString());
+                log.trace("baseURL: " + node.getBaseURL());
+            }
+        }
+    }
+}
+
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
index 27a7458b..acbecf1c 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestReportJob.java
@@ -19,6 +19,7 @@
 import org.dataone.client.rest.MultipartRestClient;
 import org.dataone.client.v2.impl.MultipartCNode;
 import org.dataone.client.v2.impl.MultipartMNode;
+import org.dataone.service.types.v2.Node;
 import org.dataone.mimemultipart.SimpleMultipartEntity;
 import org.dataone.service.exceptions.NotAuthorized;
 import org.dataone.service.types.v1.*;
@@ -155,17 +156,17 @@ public void execute(JobExecutionContext context)
             // TODO:  Cache the node values from the CN listNode service
             nodeServiceUrl = cfg.getString(nodeAbbr + ".serviceUrl");
         } catch (ConfigurationException | IOException ce) {
-            JobExecutionException jee = new JobExecutionException("Error executing task.");
+            JobExecutionException jee = new JobExecutionException(taskName + ": error executing task.");
             jee.initCause(ce);
             throw jee;
         }
 
-        log.info("Executing task " + taskType + ", " + taskName + " for node: " + nodeId + ", suiteId: " + suiteId);
+        log.debug("Executing task " + taskType + ", " + taskName + " for node: " + nodeId + ", suiteId: " + suiteId);
 
         try {
             mrc = new HttpMultipartRestClient();
         } catch (Exception e) {
-            log.error("Error creating rest client: " + e.getMessage());
+            log.error(taskName + ": error creating rest client: " + e.getMessage());
             JobExecutionException jee = new JobExecutionException(e);
             jee.setRefireImmediately(false);
             throw jee;
@@ -200,123 +201,170 @@ public void execute(JobExecutionContext context)
             }
         }
 
-        // Set UTC as the default time zone for all DateTime operations.
-        // Get current datetime, which may be used for start time range.
-        DateTimeZone.setDefault(DateTimeZone.UTC);
-        DateTime currentDT = new DateTime(DateTimeZone.UTC);
-        DateTimeFormatter dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
-        String currentDatetimeStr = dtfOut.print(currentDT);
-        DateTime startDateTimeRange = null;
-        DateTime endDateTimeRange = null;
-        String lastHarvestDateStr = null;
-
-        Task task;
-        task = store.getTask(taskName, taskType);
-        // If a 'task' entry has not been saved for this task name yet, then a 'lastHarvested'
-        // DataTime will not be available, in which case the 'startHarvestDataTime' from the
-        // config file will be used.
-        if(task.getLastHarvestDatetime() == null) {
-            task = new Task();
-            task.setTaskName(taskName);
-            task.setTaskType(taskType);
-            lastHarvestDateStr = startHarvestDatetimeStr;
-            task.setLastHarvestDatetime(lastHarvestDateStr);
-        } else {
-            lastHarvestDateStr = task.getLastHarvestDatetime();
-        }
+        ArrayList<Node> nodes = new ArrayList<>();
 
-        DateTime lastHarvestDateDT = new DateTime(lastHarvestDateStr);
-        // Set the search start datetime to the last harvest datetime, unless it is in the
-        // future. (This can happen when the previous time range end was for the current day,
-        // as the end datetime range for the previous task run will have been stored as the
-        // new lastharvestDateTime.
-        DateTime startDT = null;
-        if(lastHarvestDateDT.isAfter(currentDT.toInstant())) {
-            startDT = currentDT;
+        if (isCN) {
+            nodes = store.getNodes();
         } else {
-            startDT = new DateTime(lastHarvestDateDT);
+            Node node = store.getNode(nodeId);
+            if (node.getIdentifier().getValue() == null) {
+                String msg = ("Node entry not found for node: " + nodeId);
+                log.error(msg);
+                JobExecutionException jee = new JobExecutionException(msg);
+                jee.setRefireImmediately(false);
+                throw jee;
+            } else {
+                log.trace("Got node " + node.getIdentifier().getValue());
+                nodes.add(node);
+            }
         }
 
-//        DateTime endDT = new DateTime(startDT);
-//        endDT = endDT.plusDays(harvestDatetimeInc);
-//        if(endDT.isAfter(currentDT.toInstant())) {
-//            endDT = currentDT;
-//        }
-        DateTime endDT = new DateTime(currentDT);
-
-        // If the start and end harvest dates are the same (happens for a new node), then
-        // tweak the start so that DataONE listObjects doesn't complain.
-        if(startDT == endDT ) {
-            startDT = startDT.minusMinutes(1);
-        }
+        String harvestNodeId = null;
+        for (Node node : nodes) {
 
-        // Track the sysmeta dateUploaded of the latest harvested pid. This will become the starting time of
-        // the next harvest.
-        DateTime lastDateModifiedDT = startDT;
+            harvestNodeId = node.getIdentifier().getValue();
+            // If processing a CN, check each MN to see if it is being synchronized and if it
+            // is up.
+            if (isCN) {
 
-        String startDTstr = dtfOut.print(startDT);
-        String endDTstr = dtfOut.print(endDT);
+                // The NodeList task doesn't save CN entries from the DataONE 'listNodes()' service, but check
+                // just in case.
+                if (node.getType().equals(NodeType.CN)) {
+                    log.debug("Harvesting from CN, skipping CN entry from node list for " + node.getIdentifier().getValue());
+                    continue;
+                }
 
-        Integer startCount = new Integer(0);
-        ListResult result = null;
-        Integer totalResultCount = 0;
-        Integer filteredResultCount = 0;
-        Integer allPidsCnt = 0;
+                if (! node.isSynchronize() || ! node.getState().equals(NodeState.UP)) {
+                    log.trace("Skipping disabled node: " + node.getIdentifier().getValue() + ", sync: " + node.isSynchronize()
+                            + ", status: " + node.getState().toString());
+                    continue;
+                }
 
-        boolean morePids = true;
-        while(morePids) {
-            ArrayList<String> pidsToProcess = null;
-            try {
-                result = getPidsToProcess(cnNode, mnNode, isCN, session, suiteId, pidFilter, startDTstr, endDTstr, startCount, countRequested, lastDateModifiedDT);
-                pidsToProcess = result.getResult();
-                totalResultCount = result.getTotalResultCount();
-                filteredResultCount = result.getFilteredResultCount();
-                lastDateModifiedDT = result.getLastDateModified();
-            } catch (Exception e) {
-                JobExecutionException jee = new JobExecutionException("Unable to get pids to process", e);
-                jee.setRefireImmediately(false);
-                throw jee;
-            }
+                DateTime mnLastHarvestDT = new DateTime(node.getSynchronization().getLastHarvested(), DateTimeZone.UTC);
+                DateTime oneMonthAgoDT = new DateTime(DateTimeZone.UTC).minusMonths(1);
 
-            allPidsCnt = pidsToProcess.size();
-            for (String pidStr : pidsToProcess) {
-                try {
-                    log.debug(taskName + ": submitting pid: " + pidStr);
-                    submitReportRequest(cnNode, mnNode, isCN, session, qualityServiceUrl, pidStr, suiteId);
-                } catch (org.dataone.service.exceptions.NotFound nfe) {
-                    log.error("Unable to process pid: " + pidStr +  nfe.getMessage());
-                    continue;
-                } catch (Exception e) {
-                    log.error("Unable to process pid:  " + pidStr + " - " + e.getMessage());
+                if (mnLastHarvestDT.isBefore(oneMonthAgoDT.toInstant())) {
+                    DateTimeZone.setDefault(DateTimeZone.UTC);
+                    DateTimeFormatter dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+                    log.trace("Skipping node " + node.getIdentifier().getValue() + " that hasn't been sync'd since " + dtfOut.print(mnLastHarvestDT));
                     continue;
                 }
             }
 
-            // Check if DataONE returned the max number of results. If so, we have to request more by paging through
-            // the results returned pidsToProcess (i.e. DataONE listObjects service). If the returned result is
-            // less than the requested result, then all pids have been retrieved.
-            if(totalResultCount >= countRequested) {
-                morePids = true;
-                startCount = startCount + totalResultCount;
-                log.trace("Paging through more results, current start is " + startCount);
+            log.trace("Harvesting node: " + node.getIdentifier().getValue());
+
+            // Set UTC as the default time zone for all DateTime operations.
+            // Get current datetime, which may be used for start time range.
+            DateTimeZone.setDefault(DateTimeZone.UTC);
+            DateTime currentDT = new DateTime(DateTimeZone.UTC);
+            DateTimeFormatter dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+            String lastHarvestDateStr = null;
+
+            Task task;
+            task = store.getTask(taskName, taskType, harvestNodeId);
+            // If a 'task' entry has not been saved for this task name yet, then a 'lastHarvested'
+            // DataTime will not be available, in which case the 'startHarvestDataTime' from the
+            // config file will be used.
+            if (task.getLastHarvestDatetime(harvestNodeId) == null) {
+                task.setTaskName(taskName);
+                task.setTaskType(taskType);
+                lastHarvestDateStr = startHarvestDatetimeStr;
+                task.setLastHarvestDatetime(lastHarvestDateStr, harvestNodeId);
             } else {
-                morePids = false;
+                lastHarvestDateStr = task.getLastHarvestDatetime(harvestNodeId);
             }
-        }
-        // Don't update the lastHarvestDateDT if no pids were found.
-        if (allPidsCnt > 0) {
-            task.setLastHarvestDatetime(dtfOut.print(lastDateModifiedDT));
-            log.debug("Saving lastHarvestDate: " + dtfOut.print(lastDateModifiedDT));
-            try {
-                store.saveTask(task);
-            } catch (MetadigStoreException mse) {
-                log.error("Error saving task: " + task.getTaskName());
-                JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
-                jee.setRefireImmediately(false);
-                throw jee;
+
+            DateTime lastHarvestDateDT = new DateTime(lastHarvestDateStr);
+            // Set the search start datetime to the last harvest datetime, unless it is in the
+            // future. (This can happen when the previous time range end was for the current day,
+            // as the end datetime range for the previous task run will have been stored as the
+            // new lastharvestDateTime.
+            DateTime startDT = null;
+            if (lastHarvestDateDT.isAfter(currentDT.toInstant())) {
+                startDT = currentDT;
+            } else {
+                startDT = new DateTime(lastHarvestDateDT);
+            }
+
+            DateTime endDT = new DateTime(currentDT);
+
+            // If the start and end harvest dates are the same (happens for a new node), then
+            // tweak the start so that DataONE listObjects doesn't complain.
+            if (startDT == endDT) {
+                startDT = startDT.minusMinutes(1);
+            }
+
+            // Track the sysmeta dateUploaded of the latest harvested pid. This will become the starting time of
+            // the next harvest.
+            DateTime lastDateModifiedDT = startDT;
+
+            String startDTstr = dtfOut.print(startDT);
+            String endDTstr = dtfOut.print(endDT);
+
+            log.trace("start time: " + startDTstr);
+
+            Integer startCount = new Integer(0);
+            ListResult result = null;
+            Integer totalResultCount = 0;
+            Integer filteredResultCount = 0;
+            Integer allPidsCnt = 0;
+
+            log.trace("Getting pids for nodeId: " + harvestNodeId);
+            boolean morePids = true;
+            while (morePids) {
+                ArrayList<String> pidsToProcess = null;
+                try {
+                    result = getPidsToProcess(cnNode, mnNode, isCN, session, suiteId, pidFilter, startDTstr, endDTstr, startCount, countRequested, lastDateModifiedDT, harvestNodeId, taskName);
+                    pidsToProcess = result.getResult();
+                    totalResultCount = result.getTotalResultCount();
+                    filteredResultCount = result.getFilteredResultCount();
+                    lastDateModifiedDT = result.getLastDateModified();
+                } catch (Exception e) {
+                    JobExecutionException jee = new JobExecutionException("Unable to get pids to process", e);
+                    jee.setRefireImmediately(false);
+                    throw jee;
+                }
+
+                allPidsCnt = pidsToProcess.size();
+                for (String pidStr : pidsToProcess) {
+                    try {
+                        log.debug(taskName + ": submitting pid: " + pidStr);
+                        submitReportRequest(cnNode, mnNode, isCN, session, qualityServiceUrl, pidStr, suiteId);
+                    } catch (org.dataone.service.exceptions.NotFound nfe) {
+                        log.error("Unable to process pid: " + pidStr + nfe.getMessage());
+                        continue;
+                    } catch (Exception e) {
+                        log.error("Unable to process pid:  " + pidStr + " - " + e.getMessage());
+                        continue;
+                    }
+                }
+
+                // Check if DataONE returned the max number of results. If so, we have to request more by paging through
+                // the results returned pidsToProcess (i.e. DataONE listObjects service). If the returned result is
+                // less than the requested result, then all pids have been retrieved.
+                if (totalResultCount >= countRequested) {
+                    morePids = true;
+                    startCount = startCount + totalResultCount;
+                    log.trace("Paging through more results, current start is " + startCount);
+                } else {
+                    morePids = false;
+                }
+            }
+            // Don't update the lastHarvestDateDT if no pids were found.
+            if (allPidsCnt > 0) {
+                task.setLastHarvestDatetime(dtfOut.print(lastDateModifiedDT), harvestNodeId);
+                log.trace("Saving lastHarvestDate: " + dtfOut.print(lastDateModifiedDT) + " for node: " + harvestNodeId);
+                try {
+                    store.saveTask(task, harvestNodeId);
+                } catch (MetadigStoreException mse) {
+                    log.error("Error saving task: " + task.getTaskName());
+                    JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
+                    jee.setRefireImmediately(false);
+                    throw jee;
+                }
+                log.info(taskName + ": found " + allPidsCnt + " pids for nodeId: " + harvestNodeId + ", start: " + startDTstr + ", end: " + endDTstr + ", servierUrl: " + nodeServiceUrl);
             }
         }
-        log.info(taskName + ": Found " + allPidsCnt + " pids for start: " + startDTstr + ", end: " + endDTstr + " at servierUrl: " + nodeServiceUrl);
         store.shutdown();
     }
 
@@ -334,13 +382,14 @@ public void execute(JobExecutionContext context)
      * @param startCount the start count for paging results from DataONE, for large results
      * @param countRequested the number of items to get from DataONE on each request
      * @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid
+     * @param nodeIdFilter filter results for this nodeId (applies only to CN)
      * @throws Exception if there is an exception while executing the job.
      * @return a ListResult object containing the matching pids
      */
     public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
                                   String suiteId, String pidFilter, String startHarvestDatetimeStr,
                                   String endHarvestDatetimeStr, int startCount,
-                                  int countRequested, DateTime lastDateModifiedDT) throws Exception {
+                                  int countRequested, DateTime lastDateModifiedDT, String nodeIdFilter, String taskName) throws Exception {
 
         ArrayList<String> pids = new ArrayList<String>();
         InputStream qis = null;
@@ -364,15 +413,19 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
 
         try {
             // Even though MultipartMNode and MultipartCNode have the same parent class D1Node, the interface for D1Node doesn't
-            // include listObjects (it should), so we have to maintain a cnNode and mnNode.
+            // include listObjects, as the parameters differ from CN to MN, so we have to use a different object for each.
             if(isCN) {
+                log.trace("Getting pids for cn, for nodeid: " + nodeIdFilter);
+                nodeRef = new NodeReference();
+                nodeRef.setValue(nodeIdFilter);
                 objList = cnNode.listObjects(session, startDate, endDate, formatId, nodeRef, identifier, startCount, countRequested);
             } else {
+                log.trace("Getting pids for mn");
                 objList = mnNode.listObjects(session, startDate, endDate, formatId, identifier, replicaStatus, startCount, countRequested);
             }
             //log.info("Got " + objList.getCount() + " pids for format: " + formatId.getValue() + " pids.");
         } catch (Exception e) {
-            log.error("Error retrieving pids: " + e.getMessage());
+            log.error(taskName + ": error retrieving pids: " + e.getMessage());
             throw e;
         }
 
@@ -413,7 +466,7 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
                     // that this is the last pid to be harvested in this round.
                     if (thisDateModifiedDT.isAfter(lastDateModifiedDT)) {
                         lastDateModifiedDT = thisDateModifiedDT.plusMillis(1);
-                        log.debug("Updated lastDateMoidifed: " + lastDateModifiedDT.toString());
+                        log.debug("New value for lastDateMoidifed: " + lastDateModifiedDT.toString());
                     }
                 //    }
                 }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
index 1abb1dce..b98fbd4c 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/scheduler/RequestScorerJob.java
@@ -140,7 +140,7 @@ public void execute(JobExecutionContext context)
             requestType = dataMap.getString("requestType");
         }
 
-        log.info("Executing task " + taskType + ", " + taskName + " for node: " + nodeId + ", suiteId: " + suiteId);
+        log.debug("Executing task " + taskType + ", " + taskName + " for node: " + nodeId + ", suiteId: " + suiteId);
 
         try {
             cfg = new MDQconfig();
@@ -152,11 +152,16 @@ public void execute(JobExecutionContext context)
             nodeServiceUrl = cfg.getString(nodeAbbr + ".serviceUrl");
             log.trace("nodeServiceUrl: " + nodeServiceUrl);
         } catch (ConfigurationException | IOException ce) {
-            JobExecutionException jee = new JobExecutionException("Error executing task.");
+            JobExecutionException jee = new JobExecutionException(taskName + ": Error executing task: " + ce.getMessage());
             jee.initCause(ce);
             throw jee;
         }
 
+        if(nodeServiceUrl == null) {
+            String msg = taskName + "Unable to read serviceUrl from config file for: " + nodeId;
+            throw new JobExecutionException(msg);
+        }
+
         Session session = DataONE.getSession(subjectId, authToken);
 
         // Get a connection to the DataONE node (CN or MN)
@@ -194,19 +199,19 @@ public void execute(JobExecutionContext context)
         String lastHarvestDateStr = null;
 
         Task task;
-        task = store.getTask(taskName, taskType);
+        task = store.getTask(taskName, taskType, nodeId);
 
         // If a 'task' entry has not been saved for this task name yet, then a 'lastHarvested'
         // DataTime will not be available, in which case the 'startHarvestDataTime' from the
         // config file will be used.
-        if(task.getLastHarvestDatetime() == null) {
+        if(task.getLastHarvestDatetime(nodeId) == null) {
             task = new Task();
             task.setTaskName(taskName);
             task.setTaskType(taskType);
             lastHarvestDateStr = startHarvestDatetimeStr;
-            task.setLastHarvestDatetime(lastHarvestDateStr);
+            task.setLastHarvestDatetime(lastHarvestDateStr, nodeId);
         } else {
-            lastHarvestDateStr = task.getLastHarvestDatetime();
+            lastHarvestDateStr = task.getLastHarvestDatetime(nodeId);
         }
 
         DateTime lastHarvestDateDT = new DateTime(lastHarvestDateStr);
@@ -221,12 +226,6 @@ public void execute(JobExecutionContext context)
             startDT = new DateTime(lastHarvestDateDT);
         }
 
-//        DateTime endDT = new DateTime(startDT);
-//        endDT = endDT.plusDays(harvestDatetimeInc);
-//        if(endDT.isAfter(currentDT.toInstant())) {
-//            endDT = currentDT;
-//        }
-
         DateTime endDT = new DateTime(currentDT);
 
         // If the start and end harvest dates are the same (happends for a new node), then
@@ -274,7 +273,7 @@ public void execute(JobExecutionContext context)
                 log.trace("Getting portal pids to process, startCount: " + startCount + ", countRequested: " + countRequested);
 
                 try {
-                    result = getPidsToProcess(d1Node, session, pidFilter, startDTstr, endDTstr, startCount, countRequested, lastDateModifiedDT);
+                    result = getPidsToProcess(d1Node, session, pidFilter, startDTstr, endDTstr, startCount, countRequested, lastDateModifiedDT, taskName);
                     pidsToProcess = result.getResult();
                     resultCount = result.getResultCount();
                     lastDateModifiedDT = result.getLastDateModified();
@@ -311,18 +310,18 @@ public void execute(JobExecutionContext context)
 
             if (allIds > 0) {
                 // Record the new "last harvested" date
-                task.setLastHarvestDatetime(dtfOut.print(lastDateModifiedDT));
+                task.setLastHarvestDatetime(dtfOut.print(lastDateModifiedDT), nodeId);
                 log.debug("Saving lastHarvestDate: " + dtfOut.print(lastDateModifiedDT));
                 try {
-                    store.saveTask(task);
+                    store.saveTask(task, nodeId);
                 } catch (MetadigStoreException mse) {
                     log.error("Error saving task: " + task.getTaskName());
                     JobExecutionException jee = new JobExecutionException("Unable to save new harvest date", mse);
                     jee.setRefireImmediately(false);
                     throw jee;
                 }
+                log.info(taskName + ": found " + allIds + " seriesIds" + " for start: " + startDTstr + ", end: " + endDTstr + " at servierUrl: " + nodeServiceUrl);
             }
-            log.info(taskName + ": found " + allIds + " seriesIds" + " for start: " + startDTstr + ", end: " + endDTstr + " at servierUrl: " + nodeServiceUrl);
         }
         store.shutdown();
     }
@@ -344,7 +343,7 @@ public void execute(JobExecutionContext context)
      */
     public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
                                        String pidFilter, String startHarvestDatetimeStr, String endHarvestDatetimeStr,
-                                       int startCount, int countRequested, DateTime lastDateModifiedDT) throws Exception {
+                                       int startCount, int countRequested, DateTime lastDateModifiedDT, String taskName) throws Exception {
 
         MetadigProcessException metadigException = null;
 
@@ -376,7 +375,7 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
             fieldXpath = xpath.compile("//result/doc/str[@name='seriesId']/text()");
             dateModifiedXpath = xpath.compile("//result/doc/date[@name='dateModified']/text()");
         } catch (XPathExpressionException xpe) {
-            log.error("Error extracting id from solr result doc: " + xpe.getMessage());
+            log.error(taskName + ": error extracting id from solr result doc: " + xpe.getMessage());
             metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
             metadigException.initCause(xpe);
             throw metadigException;
@@ -396,7 +395,7 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
             try {
                 xpathResult = (org.w3c.dom.NodeList) fieldXpath.evaluate(xmldoc, XPathConstants.NODESET);
             } catch (XPathExpressionException xpe) {
-                log.error("Error extracting seriesId from solr result doc: " + xpe.getMessage());
+                log.error(taskName + ": error extracting seriesId from solr result doc: " + xpe.getMessage());
                 metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
                 metadigException.initCause(xpe);
                 throw metadigException;
@@ -416,7 +415,7 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
             try {
                 xpathResult = (org.w3c.dom.NodeList) dateModifiedXpath.evaluate(xmldoc, XPathConstants.NODESET);
             } catch (XPathExpressionException xpe) {
-                log.error("Error extracting dateModified from solr result doc: " + xpe.getMessage());
+                log.error(taskName + ": error extracting dateModified from solr result doc: " + xpe.getMessage());
                 metadigException = new MetadigProcessException("Unable to get collection pids: " + xpe.getMessage());
                 metadigException.initCause(xpe);
                 throw metadigException;
@@ -450,13 +449,13 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
     }
 
     /**
-     * Submit a requst to the metadig controller to get qualiry score info and create a graph for the specified collection.
+     * Submit a requst to the metadig controller to get quality score info and create a graph for the specified collection.
      *
-     * @param qualityServiceUrl
-     * @param collectionId
-     * @param suiteId
-     * @param nodeId
-     * @param formatFamily
+     * @param qualityServiceUrl the URL of the MetaDIG quality service
+     * @param collectionId the DataONE collection (portal) seriesId
+     * @param suiteId the quality suite to run for the collection
+     * @param nodeId the DataONE node identifier that the collection is hosted on
+     * @param formatFamily the format identifier family (e.g. "eml" for all EML format identifier versions)
      *
      * @throws Exception
      *
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
index 9958136c..8a83abce 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/DatabaseStore.java
@@ -9,6 +9,8 @@
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.dataone.service.types.v1.*;
+import org.dataone.service.types.v2.Node;
 import org.dataone.service.util.TypeMarshaller;
 import org.springframework.core.io.Resource;
 import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
@@ -22,11 +24,11 @@
 import java.io.UnsupportedEncodingException;
 import java.net.URL;
 import java.sql.*;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
 import java.time.Instant;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Properties;
+import java.util.*;
+import java.util.Date;
 
 /**
  * Persistent storage for quality runs.
@@ -322,27 +324,24 @@ public void shutdown() {
         }
     }
 
-    public void saveTask(Task task) throws MetadigStoreException {
+    public void saveTask(Task task, String nodeId) throws MetadigStoreException {
 
         PreparedStatement stmt = null;
 
         // Perform an 'upsert' on the 'nodes' table - if a record exists for the 'metadata_id, suite_id' already,
         // then update the record with the incoming data.
         try {
-            String sql = "INSERT INTO tasks (task_name, task_type, last_harvest_datetime) VALUES (?, ?, ?)"
+            String sql = "INSERT INTO tasks (task_name, task_type) VALUES (?, ?)"
                     + " ON CONFLICT ON CONSTRAINT task_name_task_type"
-                    + " DO UPDATE SET (task_name, task_type, last_harvest_datetime) = (?, ?, ?);";
+                    + " DO NOTHING";
 
             stmt = conn.prepareStatement(sql);
             stmt.setString(1, task.getTaskName());
             stmt.setString(2, task.getTaskType());
-            stmt.setString(3, task.getLastHarvestDatetime());
-            stmt.setString(4, task.getTaskName());
-            stmt.setString(5, task.getTaskType());
-            stmt.setString(6, task.getLastHarvestDatetime());
             stmt.executeUpdate();
             stmt.close();
             conn.commit();
+            saveNodeHarvest(task, nodeId);
             //conn.close();
         } catch (SQLException e) {
             log.error( e.getClass().getName()+": "+ e.getMessage());
@@ -355,7 +354,7 @@ public void saveTask(Task task) throws MetadigStoreException {
         log.trace("Records created successfully");
     }
 
-    public Task getTask(String taskName, String taskType) {
+    public Task getTask(String taskName, String taskType, String nodeId) {
 
         //return runs.get(id);
         Result result = new Result();
@@ -376,12 +375,13 @@ public Task getTask(String taskName, String taskType) {
             if(rs.next()) {
                 task.setTaskName(rs.getString("task_name"));
                 task.setTaskType(rs.getString("task_type"));
-                task.setLastHarvestDatetime(rs.getString("last_harvest_datetime"));
                 rs.close();
                 stmt.close();
             } else {
                 log.trace("No results returned from query");
             }
+
+            task.setLastHarvestDatetimes(getNodeHarvestDatetimes(task.getTaskName(), task.getTaskType(), nodeId));
         } catch ( Exception e ) {
             log.error( e.getClass().getName()+": "+ e.getMessage());
         }
@@ -389,6 +389,232 @@ public Task getTask(String taskName, String taskType) {
         return(task);
     }
 
+    public HashMap<String,String> getNodeHarvestDatetimes(String taskName, String taskType, String nodeId) {
+
+        //return runs.get(id);
+        Result result = new Result();
+        PreparedStatement stmt = null;
+        String lastDT = null;
+        Task task = new Task();
+
+        HashMap<String, String> nodeHarvestDates  = new HashMap<>();
+        // Select records from the 'nodes' table
+        try {
+            String sql = "select * from node_harvest where task_name = ? and task_type = ? and node_id = ?";
+            stmt = conn.prepareStatement(sql);
+            stmt.setString(1, taskName);
+            stmt.setString(2, taskType);
+            stmt.setString(3, nodeId);
+
+            log.trace("issuing query: " + sql);
+            ResultSet rs = stmt.executeQuery();
+            while (rs.next()) {
+                nodeHarvestDates.put(nodeId, rs.getString("last_harvest_datetime"));
+            }
+            rs.close();
+            stmt.close();
+        } catch ( Exception e ) {
+            log.error( e.getClass().getName()+": "+ e.getMessage());
+        }
+
+        return(nodeHarvestDates);
+    }
+
+
+    public void saveNodeHarvest(Task task, String nodeId) throws MetadigStoreException {
+
+        PreparedStatement stmt = null;
+
+        // Perform an 'upsert' on the 'nodes' table - if a record exists for the 'metadata_id, suite_id' already,
+        // then update the record with the incoming data.
+        try {
+            String sql = "INSERT INTO node_harvest (task_name, task_type, node_id, last_harvest_datetime) VALUES (?, ?, ?, ?)"
+                    + " ON CONFLICT ON CONSTRAINT node_harvest_task_name_task_type_node_id_uc"
+                    + " DO UPDATE SET (task_name, task_type, node_id, last_harvest_datetime) = (?, ?, ?, ?);";
+
+            stmt = conn.prepareStatement(sql);
+            stmt.setString(1, task.getTaskName());
+            stmt.setString(2, task.getTaskType());
+            stmt.setString(3, nodeId);
+            stmt.setString(4, task.getLastHarvestDatetime(nodeId));
+            stmt.setString(5, task.getTaskName());
+            stmt.setString(6, task.getTaskType());
+            stmt.setString(7, nodeId);
+            stmt.setString(8, task.getLastHarvestDatetime(nodeId));
+            stmt.executeUpdate();
+            stmt.close();
+            conn.commit();
+            //conn.close();
+        } catch (SQLException e) {
+            log.error( e.getClass().getName()+": "+ e.getMessage());
+            MetadigStoreException me = new MetadigStoreException("Unable save last harvest date to the datdabase.");
+            me.initCause(e);
+            throw(me);
+        }
+
+        // Next, insert a record into the child table ('runs')
+        log.trace("Records created successfully");
+    }
+
+    public void saveNode(Node node) throws MetadigStoreException {
+
+        PreparedStatement stmt = null;
+
+        // Perform an 'upsert' on the 'nodes' table - if a record exists for the 'metadata_id, suite_id' already,
+        // then update the record with the incoming data.
+        try {
+            String sql = "INSERT INTO nodes " +
+                    " (identifier, name, type, state, synchronize, last_harvest, baseURL) VALUES (?, ?, ?, ?, ?, ?, ?) " +
+                    " ON CONFLICT ON CONSTRAINT node_id_pk DO UPDATE SET " +
+                    " (identifier, name, type, state, synchronize, last_harvest, baseURL) = (?, ?, ?, ?, ?, ?, ?);";
+
+            DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+            dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+            String lastHarvestDatetimeStr = dateFormat.format(node.getSynchronization().getLastHarvested());
+
+            stmt = conn.prepareStatement(sql);
+            stmt.setString(1, node.getIdentifier().getValue());
+            stmt.setString(2, node.getName());
+            stmt.setString(3, node.getType().toString());
+            stmt.setString(4, node.getState().toString());
+            stmt.setBoolean(5, node.isSynchronize());
+            stmt.setString(6, lastHarvestDatetimeStr);
+            stmt.setString(7, node.getBaseURL());
+            stmt.setString(8, node.getIdentifier().getValue());
+            stmt.setString(9, node.getName());
+            stmt.setString(10, node.getType().toString());
+            stmt.setString(11, node.getState().toString());
+            stmt.setBoolean(12, node.isSynchronize());
+            stmt.setString(13, lastHarvestDatetimeStr);
+            stmt.setString(14, node.getBaseURL());
+            stmt.executeUpdate();
+            stmt.close();
+            conn.commit();
+        } catch (SQLException e) {
+            log.error( e.getClass().getName()+": "+ e.getMessage());
+            MetadigStoreException me = new MetadigStoreException("Unable to save node " + node.getIdentifier().getValue() + " to database.");
+            me.initCause(e);
+            throw(me);
+        }
+
+        // Next, insert a record into the child table ('runs')
+        log.trace("Records created successfully");
+    }
+
+      public Node getNode(String nodeId) {
+
+        Result result = new Result();
+        PreparedStatement stmt = null;
+        Node node = new Node();
+
+        // Select records from the 'nodes' table
+        try {
+            log.trace("preparing statement for query");
+            String sql = "select * from nodes where identifier = ? ";
+            stmt = conn.prepareStatement(sql);
+            stmt.setString(1, nodeId);
+
+            log.trace("issuing query: " + sql);
+            ResultSet rs = stmt.executeQuery();
+            if(rs.next()) {
+                node = extractNodeFields(rs);
+                rs.close();
+                stmt.close();
+            } else {
+                log.trace("No results returned for nodeId: " + nodeId);
+            }
+        } catch ( Exception e ) {
+            log.error( e.getClass().getName()+": "+ e.getMessage());
+        }
+
+        return(node);
+    }
+
+    public ArrayList<Node> getNodes() {
+
+        Result result = new Result();
+        PreparedStatement stmt = null;
+
+        ArrayList<Node> nodes = new ArrayList<> ();
+        ResultSet rs = null;
+        Node node;
+        // Select records from the 'nodes' table
+        try {
+            log.trace("preparing statement for query");
+            String sql = "select * from nodes; ";
+            stmt = conn.prepareStatement(sql);
+
+            log.trace("issuing query: " + sql);
+            rs = stmt.executeQuery();
+            while(rs.next()) {
+                node = extractNodeFields(rs);
+                nodes.add(node);
+            }
+        } catch ( Exception e ) {
+            log.error(e.getClass().getName() + ": " + e.getMessage());
+        }
+
+        try {
+            rs.close();
+            stmt.close();
+        } catch (Exception e) {
+            log.error("Error closing node database: " +  e.getMessage());
+        }
+
+        log.trace(nodes.size() + " nodes found in node table.");
+
+        return(nodes);
+    }
+
+    public Node extractNodeFields (ResultSet resultSet) {
+
+        Node node = new Node();
+        try {
+            NodeReference nodeReference = new NodeReference();
+            nodeReference.setValue(resultSet.getString("identifier"));
+            node.setIdentifier(nodeReference);
+            node.setName(resultSet.getString("name"));
+            switch (resultSet.getString("type")) {
+                case "CN":
+                    node.setType(NodeType.CN);
+                    break;
+                case "MN":
+                    node.setType(NodeType.MN);
+                    break;
+                case "MONITOR":
+                    node.setType(NodeType.MONITOR);
+                    break;
+            }
+
+            switch (resultSet.getString("state")) {
+                case "UP":
+                    node.setState(NodeState.UP);
+                    break;
+                case "DOWN":
+                    node.setState(NodeState.DOWN);
+                    break;
+                default:
+                    node.setState(NodeState.UNKNOWN);
+                    break;
+            }
+
+            node.setSynchronize(resultSet.getBoolean("synchronize"));
+
+            Synchronization synchronization = new Synchronization();
+            SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+            formatter.setTimeZone(TimeZone.getTimeZone("GMT"));
+            Date lastHarvestDate = formatter.parse(resultSet.getString("last_harvest"));
+            synchronization.setLastHarvested(lastHarvestDate);
+            node.setSynchronization(synchronization);
+
+            node.setBaseURL(resultSet.getString("baseURL"));
+        } catch (java.sql.SQLException | java.text.ParseException e) {
+            log.error("Error retrieving node from database: " + e);
+        }
+
+        return node;
+    }
+
     @Override
     public void createRun(Run run) {
         runs.put(run.getId(), run);
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
index af7637a0..e3f47e7a 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/InMemoryStore.java
@@ -9,6 +9,7 @@
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.dataone.service.types.v2.Node;
 import org.springframework.core.io.Resource;
 import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
 import org.xml.sax.SAXException;
@@ -16,6 +17,7 @@
 import javax.xml.bind.JAXBException;
 import java.io.IOException;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
@@ -210,13 +212,22 @@ public void deleteRun(Run run) {
 //	public void saveNode(Node node) throws MetadigStoreException { }
 
 	@Override
-	public Task getTask(String taskName, String taskType) { return new Task(); }
+	public Task getTask(String taskName, String taskType, String nodeId) { return new Task(); }
 
 	@Override
-	public void saveTask(Task task) throws MetadigStoreException { }
+	public void saveTask(Task task, String nodeId) throws MetadigStoreException { }
 
 	@Override
 	public void shutdown() {};
 
 
+	@Override
+	public Node getNode (String nodeId) { return new Node(); };
+
+	@Override
+	public void saveNode(Node node) throws MetadigStoreException {};
+
+	@Override
+	public ArrayList<Node> getNodes() { return new ArrayList<> (); };
+
 }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
index b9796c29..ad64d726 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/MDQStore.java
@@ -2,8 +2,11 @@
 
 import edu.ucsb.nceas.mdqengine.exception.MetadigStoreException;
 import edu.ucsb.nceas.mdqengine.model.*;
+import org.dataone.service.types.v2.Node;
 
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 
 public interface MDQStore {
 	
@@ -30,7 +33,12 @@ public interface MDQStore {
 	boolean isAvailable();
 	void renew() throws MetadigStoreException;
 
-	Task getTask(String taskName, String taskType);
-	void saveTask(Task task) throws MetadigStoreException;
+	Task getTask(String taskName, String taskType, String nodeId);
+	void saveTask(Task task, String nodeId) throws MetadigStoreException;
+
+	Node getNode (String nodeId);
+	void saveNode(Node node) throws MetadigStoreException;
+
+	ArrayList<Node> getNodes();
 
 }
diff --git a/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java b/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
index 4613577e..593e3e36 100644
--- a/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
+++ b/src/main/java/edu/ucsb/nceas/mdqengine/store/MNStore.java
@@ -19,6 +19,7 @@
 import org.dataone.service.types.v1.Session;
 import org.dataone.service.types.v1.Subject;
 import org.dataone.service.types.v1.util.ChecksumUtil;
+import org.dataone.service.types.v2.Node;
 import org.dataone.service.types.v2.SystemMetadata;
 
 import javax.xml.bind.JAXBException;
@@ -328,17 +329,11 @@ public void deleteRun(Run run) {
 	@Override
 	public void renew() {}
 
-//	@Override
-//	public Node getNode(String nodeId, String jobName) { return new Node(); }
-//
-//	@Override
-//	public void saveNode(Node node) throws MetadigStoreException { }
-
 	@Override
-	public Task getTask(String taskName, String taskType) { return new Task(); }
+	public Task getTask(String taskName, String taskType, String nodeId) { return new Task(); }
 
 	@Override
-	public void saveTask(Task task) throws MetadigStoreException { }
+	public void saveTask(Task task, String nodeId) throws MetadigStoreException { }
 
 	@Override
 	public void shutdown() {};
@@ -346,4 +341,14 @@ public void saveTask(Task task) throws MetadigStoreException { }
 	@Override
 	public void saveRun(Run run) {}
 
+	@Override
+	public Node getNode (String nodeId) { return new Node(); };
+
+	@Override
+	public void saveNode(Node node) throws MetadigStoreException {};
+
+	@Override
+	public ArrayList<Node> getNodes() { return new ArrayList<> (); };
+
+
 }
diff --git a/src/main/resources/sql/quality-v2.3.0.sql b/src/main/resources/sql/quality-v2.3.0.sql
index 3c4e7dfb..45a26865 100644
--- a/src/main/resources/sql/quality-v2.3.0.sql
+++ b/src/main/resources/sql/quality-v2.3.0.sql
@@ -24,12 +24,22 @@ alter table identifiers owner to metadig;
 create table tasks (
   task_name TEXT not null,
   task_type TEXT not null,
-  last_harvest_datetime TEXT not null,
   CONSTRAINT task_name_task_type PRIMARY KEY (task_name, task_type)
 );
 
 alter table tasks owner to metadig;
 
+create table node_harvest (
+  task_name TEXT not null,
+  task_type TEXT not null,
+  node_id TEXT not null,
+  last_harvest_datetime TEXT not null,
+  CONSTRAINT node_harvest_task_name_task_type_fk FOREIGN KEY (task_name, task_type) REFERENCES tasks (task_name, task_type),
+  CONSTRAINT node_harvest_task_name_task_type_node_id_uc UNIQUE (task_name, task_type, node_id)
+);
+
+alter table node_harvest owner to metadig;
+
 create TABLE runs (
   metadata_id TEXT not null,
   suite_id TEXT not null,
@@ -62,3 +72,17 @@ create TABLE filestore (
 
 alter table filestore owner to metadig;
 
+create TABLE nodes (
+  identifier TEXT not null,
+  name TEXT not null,
+  type TEXT not NULL,
+  state TEXT not NULL,
+  synchronize boolean not null,
+  last_harvest TEXT not null,
+  baseURL TEXT not null,
+  CONSTRAINT node_id_pk PRIMARY KEY (identifier)
+);
+
+alter table nodes owner to metadig;
+
+

From 54a1c4efa885c4435787f1185540bd5df8618352 Mon Sep 17 00:00:00 2001
From: gothub <slaughter@nceas.ucsb.edu>
Date: Wed, 2 Sep 2020 16:02:18 -0700
Subject: [PATCH 47/47] Add portal harvest task for mn-ucsb-1 (#256)

This is the current taskList.csv, which includes add'l entries to mn-ucsb-1
---
 src/main/resources/configuration/taskList.csv | 51 +++++++++++++++----
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/src/main/resources/configuration/taskList.csv b/src/main/resources/configuration/taskList.csv
index e1351e9a..80976d46 100644
--- a/src/main/resources/configuration/taskList.csv
+++ b/src/main/resources/configuration/taskList.csv
@@ -1,9 +1,8 @@
 task-type,task-name,task-group,cron-schedule,params
-# task type, task name, task group, cron schedule, "formatId filter (regex); suite id; node id; D1 node base url; harvest begin date; harvest increment (days);requestCount"
-# - task type: currently 'quality' and 'score' task are supported.
-# - task name: any unique string, i.e. 'quality-knb'
-# - task group: currently only 'metadig' is used
-# - nodeId
+# task type, job name, job group, cron schedule, "formatId filter (regex); suite id; node id; D1 node base url; harvest begin date; harvest increment (days);requestCount"
+# - task type:
+# - job name:
+# - job group:
 # - cron schedule:
 #   - seconds, minutes, hours, day of month, month, day of week, year
 # - params
@@ -11,10 +10,40 @@ task-type,task-name,task-group,cron-schedule,params
 #   - suite id: the metadig suite id
 #   - node id: a DataONE node URN - data will be filtered using this (DataONE sysmeta "datasource")
 #   - D1 node base url: the base service URL for an MN or CN that will be used to query for pids to be processed
-#   - harvest begin date: the first date to use for the DataONE 'listObjects' service
-#   - harvest increment (days): the time span for each search
+#   - harvest begin date: begin date: the first date to use for the DataONE 'listObjects' service
+#   - harvest increment (days): increment (days): the time span for each search
 #   - requestCount: the number of itmes to request from DataONE listObjects
-score,score-DataONE-fair,metadig,35 0/1 * * * ?,".*portal.*;FAIR.suite.1;urn:node:CN;2019-12-01T00:00:00.00Z;1;100;refresh"
-quality,quality-arctic,metadig,20 0/1 * * * ?,"^eml.*|^http.*eml.*;arctic.data.center.suite.1;urn:node:ARCTIC;1;100"
-filestore,ingest,metadig,0 0/1 * * * ?,"stage;;*.*;README.txt;filestore-ingest.log"
-
+#   - requestType: for score tasks, determine type of portal processing ("portal" or "node")
+#
+# Dataset quality scoring tasks
+quality,quality-knb,metadig,0 0/1 * * * ?,"^eml.*|^http.*eml.*;knb.suite.1;urn:node:KNB;2020-08-28T14:05:48.764Z;1;1000"
+quality,quality-arctic,metadig,5 0/1 * * * ?,"^eml.*|^http.*eml.*;arctic.data.center.suite.1;urn:node:ARCTIC;2020-08-27T00:00:00.000Z;1;1000"
+quality,quality-dataone-fair,metadig,10 0/1 * * * ?,"^eml.*|^http.*eml.*|.*www.isotc211.org.*;FAIR-suite-0.3.1;urn:node:CN;2020-08-28T00:00:00.000Z;1;1000"
+quality,quality-ess-dive,metadig,15 0/1 * * * ?,"^eml.*|^http.*eml.*;ess-dive.data.center.suite.1;urn:node:ESS_DIVE;2020-08-27T20:38:19.953Z;1;1000;"
+#
+# Portal scoring tasks
+score,portal-KNB-FAIR,metadig,5 0/1 * * * ?,"*portals*;FAIR-suite-0.3.1;urn:node:KNB;2020-08-28T00:00:00.00Z;1;100;portal"
+score,portal-ARCTIC-FAIR,metadig,10 0/1 * * * ?,"*portals*;FAIR-suite-0.3.1;urn:node:ARCTIC;2020-08-28T00:00:00.00Z;1;100;portal"
+score,portal-mnUCSB1-FAIR,metadig,15 0/1 * * * ?,"*portals*;FAIR-suite-0.3.1;urn:node:mnUCSB1;2020-08-28T00:00:00.00Z;1;100;portal"
+#
+# Note: Portal harvesting for DataONE portals created on search.dataone.org will be performed on mnUCSB1, as MetacatUI sends create and
+#       update requests performed on search.dataone.org to this host. We want to harvest them as soon as they are created, and not have to wait for mnUCSB1 to
+#      sync to the CN, and then the CN index it, so the following entry is obsolete, and no longer used.
+# # score,portal-CN-FAIR,metadig,35 0/1 * * * ?,"*portals*;FAIR.suite-0.3.1;urn:node:CN;2020-08-24T00:00:00.00Z;1;100;portal"
+#
+# Task for creating member node metadata assessment graphs
+score,mn-portal-ARCTIC-FAIR,metadig,0 0 2 * * ?,";FAIR-suite-0.3.1;urn:node:ARCTIC;2020-08-28T00:00:00.00Z;1;1000;node"
+score,mn-portal-KNB-FAIR,metadig,0 1 2 * * ?,";FAIR-suite-0.3.1;urn:node:KNB;2020-08-28T00:00:00.00Z;1;1000;node"
+score,mn-portal-ESS-DIVE-FAIR,metadig,0 2 2 * * ?,";FAIR-suite-0.3.1;urn:node:ESS_DIVE;2020-08-28T00:00:00.00Z;1;1000;node"
+score,mn-portal-CA_OPC-FAIR,metadig,0 3 2 * * ?,";FAIR-suite-0.3.1;urn:node:CA_OPC;2020-08-28T00:00:00.00Z;1;1000;node"
+score,mn-portal-DataONE-FAIR,metadig,0 4 2 * * ?,";FAIR-suite-0.3.1;urn:node:CN;2020-08-28T00:00:00.00Z;1;1000;node"
+#
+# Task for ingesting files into the file store from /data/metadig/store/stage/{code,data,graph,metadata}
+# filestore,ingest,metadig,0 0/1 * * * ?,"stage;;*.*;README.txt;filestore-ingest.log"
+#
+# Admin NOTE: it appears that DataONE HttpMultipartRestClient can't handle two clients being created at the same time, even if they are by different threads. This needs to be
+#      investigated further and potentially a bug needs to be logged in redmine for this. Until then, an easy workaround is to ensure that no two tasks are started
+#      at the same time, so adjust the cron schedule accordingly.
+#
+# Node list from DataONE
+nodelist,MN-NODE-LIST,metadig,0 0 0/1 * * ?,"urn:node:CN"
\ No newline at end of file