Skip to content

Commit

Permalink
Improve javadocs; code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
gothub committed Aug 20, 2020
1 parent d1f5a97 commit 5bfd7a7
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,18 @@ public static void main(String[] argv) throws Exception {
public JobScheduler () {
}

/**
* Read a single parameter from the quality engine parameter file
* @param paramName the parameter to read from the config file
* @throws ConfigurationException if there is an exception while reading the config file
* @throws IOException if there is an exception while reading the config file
*/
public String readConfig (String paramName) throws ConfigurationException, IOException {
String paramValue = null;
try {
MDQconfig cfg = new MDQconfig();
paramValue = cfg.getString(paramName);
} catch (Exception e) {
} catch (ConfigurationException | IOException e) {
log.error("Could not read configuration for param: " + paramName + ": " + e.getMessage());
throw e;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,25 @@ public void execute(JobExecutionContext context)
store.shutdown();
}

/**
* Query a DataONE CN or MN to obtain a list of persistent identifiers (pids) for metadata objects have been
* added to the system during a specific time period.
* @param cnNode a DataONE CN connection client object
* @param mnNode a DataONE MN connection client object
* @param isCN a logical indicating whether a CN of MN object is being used
* @param session a DataONE authentication session
* @param suiteId the quality suite to check (if this pids has already been processed)
* @param pidFilter the DataONE format identifies to filter for
* @param startHarvestDatetimeStr the starting date to harvest pids from
* @param endHarvestDatetimeStr the ending data to harvest pids from
* @param startCount the start count for paging results from DataONE, for large results
* @param countRequested the number of items to get from DataONE on each request
* @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid
* @throws Exception if there is an exception while executing the job.
* @return a ListResult object containing the matching pids
*/
public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
String suiteId, String nodeId, String pidFilter, String startHarvestDatetimeStr,
String suiteId, String pidFilter, String startHarvestDatetimeStr,
String endHarvestDatetimeStr, int startCount,
int countRequested, DateTime lastDateModifiedDT) throws Exception {

Expand All @@ -331,7 +348,6 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,

ObjectFormatIdentifier formatId = null;
NodeReference nodeRef = null;
//nodeRef.setValue(nodeId);
Identifier identifier = null;
Boolean replicaStatus = false;

Expand All @@ -356,7 +372,7 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
}
//log.info("Got " + objList.getCount() + " pids for format: " + formatId.getValue() + " pids.");
} catch (Exception e) {
log.error("Error retrieving pids for node " + nodeId + ": " + e.getMessage());
log.error("Error retrieving pids: " + e.getMessage());
throw e;
}

Expand Down Expand Up @@ -416,7 +432,24 @@ public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode,
return result;
}

public boolean runExists(String pid, String suiteId, MDQStore store) throws MetadigStoreException {

/**
* Check if the specified quality suite has already been run for a pid.
* <p>
* An additional check is made to see if the system metadata in the
* run is older than the passed in date. Because the quality engine
* uses fields from sysmeta (obsoletes, obsoletedBy), a run may need
* to be performed on an existing run in order to update the sysmeta, as
* the system is stored in the run object, and this run object is
* parsed when the run is inserted into the Solr index.
* </p>
* @param pid the pid to check
* @param suiteId the suite identifier to check (e.g. "FAIR-suite-0.3.1")
* @param store the DataStore object to send the check request to.
* @throws MetadigStoreException
*
*/
public boolean runExists(String pid, String suiteId, MDQStore store, Date dateSystemMetadataModified) throws MetadigStoreException {

boolean found = false;
Date runDateSystemMetadataModified = null;
Expand All @@ -440,6 +473,22 @@ public boolean runExists(String pid, String suiteId, MDQStore store) throws Meta
return found;
}

/**
* Submit a request to the metadig controller to run a quality suite for the specified pid.
* <p>
* The system metadata for a pid is also obtained and sent with the request
* </p>
*
* @param cnNode a DataONE CN connection client object
* @param mnNode a DataONE MN connection client object
* @param isCN a logical indicating whether a CN of MN object
* @param session a DataONE authentication session
* @param qualityServiceUrl the URL of the MetaDIG quality service
* @param pidStr the pid to submit the request for
* @param suiteId the suite identifier to submit the request for
*
* @throws Exception
*/
public void submitReportRequest(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session, String qualityServiceUrl, String pidStr, String suiteId) throws Exception {

SystemMetadata sysmeta = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,22 +330,21 @@ public void execute(JobExecutionContext context)
/**
* Query a DataONE CN or MN object store for a list of object that match the time range and formatId filters provided.
*
* //@param cnNode
* //@param mnNode
* //@param isCN
* @param session
* @param pidFilter
* @param startHarvestDatetimeStr
* @param endHarvestDatetimeStr
* @param startCount
* @param countRequested
* @param d1Node a DataONE CN or MN connection client object
* @param session a DataONE authentication session
* @param pidFilter the DataONE format identifies to filter for
* @param startHarvestDatetimeStr the starting date to harvest pids from
* @param endHarvestDatetimeStr the ending data to harvest pids from
* @param startCount the start count for paging results from DataONE, for large results
* @param countRequested the number of items to get from DataONE on each request
* @param lastDateModifiedDT the sysmeta 'dateSystemMetadataModified' value of the last harvested pid
* @throws Exception if there is an exception while executing the job.
* @return a ListResult object containing the matching pids
* @throws Exception
*/
//public ListResult getPidsToProcess(MultipartCNode cnNode, MultipartMNode mnNode, Boolean isCN, Session session,
public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
String pidFilter, String startHarvestDatetimeStr, String endHarvestDatetimeStr,
int startCount, int countRequested) throws Exception {
int startCount, int countRequested, DateTime lastDateModifiedDT) throws Exception {

MetadigProcessException metadigException = null;

Expand Down Expand Up @@ -450,6 +449,18 @@ public ListResult getPidsToProcess(MultipartD1Node d1Node, Session session,
return result;
}

/**
* Submit a requst to the metadig controller to get qualiry score info and create a graph for the specified collection.
*
* @param qualityServiceUrl
* @param collectionId
* @param suiteId
* @param nodeId
* @param formatFamily
*
* @throws Exception
*
*/
public void submitScorerRequest(String qualityServiceUrl, String collectionId, String suiteId, String nodeId, String formatFamily) throws Exception {

InputStream runResultIS = null;
Expand All @@ -475,7 +486,7 @@ public void submitScorerRequest(String qualityServiceUrl, String collectionId, S
post.addHeader("Accept", "application/xml");

// send to service
log.debug("submitting scores request : " + scorerServiceUrl);
log.trace("submitting scores request : " + scorerServiceUrl);
CloseableHttpClient client = HttpClients.createDefault();
CloseableHttpResponse response = client.execute(post);

Expand Down
14 changes: 6 additions & 8 deletions src/main/java/edu/ucsb/nceas/mdqengine/scorer/Scorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ public static void main(String[] argv) throws Exception {
* A set of quality scores are retrieved from the Quality Solr Server and a quality graph and csv file are created from
* them. For DataONE collections, the 'collectionQuery' is retrieved from Solr to determine the set of pids to be
* included.
* </p>
*
*/
final Consumer consumer = new DefaultConsumer(inProcessChannel) {
Expand Down Expand Up @@ -330,25 +331,24 @@ public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProp
}
};

// Initialize the RabbitMQ queue for scorer requests send by the controller
inProcessChannel.basicConsume(SCORER_QUEUE_NAME, false, consumer);
}

/**
* Retrieve pids associated with a DataONE collection.
*
* <p>First the 'collectionQuery' field is retrieved from DataONE Solr for the collection</p>
* <p>Next, a query is issued with the query from collectionQuery field, to retrieve all Solr docs for the collection ids./p>
* <p>Next, a query is issued with the query from the collectionQuery field, to retrieve all Solr docs for the collection ids./p>
*
* <p>Note that in the current design, the collection query is always obtained by querying the node specified in the taskList.csv file,
* which is usually an MN, but the collectionQuery is always evaluated on the CN</p>
*
* @param collectionId a DataONE project id to fetch scores for, e.g. urn:uuid:f137095e-4266-4474-aa5f-1e1fcaa5e2dc
* @param d1Node
* @param session
* @param d1Node the DataONE connection object for a node
* @param session the DataONE authentication session
* @return a List of quality scores fetched from Solr
*/
//private ScorerResult getCollectionPids(String collectionId, MultipartCNode cnNode, MultipartMNode mnNode,
// Boolean isCN, Session session) throws MetadigProcessException {
private ScorerResult getCollectionPids(String collectionId, MultipartD1Node d1Node, Session session) throws MetadigProcessException {

Document xmldoc = null;
Expand All @@ -363,11 +363,9 @@ which will be used to query DataONE Solr for all the pids associated with that p
*/
ArrayList<String> pids = new ArrayList<>();
queryStr = "?q=seriesId:" + escapeSpecialChars(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
//queryStr = "?q=seriesId:" + encodeValue(collectionId) + "+-obsoletedBy:*" + "&fl=collectionQuery,label,rightsHolder&q.op=AND";
//queryStr = "?q=seriesId:" + collectionId + "+-obsoletedBy:*&fl=collectionQuery,label,rightsHolder&q.op=AND";

startPos = 0;
// Just getting 1 row
// Just getting 1 row (for the collectionQuery field)
countRequested = 10;

// Get the collectionQuery from Solr
Expand Down

0 comments on commit 5bfd7a7

Please sign in to comment.