From 2eeb75c277b83a855b23eafcca92d9bdfd1ebb53 Mon Sep 17 00:00:00 2001 From: "Gantner, Florian Klaus" Date: Thu, 13 Jun 2024 16:19:58 +0200 Subject: [PATCH] extend datacite import mapping with additional fields --- ...AttributeConditionMetadataContributor.java | 334 ++++++++++++++++++ .../contributor/MappingValueProcessor.java | 39 ++ ...DataCiteImportMetadataSourceServiceIT.java | 23 ++ ...atacite-to-dspace-resourcetypes.properties | 13 + .../spring/api/datacite-integration.xml | 72 ++++ 5 files changed, 481 insertions(+) create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/EnhancedJsonPathAttributeConditionMetadataContributor.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/MappingValueProcessor.java create mode 100644 dspace/config/crosswalks/datacite/mapConverter-datacite-to-dspace-resourcetypes.properties diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/EnhancedJsonPathAttributeConditionMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/EnhancedJsonPathAttributeConditionMetadataContributor.java new file mode 100644 index 00000000000..bfaadba675d --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/EnhancedJsonPathAttributeConditionMetadataContributor.java @@ -0,0 +1,334 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.metadatamapping.contributor; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.Objects; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; +import org.dspace.importer.external.metadatamapping.MetadataFieldMapping; +import org.dspace.importer.external.metadatamapping.MetadatumDTO; + +/** + * A simple JsonPath Metadata processor + * that allow extract value from json object + * by configuring the path in the query variable via the bean. + * moreover this can also perform more compact extractions + * by configuring specific json processor in "metadataProcessor" + * + * In addition from some array where `query` points to the values can be selected using some additional condition check. + * This can be expressed in jsonpath, but not in jsonpointer expression. + * + * e.g. + * query /descriptions + * conditionkey "descriptionType" + * conditionvalue "Abstract" + * retrievedvalue "description" + * + * and json + * + * "descriptions": [ + * {"description":"Description A","descriptionType":"Abstract"}, + * {"description":"Description B","descriptionType":"Other"} + * ] + * + * should deliver the value "Description A". + * In addition there is some metadataPostProcessor modifying and changing the retrieved value, + * e.g. mapping to controlled values + * + * @author Mykhaylo Boychuk (mykhaylo.boychuk@4science.com) + * @author Florian Gantner (florian.gantner@uni-bamberg.de) + */ +public class EnhancedJsonPathAttributeConditionMetadataContributor implements MetadataContributor { + + private final static Logger log = LogManager.getLogger(); + + private String query; + + private String conditionKey; + + private String conditionValue; + + private String elementAttribute; + + private MetadataFieldConfig field; + + protected JsonPathMetadataProcessor metadataProcessor; + + protected JsonPathMetadataProcessor metadataPostProcessor; + + /** + * Initialize SimpleJsonPathMetadataContributor with a query, prefixToNamespaceMapping and MetadataFieldConfig + * + * @param query The JSonPath query + * @param field the matadata field to map the result of the Json path query + * MetadataFieldConfig + */ + public EnhancedJsonPathAttributeConditionMetadataContributor(String query, MetadataFieldConfig field) { + this.query = query; + this.field = field; + } + + + /** + * Unused by this implementation + */ + @Override + public void setMetadataFieldMapping(MetadataFieldMapping> rt) { + + } + + /** + * Empty constructor for SimpleJsonPathMetadataContributor + */ + public EnhancedJsonPathAttributeConditionMetadataContributor() { + + } + + /** + * Return the MetadataFieldConfig used while retrieving MetadatumDTO + * + * @return MetadataFieldConfig + */ + public MetadataFieldConfig getField() { + return field; + } + + /** + * Setting the MetadataFieldConfig + * + * @param field MetadataFieldConfig used while retrieving MetadatumDTO + */ + public void setField(MetadataFieldConfig field) { + this.field = field; + } + + /** + * Return query used to create the JSonPath + * + * @return the query this instance is based on + */ + public String getQuery() { + return query; + } + + /** + * Return query used to create the JSonPath + * + */ + public void setQuery(String query) { + this.query = query; + } + + /** + * Return condition attribute key which is checked + * + * @return the attribute key which is checked + */ + public String getConditionKey() { + return conditionKey; + } + + /** + * The key of the json attribute which is checked for the condition + * + * @param conditionKey + */ + public void setConditionKey(String conditionKey) { + this.conditionKey = conditionKey; + } + + /** + * Return condition attribute value which is checked + * + * @return the attribute value which is checked + */ + public String getConditionValue() { + return conditionValue; + } + + /** + * The value of the json attribute which must match the condition + * + * @param conditionValue + */ + public void setConditionValue(String conditionValue) { + this.conditionValue = conditionValue; + } + + /** + * Return element attribute key where the final value is retrieved from + * + * @return the attribute key for the element + */ + public String getElementAttribute() { + return elementAttribute; + } + + /** + * The json attribute where the value is retrieved from + * + * @param elementAttribute + */ + public void setElementAttribute(String elementAttribute) { + this.elementAttribute = elementAttribute; + } + + /** + * Used to process data got by jsonpath expression, like arrays to stringify, change date format or else + * If it is null, toString will be used. + * + * @param metadataProcessor + */ + public void setMetadataProcessor(JsonPathMetadataProcessor metadataProcessor) { + this.metadataProcessor = metadataProcessor; + } + + /** + * Used to process data got by jsonpath expression, like arrays to stringify, change date format or else + * If it is null, toString will be used. + * + * @param metadataPostProcessor + */ + public void setMetadataPostProcessor(JsonPathMetadataProcessor metadataPostProcessor) { + this.metadataPostProcessor = metadataPostProcessor; + } + + /** + * Retrieve the metadata associated with the given object. + * The toString() of the resulting object will be used. + * + * @param fullJson A class to retrieve metadata from. + * @return a collection of import records. Only the identifier of the found records may be put in the record. + */ + @Override + public Collection contributeMetadata(String fullJson) { + Collection metadata = new ArrayList<>(); + Collection metadataValue = new ArrayList<>(); + if (Objects.nonNull(metadataProcessor)) { + metadataValue = metadataProcessor.processMetadata(fullJson); + } else { + JsonNode jsonNode = convertStringJsonToJsonNode(fullJson); + JsonNode node = jsonNode.at(query); + if (node.isArray()) { + Iterator nodes = node.iterator(); + while (nodes.hasNext()) { + JsonNode nodeV = nodes.next(); + + if (!checkCondition(nodeV)) { + continue; + } + + if (getElementAttribute() != null) { + JsonNode element = nodeV.get(getElementAttribute()); + if (element != null && !element.isMissingNode()) { + String nodeValue = getStringValue(element); + if (StringUtils.isNotBlank(nodeValue)) { + metadataValue.add(nodeValue); + } + } + } else { + String nodeValue = getStringValue(nodeV); + if (StringUtils.isNotBlank(nodeValue)) { + metadataValue.add(nodeValue); + } + } + } + } else if (!node.isNull() && StringUtils.isNotBlank(node.toString())) { + if (getElementAttribute() != null && checkCondition(node)) { + JsonNode element = node.get(getElementAttribute()); + if (element != null && !element.isMissingNode()) { + String nodeValue = getStringValue(element); + if (StringUtils.isNotBlank(nodeValue)) { + metadataValue.add(nodeValue); + } + } + } else if (checkCondition(node)) { + String nodeValue = getStringValue(node); + if (StringUtils.isNotBlank(nodeValue)) { + metadataValue.add(nodeValue); + } + } + } + } + if (metadataPostProcessor != null) { + Collection postmetadataValues = new ArrayList<>(); + for (String value: metadataValue) { + Collection postmetadataValue = metadataPostProcessor.processMetadata(value); + if (postmetadataValue != null) { + postmetadataValues.addAll(postmetadataValue); + } + } + metadataValue = postmetadataValues; + } + + for (String value : metadataValue) { + MetadatumDTO metadatumDto = new MetadatumDTO(); + metadatumDto.setValue(value); + metadatumDto.setElement(field.getElement()); + metadatumDto.setQualifier(field.getQualifier()); + metadatumDto.setSchema(field.getSchema()); + metadata.add(metadatumDto); + } + return metadata; + } + + /** + * Check some jsonNode if some jsonpointer expression matches some condition for some match. + * The match is parsed as some regex condition + * @param node + * @return true if node matches the condition or false if not + */ + private boolean checkCondition(JsonNode node) { + if (getConditionKey() == null && getConditionValue() == null) { + return true; + } + if (getConditionKey() != null) { + JsonNode conditionnode = node.get(getConditionKey()); + if (getConditionValue() == null && conditionnode != null && !conditionnode.isMissingNode()) { + return true; + } else if (conditionnode != null && !conditionnode.isMissingNode() && + conditionnode.toString().matches(getConditionValue())) { + return true; + } + } else if (getConditionKey() == null && node.toString().matches(getConditionValue())) { + return true; + } + return false; + } + + private String getStringValue(JsonNode node) { + if (node.isTextual()) { + return node.textValue(); + } + if (node.isNumber()) { + return node.numberValue().toString(); + } + log.error("It wasn't possible to convert the value of the following JsonNode:" + node.asText()); + return StringUtils.EMPTY; + } + + private JsonNode convertStringJsonToJsonNode(String json) { + ObjectMapper mapper = new ObjectMapper(); + JsonNode body = null; + try { + body = mapper.readTree(json); + } catch (JsonProcessingException e) { + log.error("Unable to process json response.", e); + } + return body; + } +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/MappingValueProcessor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/MappingValueProcessor.java new file mode 100644 index 00000000000..6d8a2036fbe --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/MappingValueProcessor.java @@ -0,0 +1,39 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.metadatamapping.contributor; + +import java.util.ArrayList; +import java.util.Collection; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.util.SimpleMapConverter; + +/** + * This Processor allows to map text values to controlled list of vocabularies using some mapConverter + * + * @author Florian Gantner (florian.gantner@uni-bamberg.de) + */ +public class MappingValueProcessor implements JsonPathMetadataProcessor { + + private final static Logger log = LogManager.getLogger(); + + private SimpleMapConverter converter; + + @Override + public Collection processMetadata(String value) { + Collection mappedValues = new ArrayList<>(); + mappedValues.add(converter.getValue(value)); + return mappedValues; + } + + public void setConverter(SimpleMapConverter converter) { + this.converter = converter; + } + +} diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DataCiteImportMetadataSourceServiceIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DataCiteImportMetadataSourceServiceIT.java index 35b4810d929..899ba8c96a7 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DataCiteImportMetadataSourceServiceIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DataCiteImportMetadataSourceServiceIT.java @@ -127,6 +127,24 @@ private ArrayList getRecords() { MetadatumDTO author8 = createMetadatumDTO("dc", "contributor", "author", "Schleicher, Dierk"); MetadatumDTO author9 = createMetadatumDTO("dc", "contributor", "author", "Stock, Benedikt"); MetadatumDTO author10 = createMetadatumDTO("dc", "contributor", "author", "Zelmanov, Efim"); + MetadatumDTO dateissued = createMetadatumDTO("dc", "date", "issued", "2022"); + MetadatumDTO publisher = createMetadatumDTO("dc", "publisher", null, "arXiv"); + MetadatumDTO type = createMetadatumDTO("dc", "type", null, "Working Paper"); + MetadatumDTO abstrct = createMetadatumDTO("dc", "description", "abstract", "A proof is one of the most" + + " important concepts of mathematics. However, there is a striking difference between how a proof " + + "is defined in theory and how it " + + "is used in practice. This puts the unique status of mathematics as exact science into peril. " + + "Now may be the time to reconcile theory and practice, i.e. precision and intuition, through the" + + " advent of computer proof assistants. For the most time this has been a topic for experts in specialized" + + " communities. However, mathematical proofs have become increasingly sophisticated, stretching the" + + " boundaries of what is humanly comprehensible, so that leading mathematicians have asked for formal" + + " verification of their proofs. At the same time, major theorems in mathematics have recently been" + + " computer-verified by people from outside of these communities, even by beginning students. This" + + " article investigates the gap between the different definitions of a proof and possibilities to" + + " build bridges. It is written as a polemic or a collage by different members of the communities in" + + " mathematics and computer science at different stages of their careers, challenging well-known" + + " preconceptions and exploring new perspectives."); + MetadatumDTO description = createMetadatumDTO("dc", "description", null, "17 pages, 1 figure"); metadatums.add(title); metadatums.add(doi); metadatums.add(author1); @@ -139,6 +157,11 @@ private ArrayList getRecords() { metadatums.add(author8); metadatums.add(author9); metadatums.add(author10); + metadatums.add(dateissued); + metadatums.add(publisher); + metadatums.add(type); + metadatums.add(abstrct); + metadatums.add(description); ImportRecord firstRecord = new ImportRecord(metadatums); diff --git a/dspace/config/crosswalks/datacite/mapConverter-datacite-to-dspace-resourcetypes.properties b/dspace/config/crosswalks/datacite/mapConverter-datacite-to-dspace-resourcetypes.properties new file mode 100644 index 00000000000..536365cda04 --- /dev/null +++ b/dspace/config/crosswalks/datacite/mapConverter-datacite-to-dspace-resourcetypes.properties @@ -0,0 +1,13 @@ +# Mapping from Datacite ResourceGeneralTypes to dspace types +Audiovisual = Video +JournalArticle = Article +Book = Book +BookChapter = Book chapter +Dataset = Dataset +InteractiveResource = Learning Object +Image = Image +Preprint = Preprint +Software = Software +Report = Technical Report +Dissertation = Thesis +Text = Working Paper diff --git a/dspace/config/spring/api/datacite-integration.xml b/dspace/config/spring/api/datacite-integration.xml index 236ec0a3bda..efc6af42bd8 100644 --- a/dspace/config/spring/api/datacite-integration.xml +++ b/dspace/config/spring/api/datacite-integration.xml @@ -21,7 +21,13 @@ + + + + + + @@ -59,4 +65,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +