From 0691ad26adace58fa1f7b34ab9ae79b0fd96939d Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Mon, 19 Jun 2023 16:36:25 +0200 Subject: [PATCH 1/6] [DSC-782][CST-6963] New metadata and solr index for file type feat: - new consumer for bitstream to copy metadatas as file type - new solr index for child bitstream type - new discovery configuration - new dspace types. --- .../FileTypeMetadataEnhancerConsumer.java | 274 +++++++++++ .../discovery/SolrServiceFileInfoPlugin.java | 202 ++++++-- .../FileTypeMetadataEnhancerConsumerIT.java | 432 ++++++++++++++++++ dspace/config/dspace.cfg | 6 +- dspace/config/registries/dspace-types.xml | 7 + dspace/config/spring/api/discovery.xml | 51 +++ 6 files changed, 944 insertions(+), 28 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/app/filetype/consumer/FileTypeMetadataEnhancerConsumer.java create mode 100644 dspace-api/src/test/java/org/dspace/app/filetype/consumer/FileTypeMetadataEnhancerConsumerIT.java diff --git a/dspace-api/src/main/java/org/dspace/app/filetype/consumer/FileTypeMetadataEnhancerConsumer.java b/dspace-api/src/main/java/org/dspace/app/filetype/consumer/FileTypeMetadataEnhancerConsumer.java new file mode 100644 index 00000000000..b5c51e93e76 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/app/filetype/consumer/FileTypeMetadataEnhancerConsumer.java @@ -0,0 +1,274 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.filetype.consumer; + +import static org.dspace.util.FunctionalUtils.throwingConsumerWrapper; +import static org.dspace.util.FunctionalUtils.throwingMapperWrapper; + +import java.sql.SQLException; +import java.text.MessageFormat; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.commons.codec.binary.StringUtils; +import org.dspace.content.Bitstream; +import org.dspace.content.Bundle; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.MetadataField; +import org.dspace.content.MetadataFieldName; +import org.dspace.content.MetadataValue; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.BitstreamService; +import org.dspace.content.service.ItemService; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.core.exception.SQLRuntimeException; +import org.dspace.event.Consumer; +import org.dspace.event.Event; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FileTypeMetadataEnhancerConsumer implements Consumer { + + private static final Logger logger = LoggerFactory.getLogger(FileTypeMetadataEnhancerConsumer.class); + + protected static final MetadataFieldName entityTypeMetadata = new MetadataFieldName("dc", "type"); + protected static final MetadataFieldName fileTypeMetadata = new MetadataFieldName("dspace", "file", "type"); + private static final List itemMetadatas = List.of(fileTypeMetadata); + private static final List bitstreamMetadatas = List.of(entityTypeMetadata); + private static final Map bitstreamToItemMetadatasMap = Map.of( + entityTypeMetadata.toString(), fileTypeMetadata + ); + + private BitstreamService bitstreamService; + private ItemService itemService; + + private Set bitstreamAlreadyProcessed = new HashSet<>(); + private Set itemsToProcess = new HashSet<>(); + + @Override + public void initialize() throws Exception { + this.bitstreamService = ContentServiceFactory.getInstance().getBitstreamService(); + this.itemService = ContentServiceFactory.getInstance().getItemService(); + } + + @Override + public void consume(Context ctx, Event event) throws Exception { + if (Constants.BITSTREAM == event.getSubjectType()) { + this.handleBitStreamConsumer( + ctx, + Optional.ofNullable((Bitstream) event.getObject(ctx)) + .orElse(this.loadBitstream(ctx, event)), + event + ); + } else if (Constants.ITEM == event.getSubjectType() && Event.CREATE == event.getEventType()) { + this.handleItemConsumer( + ctx, + Optional.ofNullable((Item) event.getObject(ctx)) + .orElse(this.loadItem(ctx, event)) + ); + } else { + logger.warn( + "Can't consume the DSPaceObject with id {}, only BITSTREAM and ITEMS'CREATION events are consumable!", + event.getSubjectID() + ); + } + } + + @Override + public void end(Context ctx) throws Exception { + bitstreamAlreadyProcessed.clear(); + this.itemsToProcess + .stream() + .forEach(item -> this.handleItemConsumer(ctx, item)); + itemsToProcess.clear(); + } + + @Override + public void finish(Context ctx) throws Exception {} + + private Bitstream loadBitstream(Context ctx, Event event) { + Bitstream found = null; + try { + found = this.bitstreamService.find(ctx, event.getSubjectID()); + } catch (SQLException e) { + logger.error("Error while retrieving the bitstream with ID: " + event.getSubjectID(), e); + throw new SQLRuntimeException("Error while retrieving the bitstream with ID: " + event.getSubjectID(), e); + } + return found; + } + + private Item loadItem(Context ctx, Event event) { + Item found = null; + try { + found = this.itemService.find(ctx, event.getSubjectID()); + } catch (SQLException e) { + logger.error("Error while retrieving the bitstream with ID: " + event.getSubjectID(), e); + throw new SQLRuntimeException("Error while retrieving the bitstream with ID: " + event.getSubjectID(), e); + } + return found; + } + + private void handleBitStreamConsumer(Context ctx, Bitstream bitstream, Event event) { + + if (bitstream == null || this.alreadyProcessed(bitstream)) { + return; + } + List bitstreamItems = List.of(); + try { + bitstreamItems = bitstream.getBundles() + .stream() + .filter(bundle -> "ORIGINAL".equals(bundle.getName())) + .map(Bundle::getItems) + .flatMap(Collection::stream) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + bitstreamAlreadyProcessed.add(bitstream); + bitstreamItems + .stream() + .forEach(item -> this.itemsToProcess.add(item)); + } + } + + private void handleItemConsumer(Context ctx, Item item) { + + if (item == null) { + return; + } + + try { + Item loadedItem = this.itemService.find(ctx, item.getID()); + Map> grouped = + Optional.ofNullable(loadedItem) + .map(i -> i.getBundles("ORIGINAL")) + .filter(bundles -> !bundles.isEmpty()) + .map(bundles -> bundles.get(0)) + .map(Bundle::getBitstreams) + .filter(bitstreams -> !bitstreams.isEmpty()) + .map(bitstreams -> getMetadatasForItem(ctx, bitstreams).collect(Collectors.toList())) + .map(metadatas -> groupByMetadataField(metadatas)) + .filter(metadatas -> !metadatas.isEmpty()) + .orElse(Map.of()); + + this.itemService.removeMetadataValues(ctx, loadedItem, getRemovableMetadatas(loadedItem)); + + grouped + .entrySet() + .stream() + .map(entry -> + Map.entry(bitstreamToItemMetadatasMap.get(entry.getKey().toString('.')), entry.getValue()) + ) + .filter(entry -> entry.getKey() != null) + .forEach( + throwingConsumerWrapper(entry -> + this.addMetadata( + ctx, + loadedItem, + entry.getKey(), + entry.getValue() + ) + ) + ); + + } catch (SQLException e) { + logger.error(MessageFormat.format("Error while processing item {}!", item.getID().toString()), e); + throw new SQLRuntimeException(e); + } + + } + + private void addMetadata(Context ctx, Item loadedItem, MetadataFieldName metadata, List value) + throws SQLException { + this.itemService.addMetadata( + ctx, + loadedItem, + metadata.schema, + metadata.element, + metadata.qualifier, + null, + value + ); + } + + private Stream getMetadatasForItem(Context ctx, List bitstreams) { + return bitstreams + .stream() + .map( + throwingMapperWrapper(bitstream -> + this.bitstreamService.find(ctx, bitstream.getID()), + null + ) + ) + .filter(Objects::nonNull) + .flatMap(bitstream -> filterBitstreamMetadatasForItem(bitstream)); + } + + private Stream filterBitstreamMetadatasForItem(Bitstream bitstream) { + return bitstream.getMetadata() + .stream() + .filter( + metadataFilter( + bitstreamMetadatas + ) + ); + } + + private Map> groupByMetadataField(List metadatas) { + return this.collectByGroupingMetadataFieldMappingValue(metadatas.stream()); + } + + private Map> collectByGroupingMetadataFieldMappingValue(Stream stream) { + return stream + .collect( + Collectors.groupingBy( + MetadataValue::getMetadataField, + Collectors.mapping(MetadataValue::getValue, Collectors.toList()) + ) + ); + } + + private boolean alreadyProcessed(Bitstream bitstream) { + return bitstreamAlreadyProcessed.contains(bitstream); + } + + private List getRemovableMetadatas(DSpaceObject dspaceObject) { + return dspaceObject + .getMetadata() + .stream() + .filter( + metadataFilter( + itemMetadatas + ) + ) + .collect(Collectors.toList()); + } + + private Predicate metadataFilter(List metadataFields) { + return metadata -> + metadataFields + .stream() + .filter(field -> + StringUtils.equals(field.schema, metadata.getSchema()) && + StringUtils.equals(field.element, metadata.getElement()) && + StringUtils.equals(field.qualifier, metadata.getQualifier()) + ) + .findFirst() + .isPresent(); + } +} diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java index ab56e4692e3..b4305dc3dd0 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java @@ -8,15 +8,24 @@ package org.dspace.discovery; import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.function.BiFunction; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.common.SolrInputDocument; import org.dspace.content.Bitstream; import org.dspace.content.Bundle; -import org.dspace.content.Item; +import org.dspace.content.MetadataFieldName; +import org.dspace.content.MetadataValue; import org.dspace.core.Context; import org.dspace.discovery.indexobject.IndexableItem; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; /** *

@@ -36,41 +45,180 @@ * * * @author Martin Walk + * @author Vincenzo Mecca (vins01-4science - vincenzo.mecca at 4science.com) + * */ public class SolrServiceFileInfoPlugin implements SolrServiceIndexPlugin { - private static final Logger log = LogManager.getLogger(SolrServiceFileInfoPlugin.class); + /** + * Class used to map a target metadata into a solr index using {@code SolrInputDocument} + * + * @author Vincenzo Mecca (vins01-4science - vincenzo.mecca at 4science.com) + * + * @param + */ + private static class SolrFieldMetadataMapper { + private final MetadataFieldName metadata; + private final BiFunction> fieldAdder; + + public SolrFieldMetadataMapper(MetadataFieldName metadata, + BiFunction> fieldAdder) { + super(); + this.metadata = metadata; + this.fieldAdder = fieldAdder; + } + + public void map(SolrInputDocument document, String field, T value) { + this.fieldAdder.apply(document, field).accept(value); + } + + public MetadataFieldName getMetadata() { + return metadata; + } + + } + + private static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd"); private static final String BUNDLE_NAME = "ORIGINAL"; private static final String SOLR_FIELD_NAME_FOR_FILENAMES = "original_bundle_filenames"; private static final String SOLR_FIELD_NAME_FOR_DESCRIPTIONS = "original_bundle_descriptions"; + private static final String SOLR_FIELD_NAME_FOR_OAIRE_LICENSE_CONDITION = "original_bundle_oaire_licenseCondition"; + private static final String SOLR_FIELD_NAME_FOR_DATACITE_RIGHTS = "original_bundle_datacite_rights"; + private static final String SOLR_FIELD_NAME_FOR_DATACITE_AVAILABLE = "original_bundle_datacite_available"; + private static final String SOLR_FIELD_NAME_FOR_FILETYPE = "dspace_file_type"; + private static final String SOLR_POSTFIX_FILTER = "_filter"; + private static final String SOLR_POSTFIX_KEYWORD = "_keyword"; + // used for facets and filters of type Date to correctly search them and visualize in facets. + private static final String SOLR_POSTFIX_YEAR = ".year"; + private static final MetadataFieldName METADATA_DATACITE_RIGHTS = new MetadataFieldName("datacite", "rights"); + private static final MetadataFieldName METADATA_DATACITE_AVAILABLE = new MetadataFieldName("datacite", "available"); + private static final MetadataFieldName METADATA_LICENSE_CONDITION = + new MetadataFieldName("oaire", "licenseCondition"); + private static final MetadataFieldName METADATA_FILE_TYPE = new MetadataFieldName("dc", "type"); + + private static final SolrFieldMetadataMapper OAIRE_LICENSE_MAPPER = + new SolrFieldMetadataMapper( + METADATA_LICENSE_CONDITION, + (document, fieldName) -> value -> { + addField(document, fieldName, value); + addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); + addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); + } + ); + + private static final SolrFieldMetadataMapper DATACITE_RIGHTS_MAPPER = + new SolrFieldMetadataMapper( + METADATA_DATACITE_RIGHTS, + (document, fieldName) -> value -> { + addField(document, fieldName, value); + addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); + addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); + } + ); + + private static final SolrFieldMetadataMapper DATACITE_AVAILABLE_MAPPER = + new SolrFieldMetadataMapper( + METADATA_DATACITE_AVAILABLE, + (document, fieldName) -> value -> { + addField(document, fieldName, value); + addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); + addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); + addField(document, fieldName.concat(SOLR_POSTFIX_YEAR), dtf.parseLocalDate(value).getYear()); + } + ); + + private static final SolrFieldMetadataMapper FILE_TYPE_MAPPER = + new SolrFieldMetadataMapper( + METADATA_FILE_TYPE, + (document, fieldName) -> value -> { + addField(document, fieldName, value); + addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); + addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); + } + ); + + private static final Map> mappableMetadatas = Stream.of( + Map.entry(SOLR_FIELD_NAME_FOR_OAIRE_LICENSE_CONDITION, OAIRE_LICENSE_MAPPER), + Map.entry(SOLR_FIELD_NAME_FOR_DATACITE_RIGHTS, DATACITE_RIGHTS_MAPPER), + Map.entry(SOLR_FIELD_NAME_FOR_DATACITE_AVAILABLE, DATACITE_AVAILABLE_MAPPER), + Map.entry(SOLR_FIELD_NAME_FOR_FILETYPE, FILE_TYPE_MAPPER) + ) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + + private static void addField(SolrInputDocument document, String name, Object value) { + document.addField(name, value); + } @Override public void additionalIndex(Context context, IndexableObject indexableObject, SolrInputDocument document) { if (indexableObject instanceof IndexableItem) { - Item item = ((IndexableItem) indexableObject).getIndexedObject(); - List bundles = item.getBundles(); - if (bundles != null) { - for (Bundle bundle : bundles) { - String bundleName = bundle.getName(); - if ((bundleName != null) && bundleName.equals(BUNDLE_NAME)) { - List bitstreams = bundle.getBitstreams(); - if (bitstreams != null) { - for (Bitstream bitstream : bitstreams) { - try { - document.addField(SOLR_FIELD_NAME_FOR_FILENAMES, bitstream.getName()); - - String description = bitstream.getDescription(); - if ((description != null) && !description.isEmpty()) { - document.addField(SOLR_FIELD_NAME_FOR_DESCRIPTIONS, description); - } - } catch (Exception e) { - log.warn("Error occurred during update index for item {}", item.getID()); - } - } - } - } + generateBundleIndex(document, ((IndexableItem) indexableObject).getIndexedObject().getBundles()); + } + } + + private void generateBundleIndex(SolrInputDocument document, List bundles) { + if (bundles != null) { + for (Bundle bundle : bundles) { + String bundleName = bundle.getName(); + if (bundleName != null && bundleName.equals(BUNDLE_NAME)) { + generateBitstreamIndex(document, bundle.getBitstreams()); } } } } -} \ No newline at end of file + + /** + * Method that adds index to {@link SolrInputDocument}, iterates between {@code bitstreams} and {@code mappableMetadatas} + * then applies the corresponding mapping function to the bitstream + * + * @param document solr document + * @param bitstreams list of bitstreams to analyze + */ + private void generateBitstreamIndex(SolrInputDocument document, List bitstreams) { + if (document != null && bitstreams != null) { + for (Bitstream bitstream : bitstreams) { + addField(document, SOLR_FIELD_NAME_FOR_FILENAMES, bitstream.getName()); + + Optional.ofNullable(bitstream.getDescription()) + .filter(StringUtils::isNotEmpty) + .ifPresent( + (description) -> + addField(document, SOLR_FIELD_NAME_FOR_DESCRIPTIONS,description) + ); + + mappableMetadatas + .entrySet() + .stream() + .forEach( + entry -> + this.addNonNullMetadataValueField(bitstream, entry.getValue(), document, entry.getKey()) + ); + } + } + } + + /** + * Method that iterates bitstream's metadatas, verifies if is mappable and then maps the ones configured + * using the {@link SolrFieldMetadataMapper} function. + * + * @param bitstream that contains metadatas to verify + * @param metadataMapper the mapper that will be applied to the metadatas + * @param document solrdocument + * @param fieldName solr index name + */ + private void addNonNullMetadataValueField(Bitstream bitstream, SolrFieldMetadataMapper metadataMapper, + SolrInputDocument document, String fieldName) { + bitstream.getMetadata() + .stream() + .filter(metadata -> + StringUtils.equals(metadataMapper.getMetadata().schema, metadata.getSchema()) && + StringUtils.equals(metadataMapper.getMetadata().element, metadata.getElement()) && + StringUtils.equals(metadataMapper.getMetadata().qualifier, metadata.getQualifier()) + ) + .map(MetadataValue::getValue) + .filter(Objects::nonNull) + .findFirst() + .ifPresent(value -> metadataMapper.map(document, fieldName, (T) value)); + } +} diff --git a/dspace-api/src/test/java/org/dspace/app/filetype/consumer/FileTypeMetadataEnhancerConsumerIT.java b/dspace-api/src/test/java/org/dspace/app/filetype/consumer/FileTypeMetadataEnhancerConsumerIT.java new file mode 100644 index 00000000000..bfa29ab330d --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/app/filetype/consumer/FileTypeMetadataEnhancerConsumerIT.java @@ -0,0 +1,432 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.filetype.consumer; + +import static org.dspace.app.matcher.MetadataValueMatcher.with; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.not; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.sql.SQLException; +import java.text.ParseException; +import java.util.function.Predicate; + +import org.apache.commons.codec.binary.StringUtils; +import org.apache.tools.ant.filters.StringInputStream; +import org.dspace.AbstractIntegrationTestWithDatabase; +import org.dspace.authorize.AuthorizeException; +import org.dspace.builder.BitstreamBuilder; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.builder.ResourcePolicyBuilder; +import org.dspace.content.Bitstream; +import org.dspace.content.Collection; +import org.dspace.content.Item; +import org.dspace.content.MetadataFieldName; +import org.dspace.content.MetadataValue; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.BitstreamService; +import org.dspace.content.service.ItemService; +import org.dspace.core.Constants; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +public class FileTypeMetadataEnhancerConsumerIT extends AbstractIntegrationTestWithDatabase { + + private Collection collection; + + private final BitstreamService bitstreamService = ContentServiceFactory.getInstance() + .getBitstreamService(); + private final ItemService itemService = ContentServiceFactory.getInstance() + .getItemService(); + + @Before + public void setup() { + context.turnOffAuthorisationSystem(); + + parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); + + collection = CollectionBuilder.createCollection(context, parentCommunity).withName("Collection 1").build(); + + context.restoreAuthSystemState(); + } + + @Test + public void testWithoutBitstreams() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + context.turnOffAuthorisationSystem(); + Item item = ItemBuilder.createItem(context, collection).build(); + context.restoreAuthSystemState(); + context.commit(); + + item = context.reloadEntity(item); + + assertThat(item.getMetadata(), not(hasItem(with("dc.type", null)))); + assertThat(item.getMetadata(), not(hasItem(with("dspace.file_type", null)))); + + context.turnOffAuthorisationSystem(); + this.itemService.update(context, item); + context.restoreAuthSystemState(); + + item = context.reloadEntity(item); + + assertThat(item.getMetadata(), not(hasItem(with("dc.type", null)))); + assertThat(item.getMetadata(), not(hasItem(with("dspace.file.type", null)))); + } + + @Test + public void testWithoutEntityType() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + context.turnOffAuthorisationSystem(); + Item item = ItemBuilder.createItem(context, collection).build(); + Bitstream bitstream = BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .build(); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + item = context.reloadEntity(item); + + assertThat(bitstream.getMetadata(), not(hasItem(with("dc.type", null)))); + assertThat(item.getMetadata(), not(hasItem(with("dspace.file.type", null)))); + } + + @Test + public void testWithEntityTypeDelete() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + context.turnOffAuthorisationSystem(); + Item item = ItemBuilder.createItem(context, collection).build(); + Bitstream bitstream = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .build(); + + ResourcePolicyBuilder + .createResourcePolicy(context) + .withDspaceObject(bitstream) + .withAction(Constants.READ) + .withUser(admin) + .build(); + + context.restoreAuthSystemState(); + context.commit(); + + context.turnOffAuthorisationSystem(); + + this.bitstreamService.delete(context, bitstream); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + item = context.reloadEntity(item); + + assertThat(bitstream.getMetadata(), not(hasItem(with("dc.type", null)))); + assertThat(item.getMetadata(), not(hasItem(with("dspace.file.type", null)))); + } + + @Test + public void testWithEntityType() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + final String type = "Publication"; + context.turnOffAuthorisationSystem(); + final Item item = + ItemBuilder + .createItem(context, collection) + .build(); + Bitstream bitstream = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type) + .build(); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + + assertThat(bitstream.getMetadata(), hasItem(with("dc.type", type))); + assertThat(bitstream.getMetadata(), not(hasItem(with("dspace.file.type", type)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type)))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type))); + } + + @Test + public void testWithTypeEdited() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + String type = "Publication"; + context.turnOffAuthorisationSystem(); + Item item = + ItemBuilder + .createItem(context, collection) + .build(); + Bitstream bitstream = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type) + .build(); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + item = context.reloadEntity(item); + + assertThat(bitstream.getMetadata(), hasItem(with("dc.type", type))); + assertThat(bitstream.getMetadata(), not(hasItem(with("dspace.file.type", type)))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type)))); + + context.turnOffAuthorisationSystem(); + + type = "Thesis"; + this.bitstreamService.setMetadataSingleValue(context, bitstream, + FileTypeMetadataEnhancerConsumer.entityTypeMetadata, null, type); + this.bitstreamService.update(context, bitstream); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + item = context.reloadEntity(item); + + assertThat(bitstream.getMetadata(), hasItem(with("dc.type", type))); + assertThat(bitstream.getMetadata(), not(hasItem(with("dspace.file.type", type)))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type)))); + } + + @Test + public void testWithTypeDeleted() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + final String type = "Publication"; + context.turnOffAuthorisationSystem(); + Item item = + ItemBuilder + .createItem(context, collection) + .build(); + Bitstream bitstream = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type) + .build(); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + final MetadataValue entityType = bitstream.getMetadata() + .stream() + .filter(metadataFilter(FileTypeMetadataEnhancerConsumer.entityTypeMetadata)) + .findFirst() + .orElseThrow(); + bitstream.getMetadata().remove(entityType); + context.turnOffAuthorisationSystem(); + + this.bitstreamService.update(context, bitstream); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + item = context.reloadEntity(item); + + assertThat(bitstream.getMetadata(), not(hasItem(with("dc.type", Mockito.any())))); + assertThat(item.getMetadata(), not(hasItem(with("dspace.file.type", Mockito.any())))); + } + + @Test + public void testWithMultipleEntityType() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + final String type = "Publication"; + final String type1 = "Thesis"; + context.turnOffAuthorisationSystem(); + final Item item = + ItemBuilder + .createItem(context, collection) + .build(); + Bitstream bitstream = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type) + .build(); + final Bitstream bitstream1 = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type1) + .build(); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + + assertThat(bitstream.getMetadata(), hasItem(with("dc.type", type))); + assertThat(bitstream.getMetadata(), not(hasItem(with("dspace.file.type", type)))); + assertThat(bitstream1.getMetadata(), hasItem(with("dc.type", type1))); + assertThat(bitstream1.getMetadata(), not(hasItem(with("dspace.file.type", type1)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type1)))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type, null, 0, -1))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type1, null, 1, -1))); + } + + @Test + public void testWithMultipleEntityTypeEdited() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + String type = "Publication"; + String type1 = "Thesis"; + context.turnOffAuthorisationSystem(); + Item item = + ItemBuilder + .createItem(context, collection) + .build(); + Bitstream bitstream = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type) + .build(); + Bitstream bitstream1 = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type1) + .build(); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + bitstream1 = context.reloadEntity(bitstream1); + + assertThat(bitstream.getMetadata(), hasItem(with("dc.type", type))); + assertThat(bitstream.getMetadata(), not(hasItem(with("dspace.file.type", type)))); + assertThat(bitstream1.getMetadata(), hasItem(with("dc.type", type1))); + assertThat(bitstream1.getMetadata(), not(hasItem(with("dspace.file.type", type1)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type1)))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type, null, 0, -1))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type1, null, 1, -1))); + + context.turnOffAuthorisationSystem(); + + type = "Journal"; + this.bitstreamService.setMetadataSingleValue( + context, + bitstream, + FileTypeMetadataEnhancerConsumer.entityTypeMetadata, + null, + type + ); + this.bitstreamService.update(context, bitstream); + + type1 = "Journal Article"; + this.bitstreamService.setMetadataSingleValue( + context, + bitstream1, + FileTypeMetadataEnhancerConsumer.entityTypeMetadata, + null, + type1 + ); + this.bitstreamService.update(context, bitstream1); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + bitstream1 = context.reloadEntity(bitstream1); + item = context.reloadEntity(item); + + assertThat(bitstream.getMetadata(), hasItem(with("dc.type", type))); + assertThat(bitstream.getMetadata(), not(hasItem(with("dspace.file.type", type)))); + assertThat(bitstream1.getMetadata(), hasItem(with("dc.type", type1))); + assertThat(bitstream1.getMetadata(), not(hasItem(with("dspace.file.type", type1)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type1)))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type, null, 0, -1))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type1, null, 1, -1))); + } + + @Test + public void testWithMultipleEntityTypeDelete() + throws FileNotFoundException, SQLException, AuthorizeException, IOException, ParseException { + final String type = "Publication"; + final String type1 = "Thesis"; + context.turnOffAuthorisationSystem(); + Item item = + ItemBuilder + .createItem(context, collection) + .build(); + Bitstream bitstream = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type) + .build(); + Bitstream bitstream1 = + BitstreamBuilder + .createBitstream(context, item, new StringInputStream("test")) + .withType(type1) + .build(); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + bitstream1 = context.reloadEntity(bitstream1); + + assertThat(bitstream.getMetadata(), hasItem(with("dc.type", type))); + assertThat(bitstream.getMetadata(), not(hasItem(with("dspace.file.type", type)))); + assertThat(bitstream1.getMetadata(), hasItem(with("dc.type", type1))); + assertThat(bitstream1.getMetadata(), not(hasItem(with("dspace.file.type", type1)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type1)))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type, null, 0, -1))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type1, null, 1, -1))); + + context.turnOffAuthorisationSystem(); + + this.bitstreamService.clearMetadata( + context, + bitstream, + FileTypeMetadataEnhancerConsumer.entityTypeMetadata.schema, + FileTypeMetadataEnhancerConsumer.entityTypeMetadata.element, + FileTypeMetadataEnhancerConsumer.entityTypeMetadata.qualifier, + null + ); + this.bitstreamService.update(context, bitstream); + + context.restoreAuthSystemState(); + context.commit(); + + bitstream = context.reloadEntity(bitstream); + bitstream1 = context.reloadEntity(bitstream1); + item = context.reloadEntity(item); + + assertThat(bitstream.getMetadata(), not(hasItem(with("dc.type", type)))); + assertThat(bitstream.getMetadata(), not(hasItem(with("dspace.file.type", type)))); + assertThat(bitstream1.getMetadata(), hasItem(with("dc.type", type1))); + assertThat(bitstream1.getMetadata(), not(hasItem(with("dspace.file.type", type1)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type)))); + assertThat(item.getMetadata(), not(hasItem(with("dc.type", type1)))); + assertThat(item.getMetadata(), not(hasItem(with("dspace.file.type", type, null, 0, -1)))); + assertThat(item.getMetadata(), not(hasItem(with("dspace.file.type", type1, null, 1, -1)))); + assertThat(item.getMetadata(), hasItem(with("dspace.file.type", type1, null, 0, -1))); + } + + private Predicate metadataFilter(MetadataFieldName metadataField) { + return metadata -> + StringUtils.equals(metadataField.schema, metadata.getSchema()) && + StringUtils.equals(metadataField.element, metadata.getElement()) && + StringUtils.equals(metadataField.qualifier, metadata.getQualifier()); + } +} diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index 65405e795af..8ee78908939 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -793,7 +793,7 @@ event.dispatcher.default.class = org.dspace.event.BasicDispatcher # Add doi here if you are using org.dspace.identifier.DOIIdentifierProvider to generate DOIs. # Adding doi here makes DSpace send metadata updates to your doi registration agency. # Add rdf here, if you are using dspace-rdf to export your repository content as RDF. -event.dispatcher.default.consumers = versioning, discovery, eperson, dedup, crisconsumer, orcidqueue, audit, nbeventsdelete, referenceresolver, orcidwebhook, itemenhancer, customurl, reciprocal +event.dispatcher.default.consumers = versioning, discovery, eperson, dedup, crisconsumer, orcidqueue, audit, nbeventsdelete, referenceresolver, orcidwebhook, itemenhancer, customurl, reciprocal, filetypemetadataenhancer # The noindex dispatcher will not create search or browse indexes (useful for batch item imports) @@ -873,6 +873,10 @@ event.consumer.orcidqueue.filters = Item+Install|Modify|Modify_Metadata|Delete|R event.consumer.reciprocal.class = org.dspace.content.authority.ReciprocalItemAuthorityConsumer event.consumer.reciprocal.filters = Item+INSTALL|MODIFY_METADATA|MODIFY +# FileType consumer +event.consumer.filetypemetadataenhancer.class = org.dspace.app.filetype.consumer.FileTypeMetadataEnhancerConsumer +event.consumer.filetypemetadataenhancer.filters = Item+Create|Modify_Metadata:Bitstream+Create|Modify_Metadata|Delete + # ...set to true to enable testConsumer messages to standard output #testConsumer.verbose = true diff --git a/dspace/config/registries/dspace-types.xml b/dspace/config/registries/dspace-types.xml index eb71b9edd9d..094f2543a42 100644 --- a/dspace/config/registries/dspace-types.xml +++ b/dspace/config/registries/dspace-types.xml @@ -51,6 +51,13 @@ enabled Stores a boolean text value (true or false) to indicate if the iiif feature is enabled or not for the dspace object. If absent the value is derived from the parent dspace object + + + dspace + file + type + Stores the bitstream's children file type inside the item it self + dspace diff --git a/dspace/config/spring/api/discovery.xml b/dspace/config/spring/api/discovery.xml index 1ddf0da887e..d9dfa136a83 100644 --- a/dspace/config/spring/api/discovery.xml +++ b/dspace/config/spring/api/discovery.xml @@ -3628,4 +3628,55 @@ + + + + + + dc.identifier.doi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From c8d232e2dc01b3dd4a4760cafa48f0e5d716db72 Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Mon, 19 Jun 2023 16:37:24 +0200 Subject: [PATCH 2/6] [DSC-782][CST-6963] Indexing bundle-bitstreams metadatas feat: - indexing description, checksum, mime-type size-bytes from bundle --- .../discovery/SolrServiceFileInfoPlugin.java | 262 +++++++++++------- dspace/config/spring/api/discovery.xml | 2 +- 2 files changed, 166 insertions(+), 98 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java index b4305dc3dd0..4b2aaed4686 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java @@ -7,6 +7,8 @@ */ package org.dspace.discovery; +import java.sql.SQLException; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -19,13 +21,17 @@ import org.apache.commons.lang3.StringUtils; import org.apache.solr.common.SolrInputDocument; import org.dspace.content.Bitstream; +import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; +import org.dspace.content.MetadataField; import org.dspace.content.MetadataFieldName; import org.dspace.content.MetadataValue; import org.dspace.core.Context; import org.dspace.discovery.indexobject.IndexableItem; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** *

@@ -58,26 +64,26 @@ public class SolrServiceFileInfoPlugin implements SolrServiceIndexPlugin { * @param */ private static class SolrFieldMetadataMapper { - private final MetadataFieldName metadata; + private final String solrField; private final BiFunction> fieldAdder; - public SolrFieldMetadataMapper(MetadataFieldName metadata, - BiFunction> fieldAdder) { + public SolrFieldMetadataMapper( + String metadata, + BiFunction> fieldAdder + ) { super(); - this.metadata = metadata; + this.solrField = metadata; this.fieldAdder = fieldAdder; } - public void map(SolrInputDocument document, String field, T value) { - this.fieldAdder.apply(document, field).accept(value); - } - - public MetadataFieldName getMetadata() { - return metadata; + public void map(SolrInputDocument document, T value) { + this.fieldAdder.apply(document, this.solrField).accept(value); } } + private static final Logger logger = LoggerFactory.getLogger(SolrServiceFileInfoPlugin.class); + private static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd"); private static final String BUNDLE_NAME = "ORIGINAL"; private static final String SOLR_FIELD_NAME_FOR_FILENAMES = "original_bundle_filenames"; @@ -85,65 +91,92 @@ public MetadataFieldName getMetadata() { private static final String SOLR_FIELD_NAME_FOR_OAIRE_LICENSE_CONDITION = "original_bundle_oaire_licenseCondition"; private static final String SOLR_FIELD_NAME_FOR_DATACITE_RIGHTS = "original_bundle_datacite_rights"; private static final String SOLR_FIELD_NAME_FOR_DATACITE_AVAILABLE = "original_bundle_datacite_available"; - private static final String SOLR_FIELD_NAME_FOR_FILETYPE = "dspace_file_type"; + private static final String SOLR_FIELD_NAME_FOR_MIMETYPE = "original_bundle_mime_type"; + private static final String SOLR_FIELD_NAME_FOR_CHECKSUM = "original_bundle_checksum"; + private static final String SOLR_FIELD_NAME_FOR_SIZEBYTES = "original_bundle_sizebytes"; + private static final String SOLR_FIELD_NAME_FOR_SHORT_DESCRIPTION = "original_bundle_short_description"; private static final String SOLR_POSTFIX_FILTER = "_filter"; private static final String SOLR_POSTFIX_KEYWORD = "_keyword"; + private static final String BITSTREAM_METADATA_SOLR_PREFIX_KEYWORD = "bitstreams."; // used for facets and filters of type Date to correctly search them and visualize in facets. private static final String SOLR_POSTFIX_YEAR = ".year"; private static final MetadataFieldName METADATA_DATACITE_RIGHTS = new MetadataFieldName("datacite", "rights"); private static final MetadataFieldName METADATA_DATACITE_AVAILABLE = new MetadataFieldName("datacite", "available"); private static final MetadataFieldName METADATA_LICENSE_CONDITION = - new MetadataFieldName("oaire", "licenseCondition"); - private static final MetadataFieldName METADATA_FILE_TYPE = new MetadataFieldName("dc", "type"); + new MetadataFieldName("oaire", "licenseCondition"); - private static final SolrFieldMetadataMapper OAIRE_LICENSE_MAPPER = - new SolrFieldMetadataMapper( - METADATA_LICENSE_CONDITION, - (document, fieldName) -> value -> { - addField(document, fieldName, value); - addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); - addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); - } - ); + private static final BiFunction> defaultSolrIndexAdder = + (document, fieldName) -> value -> { + Collection fieldValues = document.getFieldValues(fieldName); + if (fieldValues == null || !fieldValues.contains(value)) { + addField(document, fieldName, value); + addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); + addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); + } + }; - private static final SolrFieldMetadataMapper DATACITE_RIGHTS_MAPPER = - new SolrFieldMetadataMapper( - METADATA_DATACITE_RIGHTS, - (document, fieldName) -> value -> { + private static final BiFunction> simpleSolrIndexAdder = + (document, fieldName) -> value -> { + Collection fieldValues = document.getFieldValues(fieldName); + if (fieldValues == null || !fieldValues.contains(value)) { addField(document, fieldName, value); - addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); - addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); } - ); + }; - private static final SolrFieldMetadataMapper DATACITE_AVAILABLE_MAPPER = - new SolrFieldMetadataMapper( - METADATA_DATACITE_AVAILABLE, - (document, fieldName) -> value -> { - addField(document, fieldName, value); - addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); - addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); - addField(document, fieldName.concat(SOLR_POSTFIX_YEAR), dtf.parseLocalDate(value).getYear()); - } - ); + private static final BiFunction> bitstreamMetadataSolrIndexAdder = + (document, fieldName) -> value -> { + String baseIndex = BITSTREAM_METADATA_SOLR_PREFIX_KEYWORD.concat(fieldName); + Collection fieldValues = document.getFieldValues(baseIndex); + if (fieldValues == null || !fieldValues.contains(value)) { + addField(document, baseIndex, value); + addField(document, baseIndex.concat(SOLR_POSTFIX_KEYWORD), value); + addField(document, baseIndex.concat(SOLR_POSTFIX_FILTER), value); + } + }; - private static final SolrFieldMetadataMapper FILE_TYPE_MAPPER = - new SolrFieldMetadataMapper( - METADATA_FILE_TYPE, - (document, fieldName) -> value -> { - addField(document, fieldName, value); - addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); - addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); - } - ); + private static final BiFunction> yearSolrIndexAdder = + (document, fieldName) -> value -> { + Collection fieldValues = document.getFieldValues(fieldName); + if (fieldValues == null || !fieldValues.contains(value)) { + addField(document, fieldName, value); + addField(document, fieldName.concat(SOLR_POSTFIX_KEYWORD), value); + addField(document, fieldName.concat(SOLR_POSTFIX_FILTER), value); + addField(document, fieldName.concat(SOLR_POSTFIX_YEAR), dtf.parseLocalDate(value).getYear()); + } + }; + + private static final SolrFieldMetadataMapper getFieldMapper( + String solrField, + BiFunction> adder + ) { + return new SolrFieldMetadataMapper(solrField, adder); + } + + private static final SolrFieldMetadataMapper OAIRE_LICENSE_MAPPER = + new SolrFieldMetadataMapper( + SOLR_FIELD_NAME_FOR_OAIRE_LICENSE_CONDITION, + defaultSolrIndexAdder + ); + + private static final SolrFieldMetadataMapper DATACITE_RIGHTS_MAPPER = + new SolrFieldMetadataMapper( + SOLR_FIELD_NAME_FOR_DATACITE_RIGHTS, + defaultSolrIndexAdder + ); + + private static final SolrFieldMetadataMapper DATACITE_AVAILABLE_MAPPER = + new SolrFieldMetadataMapper( + SOLR_FIELD_NAME_FOR_DATACITE_AVAILABLE, + yearSolrIndexAdder + ); - private static final Map> mappableMetadatas = Stream.of( - Map.entry(SOLR_FIELD_NAME_FOR_OAIRE_LICENSE_CONDITION, OAIRE_LICENSE_MAPPER), - Map.entry(SOLR_FIELD_NAME_FOR_DATACITE_RIGHTS, DATACITE_RIGHTS_MAPPER), - Map.entry(SOLR_FIELD_NAME_FOR_DATACITE_AVAILABLE, DATACITE_AVAILABLE_MAPPER), - Map.entry(SOLR_FIELD_NAME_FOR_FILETYPE, FILE_TYPE_MAPPER) - ) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + private static final Map> mappableMetadatas = + Stream.of( + Map.entry(METADATA_LICENSE_CONDITION.toString(), OAIRE_LICENSE_MAPPER), + Map.entry(METADATA_DATACITE_RIGHTS.toString(), DATACITE_RIGHTS_MAPPER), + Map.entry(METADATA_DATACITE_AVAILABLE.toString(), DATACITE_AVAILABLE_MAPPER) + ) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); private static void addField(SolrInputDocument document, String name, Object value) { @@ -153,16 +186,16 @@ private static void addField(SolrInputDocument document, String name, Object val @Override public void additionalIndex(Context context, IndexableObject indexableObject, SolrInputDocument document) { if (indexableObject instanceof IndexableItem) { - generateBundleIndex(document, ((IndexableItem) indexableObject).getIndexedObject().getBundles()); + generateBundleIndex(context, document, ((IndexableItem) indexableObject).getIndexedObject().getBundles()); } } - private void generateBundleIndex(SolrInputDocument document, List bundles) { + private void generateBundleIndex(Context context, SolrInputDocument document, List bundles) { if (bundles != null) { for (Bundle bundle : bundles) { String bundleName = bundle.getName(); if (bundleName != null && bundleName.equals(BUNDLE_NAME)) { - generateBitstreamIndex(document, bundle.getBitstreams()); + generateBitstreamIndex(context, document, bundle.getBitstreams()); } } } @@ -175,50 +208,85 @@ private void generateBundleIndex(SolrInputDocument document, List bundle * @param document solr document * @param bitstreams list of bitstreams to analyze */ - private void generateBitstreamIndex(SolrInputDocument document, List bitstreams) { + private void generateBitstreamIndex(Context context, SolrInputDocument document, List bitstreams) { if (document != null && bitstreams != null) { for (Bitstream bitstream : bitstreams) { - addField(document, SOLR_FIELD_NAME_FOR_FILENAMES, bitstream.getName()); - - Optional.ofNullable(bitstream.getDescription()) - .filter(StringUtils::isNotEmpty) - .ifPresent( - (description) -> - addField(document, SOLR_FIELD_NAME_FOR_DESCRIPTIONS,description) - ); - - mappableMetadatas - .entrySet() - .stream() - .forEach( - entry -> - this.addNonNullMetadataValueField(bitstream, entry.getValue(), document, entry.getKey()) - ); + + indexBitstreamFields(context, document, bitstream); + + indexBitstreamsMetadatadas(document, bitstream); } } } - /** - * Method that iterates bitstream's metadatas, verifies if is mappable and then maps the ones configured - * using the {@link SolrFieldMetadataMapper} function. - * - * @param bitstream that contains metadatas to verify - * @param metadataMapper the mapper that will be applied to the metadatas - * @param document solrdocument - * @param fieldName solr index name - */ - private void addNonNullMetadataValueField(Bitstream bitstream, SolrFieldMetadataMapper metadataMapper, - SolrInputDocument document, String fieldName) { - bitstream.getMetadata() - .stream() - .filter(metadata -> - StringUtils.equals(metadataMapper.getMetadata().schema, metadata.getSchema()) && - StringUtils.equals(metadataMapper.getMetadata().element, metadata.getElement()) && - StringUtils.equals(metadataMapper.getMetadata().qualifier, metadata.getQualifier()) - ) - .map(MetadataValue::getValue) - .filter(Objects::nonNull) - .findFirst() - .ifPresent(value -> metadataMapper.map(document, fieldName, (T) value)); + private void indexBitstreamFields(Context context, SolrInputDocument document, Bitstream bitstream) { + simpleSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_FILENAMES).accept(bitstream.getName()); + + Optional.ofNullable(bitstream.getDescription()) + .filter(StringUtils::isNotEmpty) + .ifPresent( + (description) -> + simpleSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_DESCRIPTIONS).accept(description) + ); + + try { + Optional formatOptional = + Optional.ofNullable(bitstream.getFormat(context)) + .filter(Objects::nonNull); + + formatOptional + .map(BitstreamFormat::getMIMEType) + .filter(StringUtils::isNotBlank) + .ifPresent(format -> + defaultSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_MIMETYPE).accept(format) + ); + + formatOptional + .map(BitstreamFormat::getShortDescription) + .ifPresent(format -> + simpleSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_SHORT_DESCRIPTION).accept(format) + ); + } catch (SQLException e) { + logger.error("Error while retrievig bitstream format", e); + throw new RuntimeException("Error while retrievig bitstream format", e); + } + + Optional.of(bitstream.getChecksum()) + .filter(StringUtils::isNotBlank) + .map(checksum -> bitstream.getChecksumAlgorithm() + ":" + bitstream.getChecksum()) + .ifPresent(checksum -> + defaultSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_CHECKSUM).accept(checksum) + ); + + Optional.of(bitstream.getSizeBytes()) + .filter(l -> l > 0) + .map(String::valueOf) + .ifPresent(size -> simpleSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_SIZEBYTES).accept(size)); + } + + private void indexBitstreamsMetadatadas(SolrInputDocument document, Bitstream bitstream) { + bitstream + .getMetadata() + .stream() + .filter(metadata -> metadata != null && StringUtils.isNotBlank(metadata.getValue())) + .forEach(metadata -> { + MetadataField metadataField = metadata.getMetadataField(); + String bitstreamMetadata = metadataField.toString('.'); + Optional.ofNullable(mappableMetadatas.get(bitstreamMetadata)) + .filter(Objects::nonNull) + .orElse( + getFieldMapper( + metadataField.toString(), + bitstreamMetadataSolrIndexAdder + ) + ) + .map(document, metadata.getValue()); + }); + } + + private boolean areEquals(MetadataFieldName metadataFieldName, MetadataValue metadata) { + return StringUtils.equals(metadataFieldName.schema, metadata.getSchema()) && + StringUtils.equals(metadataFieldName.element, metadata.getElement()) && + StringUtils.equals(metadataFieldName.qualifier, metadata.getQualifier()); } } diff --git a/dspace/config/spring/api/discovery.xml b/dspace/config/spring/api/discovery.xml index d9dfa136a83..773bf797285 100644 --- a/dspace/config/spring/api/discovery.xml +++ b/dspace/config/spring/api/discovery.xml @@ -3671,7 +3671,7 @@ - + From 1c8baf669b7794f29a118834f91055f7aff7557b Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Mon, 19 Jun 2023 16:41:01 +0200 Subject: [PATCH 3/6] [DSC-782][CST-6963] Updated local.cfg for test folder --- dspace-api/src/test/data/dspaceFolder/config/local.cfg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index 08eb9871058..fb8b2863506 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -157,11 +157,11 @@ wos.apiKey = submission.lookup.epo.consumerKey= submission.lookup.epo.consumerSecretKey= -event.dispatcher.default.consumers = versioning, discovery, eperson, dedup, crisconsumer, audit, nbeventsdelete, referenceresolver, orcidwebhook, iiif, itemenhancer, customurl, reciprocal +event.dispatcher.default.consumers = versioning, discovery, eperson, dedup, crisconsumer, audit, nbeventsdelete, referenceresolver, orcidwebhook, iiif, itemenhancer, customurl, reciprocal, filetypemetadataenhancer # setup a dispatcher also with the cris consumer event.dispatcher.cris-default.class = org.dspace.event.BasicDispatcher -event.dispatcher.cris-default.consumers = versioning, discovery, eperson, dedup, crisconsumer, orcidqueue, audit, referenceresolver, orcidwebhook, itemenhancer, customurl +event.dispatcher.cris-default.consumers = versioning, discovery, eperson, dedup, crisconsumer, orcidqueue, audit, referenceresolver, orcidwebhook, itemenhancer, customurl, filetypemetadataenhancer # Enable a test authority control on dc.language.iso field choices.plugin.dc.language.iso = common_iso_languages @@ -213,4 +213,4 @@ logging.server.include-stacktrace-for-httpcode = 400, 401, 404, 403, 422 # Configuration required for thorough testing of browse links webui.browse.link.1 = author:dc.contributor.* -webui.browse.link.2 = subject:dc.subject.* \ No newline at end of file +webui.browse.link.2 = subject:dc.subject.* From 524dd01dc5f321b0ec9afa074e5ac5d3bc9d8787 Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Mon, 19 Jun 2023 17:36:06 +0200 Subject: [PATCH 4/6] [DSC-782] Fixes compilation errors --- .../java/org/dspace/util/FunctionalUtils.java | 28 +++++++++++++++++++ .../org/dspace/util/ThrowingConsumer.java | 12 ++++++++ .../java/org/dspace/util/ThrowingMapper.java | 12 ++++++++ 3 files changed, 52 insertions(+) create mode 100644 dspace-api/src/main/java/org/dspace/util/ThrowingConsumer.java create mode 100644 dspace-api/src/main/java/org/dspace/util/ThrowingMapper.java diff --git a/dspace-api/src/main/java/org/dspace/util/FunctionalUtils.java b/dspace-api/src/main/java/org/dspace/util/FunctionalUtils.java index 422c2405a87..66921d04179 100644 --- a/dspace-api/src/main/java/org/dspace/util/FunctionalUtils.java +++ b/dspace-api/src/main/java/org/dspace/util/FunctionalUtils.java @@ -8,6 +8,8 @@ package org.dspace.util; import java.util.Objects; +import java.util.function.Consumer; +import java.util.function.Function; import java.util.function.Predicate; import java.util.function.Supplier; @@ -58,4 +60,30 @@ public static T getCheckDefaultOrBuild(Predicate defaultValueChecker, T d return builder.get(); } + public static Consumer throwingConsumerWrapper( + ThrowingConsumer throwingConsumer) { + return i -> { + try { + throwingConsumer.accept(i); + } catch (Exception e) { + throw new RuntimeException(e); + } + }; + } + + public static Function throwingMapperWrapper( + ThrowingMapper throwingConsumer, + R defaultValue + ) { + return i -> { + R value = defaultValue; + try { + value = throwingConsumer.accept(i); + } catch (Exception e) { + throw new RuntimeException(e); + } + return value; + }; + } + } diff --git a/dspace-api/src/main/java/org/dspace/util/ThrowingConsumer.java b/dspace-api/src/main/java/org/dspace/util/ThrowingConsumer.java new file mode 100644 index 00000000000..a04fea3ef41 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/util/ThrowingConsumer.java @@ -0,0 +1,12 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.util; + +public interface ThrowingConsumer { + void accept(T t) throws E; +} \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/util/ThrowingMapper.java b/dspace-api/src/main/java/org/dspace/util/ThrowingMapper.java new file mode 100644 index 00000000000..ac4767a8570 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/util/ThrowingMapper.java @@ -0,0 +1,12 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.util; + +public interface ThrowingMapper { + R accept(T t) throws E; +} \ No newline at end of file From e4703a77fb60cb6e4b80e5d05e692cd3335edb00 Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Tue, 20 Jun 2023 12:01:39 +0200 Subject: [PATCH 5/6] [DSC-782] Fixes NPE --- .../discovery/SolrServiceFileInfoPlugin.java | 46 +++++++++++++++---- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java index 4b2aaed4686..36fc8f38192 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java @@ -220,13 +220,17 @@ private void generateBitstreamIndex(Context context, SolrInputDocument document, } private void indexBitstreamFields(Context context, SolrInputDocument document, Bitstream bitstream) { - simpleSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_FILENAMES).accept(bitstream.getName()); + addAndHandleException( + simpleSolrIndexAdder, document, bitstream, SOLR_FIELD_NAME_FOR_FILENAMES, bitstream.getName() + ); Optional.ofNullable(bitstream.getDescription()) .filter(StringUtils::isNotEmpty) .ifPresent( (description) -> - simpleSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_DESCRIPTIONS).accept(description) + addAndHandleException( + simpleSolrIndexAdder, document, bitstream, SOLR_FIELD_NAME_FOR_DESCRIPTIONS, description + ) ); try { @@ -238,30 +242,56 @@ private void indexBitstreamFields(Context context, SolrInputDocument document, B .map(BitstreamFormat::getMIMEType) .filter(StringUtils::isNotBlank) .ifPresent(format -> - defaultSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_MIMETYPE).accept(format) + addAndHandleException( + defaultSolrIndexAdder, document, bitstream, SOLR_FIELD_NAME_FOR_MIMETYPE, format + ) ); formatOptional .map(BitstreamFormat::getShortDescription) .ifPresent(format -> - simpleSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_SHORT_DESCRIPTION).accept(format) + addAndHandleException( + simpleSolrIndexAdder, document, bitstream, SOLR_FIELD_NAME_FOR_SHORT_DESCRIPTION, format + ) ); } catch (SQLException e) { logger.error("Error while retrievig bitstream format", e); throw new RuntimeException("Error while retrievig bitstream format", e); } - Optional.of(bitstream.getChecksum()) + Optional.ofNullable(bitstream.getChecksum()) .filter(StringUtils::isNotBlank) .map(checksum -> bitstream.getChecksumAlgorithm() + ":" + bitstream.getChecksum()) .ifPresent(checksum -> - defaultSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_CHECKSUM).accept(checksum) + addAndHandleException( + defaultSolrIndexAdder, document, bitstream, SOLR_FIELD_NAME_FOR_CHECKSUM, checksum + ) ); - Optional.of(bitstream.getSizeBytes()) + Optional.ofNullable(bitstream.getSizeBytes()) .filter(l -> l > 0) .map(String::valueOf) - .ifPresent(size -> simpleSolrIndexAdder.apply(document, SOLR_FIELD_NAME_FOR_SIZEBYTES).accept(size)); + .ifPresent(size -> + addAndHandleException( + simpleSolrIndexAdder, document, bitstream, SOLR_FIELD_NAME_FOR_SIZEBYTES, size + ) + ); + } + + protected void addAndHandleException( + BiFunction> solrIndexAdder, + SolrInputDocument document, Bitstream bitstream, + String field, String value + ) { + try { + solrIndexAdder.apply(document, field).accept(value); + } catch (Exception e) { + logger.warn( + "Error occurred during the update of index field {} for bitstream {}", + field, + bitstream.getID() + ); + } } private void indexBitstreamsMetadatadas(SolrInputDocument document, Bitstream bitstream) { From 61cbe84f1f467bee9cacf49e03f05d47dfb2d1ef Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Tue, 20 Jun 2023 10:31:45 +0000 Subject: [PATCH 6/6] Checkstyle fix for SolrServiceFileInfoPlugin.java --- .../dspace/discovery/SolrServiceFileInfoPlugin.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java index 36fc8f38192..a2c3056ae38 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceFileInfoPlugin.java @@ -116,12 +116,12 @@ public void map(SolrInputDocument document, T value) { }; private static final BiFunction> simpleSolrIndexAdder = - (document, fieldName) -> value -> { - Collection fieldValues = document.getFieldValues(fieldName); - if (fieldValues == null || !fieldValues.contains(value)) { - addField(document, fieldName, value); - } - }; + (document, fieldName) -> value -> { + Collection fieldValues = document.getFieldValues(fieldName); + if (fieldValues == null || !fieldValues.contains(value)) { + addField(document, fieldName, value); + } + }; private static final BiFunction> bitstreamMetadataSolrIndexAdder = (document, fieldName) -> value -> {