Skip to content

Commit

Permalink
beacon file script
Browse files Browse the repository at this point in the history
script for generating files according to the beacon standard (see https://gbv.github.io/beaconspec/beacon.html). It get all (visibile) entities of some identifier and prints them into one process result file or optionally dumps them in the handler log. The usage is mainly popular among GLAM-Systems using the gnd-identifier in the DACH-Area. The header lines can be configured in the configuration.
  • Loading branch information
floriangantner committed Oct 26, 2023
1 parent cc63d56 commit dc4cba8
Show file tree
Hide file tree
Showing 5 changed files with 327 additions and 0 deletions.
226 changes: 226 additions & 0 deletions dspace-api/src/main/java/org/dspace/beacon/BeaconFileScript.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.beacon;

import static org.dspace.core.Constants.READ;
import static org.dspace.eperson.Group.ANONYMOUS;

import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.sql.Date;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.TimeZone;
import java.util.UUID;

import org.apache.commons.cli.ParseException;
import org.dspace.authorize.factory.AuthorizeServiceFactory;
import org.dspace.authorize.service.AuthorizeService;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.dspace.eperson.factory.EPersonServiceFactory;
import org.dspace.scripts.DSpaceRunnable;
import org.dspace.services.ConfigurationService;
import org.dspace.utils.DSpace;

/**
* Script to generate the beacon list
* The configuration for the metadata field and the resolven can be taken from the beacon.cfg file
* It is possible to use one field for some identifier (e.g. dc.identifier.other) and identify the corresponding values
* using their resolver,
* e.g. all values in dc.identifier.other which start with <a href="http://d-nb.info/gnd/">...</a> .
* Alternative resolvers can be
* specifed (optionally) which are later normalized to the main identifier
* Output
* - verbose: on handler log for debugging
* - file: print result as process result file with the given filename from the parameter
* For the specification see: <a href="https://gbv.github.io/beaconspec/beacon.html">...</a>
*
* @author Florian Gantner ([email protected])
*
*/
public class BeaconFileScript extends DSpaceRunnable<BeaconFileScriptConfiguration<BeaconFileScript>> {

private AuthorizeService authorizeService;

private ItemService itemService;

private ConfigurationService configurationService;

private Context context;

private boolean VERBOSE;

private String PRINTFILE = null;

private String metadatafield;

@Override
public void setup() throws ParseException {
this.authorizeService = AuthorizeServiceFactory.getInstance().getAuthorizeService();
this.itemService = ContentServiceFactory.getInstance().getItemService();
this.configurationService = new DSpace().getConfigurationService();
this.metadatafield = configurationService.getProperty("beacon.metadatafield");
this.VERBOSE = commandLine.hasOption('v');
if (commandLine.hasOption('f')) {
this.PRINTFILE = commandLine.getOptionValue('f');
}
}

@Override
public void internalRun() throws Exception {

if ((!VERBOSE && PRINTFILE == null) ||
(VERBOSE && PRINTFILE != null)) {
throw new Exception("Only one of the output options can be specified");
}

if (VERBOSE) {
context = new Context(Context.Mode.READ_ONLY);
} else if (PRINTFILE != null) {
context = new Context(Context.Mode.READ_WRITE);
}

String mainresolver = configurationService.getProperty("beacon.mainresolver");
String[] additionalresolvers = configurationService.getArrayProperty("beacon.additionalresolver");

assignCurrentUserInContext();
assignSpecialGroupsInContext();

if (!this.authorizeService.isAdmin(context)) {
throw new IllegalArgumentException("The user cannot generate the beacon file");
}

StringBuilder sb = new StringBuilder();

sb.append("#FORMAT: Beacon").append(System.lineSeparator());
for (String propertykey : configurationService.getPropertyKeys("beacon.header")) {
String key = propertykey.replace("beacon.header.", "");
String value = configurationService.getProperty(propertykey);
if (value != null) {
sb.append("#").append(key.toUpperCase()).append(": ").append(value).append(System.lineSeparator());
}
}

Date date = new Date(System.currentTimeMillis());
SimpleDateFormat sdf;
sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX");
sdf.setTimeZone(TimeZone.getTimeZone(ZoneId.of("Europe/Berlin")));
String textdate = sdf.format(date);

sb.append("#TIMESTAMP: ").append(textdate).append(System.lineSeparator());
try {
Iterator<Item> items = itemService.findArchivedByMetadataField(context, metadatafield, Item.ANY);
List<Item> itemlist = new ArrayList<>();
// filter inactive/hidden profiles

while (items.hasNext()) {
Item item = items.next();
if (!item.isDiscoverable() || !isVisible(item)) {
continue;
}
itemlist.add(item);
}

//Normalize identifier. remove resolver or additionalresolver
for (Item item : itemlist) {
List<MetadataValue> mvals = itemService.getMetadataByMetadataString(item, metadatafield);
for (MetadataValue mval : mvals) {
//filter all values not starting with any of the resolver
String val = mval.getValue();
if (configurationService.getBooleanProperty("beacon.metadatafield.filterresolver")) {
boolean skip = !val.startsWith(mainresolver);
//check main resolver
//check additional resolvers
if (additionalresolvers != null) {
for (String additionalresolver : additionalresolvers) {
if (val.startsWith(additionalresolver)) {
skip = false;
break;
}
}
}
if (skip) {
continue;
}
}
//normalize identifiers and replace the values
if (val != null && val.startsWith(mainresolver)) {
val = val.replace(mainresolver, "");
} else if (val != null && additionalresolvers != null) {
for (String additionalresolver : additionalresolvers) {
if (val.startsWith(additionalresolver)) {
val = val.replace(additionalresolver, "");
break;
}
}
}
sb.append(val).append(System.lineSeparator());
break;
}

}
String result = sb.toString();
if (VERBOSE) {
handler.logInfo(result);
} else if (PRINTFILE != null) {
try {
handler.writeFilestream(context, PRINTFILE,
new ByteArrayInputStream(result.getBytes(StandardCharsets.UTF_8)), "BEACON");
} catch (Exception e) {
handler.logError(e.getMessage());
}
}
context.complete();
handler.logInfo("Beacon file completed successfully");
} catch (Exception e) {
handler.handleException(e);
context.abort();
} finally {
if (context.isValid()) {
context.close();
}
}
}

@Override
@SuppressWarnings("unchecked")
public BeaconFileScriptConfiguration<BeaconFileScript> getScriptConfiguration() {
return new DSpace().getServiceManager().getServiceByName("beacon-file",
BeaconFileScriptConfiguration.class);
}

private void assignCurrentUserInContext() throws SQLException {
UUID uuid = getEpersonIdentifier();
if (uuid != null) {
EPerson ePerson = EPersonServiceFactory.getInstance().getEPersonService().find(context, uuid);
context.setCurrentUser(ePerson);
}
}

private void assignSpecialGroupsInContext() throws SQLException {
for (UUID uuid : handler.getSpecialGroups()) {
context.setSpecialGroup(uuid);
}
}

public boolean isVisible(Item item) {
return item.getResourcePolicies().stream()
.filter(policy -> policy.getGroup() != null)
.anyMatch(policy -> READ == policy.getAction() && ANONYMOUS.equals(policy.getGroup().getName()));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.beacon;

import java.sql.SQLException;

import org.apache.commons.cli.Options;
import org.dspace.authorize.service.AuthorizeService;
import org.dspace.core.Context;
import org.dspace.scripts.configuration.ScriptConfiguration;
import org.springframework.beans.factory.annotation.Autowired;

/**
* Script configuration for {@link BeaconFileScript}.
*
* @author Florian Gantner ([email protected])
*/
public class BeaconFileScriptConfiguration<T extends BeaconFileScript> extends ScriptConfiguration<T> {

@Autowired
private AuthorizeService authorizeService;

private Class<T> dspaceRunnableClass;

@Override
public boolean isAllowedToExecute(Context context) {
try {
return authorizeService.isAdmin(context);
} catch (SQLException e) {
throw new RuntimeException("SQLException occurred when checking if the current user is an admin", e);
}
}

@Override
public Options getOptions() {
if (options == null) {
Options options = new Options();

options.addOption("v", "verbose", false, "print out result on handler log");
options.getOption("v").setType(boolean.class);
options.getOption("v").setRequired(false);

options.addOption("f", "file", true, "print to file and specify filename, e.g. beacon.txt");
options.getOption("f").setType(String.class);
options.getOption("f").setRequired(false);

super.options = options;
}
return options;
}

@Override
public void setDspaceRunnableClass(Class<T> dspaceRunnableClass) {
this.dspaceRunnableClass = dspaceRunnableClass;
}

@Override
public Class<T> getDspaceRunnableClass() {
return dspaceRunnableClass;
}

}
1 change: 1 addition & 0 deletions dspace/config/dspace.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -1997,3 +1997,4 @@ include = ${module_dir}/pushocr.cfg
include = ${module_dir}/pushocr.force.cfg
include = ${module_dir}/cleanup-authority-metadata-relation.cfg
include = ${module_dir}/ror.cfg
include = ${module_dir}/beacon.cfg
28 changes: 28 additions & 0 deletions dspace/config/modules/beacon.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# These Properties fill the header lines in the generated beacon file
# the metadatafield containing the gnd identifier
beacon.metadatafield = dc.identifier.gnd

# if enabled filter the values with those matching the mainresolver or additionalresolver.additionalresolver
# this can be applied when the identifier field contains multiple identifiers, e.g. dc.identifier.other with prefixes
#beacon.metadatafield.filterresolver

# the main resolver
# values from beacon.additionalresolver are normalized to this value
beacon.mainresolver = http://d-nb.info/gnd/
# these are additional resolvers which are normalized. repeatable
#beacon.additionalresolver = https://d-nb.info/gnd/
beacon.additionalresolver = https://d-nb.info/gnd/

## Header Lines
# FORMAT AND TIMESTAMP will always be added.
# the keys under beacon.header will be used for the HEADER Line starting with #
beacon.header.description = Professorinnen- und Professorenkatalog der Otto-Friedrich-Universität Bamberg
beacon.header.homepage = https://professorenkatalog.uni-bamberg.de
#beacon.header.contact = [email protected]
beacon.header.creator = Professorinnen- und Professorenkatalog der Otto-Friedrich-Universität Bamberg
beacon.header.message = Professorinnen- und Professorenkatalog der Otto-Friedrich-Universität Bamberg
beacon.header.prefix = ${beacon.mainresolver}{ID}
# target link. might differ when the gnd can be resolved under some own path, e.g. /gnd/{ID}
beacon.header.target = https://professorenkatalog.uni-bamberg.de/search?configuration\=default&q\=dc.identifier.gnd:{ID}
beacon.header.feed = https://professorenkatalog.uni-bamberg.de/beacon
beacon.header.update = MONTHLY
5 changes: 5 additions & 0 deletions dspace/config/spring/api/scripts.xml
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,9 @@
<property name="dspaceRunnableClass" value="org.dspace.app.metadata.export.MetadataSchemaExportCliScript"/>
</bean>

<bean id="beacon-file" class="org.dspace.beacon.BeaconFileScriptConfiguration">
<property name="description" value="Generate some beacon file of some configured identifier and print it to the specified option (handler, file)"/>
<property name="dspaceRunnableClass" value="org.dspace.beacon.BeaconFileScript"/>
</bean>

</beans>

0 comments on commit dc4cba8

Please sign in to comment.