From b4f41353d07d41d3751a03c019083e81f46e0fc8 Mon Sep 17 00:00:00 2001 From: MichaelRoeder Date: Wed, 17 Jun 2015 17:31:43 +0200 Subject: [PATCH] Added a simple example class showing the generation of a NIF corpus. --- .../aksw/gerbil/CorpusGenerationExample.java | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 src/test/java/org/aksw/gerbil/CorpusGenerationExample.java diff --git a/src/test/java/org/aksw/gerbil/CorpusGenerationExample.java b/src/test/java/org/aksw/gerbil/CorpusGenerationExample.java new file mode 100644 index 000000000..de90ad5bb --- /dev/null +++ b/src/test/java/org/aksw/gerbil/CorpusGenerationExample.java @@ -0,0 +1,77 @@ +package org.aksw.gerbil; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.aksw.gerbil.io.nif.DocumentListParser; +import org.aksw.gerbil.io.nif.DocumentListWriter; +import org.aksw.gerbil.io.nif.NIFParser; +import org.aksw.gerbil.io.nif.NIFWriter; +import org.aksw.gerbil.io.nif.impl.TurtleNIFParser; +import org.aksw.gerbil.io.nif.impl.TurtleNIFWriter; +import org.aksw.gerbil.transfer.nif.Document; +import org.aksw.gerbil.transfer.nif.NIFTransferPrefixMapping; +import org.aksw.gerbil.transfer.nif.data.Annotation; +import org.aksw.gerbil.transfer.nif.data.DocumentImpl; +import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity; +import org.junit.Ignore; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +/** + * This class contains a simple example, showing how a developer could create a + * simple NIF corpus. + * + * @author Michael Röder (roeder@informatik.uni-leipzig.de) + * + */ +@Ignore +class CorpusGenerationExample { + + public static void main(String[] args) { + + String text = "Japan (Japanese: 日本 Nippon or Nihon) is a stratovolcanic archipelago of 6,852 islands."; + Document document = new DocumentImpl(text, "http://example.org/document0"); + + // Add the marking for "Japan" + Set uris = new HashSet(); + uris.add("http://example.org/Japan"); + Set types = new HashSet(); + types.add("http://example.org/Country"); + types.add("http://example.org/StratovolcanicArchipelago"); + document.addMarking(new TypedNamedEntity(0, 5, uris, types)); + + // Add the marking for "stratovolcanic archipelago" + uris = new HashSet(); + uris.add("http://example.org/StratovolcanicArchipelago"); + types = new HashSet(); + types.add("http://example.org/Archipelago"); + types.add("http://www.w3.org/2000/01/rdf-schema#Class"); + document.addMarking(new TypedNamedEntity(42, 26, uris, types)); + + // Add a marking showing that this document has geographical content + uris = new HashSet(); + uris.add("http://example.org/Geography"); + document.addMarking(new Annotation(uris)); + + List documents = new ArrayList(); + documents.add(document); + + // Writing our new list of documents to a String + NIFWriter writer = new TurtleNIFWriter(); + String nifString = writer.writeNIF(documents); + System.out.println(nifString); + + // After generating a NIF corpus, it can be helpful to parse the NIF using a `NIFParser` instance. + NIFParser parser = new TurtleNIFParser(); + parser.parseNIF(nifString); + + // Instead of text containing the NIF information, a jena RDF `Model` can be created. + DocumentListWriter listWriter = new DocumentListWriter(); + Model nifModel = ModelFactory.createDefaultModel(); + listWriter.writeDocumentsToModel(nifModel, documents); + } +}