Skip to content

Commit

Permalink
Added a simple example class showing the generation of a NIF corpus.
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelRoeder committed Jun 17, 2015
1 parent 64e6e96 commit b4f4135
Showing 1 changed file with 77 additions and 0 deletions.
77 changes: 77 additions & 0 deletions src/test/java/org/aksw/gerbil/CorpusGenerationExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package org.aksw.gerbil;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.aksw.gerbil.io.nif.DocumentListParser;
import org.aksw.gerbil.io.nif.DocumentListWriter;
import org.aksw.gerbil.io.nif.NIFParser;
import org.aksw.gerbil.io.nif.NIFWriter;
import org.aksw.gerbil.io.nif.impl.TurtleNIFParser;
import org.aksw.gerbil.io.nif.impl.TurtleNIFWriter;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.NIFTransferPrefixMapping;
import org.aksw.gerbil.transfer.nif.data.Annotation;
import org.aksw.gerbil.transfer.nif.data.DocumentImpl;
import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity;
import org.junit.Ignore;

import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;

/**
* This class contains a simple example, showing how a developer could create a
* simple NIF corpus.
*
* @author Michael Röder ([email protected])
*
*/
@Ignore
class CorpusGenerationExample {

public static void main(String[] args) {

String text = "Japan (Japanese: 日本 Nippon or Nihon) is a stratovolcanic archipelago of 6,852 islands.";
Document document = new DocumentImpl(text, "http://example.org/document0");

// Add the marking for "Japan"
Set<String> uris = new HashSet<String>();
uris.add("http://example.org/Japan");
Set<String> types = new HashSet<String>();
types.add("http://example.org/Country");
types.add("http://example.org/StratovolcanicArchipelago");
document.addMarking(new TypedNamedEntity(0, 5, uris, types));

// Add the marking for "stratovolcanic archipelago"
uris = new HashSet<String>();
uris.add("http://example.org/StratovolcanicArchipelago");
types = new HashSet<String>();
types.add("http://example.org/Archipelago");
types.add("http://www.w3.org/2000/01/rdf-schema#Class");
document.addMarking(new TypedNamedEntity(42, 26, uris, types));

// Add a marking showing that this document has geographical content
uris = new HashSet<String>();
uris.add("http://example.org/Geography");
document.addMarking(new Annotation(uris));

List<Document> documents = new ArrayList<Document>();
documents.add(document);

// Writing our new list of documents to a String
NIFWriter writer = new TurtleNIFWriter();
String nifString = writer.writeNIF(documents);
System.out.println(nifString);

// After generating a NIF corpus, it can be helpful to parse the NIF using a `NIFParser` instance.
NIFParser parser = new TurtleNIFParser();
parser.parseNIF(nifString);

// Instead of text containing the NIF information, a jena RDF `Model` can be created.
DocumentListWriter listWriter = new DocumentListWriter();
Model nifModel = ModelFactory.createDefaultModel();
listWriter.writeDocumentsToModel(nifModel, documents);
}
}

0 comments on commit b4f4135

Please sign in to comment.