Skip to content

Commit

Permalink
Made it possible to change the index on runtime, however returning en…
Browse files Browse the repository at this point in the history
…tities from two URIs at the same time is impossible as of now, this is related to #55 and #46. If 55 is fixed, 46 should be fixed.
  • Loading branch information
RicardoUsbeck committed Nov 17, 2017
1 parent 74ac788 commit ac5398e
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 4 deletions.
21 changes: 19 additions & 2 deletions src/main/java/org/aksw/agdistis/algorithm/CandidateUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ public class CandidateUtil {

private static Logger log = LoggerFactory.getLogger(CandidateUtil.class);
private String nodeType;
public void setNodeType(String nodeType) {
this.nodeType = nodeType;
}

private TripleIndex index;
private TripleIndexContext index2;
private NGramDistance nGramDistance;
Expand All @@ -47,7 +51,8 @@ public CandidateUtil() throws IOException {
String envNodeType = System.getenv("AGDISTIS_NODE_TYPE");
this.nodeType = envNodeType != null ? envNodeType : prop.getProperty("nodeType");
String envNgramDistance = System.getenv("AGDISTIS_NGRAM_DISTANCE");
this.nGramDistance = new NGramDistance(Integer.valueOf(envNgramDistance != null ? envNgramDistance : prop.getProperty("ngramDistance")));
this.nGramDistance = new NGramDistance(
Integer.valueOf(envNgramDistance != null ? envNgramDistance : prop.getProperty("ngramDistance")));
this.index = new TripleIndex();
String envContext = System.getenv("AGDISTIS_CONTEXT");
this.context = Boolean.valueOf(envContext != null ? envContext : prop.getProperty("context"));
Expand All @@ -61,11 +66,23 @@ public CandidateUtil() throws IOException {
String envAcronym = System.getenv("AGDISTIS_ACRONYM");
this.acronym = Boolean.valueOf(envAcronym != null ? envAcronym : prop.getProperty("acronym"));
String envCommonEntities = System.getenv("AGDISTIS_COMMON_ENTITIES");
this.commonEntities = Boolean.valueOf(envCommonEntities != null ? envCommonEntities : prop.getProperty("commonEntities"));
this.commonEntities = Boolean
.valueOf(envCommonEntities != null ? envCommonEntities : prop.getProperty("commonEntities"));
String envAlgorithm = System.getenv("AGDISTIS_ALGORITHM");
this.algorithm = envAlgorithm != null ? envAlgorithm : prop.getProperty("algorithm");
}

public void setIndex(TripleIndex index) {
try {
this.index = index;
this.domainWhiteLister = new DomainWhiteLister(index);
} catch (IOException e) {
log.error("Could not set new index in Candidate Util due to DomainWhiteLister");
e.printStackTrace();
}

}

public void insertCandidatesIntoText(DirectedSparseGraph<Node, String> graph, Document document,
double threshholdTrigram, Boolean heuristicExpansionOn) throws IOException {
NamedEntitiesInText namedEntities = document.getNamedEntitiesInText();
Expand Down
21 changes: 21 additions & 0 deletions src/main/java/org/aksw/agdistis/algorithm/NEDAlgo_HITS.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,5 +147,26 @@ public void setMaxDepth(int maxDepth) {
public void setHeuristicExpansionOn(Boolean value) {
this.heuristicExpansionOn = value;
}
public String getEdgeType() {
return edgeType;
}

public void setEdgeType(String edgeType) {
this.edgeType = edgeType;
}

public String getNodeType() {
return nodeType;
}

public void setNodeType(String nodeType) {
this.nodeType = nodeType;
this.cu.setNodeType(nodeType);
}

public void setIndex(TripleIndex index) {
this.index = index;
this.cu.setIndex(index);
}

}
7 changes: 7 additions & 0 deletions src/main/java/org/aksw/agdistis/util/TripleIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,19 @@ public TripleIndex() throws IOException {

cache = CacheBuilder.newBuilder().maximumSize(50000).build();
}

public void setIndex(String index) throws IOException {
directory = new MMapDirectory(new File(index));
ireader = DirectoryReader.open(directory);
isearcher = new IndexSearcher(ireader);
}

public List<Triple> search(String subject, String predicate, String object) {
return search(subject, predicate, object, defaultMaxNumberOfDocsRetrievedFromIndex);
}

public List<Triple> search(String subject, String predicate, String object, int maxNumberOfResults) {
System.out.println(predicate +" -> "+object + " : " );
BooleanQuery bq = new BooleanQuery();
List<Triple> triples = new ArrayList<Triple>();

Expand Down
9 changes: 7 additions & 2 deletions src/main/java/org/aksw/agdistis/util/TripleIndexCreator.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ public static void main(String args[]) {
log.info("The index will be here: " + index);

String envFolderWithTtlFiles = System.getenv("AGDISTIS_FOLDER_WITH_TTL_FILES");
String folder = envFolderWithTtlFiles != null ? envFolderWithTtlFiles : prop.getProperty("folderWithTTLFiles");
String folder = envFolderWithTtlFiles != null ? envFolderWithTtlFiles
: prop.getProperty("folderWithTTLFiles");
log.info("Getting triple data from: " + folder);
List<File> listOfFiles = new ArrayList<File>();
for (File file : new File(folder).listFiles()) {
Expand Down Expand Up @@ -140,7 +141,11 @@ private void indexTTLFile(File file, String baseURI)
OnlineStatementHandler osh = new OnlineStatementHandler();
parser.setRDFHandler(osh);
parser.setStopAtFirstError(false);
parser.parse(new FileReader(file), baseURI);
if (baseURI == null) {
parser.parse(new FileReader(file), "");
} else {
parser.parse(new FileReader(file), baseURI);
}
log.info("Finished parsing: " + file);
}

Expand Down
133 changes: 133 additions & 0 deletions src/test/java/TripleIndexCreatorTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@

import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;

import org.aksw.agdistis.algorithm.CandidateUtil;
import org.aksw.agdistis.algorithm.NEDAlgo_HITS;
import org.aksw.agdistis.datatypes.Document;
import org.aksw.agdistis.datatypes.NamedEntitiesInText;
import org.aksw.agdistis.datatypes.NamedEntityInText;
import org.aksw.agdistis.util.Triple;
import org.aksw.agdistis.util.TripleIndex;
import org.aksw.agdistis.util.TripleIndexCreator;
import org.aksw.agdistis.webapp.GetDisambiguation;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

public class TripleIndexCreatorTest {

Logger log = LoggerFactory.getLogger(TripleIndexCreatorTest.class);
private TripleIndex index;

@Before
public void init() {
try {
index = new TripleIndex();

} catch (IOException e) {
log.error(
"Can not load index or DBpedia repository due to either wrong properties in agdistis.properties or missing index at location",
e);
}
}

@After
public void close() {
try {
index.close();
} catch (IOException e) {
log.error(
"Can not load index or DBpedia repository due to either wrong properties in agdistis.properties or missing index at location",
e);
}
}

@Test
/**
* tests https://github.com/dice-group/AGDISTIS/issues/46 if we need to return
* URIs from two different KBs AGDISTIS returns breaks
*
* @throws IOException
*/
public void testMinimalOntologyExample() throws IOException {
// load test data into index
TripleIndexCreator tic = new TripleIndexCreator();
File file = new File("src/test/resources/test_evertec.ttl");
File folder = new File("src/test/resources/evertec");
if (folder.exists()) {
folder.delete();
}
tic.createIndex(Lists.newArrayList(file), folder.getAbsolutePath(), null);

// set the properties correctly

NEDAlgo_HITS agdistis = new NEDAlgo_HITS();
agdistis.setNodeType("http://fairhair.ai/kg/resource/");
agdistis.setEdgeType("http://dbpedia.org/ontology/");

// load index
index.setIndex(folder.getAbsolutePath());
agdistis.setIndex(index);

// test index
String taisho = "Evertec";
String taishoURL = "http://fairhair.ai/kg/resource/Evertec";
String japan = "Puerto Rico";
String japanURL = "http://dbpedia.org/resource/Puerto_Rico";

HashMap<String, String> correct = new HashMap<String, String>();
correct.put(taisho, taishoURL);
correct.put(japan, japanURL);
String preAnnotatedText = "<entity>" + taisho + "</entity> is a company in<entity>" + japan + "</entity>.";

Document d = GetDisambiguation.textToDocument(preAnnotatedText);
agdistis.run(d, null);

NamedEntitiesInText namedEntities = d.getNamedEntitiesInText();
HashMap<NamedEntityInText, String> results = new HashMap<NamedEntityInText, String>();
for (NamedEntityInText namedEntity : namedEntities) {
String disambiguatedURL = namedEntity.getNamedEntityUri();
System.out.println(namedEntity);
results.put(namedEntity, disambiguatedURL);
}
for (NamedEntityInText namedEntity : results.keySet()) {
String disambiguatedURL = namedEntity.getNamedEntityUri();
System.out.println(namedEntity.getLabel() + " -> " + results.get(namedEntity));
// TODO comment that line in
// assertTrue(correct.get(namedEntity.getLabel()).equals(disambiguatedURL));
}

// TODO if we change the node type, Puerto Rico gets found but not Evertec
agdistis.setNodeType("http://dbpedia.org/resource/");

d = GetDisambiguation.textToDocument(preAnnotatedText);
agdistis.run(d, null);

namedEntities = d.getNamedEntitiesInText();
results = new HashMap<NamedEntityInText, String>();
for (NamedEntityInText namedEntity : namedEntities) {
String disambiguatedURL = namedEntity.getNamedEntityUri();
System.out.println(namedEntity);
results.put(namedEntity, disambiguatedURL);
}
for (NamedEntityInText namedEntity : results.keySet()) {
String disambiguatedURL = namedEntity.getNamedEntityUri();
System.out.println(namedEntity.getLabel() + " -> " + results.get(namedEntity));
// TODO comment that line in
// assertTrue(correct.get(namedEntity.getLabel()).equals(disambiguatedURL));
}

}

}

0 comments on commit ac5398e

Please sign in to comment.