diff --git a/hdt-api/src/main/java/org/rdfhdt/hdt/quad/QuadString.java b/hdt-api/src/main/java/org/rdfhdt/hdt/quad/QuadString.java index e767dae7..1dfc93cb 100644 --- a/hdt-api/src/main/java/org/rdfhdt/hdt/quad/QuadString.java +++ b/hdt-api/src/main/java/org/rdfhdt/hdt/quad/QuadString.java @@ -30,6 +30,10 @@ public void clear() { @Override public boolean equals(Object other) { if (!(other instanceof QuadString)) { + if (context.length() == 0) { + // not a quad string, maybe it is a TripleString + return super.equals(other); + } return false; } QuadString qs = (QuadString) other; @@ -116,6 +120,9 @@ public QuadString tripleToString() { @Override public String toString() { + if (context.length() == 0) { + return super.toString(); + } return super.toString() + " " + context; } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/dictionary/impl/HashQuadDictionary.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/dictionary/impl/HashQuadDictionary.java index 4897e633..181d8540 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/dictionary/impl/HashQuadDictionary.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/dictionary/impl/HashQuadDictionary.java @@ -123,14 +123,21 @@ public void reorganize(TempTriples triples) { mapPred.setNewID(j, this.stringToId(mapPred.getString(j), TripleComponentRole.PREDICATE)); } - for(long j=0;j iterator long size=0; while(iterator.hasNext()) { TripleString triple = iterator.next(); - triples.insert( - dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), - dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), - dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT) - ); + if (dictionary.supportGraphs()) { + triples.insert( + dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), + dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), + dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT), + dictionary.insert(triple.getGraph(), TripleComponentRole.GRAPH) + ); + size+=triple.getSubject().length()+triple.getPredicate().length()+triple.getObject().length()+triple.getGraph().length()+5; // Spaces and final dot + } else { + triples.insert( + dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT), + dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE), + dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT) + ); + size+=triple.getSubject().length()+triple.getPredicate().length()+triple.getObject().length()+4; // Spaces and final dot + } num++; - size+=triple.getSubject().length()+triple.getPredicate().length()+triple.getObject().length()+4; // Spaces and final dot ListenerUtil.notifyCond(listener, "Loaded "+num+" triples", num, 0, 100); } dictionary.endProcessing(); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/quads/impl/BitmapQuadsIteratorG.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/quads/impl/BitmapQuadsIteratorG.java index 18663cbf..c90dbbfd 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/quads/impl/BitmapQuadsIteratorG.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/quads/impl/BitmapQuadsIteratorG.java @@ -10,7 +10,7 @@ public class BitmapQuadsIteratorG extends BitmapTriplesIterator { // resolves ???G, S??G, SP?G, SPOG queries - private Bitmap bitmapGraph; // the graph bitmap for the search + private final Bitmap bitmapGraph; // the graph bitmap for the search public BitmapQuadsIteratorG(BitmapTriples triples, TripleID pattern) { super(); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/rdf/parsers/JenaNodeFormatter.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/rdf/parsers/JenaNodeFormatter.java index 7cb8ff68..f3b4da69 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/rdf/parsers/JenaNodeFormatter.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/rdf/parsers/JenaNodeFormatter.java @@ -26,6 +26,7 @@ import org.apache.jena.datatypes.xsd.impl.RDFLangString; import org.apache.jena.graph.Node; import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.sparql.core.Quad; /** * Converts a Jena {@link Node} to a String format that will round trip back to the same Node via @@ -42,7 +43,7 @@ public static String format(RDFNode n) { } public static String format(Node node) { - if (node == null) { + if (node == null || Quad.isDefaultGraph(node)) { return ""; } if (node.isURI()) { diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/TriplesList.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/TriplesList.java index 6c573603..65e0f573 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/TriplesList.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/TriplesList.java @@ -536,7 +536,7 @@ public void replaceAllIds( (int)mapGraph.getNewID(triple.getGraph() -1) ); } else { - throw new RuntimeException("You must call the replaceAllIds method without a DictionaryIDMapping for graphs if the triples are not quads."); + throw new IllegalArgumentException("You must call the replaceAllIds method without a DictionaryIDMapping for graphs if the triples are not quads."); } } } @@ -550,7 +550,7 @@ public void replaceAllIds( sorted=false; for(TripleIDInt triple : arrayOfTriples) { if (triple.isQuad()) { - throw new RuntimeException("You must call the replaceAllIds method with a DictionaryIDMapping for graphs if the triples are quads."); + throw new IllegalArgumentException("You must call the replaceAllIds method with a DictionaryIDMapping for graphs if the triples are quads."); } else { triple.setAll( (int)mapSubj.getNewID(triple.getSubject() -1), diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/hdt/HDTManagerTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/hdt/HDTManagerTest.java index e5223a1a..e6c4c468 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/hdt/HDTManagerTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/hdt/HDTManagerTest.java @@ -17,9 +17,11 @@ import org.rdfhdt.hdt.dictionary.impl.MultipleBaseDictionary; import org.rdfhdt.hdt.enums.CompressionType; import org.rdfhdt.hdt.enums.RDFNotation; +import org.rdfhdt.hdt.enums.TripleComponentRole; import org.rdfhdt.hdt.exceptions.NotFoundException; import org.rdfhdt.hdt.exceptions.ParserException; import org.rdfhdt.hdt.hdt.impl.diskimport.CompressionResult; +import org.rdfhdt.hdt.iterator.utils.FetcherIterator; import org.rdfhdt.hdt.iterator.utils.PipedCopyIterator; import org.rdfhdt.hdt.listener.ProgressListener; import org.rdfhdt.hdt.options.HDTOptions; @@ -28,8 +30,10 @@ import org.rdfhdt.hdt.rdf.RDFFluxStop; import org.rdfhdt.hdt.rdf.RDFParserFactory; import org.rdfhdt.hdt.triples.IteratorTripleID; +import org.rdfhdt.hdt.triples.IteratorTripleString; import org.rdfhdt.hdt.triples.TripleID; import org.rdfhdt.hdt.triples.TripleString; +import org.rdfhdt.hdt.triples.impl.BitmapTriplesIteratorPositionTest; import org.rdfhdt.hdt.triples.impl.utils.HDTTestUtils; import org.rdfhdt.hdt.util.LargeFakeDataSetStreamSupplier; import org.rdfhdt.hdt.util.StopWatch; @@ -40,6 +44,7 @@ import org.rdfhdt.hdt.util.string.CharSequenceComparator; import org.rdfhdt.hdt.util.string.ReplazableString; +import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -49,11 +54,13 @@ import java.util.Collection; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Random; +import java.util.Set; import java.util.stream.Stream; import static org.junit.Assert.assertEquals; @@ -67,7 +74,8 @@ HDTManagerTest.DynamicDiskTest.class, HDTManagerTest.DynamicCatTreeTest.class, HDTManagerTest.FileDynamicTest.class, - HDTManagerTest.StaticTest.class + HDTManagerTest.StaticTest.class, + HDTManagerTest.HDTQTest.class, }) public class HDTManagerTest { public static class HDTManagerTestBase extends AbstractMapMemoryTest implements ProgressListener { @@ -254,6 +262,7 @@ protected static void printHex(CharSequence seq) { } System.out.println(); } + protected static void printBin(CharSequence seq) { ByteString bs = ByteString.of(seq); byte[] buffer = bs.getBuffer(); @@ -302,9 +311,9 @@ public static Collection params() { for (String mode : modes) { params.addAll(List.of( new Object[]{"base-w" + threads + "-" + mode, SIZE_VALUE * 8, 20, 50, threads, mode, false, dict[0], dict[1], SIZE_VALUE, ""}, - new Object[]{"duplicates-w" + threads + "-" + mode, SIZE_VALUE * 8, 10, 50, threads, mode, false, dict[0], dict[1], SIZE_VALUE, ""}, - new Object[]{"large-literals-w" + threads + "-" + mode, SIZE_VALUE * 2, 20, 250, threads, mode, false, dict[0], dict[1], SIZE_VALUE, ""}, - new Object[]{"quiet-w" + threads + "-" + mode, SIZE_VALUE * 8, 10, 50, threads, mode, false, dict[0], dict[1], SIZE_VALUE, ""} + new Object[]{"duplicates-w" + threads + "-" + mode, SIZE_VALUE * 8, 10, 50, threads, mode, false, dict[0], dict[1], SIZE_VALUE, ""}, + new Object[]{"large-literals-w" + threads + "-" + mode, SIZE_VALUE * 2, 20, 250, threads, mode, false, dict[0], dict[1], SIZE_VALUE, ""}, + new Object[]{"quiet-w" + threads + "-" + mode, SIZE_VALUE * 8, 10, 50, threads, mode, false, dict[0], dict[1], SIZE_VALUE, ""} )); } } @@ -964,7 +973,245 @@ public void diffMultiSectTest() throws ParserException, IOException, NotFoundExc } } } + } + + @RunWith(Parameterized.class) + public static class HDTQTest extends HDTManagerTestBase { + @Parameterized.Parameters(name = "default graph:{0}") + public static Collection params() { + return List.of(true, false); + } + + @Parameterized.Parameter + public boolean useDefaultGraph; + + private LargeFakeDataSetStreamSupplier createSupplier() { + // fake data generation + return LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(10000, 42) + .withNoDefaultGraph(!useDefaultGraph) + .withQuads(true); + } + + private void hdtqTesd(LargeFakeDataSetStreamSupplier supplier, Path d) throws NotFoundException, IOException { + // run test + Comparator csc = CharSequenceComparator.getInstance(); + try (HDT h = HDTManager.mapIndexedHDT(d)) { + Path indexFile = d.resolveSibling(d.getFileName() + HDTVersion.get_index_suffix("-")); + assertTrue("can't find " + indexFile, Files.exists(indexFile)); + supplier.reset(); + Iterator it = supplier.createTripleStringStream(); + Set dataset = new HashSet<>(); + while (it.hasNext()) { + dataset.add(it.next().tripleToString()); + } + + supplier.reset(); + long count = 0; + for (TripleString ts : (Iterable)supplier::createTripleStringStream) { + count++; + TripleString tsstr = ts.tripleToString(); + assertTrue("can't find " + tsstr, dataset.contains(tsstr)); + CharSequence graph = ts.getGraph(); + if (graph.length() == 0) { + IteratorTripleString it2 = h.search(ts.getSubject(), ts.getPredicate(), ts.getObject()); + + // search until we have no graph + while (true) { + assertTrue(it2.hasNext()); + TripleString ts2 = it2.next(); + if (ts2.getGraph().length() == 0) { + assertEquals(ts, ts2); + break; + } + } + } else { + IteratorTripleString it2 = h.search(ts.getSubject(), ts.getPredicate(), ts.getObject(), graph); + assertTrue(it2.hasNext()); + TripleString ts2 = it2.next(); + assertEquals(ts, ts2); + assertFalse(it2.hasNext()); + + // empty search to check wildcard + IteratorTripleString it3 = h.search(ts.getSubject(), ts.getPredicate(), ts.getObject(), ""); + while (true) { + assertTrue(it3.hasNext()); + TripleString ts3 = it3.next(); + if (csc.compare(ts3.getGraph(), graph) == 0) { + assertEquals(ts, ts3); + break; + } + } + } + } + + assertEquals(dataset.size(), count); + + { + IteratorTripleString itSearch = h.search("", "", "", ""); + long count2 = 0; + while (itSearch.hasNext()) { + count2++; + TripleString ts = itSearch.next(); + TripleString tsstr = ts.tripleToString(); + assertTrue("can't find " + tsstr, dataset.contains(tsstr)); + + } + assertEquals(dataset.size(), count2); + } + + // FOQ INDEX TEST + + StringBuilder roleDesc = new StringBuilder(); + for (TripleComponentRole role : TripleComponentRole.values()) { + Set dataset2 = new HashSet<>(dataset); + roleDesc.append(",").append(role); + + Iterator roleIt; + switch (role) { + case OBJECT: { + Iterator sh = h.getDictionary().getShared().getSortedEntries(); + Iterator ob = h.getDictionary().getObjects().getSortedEntries(); + roleIt = new FetcherIterator<>() { + @Override + protected CharSequence getNext() { + if (sh.hasNext()) { + return sh.next(); + } + if (ob.hasNext()) { + return ob.next(); + } + return null; + } + }; + } + break; + case SUBJECT: { + Iterator sh = h.getDictionary().getShared().getSortedEntries(); + Iterator su = h.getDictionary().getSubjects().getSortedEntries(); + roleIt = new FetcherIterator<>() { + @Override + protected CharSequence getNext() { + if (sh.hasNext()) { + return sh.next(); + } + if (su.hasNext()) { + return su.next(); + } + return null; + } + }; + } + break; + case PREDICATE: + roleIt = h.getDictionary().getPredicates().getSortedEntries(); + break; + case GRAPH: + roleIt = h.getDictionary().getGraphs().getSortedEntries(); + break; + default: + throw new AssertionError(); + } + + long componentId = 0; + Set components = new HashSet<>(); + while (roleIt.hasNext()) { + CharSequence component = roleIt.next(); + String str = component.toString(); + components.add(component.toString()); + long cid = componentId++; + + Iterator eid; + switch (role) { + case OBJECT: + eid = h.search("", "", component, ""); + break; + case SUBJECT: + eid = h.search(component, "", "", ""); + break; + case PREDICATE: + eid = h.search("", component, "", ""); + break; + case GRAPH: + eid = h.search("", "", "", component); + break; + default: + throw new AssertionError(); + } + + while (eid.hasNext()) { + TripleString tsstr = eid.next().tripleToString(); + if (role == TripleComponentRole.GRAPH && !tsstr.getGraph().equals(str)) { + // the default graph "" is searching all the graphs, so we need + // to check that we are using the right one. + continue; + } + if (!dataset2.remove(tsstr)) { + BitmapTriplesIteratorPositionTest.printIterator(eid); + fail("can't remove " + tsstr + + "\nfor " + role + "=" + component + "(" + cid + ")" + + "\ndone: " + roleDesc.substring(1) + + "\n" + String.join(",", components) + ); + } + } + } + assertTrue(dataset2.isEmpty()); + } + + + } + } + + @Test + public void iteratorStreamGenerationTest() throws IOException, ParserException, NotFoundException { + LargeFakeDataSetStreamSupplier supplier = createSupplier(); + Iterator it = supplier.createTripleStringStream(); + HDTOptions spec = HDTOptions.of( + HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD, + HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION + ); + Path root = tempDir.newFolder().toPath(); + try { + Path d = root.resolve("d.hdt"); + try (HDT hdt = HDTManager.generateHDT(it, HDTTestUtils.BASE_URI, spec, ProgressListener.ignore())) { + hdt.saveToHDT(d.toAbsolutePath().toString(), ProgressListener.ignore()); + } + hdtqTesd(supplier, d); + } finally { + PathUtils.deleteDirectory(root); + } + } + + @Test + public void fileReadGenerationTest() throws IOException, ParserException, NotFoundException { + LargeFakeDataSetStreamSupplier supplier = createSupplier(); + Iterator it = supplier.createTripleStringStream(); + + HDTOptions spec = HDTOptions.of( + HDTOptionsKeys.TEMP_DICTIONARY_IMPL_KEY, HDTOptionsKeys.TEMP_DICTIONARY_IMPL_VALUE_HASH_QUAD, + HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_FOUR_QUAD_SECTION + ); + Path root = tempDir.newFolder().toPath(); + try { + Path nq = root.resolve("d.nq"); + try (BufferedWriter writer = Files.newBufferedWriter(nq)) { + while (it.hasNext()) { + it.next().dumpNtriple(writer); + } + writer.flush(); + } + Path d = root.resolve("d.hdt"); + try (HDT hdt = HDTManager.generateHDT(nq.toAbsolutePath().toString(), + HDTTestUtils.BASE_URI, RDFNotation.NQUAD, spec, ProgressListener.ignore())) { + hdt.saveToHDT(d.toAbsolutePath().toString(), ProgressListener.ignore()); + } + hdtqTesd(supplier, d); + } finally { + PathUtils.deleteDirectory(root); + } + } } @Ignore("handTests") @@ -984,6 +1231,7 @@ public void qzdqzdTest() throws ParserException, IOException { } } + @Test public void bigDiskTest() throws ParserException, IOException { LargeFakeDataSetStreamSupplier supplier = LargeFakeDataSetStreamSupplier diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorPositionTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorPositionTest.java index de3227f0..9630c43d 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorPositionTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/triples/impl/BitmapTriplesIteratorPositionTest.java @@ -102,7 +102,7 @@ public BitmapTriplesIteratorPositionTest(String dictionaryType, int subjects, in * * @param it Iterator */ - private void printIterator(Object it) { + public static void printIterator(Object it) { for (int depth = 0; ; depth++) { System.out.println("[" + depth + "] Used iterator: " + it.getClass()); try { diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/LargeFakeDataSetStreamSupplier.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/LargeFakeDataSetStreamSupplier.java index c36b4a17..3546eceb 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/LargeFakeDataSetStreamSupplier.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/LargeFakeDataSetStreamSupplier.java @@ -133,6 +133,7 @@ public static LargeFakeDataSetStreamSupplier createInfinite(long seed) { private TripleString buffer; private TripleString next; private boolean nquad; + private boolean noDefaultGraph; private LargeFakeDataSetStreamSupplier(long maxSize, long maxTriples, long seed) { this.maxSize = maxSize; @@ -284,7 +285,7 @@ private CharSequence createGraph() { return ""; } int rnd = random.nextInt(10); - if (rnd < 4) { + if (rnd < 4 && !noDefaultGraph) { return ""; // no graph } if (rnd == 4) { @@ -545,6 +546,18 @@ public LargeFakeDataSetStreamSupplier withMaxGraph(int maxGraph) { this.maxGraph = maxGraph; return this; } + /** + * do not use default graph with quad generation + * + * @param noDefaultGraph no default graph + * @return this + */ + public LargeFakeDataSetStreamSupplier withNoDefaultGraph(boolean noDefaultGraph) { + this.noDefaultGraph = noDefaultGraph; + return this; + } + + /** * Stream connected to a thread to interrupt in case of Exception