From 553805795ee7f65ad95263d08ca2c3245df0d5c9 Mon Sep 17 00:00:00 2001 From: Bharathwaj G Date: Fri, 18 Oct 2024 15:39:07 +0530 Subject: [PATCH] Handle delete cases for star tree Signed-off-by: Bharathwaj G --- .../lucene/index/DocValuesProducerUtil.java | 33 ++++++ .../Composite912DocValuesReader.java | 8 +- .../Composite912DocValuesWriter.java | 39 ++++--- .../startree/builder/BaseStarTreeBuilder.java | 1 - .../StarTreeDocValuesFormatTests.java | 100 ++++++++++++++++++ 5 files changed, 158 insertions(+), 23 deletions(-) create mode 100644 server/src/main/java/org/apache/lucene/index/DocValuesProducerUtil.java diff --git a/server/src/main/java/org/apache/lucene/index/DocValuesProducerUtil.java b/server/src/main/java/org/apache/lucene/index/DocValuesProducerUtil.java new file mode 100644 index 0000000000000..3aebec56b82d9 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/index/DocValuesProducerUtil.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.index; + +import org.apache.lucene.codecs.DocValuesProducer; + +import java.util.Collections; +import java.util.Set; + +/** + * Utility class for DocValuesProducers + * @opensearch.internal + */ +public class DocValuesProducerUtil { + /** + * Returns the segment doc values producers for the given doc values producer. + * If the given doc values producer is not a segment doc values producer, an empty set is returned. + * @param docValuesProducer the doc values producer + * @return the segment doc values producers + */ + public static Set getSegmentDocValuesProducers(DocValuesProducer docValuesProducer) { + if (docValuesProducer instanceof SegmentDocValuesProducer) { + return (((SegmentDocValuesProducer) docValuesProducer).dvProducers); + } + return Collections.emptySet(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java index bb8a07d856d87..637d3250fda3f 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java @@ -185,7 +185,13 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState // populates the dummy list of field infos to fetch doc id set iterators for respective fields. // the dummy field info is used to fetch the doc id set iterators for respective fields based on field name FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields)); - this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context); + this.readState = new SegmentReadState( + readState.directory, + readState.segmentInfo, + fieldInfos, + readState.context, + readState.segmentSuffix + ); // initialize star-tree doc values producer diff --git a/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesWriter.java index 2225870afae8e..dd35091dece2f 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesWriter.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesWriter.java @@ -12,6 +12,7 @@ import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesProducerUtil; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; @@ -35,7 +36,6 @@ import org.opensearch.index.mapper.CompositeMappedFieldType; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.MapperService; -import org.opensearch.index.mapper.StarTreeMapper; import java.io.IOException; import java.util.ArrayList; @@ -221,12 +221,8 @@ private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer, } // we have all the required fields to build composite fields if (compositeFieldSet.isEmpty()) { - for (CompositeMappedFieldType mappedType : compositeMappedFieldTypes) { - if (mappedType instanceof StarTreeMapper.StarTreeFieldType) { - try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) { - starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, compositeDocValuesConsumer); - } - } + try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) { + starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, compositeDocValuesConsumer); } } } @@ -285,9 +281,20 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException { if (mergeState.docValuesProducers[i] instanceof CompositeIndexReader) { reader = (CompositeIndexReader) mergeState.docValuesProducers[i]; } else { - continue; + Set docValuesProducers = DocValuesProducerUtil.getSegmentDocValuesProducers( + mergeState.docValuesProducers[i] + ); + for (DocValuesProducer docValuesProducer : docValuesProducers) { + if (docValuesProducer instanceof CompositeIndexReader) { + reader = (CompositeIndexReader) docValuesProducer; + List compositeFieldInfo = reader.getCompositeIndexFields(); + if (compositeFieldInfo.isEmpty() == false) { + break; + } + } + } } - + if (reader == null) continue; List compositeFieldInfo = reader.getCompositeIndexFields(); for (CompositeIndexFieldInfo fieldInfo : compositeFieldInfo) { if (fieldInfo.getType().equals(CompositeMappedFieldType.CompositeFieldType.STAR_TREE)) { @@ -295,17 +302,6 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException { if (compositeIndexValues instanceof StarTreeValues) { StarTreeValues starTreeValues = (StarTreeValues) compositeIndexValues; List fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), new ArrayList<>()); - if (starTreeField == null) { - starTreeField = starTreeValues.getStarTreeField(); - } - // assert star tree configuration is same across segments - else { - if (starTreeField.equals(starTreeValues.getStarTreeField()) == false) { - throw new IllegalArgumentException( - "star tree field configuration must match the configuration of the field being merged" - ); - } - } fieldsList.add(starTreeValues); starTreeSubsPerField.put(fieldInfo.getField(), fieldsList); } @@ -340,7 +336,8 @@ private static SegmentWriteState getSegmentWriteState(SegmentWriteState segmentW segmentInfo, segmentWriteState.fieldInfos, segmentWriteState.segUpdates, - segmentWriteState.context + segmentWriteState.context, + segmentWriteState.segmentSuffix ); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index c3ea04d52e892..3054e8e66b601 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -735,7 +735,6 @@ private SequentialDocValuesIterator getIteratorForNumericField( * @throws IOException throws an exception if we are unable to add the doc */ private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOException { - appendStarTreeDocument(starTreeDocument); numStarTreeDocs++; } diff --git a/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java index 4fe0199f89f41..08707cf2d6b1f 100644 --- a/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java @@ -15,11 +15,14 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene912.Lucene912Codec; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; import org.apache.lucene.tests.index.RandomIndexWriter; @@ -58,9 +61,12 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.STAR_TREE_DOCS_COUNT; import static org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils.assertStarTreeDocuments; /** @@ -207,6 +213,100 @@ public void testStarTreeDocValues() throws IOException { directory.close(); } + public void testStarTreeKeywordDocValuesWithDeletions() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + + int iterations = 3; + Map map = new HashMap<>(); + List allIds = new ArrayList<>(); + for (int iter = 0; iter < iterations; iter++) { + // Add 10 documents + for (int i = 0; i < 10; i++) { + String id = String.valueOf(random().nextInt() + i); + allIds.add(id); + Document doc = new Document(); + doc.add(new StringField("_id", id, Field.Store.YES)); + int fieldValue = random().nextInt(5) + 1; + doc.add(new SortedNumericDocValuesField("field", fieldValue)); + + int sndvValue = random().nextInt(3); + + doc.add(new SortedNumericDocValuesField("sndv", sndvValue)); + int dvValue = random().nextInt(3); + + doc.add(new SortedNumericDocValuesField("dv", dvValue)); + map.put(sndvValue + "-" + dvValue, fieldValue + map.getOrDefault(sndvValue + "-" + dvValue, 0)); + iw.addDocument(doc); + } + iw.flush(); + } + iw.commit(); + // Delete random number of documents + int docsToDelete = random().nextInt(9); // Delete up to 9 documents + for (int i = 0; i < docsToDelete; i++) { + if (!allIds.isEmpty()) { + String idToDelete = allIds.remove(random().nextInt(allIds.size() - 1)); + iw.deleteDocuments(new Term("_id", idToDelete)); + allIds.remove(idToDelete); + } + } + iw.flush(); + iw.commit(); + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = DirectoryReader.open(directory); + TestUtil.checkReader(ir); + assertEquals(1, ir.leaves().size()); + + // Assert star tree documents + for (LeafReaderContext context : ir.leaves()) { + SegmentReader reader = Lucene.segmentReader(context.reader()); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + + for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) { + StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); + StarTreeDocument[] actualStarTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( + List.of(starTreeValues), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), + Integer.parseInt(starTreeValues.getAttributes().get(STAR_TREE_DOCS_COUNT)) + ); + for (StarTreeDocument starDoc : actualStarTreeDocuments) { + Long sndvVal = null; + if (starDoc.dimensions[0] != null) { + sndvVal = starDoc.dimensions[0]; + } + Long dvVal = null; + if (starDoc.dimensions[1] != null) { + dvVal = starDoc.dimensions[1]; + } + if (starDoc.metrics[0] != null) { + double metric = (double) starDoc.metrics[0]; + if (map.containsKey(sndvVal + "-" + dvVal)) { + assertEquals((long) map.get(sndvVal + "-" + dvVal), (long) metric); + } + } + } + } + } + ir.close(); + directory.close(); + } + private XContentBuilder getExpandedMapping() throws IOException { return topMapping(b -> { b.startObject("composite");