From 20107a3b9486e091a81fd84beecf93ef21c22eed Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Wed, 4 Sep 2024 09:19:46 -0700 Subject: [PATCH] Fixed merge logic for multiple shards case Signed-off-by: Martin Gaievski --- .../search/query/TopDocsMerger.java | 19 +++++- .../search/query/TopDocsMergerTests.java | 59 +++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java b/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java index a77ff458e..23819521c 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java +++ b/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java @@ -55,9 +55,12 @@ class TopDocsMerger { * @return merged TopDocsAndMaxScore object */ public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) { - if (Objects.isNull(newTopDocs) || Objects.isNull(newTopDocs.topDocs) || newTopDocs.topDocs.totalHits.value == 0) { + if (isEmpty(newTopDocs)) { return source; } + if (isEmpty(source)) { + return newTopDocs; + } TotalHits mergedTotalHits = getMergedTotalHits(source, newTopDocs); TopDocsAndMaxScore result = new TopDocsAndMaxScore( getTopDocs(getMergedScoreDocs(source.topDocs.scoreDocs, newTopDocs.topDocs.scoreDocs), mergedTotalHits), @@ -66,6 +69,20 @@ public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAn return result; } + /** + * Checks if TopDocsAndMaxScore is null, has no top docs or zero total hits + * @param topDocsAndMaxScore + * @return + */ + private static boolean isEmpty(final TopDocsAndMaxScore topDocsAndMaxScore) { + if (Objects.isNull(topDocsAndMaxScore) + || Objects.isNull(topDocsAndMaxScore.topDocs) + || topDocsAndMaxScore.topDocs.totalHits.value == 0) { + return true; + } + return false; + } + private TotalHits getMergedTotalHits(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) { // merged value is a lower bound - if both are equal_to than merged will also be equal_to, // otherwise assign greater_than_or_equal diff --git a/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java b/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java index d10ca0668..9c2718687 100644 --- a/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java +++ b/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java @@ -176,6 +176,65 @@ public void testMergeScoreDocs_whenBothTopDocsHasNoHits_thenSuccessful() { assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[3].score, 0); } + @SneakyThrows + public void testMergeScoreDocs_whenSomeSegmentsHasNoHits_thenSuccessful() { + // Given + TopDocsMerger topDocsMerger = new TopDocsMerger(null); + + // When + // first segment has no results, and we merge with non-empty segment + TopDocs topDocsOriginal = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {}); + TopDocsAndMaxScore topDocsAndMaxScoreOriginal = new TopDocsAndMaxScore(topDocsOriginal, 0); + TopDocs topDocsNew = new TopDocs( + new TotalHits(2, TotalHits.Relation.EQUAL_TO), + new ScoreDoc[] { + createStartStopElementForHybridSearchResults(0), + createDelimiterElementForHybridSearchResults(0), + new ScoreDoc(0, 0.5f), + new ScoreDoc(2, 0.3f), + createStartStopElementForHybridSearchResults(0) } + ); + TopDocsAndMaxScore topDocsAndMaxScoreNew = new TopDocsAndMaxScore(topDocsNew, 0.5f); + TopDocsAndMaxScore mergedTopDocsAndMaxScore = topDocsMerger.merge(topDocsAndMaxScoreOriginal, topDocsAndMaxScoreNew); + + // Then + assertNotNull(mergedTopDocsAndMaxScore); + + assertEquals(0.5f, mergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION); + assertEquals(2, mergedTopDocsAndMaxScore.topDocs.totalHits.value); + assertEquals(TotalHits.Relation.EQUAL_TO, mergedTopDocsAndMaxScore.topDocs.totalHits.relation); + assertEquals(5, mergedTopDocsAndMaxScore.topDocs.scoreDocs.length); + // check format, all elements one by one + ScoreDoc[] scoreDocs = mergedTopDocsAndMaxScore.topDocs.scoreDocs; + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[0].score, 0); + assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[1].score, 0); + assertScoreDoc(scoreDocs[2], 0, 0.5f); + assertScoreDoc(scoreDocs[3], 2, 0.3f); + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[4].score, 0); + + // When + // source object has results, and we merge with empty segment + TopDocs topDocsNewEmpty = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {}); + TopDocsAndMaxScore topDocsAndMaxScoreNewEmpty = new TopDocsAndMaxScore(topDocsNewEmpty, 0); + TopDocsAndMaxScore finalMergedTopDocsAndMaxScore = topDocsMerger.merge(mergedTopDocsAndMaxScore, topDocsAndMaxScoreNewEmpty); + + // Then + // merged object remains unchanged + assertNotNull(finalMergedTopDocsAndMaxScore); + + assertEquals(0.5f, finalMergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION); + assertEquals(2, finalMergedTopDocsAndMaxScore.topDocs.totalHits.value); + assertEquals(TotalHits.Relation.EQUAL_TO, finalMergedTopDocsAndMaxScore.topDocs.totalHits.relation); + assertEquals(5, finalMergedTopDocsAndMaxScore.topDocs.scoreDocs.length); + // check format, all elements one by one + ScoreDoc[] finalScoreDocs = finalMergedTopDocsAndMaxScore.topDocs.scoreDocs; + assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[0].score, 0); + assertEquals(MAGIC_NUMBER_DELIMITER, finalScoreDocs[1].score, 0); + assertScoreDoc(finalScoreDocs[2], 0, 0.5f); + assertScoreDoc(finalScoreDocs[3], 2, 0.3f); + assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[4].score, 0); + } + @SneakyThrows public void testThreeSequentialMerges_whenAllTopDocsHasHits_thenSuccessful() { TopDocsMerger topDocsMerger = new TopDocsMerger(null);