Skip to content

Commit

Permalink
Neural sparse query two-phase search processor's bwc test (#777) (#831)
Browse files Browse the repository at this point in the history
* Poc of pipeline

Signed-off-by: conggguan <[email protected]>

* Complete some settings for two phase pipeline.

Signed-off-by: conggguan <[email protected]>

* Change the implement of two-phase from QueryBuilderVistor to custom process funciton.

Signed-off-by: conggguan <[email protected]>

* Add It and fix some bug on the state of multy same neuralsparsequerybuilder.

Signed-off-by: conggguan <[email protected]>

* Simplify some logic, and correct some format.

Signed-off-by: conggguan <[email protected]>

* Optimize some format.

Signed-off-by: conggguan <[email protected]>

* Add some test case.

Signed-off-by: conggguan <[email protected]>

* Optimize some logic for zhichao-aws's comments.

Signed-off-by: conggguan <[email protected]>

* Optimize a line without application.

Signed-off-by: conggguan <[email protected]>

* Add some comments, remove some redundant lines, fix some format.

Signed-off-by: conggguan <[email protected]>

* Remove a redundant null check, fix a if format.

Signed-off-by: conggguan <[email protected]>

* Fix a typo for a comment, camelcase format for some variable.

Signed-off-by: conggguan <[email protected]>

* Add some comments to illustrate the influence of the modify on 2-phase search pipeline to neural sparse query builder.

Signed-off-by: conggguan <[email protected]>

* Add restart and rolling upgrade bwc test for neural sparse two phase processor.

Signed-off-by: conggguan <[email protected]>

* Spotless on qa.

Signed-off-by: conggguan <[email protected]>

* Update change log for two-phase BWC test.

Signed-off-by: conggguan <[email protected]>

* Remove redundant lines of two-phase BWC test.

Signed-off-by: conggguan <[email protected]>

* Add changelog.

Signed-off-by: conggguan <[email protected]>

* Add the PR link and number for the CHANGELOG.md.

Signed-off-by: conggguan <[email protected]>

* [Fix] NeuralSparseTwoPhaseProcessorIT created wrong ingest pipeline, fix it to correct API.

Signed-off-by: conggguan <[email protected]>

---------

Signed-off-by: conggguan <[email protected]>
Signed-off-by: conggguan <[email protected]>
(cherry picked from commit b0b5128)

Co-authored-by: conggguan <[email protected]>
  • Loading branch information
opensearch-trigger-bot[bot] and conggguan committed Jul 13, 2024
1 parent ceee5a5 commit 0200245
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix for missing HybridQuery results when concurrent segment search is enabled ([#800](https://github.com/opensearch-project/neural-search/pull/800))
### Infrastructure
- Add BWC for batch ingestion ([#769](https://github.com/opensearch-project/neural-search/pull/769))
- Add backward test cases for neural sparse two phase processor ([#777](https://github.com/opensearch-project/neural-search/pull/777))
### Documentation
### Maintenance
### Refactoring
14 changes: 14 additions & 0 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down Expand Up @@ -155,6 +162,13 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;
import org.opensearch.neuralsearch.util.TestUtils;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR;

public class NeuralSparseTwoPhaseProcessorIT extends AbstractRestartUpgradeRestTestCase {

private static final String NEURAL_SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-dense";
private static final String NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse";
private static final String TEST_ENCODING_FIELD = "passage_embedding";
private static final String TEST_TEXT_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world a b";

public void testNeuralSparseQueryTwoPhaseProcessor_NeuralSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
if (isRunningAgainstOldCluster()) {
String modelId = uploadSparseEncodingModel();
loadModel(modelId);
neuralSparseQueryBuilder.modelId(modelId);
createPipelineForSparseEncodingProcessor(modelId, NEURAL_SPARSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
NEURAL_SPARSE_INGEST_PIPELINE_NAME
);
addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1));
createNeuralSparseTwoPhaseSearchProcessor(NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME);
updateIndexSettings(
getIndexNameForTest(),
Settings.builder().put("index.search.default_pipeline", NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME)
);
Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits");
assertNotNull(resultWith2PhasePipeline);
} else {
String modelId = null;
try {
modelId = TestUtils.getModelId(getIngestionPipeline(NEURAL_SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
neuralSparseQueryBuilder.modelId(modelId);
Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits");
assertNotNull(resultWith2PhasePipeline);
} finally {
wipeOfTestResources(
getIndexNameForTest(),
NEURAL_SPARSE_INGEST_PIPELINE_NAME,
modelId,
NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME
);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"request_processors": [
{
"neural_sparse_two_phase_processor": {
"tag": "neural-sparse",
"description": "This processor is making two-phase rescorer.",
"enabled": true,
"two_phase_parameter": {
"prune_ratio": %f,
"expansion_rate": %f,
"max_window_size": %d
}
}
}
]
}
39 changes: 39 additions & 0 deletions qa/rolling-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")
|| ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")
|| ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}


nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down Expand Up @@ -156,6 +166,16 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")
|| ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")
|| ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}


nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down Expand Up @@ -213,6 +233,16 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")
|| ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")
|| ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}


nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down Expand Up @@ -270,6 +300,15 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")
|| ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")
|| ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;
import org.opensearch.neuralsearch.util.TestUtils;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR;

public class NeuralSparseTwoPhaseProcessorIT extends AbstractRollingUpgradeTestCase {
// add prefix to avoid conflicts with other IT class, since don't wipe resources after first round
private static final String SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-sparse";
private static final String SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse";
private static final String TEST_ENCODING_FIELD = "passage_embedding";
private static final String TEST_TEXT_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world a b";
private String sparseModelId = "";

// test of NeuralSparseTwoPhaseProcessor supports neural_sparse query's two phase speed up
// the feature is introduced from 2.15
public void testNeuralSparseTwoPhaseProcessorIT_NeuralSparseSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
// will set the model_id after we obtain the id
NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);

switch (getClusterType()) {
case OLD:
sparseModelId = uploadSparseEncodingModel();
loadModel(sparseModelId);
neuralSparseQueryBuilder.modelId(sparseModelId);
createPipelineForSparseEncodingProcessor(sparseModelId, SPARSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
SPARSE_INGEST_PIPELINE_NAME
);
addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1));
createNeuralSparseTwoPhaseSearchProcessor(SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME);
updateIndexSettings(
getIndexNameForTest(),
Settings.builder().put("index.search.default_pipeline", SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME)
);
assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"));
break;
case MIXED:
sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(sparseModelId);
neuralSparseQueryBuilder.modelId(sparseModelId);
assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"));
break;
case UPGRADED:
try {
sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(sparseModelId);
neuralSparseQueryBuilder.modelId(sparseModelId);
assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"));
} finally {
wipeOfTestResources(
getIndexNameForTest(),
SPARSE_INGEST_PIPELINE_NAME,
sparseModelId,
SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME
);
}
break;
default:
throw new IllegalStateException("Unexpected value: " + getClusterType());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"request_processors": [
{
"neural_sparse_two_phase_processor": {
"tag": "neural-sparse",
"description": "This processor is making two-phase rescorer.",
"enabled": true,
"two_phase_parameter": {
"prune_ratio": %f,
"expansion_rate": %f,
"max_window_size": %d
}
}
}
]
}

0 comments on commit 0200245

Please sign in to comment.