-
Notifications
You must be signed in to change notification settings - Fork 64
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Feature] Support for Default Model Id #337
Changes from 17 commits
dac5012
6903f94
88ba2e9
c06d07a
7c3a302
dee67ca
b13eb80
badced1
9c010e7
d0b70c7
c9ada83
4be198c
e9db552
e9efd72
ff10aba
c3d0649
ef5f939
29017ba
ce28954
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.neuralsearch.processor; | ||
|
||
import java.util.Map; | ||
|
||
import lombok.Getter; | ||
import lombok.Setter; | ||
|
||
import org.opensearch.action.search.SearchRequest; | ||
import org.opensearch.common.Nullable; | ||
import org.opensearch.index.query.QueryBuilder; | ||
import org.opensearch.ingest.ConfigurationUtils; | ||
import org.opensearch.neuralsearch.query.visitor.NeuralSearchQueryVisitor; | ||
import org.opensearch.search.pipeline.AbstractProcessor; | ||
import org.opensearch.search.pipeline.Processor; | ||
import org.opensearch.search.pipeline.SearchRequestProcessor; | ||
|
||
/** | ||
* Neural Search Query Request Processor, It modifies the search request with neural query clause | ||
* and adds model Id if not present in the search query. | ||
*/ | ||
@Setter | ||
@Getter | ||
public class EnrichingQueryDefaultProcessor extends AbstractProcessor implements SearchRequestProcessor { | ||
|
||
/** | ||
* Key to reference this processor type from a search pipeline. | ||
*/ | ||
public static final String TYPE = "enriching_query_defaults"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. neural_query_enricher name seems better here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @martin-gaievski I am finally going with neural_query_enricher There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agree, that sounds reasonable, please update the class with that new name |
||
|
||
private final String modelId; | ||
|
||
private final Map<String, Object> neuralFieldDefaultIdMap; | ||
|
||
/** | ||
* Returns the type of the processor. | ||
* | ||
* @return The processor type. | ||
*/ | ||
@Override | ||
public String getType() { | ||
return TYPE; | ||
} | ||
|
||
private EnrichingQueryDefaultProcessor( | ||
String tag, | ||
String description, | ||
boolean ignoreFailure, | ||
@Nullable String modelId, | ||
@Nullable Map<String, Object> neuralFieldDefaultIdMap | ||
) { | ||
super(tag, description, ignoreFailure); | ||
this.modelId = modelId; | ||
this.neuralFieldDefaultIdMap = neuralFieldDefaultIdMap; | ||
} | ||
|
||
/** | ||
* Processes the Search Request. | ||
* | ||
* @return The Search Request. | ||
*/ | ||
@Override | ||
public SearchRequest processRequest(SearchRequest searchRequest) { | ||
QueryBuilder queryBuilder = searchRequest.source().query(); | ||
queryBuilder.visit(new NeuralSearchQueryVisitor(modelId, neuralFieldDefaultIdMap)); | ||
return searchRequest; | ||
} | ||
|
||
public static class Factory implements Processor.Factory<SearchRequestProcessor> { | ||
private static final String DEFAULT_MODEL_ID = "default_model_id"; | ||
private static final String NEURAL_FIELD_DEFAULT_ID = "neural_field_default_id"; | ||
|
||
/** | ||
* Create the processor object. | ||
* | ||
* @return EnrichingQueryDefaultProcessor | ||
*/ | ||
@Override | ||
public EnrichingQueryDefaultProcessor create( | ||
Map<String, Processor.Factory<SearchRequestProcessor>> processorFactories, | ||
String tag, | ||
String description, | ||
boolean ignoreFailure, | ||
Map<String, Object> config, | ||
PipelineContext pipelineContext | ||
) throws IllegalArgumentException { | ||
String modelId; | ||
try { | ||
modelId = (String) config.remove(DEFAULT_MODEL_ID); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why config.remove? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Took reference from OS core There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a better way to get the string. Check this: https://github.com/opensearch-project/neural-search/blob/main/src/main/java/org/opensearch/neuralsearch/processor/factory/TextEmbeddingProcessorFactory.java#L40C9-L40C89 |
||
} catch (ClassCastException e) { | ||
throw new IllegalArgumentException("model Id must of String type"); | ||
} | ||
Map<String, Object> neuralInfoMap = ConfigurationUtils.readOptionalMap(TYPE, tag, config, NEURAL_FIELD_DEFAULT_ID); | ||
|
||
if (modelId == null && neuralInfoMap == null) { | ||
throw new IllegalArgumentException("model Id or neural info map either of them should be provided"); | ||
} | ||
|
||
return new EnrichingQueryDefaultProcessor(tag, description, ignoreFailure, modelId, neuralInfoMap); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.neuralsearch.query.visitor; | ||
|
||
import java.util.Map; | ||
|
||
import lombok.AllArgsConstructor; | ||
|
||
import org.apache.lucene.search.BooleanClause; | ||
import org.opensearch.index.query.QueryBuilder; | ||
import org.opensearch.index.query.QueryBuilderVisitor; | ||
import org.opensearch.neuralsearch.query.NeuralQueryBuilder; | ||
|
||
/** | ||
* Neural Search Query Visitor. It visits each and every component of query buikder tree. | ||
*/ | ||
@AllArgsConstructor | ||
public class NeuralSearchQueryVisitor implements QueryBuilderVisitor { | ||
|
||
private final String modelId; | ||
private final Map<String, Object> neuralFieldMap; | ||
|
||
/** | ||
* Accept method accepts every query builder from the search request, | ||
* and processes it if the required conditions in accept method are satisfied. | ||
*/ | ||
@Override | ||
public void accept(QueryBuilder queryBuilder) { | ||
vibrantvarun marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (queryBuilder instanceof NeuralQueryBuilder) { | ||
NeuralQueryBuilder neuralQueryBuilder = (NeuralQueryBuilder) queryBuilder; | ||
if (neuralQueryBuilder.modelId() == null) { | ||
if (neuralFieldMap != null | ||
&& neuralQueryBuilder.fieldName() != null | ||
&& neuralFieldMap.get(neuralQueryBuilder.fieldName()) != null) { | ||
String fieldDefaultModelId = (String) neuralFieldMap.get(neuralQueryBuilder.fieldName()); | ||
neuralQueryBuilder.modelId(fieldDefaultModelId); | ||
} else if (modelId != null) { | ||
neuralQueryBuilder.modelId(modelId); | ||
} else { | ||
throw new IllegalArgumentException( | ||
"model id must be provided in neural query or a default model id must be set in search request processor" | ||
); | ||
} | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Retrieves the child visitor from the Visitor object. | ||
* | ||
* @return The sub Query Visitor | ||
*/ | ||
@Override | ||
public QueryBuilderVisitor getChildVisitor(BooleanClause.Occur occur) { | ||
return this; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.neuralsearch.util; | ||
|
||
import lombok.AccessLevel; | ||
import lombok.NoArgsConstructor; | ||
import lombok.extern.log4j.Log4j2; | ||
|
||
import org.opensearch.Version; | ||
import org.opensearch.cluster.service.ClusterService; | ||
|
||
/** | ||
* Class abstracts information related to underlying OpenSearch cluster | ||
*/ | ||
@NoArgsConstructor(access = AccessLevel.PRIVATE) | ||
@Log4j2 | ||
public class NeuralSearchClusterUtil { | ||
vibrantvarun marked this conversation as resolved.
Show resolved
Hide resolved
|
||
private ClusterService clusterService; | ||
|
||
private static NeuralSearchClusterUtil instance; | ||
|
||
/** | ||
* Return instance of the cluster context, must be initialized first for proper usage | ||
* @return instance of cluster context | ||
*/ | ||
public static synchronized NeuralSearchClusterUtil instance() { | ||
if (instance == null) { | ||
instance = new NeuralSearchClusterUtil(); | ||
} | ||
return instance; | ||
} | ||
|
||
/** | ||
* Initializes instance of cluster context by injecting dependencies | ||
* @param clusterService | ||
*/ | ||
public void initialize(final ClusterService clusterService) { | ||
this.clusterService = clusterService; | ||
} | ||
|
||
/** | ||
* Return minimal OpenSearch version based on all nodes currently discoverable in the cluster | ||
* @return minimal installed OpenSearch version, default to Version.CURRENT which is typically the latest version | ||
*/ | ||
public Version getClusterMinVersion() { | ||
return this.clusterService.state().getNodes().getMinNodeVersion(); | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should start getting rid of these kind of inits and move towards singleton pattern without this kind of inits.
May be an AI for maintainers