Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

4829 minimum should match #5488

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ pub struct BoolQuery {
filter: Vec<ElasticQueryDslInner>,
#[serde(default)]
pub boost: Option<NotNaNf32>,
#[serde(default)]
pub minimum_should_match: Option<usize>,
}

impl BoolQuery {
Expand All @@ -57,6 +59,7 @@ impl BoolQuery {
should: children,
filter: Vec::new(),
boost: None,
minimum_should_match: None,
}
}
}
Expand All @@ -75,6 +78,7 @@ impl ConvertibleToQueryAst for BoolQuery {
must_not: convert_vec(self.must_not)?,
should: convert_vec(self.should)?,
filter: convert_vec(self.filter)?,
minimum_should_match: self.minimum_should_match,
};
Ok(bool_query_ast.into())
}
Expand All @@ -90,6 +94,8 @@ impl From<BoolQuery> for ElasticQueryDslInner {
mod tests {
use crate::elastic_query_dsl::bool_query::BoolQuery;
use crate::elastic_query_dsl::term_query::term_query_from_field_value;
use crate::elastic_query_dsl::ConvertibleToQueryAst;
use crate::query_ast::QueryAst;

#[test]
fn test_dsl_bool_query_deserialize_simple() {
Expand All @@ -111,6 +117,7 @@ mod tests {
should: Vec::new(),
filter: Vec::new(),
boost: None,
minimum_should_match: None
}
);
}
Expand All @@ -130,6 +137,7 @@ mod tests {
should: Vec::new(),
filter: vec![term_query_from_field_value("product_id", "2").into(),],
boost: None,
minimum_should_match: None,
}
);
}
Expand All @@ -152,7 +160,28 @@ mod tests {
should: Vec::new(),
filter: Vec::new(),
boost: None,
minimum_should_match: None,
}
);
}

#[test]
fn test_dsl_query_with_minimum_should_match() {
let bool_query_json = r#"{
"should": [
{ "term": {"product_id": {"value": "1" }} },
{ "term": {"product_id": {"value": "2" }} },
{ "term": {"product_id": {"value": "3" }} }
],
"minimum_should_match": 2
}"#;
let bool_query: BoolQuery = serde_json::from_str(bool_query_json).unwrap();
assert_eq!(bool_query.should.len(), 3);
assert_eq!(bool_query.minimum_should_match, Some(2));
let QueryAst::Bool(bool_query_ast) = bool_query.convert_to_query_ast().unwrap() else {
panic!();
};
assert_eq!(bool_query_ast.should.len(), 3);
assert_eq!(bool_query_ast.minimum_should_match, Some(2));
}
}
3 changes: 3 additions & 0 deletions quickwit/quickwit-query/src/query_ast/bool_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ pub struct BoolQuery {
pub should: Vec<QueryAst>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub filter: Vec<QueryAst>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub minimum_should_match: Option<usize>,
}

impl From<BoolQuery> for QueryAst {
Expand All @@ -65,6 +67,7 @@ impl BuildTantivyAst for BoolQuery {
with_validation: bool,
) -> Result<TantivyQueryAst, InvalidQuery> {
let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery::default();
boolean_query.minimum_should_match = self.minimum_should_match;
for must in &self.must {
let must_leaf = must.build_tantivy_ast_call(
schema,
Expand Down
2 changes: 2 additions & 0 deletions quickwit/quickwit-query/src/query_ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ impl QueryAst {
must_not,
should,
filter,
minimum_should_match,
}) => {
let must = parse_user_query_in_asts(must, default_search_fields)?;
let must_not = parse_user_query_in_asts(must_not, default_search_fields)?;
Expand All @@ -92,6 +93,7 @@ impl QueryAst {
must_not,
should,
filter,
minimum_should_match,
}
.into())
}
Expand Down
75 changes: 46 additions & 29 deletions quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.

use tantivy::query::{
AllQuery as TantivyAllQuery, ConstScoreQuery as TantivyConstScoreQuery,
AllQuery as TantivyAllQuery, BooleanQuery, ConstScoreQuery as TantivyConstScoreQuery,
EmptyQuery as TantivyEmptyQuery,
};
use tantivy::query_grammar::Occur;
Expand Down Expand Up @@ -160,6 +160,7 @@ pub(crate) struct TantivyBoolQuery {
pub must_not: Vec<TantivyQueryAst>,
pub should: Vec<TantivyQueryAst>,
pub filter: Vec<TantivyQueryAst>,
pub minimum_should_match: Option<usize>,
}

fn simplify_asts(asts: Vec<TantivyQueryAst>) -> Vec<TantivyQueryAst> {
Expand All @@ -186,6 +187,7 @@ impl TantivyBoolQuery {
self.should = simplify_asts(self.should);
self.must_not = simplify_asts(self.must_not);
self.filter = simplify_asts(self.filter);

for must_children in [&mut self.must, &mut self.filter] {
for child in must_children {
if child.const_predicate() == Some(MatchAllOrNone::MatchNone) {
Expand All @@ -197,6 +199,7 @@ impl TantivyBoolQuery {
&& self.must.is_empty()
&& self.filter.is_empty()
&& self.must_not.is_empty()
&& self.minimum_should_match.unwrap_or(0) == 0
{
// This is just a convention mimicking Elastic/Commonsearch's behavior.
return TantivyQueryAst::match_all();
Expand All @@ -211,7 +214,7 @@ impl TantivyBoolQuery {
continue;
}
};
if must_bool.should.is_empty() {
if must_bool.should.is_empty() && must_bool.minimum_should_match.is_none() {
new_must.append(&mut must_bool.must);
self.filter.append(&mut must_bool.filter);
self.must_not.append(&mut must_bool.must_not);
Expand All @@ -230,10 +233,10 @@ impl TantivyBoolQuery {
continue;
}
};
if filter_bool.should.is_empty() {
if filter_bool.should.is_empty() && filter_bool.minimum_should_match.is_none() {
new_filter.append(&mut filter_bool.must);
new_filter.append(&mut filter_bool.filter);
// must_not doeen't contribute to score, no need to move it to some filter_not kind
// must_not doesn't contribute to score, no need to move it to some filter_not kind
// of thing
self.must_not.append(&mut filter_bool.must_not);
} else {
Expand All @@ -243,24 +246,27 @@ impl TantivyBoolQuery {
self.filter = new_filter;

let mut new_should = Vec::with_capacity(self.should.len());
for should in self.should {
let mut should_bool = match should {
TantivyQueryAst::Bool(bool_query) => bool_query,
_ => {
new_should.push(should);
continue;
if self.minimum_should_match.is_none() {
for should in self.should {
let mut should_bool = match should {
TantivyQueryAst::Bool(bool_query) => bool_query,
_ => {
new_should.push(should);
continue;
}
};
if should_bool.must.is_empty()
&& should_bool.filter.is_empty()
&& should_bool.must_not.is_empty()
&& should_bool.minimum_should_match.is_none()
{
new_should.append(&mut should_bool.should);
} else {
new_should.push(TantivyQueryAst::Bool(should_bool));
}
};
if should_bool.must.is_empty()
&& should_bool.filter.is_empty()
&& should_bool.must_not.is_empty()
{
new_should.append(&mut should_bool.should);
} else {
new_should.push(TantivyQueryAst::Bool(should_bool));
}
self.should = new_should;
}
self.should = new_should;

// TODO we could turn must_not(must_not(abc, def)) into should(filter(abc), filter(def)),
// we can't simply have should(abc, def) because of scoring, and should(filter(abc, def))
Expand Down Expand Up @@ -317,14 +323,15 @@ impl TantivyBoolQuery {
} else {
let num_children =
self.must.len() + self.should.len() + self.must_not.len() + self.filter.len();
if num_children == 1 {
if num_children == 1 && self.minimum_should_match.is_none() {
if let Some(ast) = self.must.pop().or(self.should.pop()) {
return ast;
}
// We do not optimize a single filter clause for the moment.
// We do need a mechanism to make sure we keep the boost of 0.
}
}

TantivyQueryAst::Bool(self)
}
}
Expand Down Expand Up @@ -360,7 +367,13 @@ impl From<TantivyBoolQuery> for Box<dyn TantivyQuery> {
Box::new(TantivyConstScoreQuery::new(filter_query, 0.0f32)),
));
}
Box::new(tantivy::query::BooleanQuery::from(clause))
let tantivy_bool_query = if let Some(minimum_should_match) = bool_query.minimum_should_match
{
BooleanQuery::with_minimum_required_clauses(clause, minimum_should_match)
} else {
BooleanQuery::from(clause)
};
Box::new(tantivy::query::BooleanQuery::from(tantivy_bool_query))
}
}

Expand Down Expand Up @@ -881,14 +894,18 @@ mod tests {
let filter = proptest::collection::vec(element.clone(), 0..4);
let should = proptest::collection::vec(element.clone(), 0..4);
let must_not = proptest::collection::vec(element.clone(), 0..4);
(must, filter, should, must_not).prop_map(|(must, filter, should, must_not)| {
TantivyQueryAst::Bool(TantivyBoolQuery {
must,
filter,
should,
must_not,
})
})
let minimum_should_match = (0usize..2).prop_map(|n: usize| n.checked_sub(1));
(must, filter, should, must_not, minimum_should_match).prop_map(
|(must, filter, should, must_not, minimum_should_match)| {
TantivyQueryAst::Bool(TantivyBoolQuery {
must,
filter,
should,
must_not,
minimum_should_match,
})
},
)
})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ fn build_request_for_es_api(
must_not: Vec::new(),
should: Vec::new(),
filter: queries,
minimum_should_match: None,
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,9 @@ expected:
total:
value: 100
---
# elasticsearch accepts this query
engines:
- quickwit
json:
query:
query_string:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,11 @@ json:
range:
repo.name:
gte: "h"
lte: "z"
lte: "i"
expected:
hits:
total:
value: 62
value: 1
relation: "eq"
---
# This field is a JSON field and has fast field normalization.
Expand All @@ -218,6 +218,8 @@ expected:
---
# This field is a JSON field and has fast field normalization.
# That means it is case insensitive
engines:
- quickwit
json:
query:
range:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,60 @@ expected:
hits:
total:
value: 100


---
json:
query:
bool:
should:
- {"query_string": {"query": "type:PushEvent"}}
- {"query_string": {"query": "actor.login:jadonk"}}
- {"query_string": {"query": "actor.login:teozfrank"}}
- {"query_string": {"query": "type:IssueCommentEvent"}}
minimum_should_match: 1
expected:
hits:
total:
value: 69
---
json:
query:
bool:
should:
- {"query_string": {"query": "type:PushEvent"}}
- {"query_string": {"query": "actor.login:jadonk"}}
- {"query_string": {"query": "actor.login:teozfrank"}}
- {"query_string": {"query": "type:IssueCommentEvent"}}
minimum_should_match: 2
expected:
hits:
total:
value: 3
---
json:
query:
bool:
should:
- {"query_string": {"query": "type:PushEvent"}}
- {"query_string": {"query": "actor.login:jadonk"}}
- {"query_string": {"query": "actor.login:teozfrank"}}
- {"query_string": {"query": "type:IssueCommentEvent"}}
minimum_should_match: 3
expected:
hits:
total:
value: 0
---
json:
query:
bool:
must:
- {"query_string": {"query": "type:PushEvent"}}
should:
- {"query_string": {"query": "actor.login:jadonk"}}
- {"query_string": {"query": "actor.login:teozfrank"}}
- {"query_string": {"query": "type:IssueCommentEvent"}}
minimum_should_match: 1
expected:
hits:
total:
value: 2
Loading
Loading