From 46a4dc4f1b84ff5be15ad859c87168af9d0ad4ce Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Wed, 20 Sep 2023 12:09:21 +0200 Subject: [PATCH] add support for exist in query language (#3800) * add support for exist in query language * upgrade tantivy --- quickwit/Cargo.lock | 18 +++++++++--------- quickwit/Cargo.toml | 2 +- .../src/actors/merge_executor.rs | 2 +- .../src/models/indexed_split.rs | 2 +- .../src/query_ast/user_input_query.rs | 5 ++++- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index b19dd06d32e..4eabf46cb23 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -4206,7 +4206,7 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "stable_deref_trait", ] @@ -7201,7 +7201,7 @@ dependencies = [ [[package]] name = "tantivy" version = "0.21.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "aho-corasick", "arc-swap", @@ -7255,7 +7255,7 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" version = "0.5.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "bitpacking", ] @@ -7263,7 +7263,7 @@ dependencies = [ [[package]] name = "tantivy-columnar" version = "0.2.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "fastdivide", "fnv", @@ -7278,7 +7278,7 @@ dependencies = [ [[package]] name = "tantivy-common" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "async-trait", "byteorder", @@ -7301,7 +7301,7 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" version = "0.21.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "nom", ] @@ -7309,7 +7309,7 @@ dependencies = [ [[package]] name = "tantivy-sstable" version = "0.2.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "tantivy-common", "tantivy-fst", @@ -7319,7 +7319,7 @@ dependencies = [ [[package]] name = "tantivy-stacker" version = "0.2.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "murmurhash32", "tantivy-common", @@ -7328,7 +7328,7 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" version = "0.2.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=1932513#19325132b7b94b8c0614d74d7a58f2408e469ebc" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=0241a05b#0241a05b90280ab78523f58bea9a3f21ae29a7e8" dependencies = [ "serde", ] diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index be0a1d171dc..3e963abc83e 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -231,7 +231,7 @@ quickwit-serve = { version = "0.6.3", path = "./quickwit-serve" } quickwit-storage = { version = "0.6.3", path = "./quickwit-storage" } quickwit-telemetry = { version = "0.6.3", path = "./quickwit-telemetry" } -tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "1932513", default-features = false, features = [ +tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "0241a05b", default-features = false, features = [ "mmap", "lz4-compression", "zstd-compression", diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs index 525027219aa..24cd6faf168 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs @@ -466,7 +466,7 @@ impl MergeExecutor { ctx.record_progress(); let _protect_guard = ctx.protect_zone(); - let mut index_writer = union_index.writer_with_num_threads(1, 3_000_000)?; + let mut index_writer = union_index.writer_with_num_threads(1, 15_000_000)?; let num_delete_tasks = delete_tasks.len(); if num_delete_tasks > 0 { let doc_mapper = doc_mapper_opt diff --git a/quickwit/quickwit-indexing/src/models/indexed_split.rs b/quickwit/quickwit-indexing/src/models/indexed_split.rs index ce285e2fcd9..45f2abc133b 100644 --- a/quickwit/quickwit-indexing/src/models/indexed_split.rs +++ b/quickwit/quickwit-indexing/src/models/indexed_split.rs @@ -98,7 +98,7 @@ impl IndexedSplitBuilder { let controlled_directory = ControlledDirectory::new(box_mmap_directory, io_controls); let index_writer = - index_builder.single_segment_index_writer(controlled_directory.clone(), 10_000_000)?; + index_builder.single_segment_index_writer(controlled_directory.clone(), 15_000_000)?; Ok(Self { split_attrs: SplitAttrs { pipeline_id, diff --git a/quickwit/quickwit-query/src/query_ast/user_input_query.rs b/quickwit/quickwit-query/src/query_ast/user_input_query.rs index 450a9485065..cab0d9cea9a 100644 --- a/quickwit/quickwit-query/src/query_ast/user_input_query.rs +++ b/quickwit/quickwit-query/src/query_ast/user_input_query.rs @@ -30,7 +30,9 @@ use tantivy::tokenizer::TokenizerManager; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::tantivy_query_ast::TantivyQueryAst; -use crate::query_ast::{self, BuildTantivyAst, FullTextMode, FullTextParams, QueryAst}; +use crate::query_ast::{ + self, BuildTantivyAst, FieldPresenceQuery, FullTextMode, FullTextParams, QueryAst, +}; use crate::{BooleanOperand, InvalidQuery, JsonLiteral}; const DEFAULT_PHRASE_QUERY_MAX_EXPANSION: u32 = 50; @@ -161,6 +163,7 @@ fn convert_user_input_ast_to_query_ast( let term_set_query = query_ast::TermSetQuery { terms_per_field }; Ok(term_set_query.into()) } + UserInputLeaf::Exists { field } => Ok(FieldPresenceQuery { field }.into()), }, UserInputAst::Boost(underlying, boost) => { let query_ast = convert_user_input_ast_to_query_ast(