From 8a59e05509673221da132b70c5830fce05a7357a Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 31 Jul 2023 14:18:23 +0900 Subject: [PATCH] Added support for exists query, as defined in Elasticsearch Field exists does not consider types, only field names. Field capability will have to be handled differently unfortunately. This works by introducing an internal (but normal) "u64" field that stores postings list for field existence. For performance/RAM reasons, the fields full path is not stored as a string but instead we compute a u64-fnv hash using the path from root to leaf. If the hash perfects ideally, even with the anniversary attach, collisions are very unlikely. When dealing with complex JSON with the raw tokenizer this feature can double the number of tokens we deal with, and has an impact on performance. For this reason, it is not added as an option in the DocMapper. Like Elasticsearch, we only store field existence of indexed fields. Also in order to handle refinement like expand_dots, we work over the built tantivy Document and reuse the existing resolution logic. On 1.4GB of gharchive (which is close to a worst case scenaio), see the following performance/index size change: With field_exists enabled - Indexing Throughput: 41 MB/s - Index size: 701M With field_exists disabled - Indexing Throughput: 46 MB/s - Index size: 698M --- quickwit/Cargo.lock | 4 + quickwit/quickwit-common/Cargo.toml | 1 + quickwit/quickwit-common/src/lib.rs | 3 + quickwit/quickwit-common/src/path_hasher.rs | 68 ++++++ quickwit/quickwit-common/src/shared_consts.rs | 21 ++ .../quickwit-config/src/index_config/mod.rs | 4 + quickwit/quickwit-doc-mapper/Cargo.toml | 1 + .../src/default_doc_mapper/default_mapper.rs | 204 +++++++++++++++--- .../default_mapper_builder.rs | 3 + .../quickwit-doc-mapper/src/doc_mapper.rs | 8 +- quickwit/quickwit-doc-mapper/src/lib.rs | 8 +- .../quickwit-doc-mapper/src/tag_pruning.rs | 1 + .../file-backed-index/v0.4.expected.json | 1 + .../file-backed-index/v0.5.expected.json | 1 + .../file-backed-index/v0.6.expected.json | 1 + .../test-data/file-backed-index/v0.6.json | 1 + .../index-metadata/v0.4.expected.json | 1 + .../index-metadata/v0.5.expected.json | 1 + .../index-metadata/v0.6.expected.json | 1 + .../test-data/index-metadata/v0.6.json | 1 + quickwit/quickwit-query/Cargo.toml | 2 + .../src/elastic_query_dsl/exists_query.rs | 36 ++++ .../src/elastic_query_dsl/mod.rs | 4 + .../src/query_ast/field_presence.rs | 127 +++++++++++ quickwit/quickwit-query/src/query_ast/mod.rs | 10 + .../quickwit-query/src/query_ast/visitor.rs | 6 + quickwit/quickwit-query/src/tokenizers/mod.rs | 2 +- .../es_compatibility/0011-exists-query.yaml | 33 +++ .../es_compatibility/_setup.quickwit.yaml | 1 + 29 files changed, 519 insertions(+), 36 deletions(-) create mode 100644 quickwit/quickwit-common/src/path_hasher.rs create mode 100644 quickwit/quickwit-common/src/shared_consts.rs create mode 100644 quickwit/quickwit-query/src/elastic_query_dsl/exists_query.rs create mode 100644 quickwit/quickwit-query/src/query_ast/field_presence.rs create mode 100644 quickwit/rest-api-tests/scenarii/es_compatibility/0011-exists-query.yaml diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 1fad4d9f25c..a3c5b2715d8 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -4918,6 +4918,7 @@ dependencies = [ "byte-unit", "dyn-clone", "env_logger", + "fnv", "futures", "home", "hostname", @@ -5064,6 +5065,7 @@ dependencies = [ "nom", "once_cell", "proptest", + "quickwit-common", "quickwit-datetime", "quickwit-macros", "quickwit-proto", @@ -5430,12 +5432,14 @@ dependencies = [ "anyhow", "base64 0.21.2", "criterion", + "fnv", "hex", "lindera-core", "lindera-dictionary", "lindera-tokenizer", "once_cell", "proptest", + "quickwit-common", "quickwit-datetime", "serde", "serde_json", diff --git a/quickwit/quickwit-common/Cargo.toml b/quickwit/quickwit-common/Cargo.toml index 185a232497b..dba7b0bf244 100644 --- a/quickwit/quickwit-common/Cargo.toml +++ b/quickwit/quickwit-common/Cargo.toml @@ -16,6 +16,7 @@ async-trait = { workspace = true } byte-unit = { workspace = true } dyn-clone = { workspace = true } env_logger = { workspace = true } +fnv = { workspace = true } futures = { workspace = true } home = { workspace = true } hostname = { workspace = true } diff --git a/quickwit/quickwit-common/src/lib.rs b/quickwit/quickwit-common/src/lib.rs index 5b390432416..02d8072083d 100644 --- a/quickwit/quickwit-common/src/lib.rs +++ b/quickwit/quickwit-common/src/lib.rs @@ -27,11 +27,13 @@ pub mod io; mod kill_switch; pub mod metrics; pub mod net; +mod path_hasher; mod progress; pub mod pubsub; pub mod rand; pub mod rendezvous_hasher; pub mod runtimes; +pub mod shared_consts; pub mod sorted_iter; pub mod stream_utils; @@ -49,6 +51,7 @@ use std::str::FromStr; pub use coolid::new_coolid; pub use kill_switch::KillSwitch; +pub use path_hasher::PathHasher; pub use progress::{Progress, ProtectedZoneGuard}; pub use stream_utils::{BoxStream, ServiceStream}; use tracing::{error, info}; diff --git a/quickwit/quickwit-common/src/path_hasher.rs b/quickwit/quickwit-common/src/path_hasher.rs new file mode 100644 index 00000000000..505802a5fd9 --- /dev/null +++ b/quickwit/quickwit-common/src/path_hasher.rs @@ -0,0 +1,68 @@ +// Copyright (C) 2023 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::hash::Hasher; + +/// Mini wrapper over the FnvHasher to incrementally hash nodes +/// in a tree. +/// +/// The wrapper does not do too much. Its main purpose to +/// work around the lack of Clone in the fnv Hasher +/// and enforce a 0 byte separator between segments. +#[derive(Default)] +pub struct PathHasher { + hasher: fnv::FnvHasher, +} + +impl Clone for PathHasher { + #[inline(always)] + fn clone(&self) -> PathHasher { + PathHasher { + hasher: fnv::FnvHasher::with_key(self.hasher.finish()), + } + } +} + +impl PathHasher { + /// Helper function, mostly for tests. + pub fn hash_path(segments: &[&[u8]]) -> u64 { + let mut hasher = Self::default(); + for segment in segments { + hasher.append(segment); + } + hasher.finish() + } + + /// Appends a new segment to our path. + /// + /// In order to avoid natural collisions, (e.g. &["ab", "c"] and &["a", "bc"]), + /// we add a null byte between each segment as a separator. + #[inline] + pub fn append(&mut self, payload: &[u8]) { + self.hasher.write(payload); + // We use 255 as a separator as all utf8 bytes contain a 0 + // in position 0-5. + self.hasher.write(&[255u8]); + } + + #[inline] + pub fn finish(&self) -> u64 { + self.hasher.finish() + } +} diff --git a/quickwit/quickwit-common/src/shared_consts.rs b/quickwit/quickwit-common/src/shared_consts.rs new file mode 100644 index 00000000000..44891328fea --- /dev/null +++ b/quickwit/quickwit-common/src/shared_consts.rs @@ -0,0 +1,21 @@ +// Copyright (C) 2023 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +/// Field name reserved for storing the dynamically indexed fields. +pub const FIELD_PRESENCE_FIELD_NAME: &str = "_field_presence"; diff --git a/quickwit/quickwit-config/src/index_config/mod.rs b/quickwit/quickwit-config/src/index_config/mod.rs index 547781a3ff0..e589079e327 100644 --- a/quickwit/quickwit-config/src/index_config/mod.rs +++ b/quickwit/quickwit-config/src/index_config/mod.rs @@ -66,6 +66,8 @@ pub struct DocMapping { #[serde(default)] pub store_source: bool, #[serde(default)] + pub index_field_presence: bool, + #[serde(default)] pub timestamp_field: Option, #[serde_multikey( deserializer = Mode::from_parts, @@ -433,6 +435,7 @@ impl TestableForRegression for IndexConfig { ) .unwrap(); let doc_mapping = DocMapping { + index_field_presence: true, field_mappings: vec![ tenant_id_mapping, timestamp_mapping, @@ -517,6 +520,7 @@ pub fn build_doc_mapper( ) -> anyhow::Result> { let builder = DefaultDocMapperBuilder { store_source: doc_mapping.store_source, + index_field_presence: doc_mapping.index_field_presence, default_search_fields: search_settings.default_search_fields.clone(), timestamp_field: doc_mapping.timestamp_field.clone(), field_mappings: doc_mapping.field_mappings.clone(), diff --git a/quickwit/quickwit-doc-mapper/Cargo.toml b/quickwit/quickwit-doc-mapper/Cargo.toml index 34a4e8a8971..f817f89e6c8 100644 --- a/quickwit/quickwit-doc-mapper/Cargo.toml +++ b/quickwit/quickwit-doc-mapper/Cargo.toml @@ -32,6 +32,7 @@ utoipa = { workspace = true } quickwit-datetime = { workspace = true } quickwit-macros = { workspace = true } +quickwit-common = { workspace = true } quickwit-query = { workspace = true } [dev-dependencies] diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 514f1cd4a06..7483383d8cd 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -21,12 +21,16 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::num::NonZeroU32; use anyhow::{bail, Context}; +use fnv::FnvHashSet; +use quickwit_common::PathHasher; use quickwit_query::create_default_quickwit_tokenizer_manager; use quickwit_query::query_ast::QueryAst; use serde::{Deserialize, Serialize}; use serde_json::{self, Value as JsonValue}; use tantivy::query::Query; -use tantivy::schema::{Field, FieldType, Schema, Value as TantivyValue, STORED}; +use tantivy::schema::{ + Field, FieldType, FieldValue, Schema, Value as TantivyValue, INDEXED, STORED, +}; use tantivy::tokenizer::TokenizerManager; use tantivy::Document; @@ -40,10 +44,11 @@ use crate::query_builder::build_query; use crate::routing_expression::RoutingExpr; use crate::{ Cardinality, DocMapper, DocParsingError, Mode, QueryParserError, TokenizerEntry, WarmupInfo, - DYNAMIC_FIELD_NAME, SOURCE_FIELD_NAME, + DYNAMIC_FIELD_NAME, FIELD_PRESENCE_FIELD_NAME, SOURCE_FIELD_NAME, }; -/// Default [`DocMapper`] implementation +const FIELD_PRESENCE_FIELD: Field = Field::from_field_id(0u32); + /// which defines a set of rules to map json fields /// to tantivy index fields. /// @@ -55,6 +60,9 @@ pub struct DefaultDocMapper { /// This field is only valid when using the schema associated with the default /// doc mapper, and therefore cannot be used in the `query` method. source_field: Option, + /// Indexes field presence. It is necessary to enable this in order to run exists + /// queries. + index_field_presence: bool, /// Field in which the dynamically mapped fields should be stored. /// This field is only valid when using the schema associated with the default /// doc mapper, and therefore cannot be used in the `query` method. @@ -131,6 +139,16 @@ impl TryFrom for DefaultDocMapper { fn try_from(builder: DefaultDocMapperBuilder) -> anyhow::Result { let mut schema_builder = Schema::builder(); + let field_presence_field = schema_builder.add_u64_field(FIELD_PRESENCE_FIELD_NAME, INDEXED); + assert_eq!(field_presence_field, FIELD_PRESENCE_FIELD); + + let dynamic_field = if let Mode::Dynamic(json_options) = &builder.mode { + Some(schema_builder.add_json_field(DYNAMIC_FIELD_NAME, json_options.clone())) + } else { + None + }; + + // Adding regular fields. let field_mappings = build_mapping_tree(&builder.field_mappings, &mut schema_builder)?; let source_field = if builder.store_source { Some(schema_builder.add_json_field(SOURCE_FIELD_NAME, STORED)) @@ -142,12 +160,6 @@ impl TryFrom for DefaultDocMapper { validate_timestamp_field(timestamp_field_path, &field_mappings)?; }; - let dynamic_field = if let Mode::Dynamic(json_options) = &builder.mode { - Some(schema_builder.add_json_field(DYNAMIC_FIELD_NAME, json_options.clone())) - } else { - None - }; - let schema = schema_builder.build(); let tokenizer_manager = create_default_quickwit_tokenizer_manager(); @@ -226,6 +238,7 @@ impl TryFrom for DefaultDocMapper { let required_fields = Vec::new(); Ok(DefaultDocMapper { schema, + index_field_presence: builder.index_field_presence, source_field, dynamic_field, default_search_field_names, @@ -326,6 +339,7 @@ impl From for DefaultDocMapperBuilder { }; Self { store_source: default_doc_mapper.source_field.is_some(), + index_field_presence: default_doc_mapper.index_field_presence, timestamp_field: default_doc_mapper .timestamp_field_name() .map(ToString::to_string), @@ -378,6 +392,63 @@ fn extract_single_obj( } } +#[inline] +fn populate_field_presence_for_json_value( + json_value: &JsonValue, + path_hasher: &PathHasher, + is_expand_dots_enabled: bool, + output: &mut FnvHashSet, +) { + match json_value { + JsonValue::Null => {} + JsonValue::Bool(_) | JsonValue::Number(_) | JsonValue::String(_) => { + output.insert(path_hasher.finish()); + } + JsonValue::Array(items) => { + for item in items { + populate_field_presence_for_json_value( + item, + path_hasher, + is_expand_dots_enabled, + output, + ); + } + } + JsonValue::Object(json_obj) => { + populate_field_presence_for_json_obj( + json_obj, + path_hasher.clone(), + is_expand_dots_enabled, + output, + ); + } + } +} + +fn populate_field_presence_for_json_obj( + json_obj: &JsonObject, + path_hasher: PathHasher, + is_expand_dots_enabled: bool, + output: &mut FnvHashSet, +) { + for (field_key, field_value) in json_obj { + let mut child_path_hasher = path_hasher.clone(); + if is_expand_dots_enabled { + for segment in field_key.split('.') { + child_path_hasher.append(segment.as_bytes()); + } + } else { + child_path_hasher.append(field_key.as_bytes()); + }; + populate_field_presence_for_json_value( + field_value, + &child_path_hasher, + is_expand_dots_enabled, + output, + ); + } +} + #[typetag::serde(name = "default")] impl DocMapper for DefaultDocMapper { fn doc_from_json_obj( @@ -409,6 +480,42 @@ impl DocMapper for DefaultDocMapper { } } + // The capacity is inexact here. + + if self.index_field_presence { + let mut field_presence_hashes: FnvHashSet = FnvHashSet::with_capacity_and_hasher( + document.field_values().len(), + Default::default(), + ); + for FieldValue { field, value } in document.field_values() { + let field_entry = self.schema.get_field_entry(*field); + if !field_entry.is_indexed() { + continue; + } + let mut path_hasher: PathHasher = PathHasher::default(); + path_hasher.append(&field.field_id().to_le_bytes()[..]); + if let tantivy::schema::Value::JsonObject(json_obj) = value { + let is_expand_dots_enabled: bool = + if let FieldType::JsonObject(json_options) = field_entry.field_type() { + json_options.is_expand_dots_enabled() + } else { + false + }; + populate_field_presence_for_json_obj( + json_obj, + path_hasher, + is_expand_dots_enabled, + &mut field_presence_hashes, + ); + } else { + field_presence_hashes.insert(path_hasher.finish()); + } + } + for field_presence_hash in field_presence_hashes { + document.add_field_value(FIELD_PRESENCE_FIELD, field_presence_hash); + } + } + self.check_missing_required_fields(&document)?; Ok((partition, document)) } @@ -475,8 +582,9 @@ impl DocMapper for DefaultDocMapper { #[cfg(test)] mod tests { - use std::collections::HashMap; + use std::collections::{HashMap, HashSet}; + use quickwit_common::PathHasher; use quickwit_query::query_ast::query_ast_from_user_text; use serde_json::{self, json, Value as JsonValue}; use tantivy::schema::{FieldType, IndexRecordOption, Type, Value as TantivyValue}; @@ -484,7 +592,8 @@ mod tests { use super::DefaultDocMapper; use crate::default_doc_mapper::field_mapping_entry::DEFAULT_TOKENIZER_NAME; use crate::{ - DefaultDocMapperBuilder, DocMapper, DocParsingError, DYNAMIC_FIELD_NAME, SOURCE_FIELD_NAME, + DefaultDocMapperBuilder, DocMapper, DocParsingError, DYNAMIC_FIELD_NAME, + FIELD_PRESENCE_FIELD_NAME, SOURCE_FIELD_NAME, }; fn example_json_doc_value() -> JsonValue { @@ -545,10 +654,12 @@ mod tests { let schema = doc_mapper.schema(); // 8 property entry + 1 field "_source" + two fields values for "tags" field // + 2 values inf "server.status" field + 2 values in "server.payload" field - assert_eq!(document.len(), 16); + // + 12 values for field presence. + assert_eq!(document.len(), 28); let expected_json_paths_and_values: HashMap = serde_json::from_str(EXPECTED_JSON_PATHS_AND_VALUES).unwrap(); - document.field_values().iter().for_each(|field_value| { + let mut field_presences: HashSet = HashSet::new(); + for field_value in document.field_values() { let field_name = schema.get_field_name(field_value.field()); if field_name == SOURCE_FIELD_NAME { assert_eq!(field_value.value().as_json(), json_doc.as_object()); @@ -557,6 +668,9 @@ mod tests { field_value.value().as_json(), json!({"response_date2": "2021-12-19T16:39:57+00:00"}).as_object() ); + } else if field_name == FIELD_PRESENCE_FIELD_NAME { + let field_presence_u64 = field_value.value().as_u64().unwrap(); + field_presences.insert(field_presence_u64); } else { let value = serde_json::to_string(field_value.value()).unwrap(); let is_value_in_expected_values = expected_json_paths_and_values @@ -571,7 +685,20 @@ mod tests { panic!("Could not find: {value:?} in {expected_json_paths_and_values:?}"); } } - }); + } + assert_eq!(field_presences.len(), 12); + let timestamp_field = schema.get_field("timestamp").unwrap(); + let attributes_field = schema.get_field("attributes.server").unwrap(); + assert!( + field_presences.contains(&PathHasher::hash_path(&[×tamp_field + .field_id() + .to_le_bytes()[..]])) + ); + assert!( + field_presences.contains(&PathHasher::hash_path(&[&attributes_field + .field_id() + .to_le_bytes()[..]])) + ); } #[test] @@ -854,6 +981,7 @@ mod tests { fn test_parse_document_with_tag_fields() { let doc_mapper = r#"{ "default_search_fields": [], + "index_field_presence": true, "timestamp_field": null, "tag_fields": ["city"], "store_source": true, @@ -883,8 +1011,8 @@ mod tests { .doc_from_json_obj(json_doc_value.as_object().unwrap().clone()) .unwrap(); - // 2 properties, + 1 value for "_source" - assert_eq!(document.len(), 3); + // 2 properties, + 1 value for "_source" + 2 for field presence. + assert_eq!(document.len(), 5); let expected_json_paths_and_values: HashMap = serde_json::from_str( r#"{ "city": ["tokio"], @@ -892,10 +1020,14 @@ mod tests { }"#, ) .unwrap(); + let mut field_presences: HashSet = HashSet::default(); document.field_values().iter().for_each(|field_value| { let field_name = schema.get_field_name(field_value.field()); if field_name == SOURCE_FIELD_NAME { assert_eq!(field_value.value().as_json(), json_doc_value.as_object()); + } else if field_name == FIELD_PRESENCE_FIELD_NAME { + let field_value_hash = field_value.value().as_u64().unwrap(); + field_presences.insert(field_value_hash); } else { let value = serde_json::to_string(field_value.value()).unwrap(); let is_value_in_expected_values = expected_json_paths_and_values @@ -909,6 +1041,19 @@ mod tests { assert!(is_value_in_expected_values); } }); + assert_eq!(field_presences.len(), 2); + let city_field = schema.get_field("city").unwrap(); + let image_field = schema.get_field("image").unwrap(); + assert!( + field_presences.contains(&PathHasher::hash_path(&[&city_field + .field_id() + .to_le_bytes()])) + ); + assert!( + field_presences.contains(&PathHasher::hash_path(&[&image_field + .field_id() + .to_le_bytes()])) + ); } #[test] @@ -1085,16 +1230,16 @@ mod tests { let default_doc_mapper: DefaultDocMapper = serde_json::from_str(r#"{ "mode": "lenient" }"#).unwrap(); let schema = default_doc_mapper.schema(); - assert_eq!(schema.num_fields(), 0); + assert_eq!(schema.num_fields(), 1); assert!(default_doc_mapper.default_search_field_names.is_empty()); } #[test] - fn test_dymamic_mode_schema() { + fn test_dynamic_mode_schema() { let default_doc_mapper: DefaultDocMapper = serde_json::from_str(r#"{ "mode": "dynamic" }"#).unwrap(); let schema = default_doc_mapper.schema(); - assert_eq!(schema.num_fields(), 1); + assert_eq!(schema.num_fields(), 2); let dynamic_field = schema.get_field(DYNAMIC_FIELD_NAME).unwrap(); let dynamic_field_entry = schema.get_field_entry(dynamic_field); assert_eq!(dynamic_field_entry.field_type().value_type(), Type::Json); @@ -1103,7 +1248,7 @@ mod tests { } #[test] - fn test_dymamic_mode_schema_not_indexed() { + fn test_dynamic_mode_schema_not_indexed() { let default_doc_mapper: DefaultDocMapper = serde_json::from_str( r#"{ "mode": "dynamic", @@ -1115,14 +1260,13 @@ mod tests { ) .unwrap(); let schema = default_doc_mapper.schema(); - assert_eq!(schema.num_fields(), 1); + assert_eq!(schema.num_fields(), 2); let dynamic_field = schema.get_field(DYNAMIC_FIELD_NAME).unwrap(); let dynamic_field_entry = schema.get_field_entry(dynamic_field); - if let FieldType::JsonObject(json_opt) = dynamic_field_entry.field_type() { - assert_eq!(json_opt.is_indexed(), false); - } else { + let FieldType::JsonObject(json_opt) = dynamic_field_entry.field_type() else { panic!("Expected a json object"); - } + }; + assert_eq!(json_opt.is_indexed(), false); default_doc_mapper.default_search_field_names.is_empty(); } @@ -1336,7 +1480,7 @@ mod tests { assert_eq!( default_doc_mapper_query_aux(&doc_mapper, "body.dynamic_field:hello"), Ok( - r#"TermQuery(Term(field=0, type=Json, path=dynamic_field, type=Str, "hello"))"# + r#"TermQuery(Term(field=2, type=Json, path=dynamic_field, type=Str, "hello"))"# .to_string() ) ); @@ -1359,11 +1503,11 @@ mod tests { .unwrap(); assert_eq!( default_doc_mapper_query_aux(&doc_mapper, "identity.username:toto").unwrap(), - r#"TermQuery(Term(field=0, type=Str, "toto"))"# + r#"TermQuery(Term(field=2, type=Str, "toto"))"# ); assert_eq!( default_doc_mapper_query_aux(&doc_mapper, r#"identity\.username:toto"#).unwrap(), - r#"TermQuery(Term(field=1, type=Str, "toto"))"# + r#"TermQuery(Term(field=3, type=Str, "toto"))"# ); } @@ -1380,11 +1524,11 @@ mod tests { .unwrap(); assert_eq!( default_doc_mapper_query_aux(&doc_mapper, "identity.username:toto").unwrap(), - r#"TermQuery(Term(field=0, type=Json, path=username, type=Str, "toto"))"# + r#"TermQuery(Term(field=2, type=Json, path=username, type=Str, "toto"))"# ); assert_eq!( default_doc_mapper_query_aux(&doc_mapper, r#"identity\.username:toto"#).unwrap(), - r#"TermQuery(Term(field=1, type=Str, "toto"))"# + r#"TermQuery(Term(field=3, type=Str, "toto"))"# ); } diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper_builder.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper_builder.rs index 98821da874d..e216c2c8365 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper_builder.rs @@ -39,6 +39,9 @@ pub struct DefaultDocMapperBuilder { /// Stores the original source document when set to true. #[serde(default)] pub store_source: bool, + /// Indexes field presence. + #[serde(default)] + pub index_field_presence: bool, /// Name of the fields that are searched by default, unless overridden. #[serde(default)] pub default_search_fields: Vec, diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs index ff449964b2b..a2174cffc72 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs @@ -359,7 +359,7 @@ mod tests { let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap(); assert_eq!( format!("{query:?}"), - r#"TermQuery(Term(field=0, type=Json, path=toto.titi, type=Str, "hello"))"# + r#"TermQuery(Term(field=2, type=Json, path=toto.titi, type=Str, "hello"))"# ); } @@ -378,7 +378,7 @@ mod tests { let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap(); assert_eq!( format!("{query:?}"), - r#"TermQuery(Term(field=0, type=Json, path=toto.titi, type=Str, "hello"))"# + r#"TermQuery(Term(field=1, type=Json, path=toto.titi, type=Str, "hello"))"# ); } @@ -397,7 +397,7 @@ mod tests { let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap(); assert_eq!( format!("{query:?}"), - r#"BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Json, path=toto, type=I64, 5))), (Should, TermQuery(Term(field=0, type=Json, path=toto, type=Str, "5")))] }"# + r#"BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Json, path=toto, type=I64, 5))), (Should, TermQuery(Term(field=1, type=Json, path=toto, type=Str, "5")))] }"# ); } @@ -567,7 +567,7 @@ mod tests { let (query, _) = doc_mapper.query(schema, &query_ast, false).unwrap(); assert_eq!( format!("{query:?}"), - r#"TermQuery(Term(field=0, type=Str, "JPN:す"))"# + r#"TermQuery(Term(field=2, type=Str, "JPN:す"))"# ); } } diff --git a/quickwit/quickwit-doc-mapper/src/lib.rs b/quickwit/quickwit-doc-mapper/src/lib.rs index ec6573a239b..07542d73f63 100644 --- a/quickwit/quickwit-doc-mapper/src/lib.rs +++ b/quickwit/quickwit-doc-mapper/src/lib.rs @@ -45,6 +45,7 @@ use default_doc_mapper::{ }; pub use doc_mapper::{DocMapper, JsonObject, NamedField, TermRange, WarmupInfo}; pub use error::{DocParsingError, QueryParserError}; +use quickwit_common::shared_consts::FIELD_PRESENCE_FIELD_NAME; /// Field name reserved for storing the source document. pub const SOURCE_FIELD_NAME: &str = "_source"; @@ -53,7 +54,11 @@ pub const SOURCE_FIELD_NAME: &str = "_source"; pub const DYNAMIC_FIELD_NAME: &str = "_dynamic"; /// Quickwit reserved field names. -const QW_RESERVED_FIELD_NAMES: &[&str] = &[SOURCE_FIELD_NAME, DYNAMIC_FIELD_NAME]; +const QW_RESERVED_FIELD_NAMES: &[&str] = &[ + SOURCE_FIELD_NAME, + DYNAMIC_FIELD_NAME, + FIELD_PRESENCE_FIELD_NAME, +]; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) enum Cardinality { @@ -86,6 +91,7 @@ pub fn default_doc_mapper_for_test() -> DefaultDocMapper { const JSON_CONFIG_VALUE: &str = r#" { "store_source": true, + "index_field_presence": true, "default_search_fields": [ "body", "attributes.server", "attributes.server\\.status" ], diff --git a/quickwit/quickwit-doc-mapper/src/tag_pruning.rs b/quickwit/quickwit-doc-mapper/src/tag_pruning.rs index debf5fa9fe6..78184a0194d 100644 --- a/quickwit/quickwit-doc-mapper/src/tag_pruning.rs +++ b/quickwit/quickwit-doc-mapper/src/tag_pruning.rs @@ -109,6 +109,7 @@ fn extract_unsimplified_tags_filter_ast(query_ast: QueryAst) -> UnsimplifiedTagF QueryAst::UserInput(_user_text_query) => { panic!("Extract unsimplified should only be called on AST without UserInputQuery."); } + QueryAst::FieldPresence(_) => UnsimplifiedTagFilterAst::Uninformative, } } diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.expected.json index e9bd9c16174..0b8311d0238 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.expected.json @@ -28,6 +28,7 @@ "stored": true, "tokenizer": "raw" }, + "index_field_presence": false, "field_mappings": [ { "coerce": true, diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.expected.json index 44c2e6bc1e3..097763ad774 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.expected.json @@ -28,6 +28,7 @@ "stored": true, "tokenizer": "raw" }, + "index_field_presence": false, "field_mappings": [ { "coerce": true, diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json index 399410c0ffb..5a475a379a9 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json @@ -28,6 +28,7 @@ "stored": true, "tokenizer": "raw" }, + "index_field_presence": true, "field_mappings": [ { "coerce": true, diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json index 399410c0ffb..5a475a379a9 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json @@ -28,6 +28,7 @@ "stored": true, "tokenizer": "raw" }, + "index_field_presence": true, "field_mappings": [ { "coerce": true, diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json index 6537a85f335..7b8bb82779e 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json @@ -17,6 +17,7 @@ "stored": true, "tokenizer": "raw" }, + "index_field_presence": false, "field_mappings": [ { "coerce": true, diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json index 6537a85f335..7b8bb82779e 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json @@ -17,6 +17,7 @@ "stored": true, "tokenizer": "raw" }, + "index_field_presence": false, "field_mappings": [ { "coerce": true, diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json index 1fb511afa8b..46bd1d830ea 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json @@ -17,6 +17,7 @@ "stored": true, "tokenizer": "raw" }, + "index_field_presence": true, "field_mappings": [ { "coerce": true, diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json index 1fb511afa8b..46bd1d830ea 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json @@ -17,6 +17,7 @@ "stored": true, "tokenizer": "raw" }, + "index_field_presence": true, "field_mappings": [ { "coerce": true, diff --git a/quickwit/quickwit-query/Cargo.toml b/quickwit/quickwit-query/Cargo.toml index ca74be4b287..21fc89382ba 100644 --- a/quickwit/quickwit-query/Cargo.toml +++ b/quickwit/quickwit-query/Cargo.toml @@ -12,6 +12,7 @@ documentation = "https://quickwit.io/docs/" [dependencies] anyhow = { workspace = true } base64 = { workspace = true } +fnv = { workspace = true } hex = { workspace = true } lindera-core = { workspace = true, optional = true} lindera-dictionary = { workspace = true, optional = true } @@ -25,6 +26,7 @@ thiserror = { workspace = true } tracing = { workspace = true } whichlang = { workspace = true, optional = true } +quickwit-common = { workspace = true } quickwit-datetime = { workspace = true } [dev-dependencies] diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/exists_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/exists_query.rs new file mode 100644 index 00000000000..a11313ea726 --- /dev/null +++ b/quickwit/quickwit-query/src/elastic_query_dsl/exists_query.rs @@ -0,0 +1,36 @@ +// Copyright (C) 2023 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use serde::{Deserialize, Serialize}; + +use crate::elastic_query_dsl::ConvertableToQueryAst; +use crate::query_ast::{self, QueryAst}; + +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug)] +pub struct ExistsQuery { + field: String, +} + +impl ConvertableToQueryAst for ExistsQuery { + fn convert_to_query_ast(self) -> anyhow::Result { + Ok(QueryAst::FieldPresence(query_ast::FieldPresenceQuery { + field: self.field, + })) + } +} diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs index b5294b87a60..537b8c83d2b 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs @@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize}; mod bool_query; +mod exists_query; mod match_query; mod one_field_map; mod phrase_prefix_query; @@ -34,6 +35,7 @@ pub(crate) use query_string_query::QueryStringQuery; use range_query::RangeQuery; use term_query::TermQuery; +use crate::elastic_query_dsl::exists_query::ExistsQuery; use crate::elastic_query_dsl::match_query::MatchQuery; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::QueryAst; @@ -57,6 +59,7 @@ enum ElasticQueryDslInner { Match(MatchQuery), MatchPhrasePrefix(MatchPhrasePrefix), Range(RangeQuery), + Exists(ExistsQuery), } #[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)] @@ -97,6 +100,7 @@ impl ConvertableToQueryAst for ElasticQueryDslInner { } Self::Range(range_query) => range_query.convert_to_query_ast(), Self::Match(match_query) => match_query.convert_to_query_ast(), + Self::Exists(exists_query) => exists_query.convert_to_query_ast(), } } } diff --git a/quickwit/quickwit-query/src/query_ast/field_presence.rs b/quickwit/quickwit-query/src/query_ast/field_presence.rs new file mode 100644 index 00000000000..1b0a4cf2d49 --- /dev/null +++ b/quickwit/quickwit-query/src/query_ast/field_presence.rs @@ -0,0 +1,127 @@ +// Copyright (C) 2023 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use quickwit_common::shared_consts::FIELD_PRESENCE_FIELD_NAME; +use quickwit_common::PathHasher; +use serde::{Deserialize, Serialize}; +use tantivy::schema::{Field, IndexRecordOption, Schema as TantivySchema}; +use tantivy::tokenizer::TokenizerManager; +use tantivy::Term; + +use crate::query_ast::tantivy_query_ast::TantivyQueryAst; +use crate::query_ast::{BuildTantivyAst, QueryAst}; +use crate::{find_field_or_hit_dynamic, InvalidQuery}; + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +pub struct FieldPresenceQuery { + pub(crate) field: String, +} + +impl From for QueryAst { + fn from(field_presence_query: FieldPresenceQuery) -> Self { + QueryAst::FieldPresence(field_presence_query) + } +} + +fn compute_field_presence_hash(field: Field, field_path: &str) -> u64 { + let mut path_hasher: PathHasher = PathHasher::default(); + path_hasher.append(&field.field_id().to_le_bytes()[..]); + let mut escaped = false; + let mut current_segment = String::new(); + for c in field_path.chars() { + if escaped { + escaped = false; + current_segment.push(c); + continue; + } + match c { + '\\' => { + escaped = true; + } + '.' => { + path_hasher.append(current_segment.as_bytes()); + current_segment.clear(); + } + _ => { + current_segment.push(c); + } + } + } + if !current_segment.is_empty() { + path_hasher.append(current_segment.as_bytes()); + } + path_hasher.finish() +} + +impl BuildTantivyAst for FieldPresenceQuery { + fn build_tantivy_ast_impl( + &self, + schema: &TantivySchema, + _tokenizer_manager: &TokenizerManager, + _search_fields: &[String], + _with_validation: bool, + ) -> Result { + let field_presence_field = schema.get_field(FIELD_PRESENCE_FIELD_NAME).map_err(|_| { + InvalidQuery::SchemaError("Field presence is not available for this split.".to_string()) + })?; + let (field, _field_entry, path) = find_field_or_hit_dynamic(&self.field, schema)?; + let field_presence_hash = compute_field_presence_hash(field, path); + let field_presence_term: Term = + Term::from_field_u64(field_presence_field, field_presence_hash); + let field_presence_term_query = + tantivy::query::TermQuery::new(field_presence_term, IndexRecordOption::Basic); + Ok(TantivyQueryAst::from(field_presence_term_query)) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_field_presence_single() { + let field_presence_term: u64 = + compute_field_presence_hash(Field::from_field_id(17u32), "attributes"); + assert_eq!( + field_presence_term, + PathHasher::hash_path(&[&17u32.to_le_bytes()[..], b"attributes"]) + ); + } + + #[test] + fn test_field_presence_hash_simple() { + let field_presence_term: u64 = + compute_field_presence_hash(Field::from_field_id(17u32), "attributes.color"); + assert_eq!( + field_presence_term, + PathHasher::hash_path(&[&17u32.to_le_bytes()[..], b"attributes", b"color"]) + ); + } + + #[test] + fn test_field_presence_hash_escaped_dot() { + let field_presence_term: u64 = + compute_field_presence_hash(Field::from_field_id(17u32), r#"attributes\.color.hello"#); + assert_eq!( + field_presence_term, + PathHasher::hash_path(&[&17u32.to_le_bytes()[..], b"attributes.color", b"hello"]) + ); + } +} diff --git a/quickwit/quickwit-query/src/query_ast/mod.rs b/quickwit/quickwit-query/src/query_ast/mod.rs index 8decbccb96b..6708dae41f1 100644 --- a/quickwit/quickwit-query/src/query_ast/mod.rs +++ b/quickwit/quickwit-query/src/query_ast/mod.rs @@ -23,6 +23,7 @@ use tantivy::schema::Schema as TantivySchema; use tantivy::tokenizer::TokenizerManager; mod bool_query; +mod field_presence; mod full_text_query; mod phrase_prefix_query; mod range_query; @@ -34,6 +35,7 @@ pub(crate) mod utils; mod visitor; pub use bool_query::BoolQuery; +pub use field_presence::FieldPresenceQuery; pub use full_text_query::{FullTextMode, FullTextParams, FullTextQuery}; pub use phrase_prefix_query::PhrasePrefixQuery; pub use range_query::RangeQuery; @@ -52,6 +54,7 @@ pub enum QueryAst { Bool(BoolQuery), Term(TermQuery), TermSet(TermSetQuery), + FieldPresence(FieldPresenceQuery), FullText(FullTextQuery), PhrasePrefix(PhrasePrefixQuery), Range(RangeQuery), @@ -94,6 +97,7 @@ impl QueryAst { | ast @ QueryAst::PhrasePrefix(_) | ast @ QueryAst::MatchAll | ast @ QueryAst::MatchNone + | ast @ QueryAst::FieldPresence(_) | ast @ QueryAst::Range(_) => Ok(ast), QueryAst::UserInput(user_text_query) => { user_text_query.parse_user_query(default_search_fields) @@ -226,6 +230,12 @@ impl BuildTantivyAst for QueryAst { search_fields, with_validation, ), + QueryAst::FieldPresence(field_presence) => field_presence.build_tantivy_ast_call( + schema, + tokenizer_manager, + search_fields, + with_validation, + ), } } } diff --git a/quickwit/quickwit-query/src/query_ast/visitor.rs b/quickwit/quickwit-query/src/query_ast/visitor.rs index d7b783ae42f..398f8d9852d 100644 --- a/quickwit/quickwit-query/src/query_ast/visitor.rs +++ b/quickwit/quickwit-query/src/query_ast/visitor.rs @@ -18,6 +18,7 @@ // along with this program. If not, see . use crate::not_nan_f32::NotNaNf32; +use crate::query_ast::field_presence::FieldPresenceQuery; use crate::query_ast::user_input_query::UserInputQuery; use crate::query_ast::{ BoolQuery, FullTextQuery, PhrasePrefixQuery, QueryAst, RangeQuery, TermQuery, TermSetQuery, @@ -41,6 +42,7 @@ pub trait QueryAstVisitor<'a> { QueryAst::MatchNone => self.visit_match_none(), QueryAst::Boost { underlying, boost } => self.visit_boost(underlying, *boost), QueryAst::UserInput(user_text_query) => self.visit_user_text(user_text_query), + QueryAst::FieldPresence(exists) => self.visit_exists(exists), } } @@ -99,4 +101,8 @@ pub trait QueryAstVisitor<'a> { fn visit_user_text(&mut self, _user_text_query: &'a UserInputQuery) -> Result<(), Self::Err> { Ok(()) } + + fn visit_exists(&mut self, _exists_query: &'a FieldPresenceQuery) -> Result<(), Self::Err> { + Ok(()) + } } diff --git a/quickwit/quickwit-query/src/tokenizers/mod.rs b/quickwit/quickwit-query/src/tokenizers/mod.rs index 479acf4573b..07a27c86689 100644 --- a/quickwit/quickwit-query/src/tokenizers/mod.rs +++ b/quickwit/quickwit-query/src/tokenizers/mod.rs @@ -37,11 +37,11 @@ pub const DEFAULT_REMOVE_TOKEN_LENGTH: usize = 255; /// Quickwit's tokenizer/analyzer manager. pub fn create_default_quickwit_tokenizer_manager() -> TokenizerManager { let tokenizer_manager = TokenizerManager::default(); + let raw_tokenizer = TextAnalyzer::builder(RawTokenizer::default()) .filter(RemoveLongFilter::limit(DEFAULT_REMOVE_TOKEN_LENGTH)) .build(); tokenizer_manager.register("raw", raw_tokenizer); - let chinese_tokenizer = TextAnalyzer::builder(ChineseTokenizer) .filter(RemoveLongFilter::limit(DEFAULT_REMOVE_TOKEN_LENGTH)) .filter(LowerCaser) diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0011-exists-query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0011-exists-query.yaml new file mode 100644 index 00000000000..8de231391e5 --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0011-exists-query.yaml @@ -0,0 +1,33 @@ +json: + query: + exists: + field: type +expected: + hits: + total: + value: 100 +--- +json: + query: + exists: + field: thisfielddoesnotexists +expected: + hits: + total: + value: 0 +--- +json: + query: + exists: + field: payload.size +expected: + hits: + total: + value: 60 +--- +# Fortunately, ES does not accept this quirky syntax in the +# case of exists query. +json: + query: + exists: payload.size +status_code: 400 diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml index 59a9487b7d7..45397b04a8d 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml @@ -12,6 +12,7 @@ json: version: "0.6" index_id: gharchive doc_mapping: + index_field_presence: true timestamp_field: created_at mode: dynamic field_mappings: