From 8c2caf5a10fb376d5a0466ab3a3fc5562734fea4 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Thu, 3 Aug 2023 18:00:24 -0400 Subject: [PATCH] Add coerce and output format options for numeric fields (#3704) --- docs/configuration/index-config.md | 16 +- quickwit/quickwit-doc-mapper/Cargo.toml | 5 +- .../default_doc_mapper/field_mapping_entry.rs | 88 +++++-- .../default_doc_mapper/field_mapping_type.rs | 3 +- .../src/default_doc_mapper/mapping_tree.rs | 247 +++++++++++++++--- .../quickwit-doc-mapper/src/doc_mapper.rs | 7 +- .../file-backed-index/v0.4.expected.json | 2 + .../test-data/file-backed-index/v0.4.json | 4 +- .../file-backed-index/v0.5.expected.json | 2 + .../test-data/file-backed-index/v0.5.json | 4 +- .../file-backed-index/v0.6.expected.json | 2 + .../test-data/file-backed-index/v0.6.json | 2 + .../index-metadata/v0.4.expected.json | 2 + .../index-metadata/v0.5.expected.json | 2 + .../index-metadata/v0.6.expected.json | 2 + .../test-data/index-metadata/v0.6.json | 2 + 16 files changed, 315 insertions(+), 75 deletions(-) diff --git a/docs/configuration/index-config.md b/docs/configuration/index-config.md index 8f8e6db2b6e..1fb19b9c504 100644 --- a/docs/configuration/index-config.md +++ b/docs/configuration/index-config.md @@ -164,7 +164,7 @@ Indexing with position is required to run phrase queries. Quickwit handles three numeric types: `i64`, `u64`, and `f64`. -Numeric values can be stored in a fast field (the equivalent of Lucene's `DocValues`) which is a column-oriented storage. +Numeric values can be stored in a fast field (the equivalent of Lucene's `DocValues`), which is a column-oriented storage used for range queries and aggregations. Example of a mapping for an u64 field: @@ -179,12 +179,14 @@ fast: true **Parameters for i64, u64 and f64 field** -| Variable | Description | Default value | -| ------------- | ------------- | ------------- | -| `description` | Optional description for the field. | `None` | -| `stored` | Whether the field values are stored in the document store | `true` | -| `indexed` | Whether the field values are indexed | `true` | -| `fast` | Whether the field values are stored in a fast field | `false` | +| Variable | Description | Default value | +| --------------- | ------------- | ------------- | +| `description` | Optional description for the field. | `None` | +| `stored` | Whether the field values are stored in the document store. | `true` | +| `indexed` | Whether the field values are indexed. | `true` | +| `fast` | Whether the field values are stored in a fast field. | `false` | +| `coerce` | Whether to convert numbers passed as strings to integers or floats. | `true` | +| `output_format` | JSON type used to return numbers in search results. Possible values are `number` or `string`. | `number` | #### `datetime` type diff --git a/quickwit/quickwit-doc-mapper/Cargo.toml b/quickwit/quickwit-doc-mapper/Cargo.toml index 85900ecd303..34a4e8a8971 100644 --- a/quickwit/quickwit-doc-mapper/Cargo.toml +++ b/quickwit/quickwit-doc-mapper/Cargo.toml @@ -38,11 +38,12 @@ quickwit-query = { workspace = true } criterion = { workspace = true } matches = { workspace = true } proptest = { workspace = true } -quickwit-proto = { workspace = true } -quickwit-query = { workspace = true, features = ["testsuite"] } serde_yaml = { workspace = true } time = { workspace = true } +quickwit-proto = { workspace = true } +quickwit-query = { workspace = true, features = ["testsuite"] } + [features] multilang = ["quickwit-query/multilang"] testsuite = ["multilang"] diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs index b342997be51..16bc953c926 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs @@ -95,9 +95,40 @@ pub struct QuickwitNumericOptions { pub indexed: bool, #[serde(default)] pub fast: bool, + #[serde(default = "default_as_true")] + pub coerce: bool, + #[serde(default)] + pub output_format: NumericOutputFormat, } impl Default for QuickwitNumericOptions { + fn default() -> Self { + Self { + description: None, + indexed: true, + stored: true, + fast: false, + coerce: true, + output_format: NumericOutputFormat::default(), + } + } +} + +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, utoipa::ToSchema)] +#[serde(deny_unknown_fields)] +pub struct QuickwitBoolOptions { + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(default = "default_as_true")] + pub stored: bool, + #[serde(default = "default_as_true")] + pub indexed: bool, + #[serde(default)] + pub fast: bool, +} + +impl Default for QuickwitBoolOptions { fn default() -> Self { Self { description: None, @@ -150,15 +181,15 @@ pub enum BinaryFormat { impl BinaryFormat { pub fn as_str(&self) -> &str { match self { - BinaryFormat::Base64 => "base64", - BinaryFormat::Hex => "hex", + Self::Base64 => "base64", + Self::Hex => "hex", } } pub fn format_to_json(&self, value: &[u8]) -> JsonValue { match self { - BinaryFormat::Base64 => BASE64_STANDARD.encode(value).into(), - BinaryFormat::Hex => hex::encode(value).into(), + Self::Base64 => BASE64_STANDARD.encode(value).into(), + Self::Hex => hex::encode(value).into(), } } @@ -172,14 +203,12 @@ impl BinaryFormat { )); }; let payload = match self { - BinaryFormat::Base64 => { - BASE64_STANDARD - .decode(&byte_str) - .map_err(|base64_decode_err| { - format!("Expected base64 string, got `{byte_str}`: {base64_decode_err}") - })? - } - BinaryFormat::Hex => hex::decode(&byte_str).map_err(|hex_decode_err| { + Self::Base64 => BASE64_STANDARD + .decode(&byte_str) + .map_err(|base64_decode_err| { + format!("Expected base64 string, got `{byte_str}`: {base64_decode_err}") + })?, + Self::Hex => hex::decode(&byte_str).map_err(|hex_decode_err| { format!("Expected hex string, got `{byte_str}`: {hex_decode_err}") })?, }; @@ -187,6 +216,14 @@ impl BinaryFormat { } } +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum NumericOutputFormat { + #[default] + Number, + String, +} + #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, utoipa::ToSchema)] #[serde(deny_unknown_fields)] pub struct QuickwitIpAddrOptions { @@ -618,8 +655,8 @@ fn deserialize_mapping_type( Ok(FieldMappingType::F64(numeric_options, cardinality)) } Type::Bool => { - let numeric_options: QuickwitNumericOptions = serde_json::from_value(json)?; - Ok(FieldMappingType::Bool(numeric_options, cardinality)) + let bool_options: QuickwitBoolOptions = serde_json::from_value(json)?; + Ok(FieldMappingType::Bool(bool_options, cardinality)) } Type::IpAddr => { let ip_addr_options: QuickwitIpAddrOptions = serde_json::from_value(json)?; @@ -685,8 +722,8 @@ fn typed_mapping_to_json_params( FieldMappingType::Text(text_options, _) => serialize_to_map(&text_options), FieldMappingType::U64(options, _) | FieldMappingType::I64(options, _) - | FieldMappingType::F64(options, _) - | FieldMappingType::Bool(options, _) => serialize_to_map(&options), + | FieldMappingType::F64(options, _) => serialize_to_map(&options), + FieldMappingType::Bool(options, _) => serialize_to_map(&options), FieldMappingType::Bytes(options, _) => serialize_to_map(&options), FieldMappingType::IpAddr(options, _) => serialize_to_map(&options), FieldMappingType::DateTime(date_time_options, _) => serialize_to_map(&date_time_options), @@ -1062,7 +1099,7 @@ mod tests { #[test] fn test_deserialize_i64_parsing_error_with_text_options() { - let result = serde_json::from_str::( + let error = serde_json::from_str::( r#" { "name": "my_field_name", @@ -1070,12 +1107,13 @@ mod tests { "tokenizer": "basic" } "#, - ); - let error = result.unwrap_err(); + ) + .unwrap_err(); + assert_eq!( error.to_string(), "Error while parsing field `my_field_name`: unknown field `tokenizer`, expected one \ - of `description`, `stored`, `indexed`, `fast`" + of `description`, `stored`, `indexed`, `fast`, `coerce`, `output_format`" ); } @@ -1146,6 +1184,8 @@ mod tests { "stored": true, "fast": false, "indexed": true, + "coerce": true, + "output_format": "number" }) ); Ok(()) @@ -1165,7 +1205,7 @@ mod tests { .unwrap_err() .to_string(), "Error while parsing field `my_field_name`: unknown field `tokenizer`, expected one \ - of `description`, `stored`, `indexed`, `fast`" + of `description`, `stored`, `indexed`, `fast`, `coerce`, `output_format`" ); } @@ -1232,6 +1272,8 @@ mod tests { "stored": true, "fast": false, "indexed": true, + "coerce": true, + "output_format": "number" }) ); } @@ -1256,6 +1298,8 @@ mod tests { "stored": true, "fast": false, "indexed": true, + "coerce": true, + "output_format": "number" }) ); } @@ -1616,6 +1660,8 @@ mod tests { "stored": true, "fast": false, "indexed": true, + "coerce": true, + "output_format": "number" }) ); } diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs index 2724df7220c..e98fd9766ee 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs @@ -20,6 +20,7 @@ use tantivy::schema::Type; use super::date_time_type::QuickwitDateTimeOptions; +use super::field_mapping_entry::QuickwitBoolOptions; use crate::default_doc_mapper::field_mapping_entry::{ QuickwitBytesOptions, QuickwitIpAddrOptions, QuickwitJsonOptions, QuickwitNumericOptions, QuickwitObjectOptions, QuickwitTextOptions, @@ -41,7 +42,7 @@ pub(crate) enum FieldMappingType { /// 64-bit float mapping type configuration. F64(QuickwitNumericOptions, Cardinality), /// Bool mapping type configuration. - Bool(QuickwitNumericOptions, Cardinality), + Bool(QuickwitBoolOptions, Cardinality), /// IP Address mapping type configuration. IpAddr(QuickwitIpAddrOptions, Cardinality), /// Bytes mapping type configuration. diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs index 731a68440a8..4db71471f9c 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs @@ -33,6 +33,7 @@ use tantivy::{DateOptions, Document}; use tracing::warn; use super::date_time_type::QuickwitDateTimeOptions; +use super::field_mapping_entry::{NumericOutputFormat, QuickwitBoolOptions}; use crate::default_doc_mapper::field_mapping_entry::{ QuickwitBytesOptions, QuickwitIpAddrOptions, QuickwitNumericOptions, QuickwitObjectOptions, QuickwitTextOptions, @@ -42,15 +43,15 @@ use crate::{Cardinality, DocParsingError, FieldMappingEntry, ModeType}; #[derive(Clone, Debug)] pub enum LeafType { - Text(QuickwitTextOptions), + Bool(QuickwitBoolOptions), + Bytes(QuickwitBytesOptions), + DateTime(QuickwitDateTimeOptions), + F64(QuickwitNumericOptions), I64(QuickwitNumericOptions), U64(QuickwitNumericOptions), - F64(QuickwitNumericOptions), - Bool(QuickwitNumericOptions), IpAddr(QuickwitIpAddrOptions), - DateTime(QuickwitDateTimeOptions), - Bytes(QuickwitBytesOptions), Json(QuickwitJsonOptions), + Text(QuickwitTextOptions), } impl LeafType { @@ -63,9 +64,9 @@ impl LeafType { Err(format!("Expected JSON string, got `{json_val}`.")) } } - LeafType::I64(_) => i64::from_json(json_val), - LeafType::U64(_) => u64::from_json(json_val), - LeafType::F64(_) => f64::from_json(json_val), + LeafType::I64(numeric_options) => i64::from_json(json_val, numeric_options.coerce), + LeafType::U64(numeric_options) => u64::from_json(json_val, numeric_options.coerce), + LeafType::F64(numeric_options) => f64::from_json(json_val, numeric_options.coerce), LeafType::Bool(_) => { if let JsonValue::Bool(val) = json_val { Ok(TantivyValue::Bool(val)) @@ -181,9 +182,6 @@ fn extract_json_val( fn value_to_json(value: TantivyValue, leaf_type: &LeafType) -> Option { match (&value, leaf_type) { (TantivyValue::Str(_), LeafType::Text(_)) - | (TantivyValue::I64(_), LeafType::I64(_)) - | (TantivyValue::U64(_), LeafType::U64(_)) - | (TantivyValue::F64(_), LeafType::F64(_)) | (TantivyValue::Bool(_), LeafType::Bool(_)) | (TantivyValue::IpAddr(_), LeafType::IpAddr(_)) | (TantivyValue::JsonObject(_), LeafType::Json(_)) => { @@ -202,6 +200,15 @@ fn value_to_json(value: TantivyValue, leaf_type: &LeafType) -> Option .expect("Invalid datetime is not allowed."); Some(json_value) } + (TantivyValue::F64(f64_val), LeafType::F64(numeric_options)) => { + f64_val.to_json(numeric_options.output_format) + } + (TantivyValue::I64(i64_val), LeafType::I64(numeric_options)) => { + i64_val.to_json(numeric_options.output_format) + } + (TantivyValue::U64(u64_val), LeafType::U64(numeric_options)) => { + u64_val.to_json(numeric_options.output_format) + } _ => { warn!( "The value type `{:?}` doesn't match the requested type `{:?}`", @@ -231,41 +238,90 @@ fn insert_json_val( doc_json.insert(last_field_name.to_string(), json_val); } -trait NumVal: Sized + Into { +trait NumVal: Sized + FromStr + ToString + Into { fn from_json_number(num: &serde_json::Number) -> Option; - fn from_json(json_val: JsonValue) -> Result { - if let JsonValue::Number(num_val) = json_val { - Ok(Self::from_json_number(&num_val) + fn from_json(json_val: JsonValue, coerce: bool) -> Result { + match json_val { + JsonValue::Number(num_val) => Self::from_json_number(&num_val) + .map(Self::into) .ok_or_else(|| { format!( "Expected {}, got inconvertible JSON number `{}`.", type_name::(), num_val ) - })? - .into()) - } else { - Err(format!("Expected JSON number, got `{json_val}`.",)) + }), + JsonValue::String(str_val) => { + if coerce { + str_val.parse::().map(Self::into).map_err(|_| { + format!( + "Failed to coerce JSON string `\"{str_val}\"` to {}.", + type_name::() + ) + }) + } else { + Err(format!( + "Expected JSON number, got string `\"{str_val}\"`. Enable coercion to {} \ + with the `coerce` parameter in the field mapping.", + type_name::() + )) + } + } + _ => { + let message = if coerce { + format!("Expected JSON number or string, got `{json_val}`.") + } else { + format!("Expected JSON number, got `{json_val}`.") + }; + Err(message) + } } } + + fn to_json(&self, output_format: NumericOutputFormat) -> Option; } impl NumVal for u64 { fn from_json_number(num: &serde_json::Number) -> Option { num.as_u64() } + + fn to_json(&self, output_format: NumericOutputFormat) -> Option { + let json_value = match output_format { + NumericOutputFormat::String => JsonValue::String(self.to_string()), + NumericOutputFormat::Number => JsonValue::Number(serde_json::Number::from(*self)), + }; + Some(json_value) + } } impl NumVal for i64 { fn from_json_number(num: &serde_json::Number) -> Option { num.as_i64() } + + fn to_json(&self, output_format: NumericOutputFormat) -> Option { + let json_value = match output_format { + NumericOutputFormat::String => JsonValue::String(self.to_string()), + NumericOutputFormat::Number => JsonValue::Number(serde_json::Number::from(*self)), + }; + Some(json_value) + } } impl NumVal for f64 { fn from_json_number(num: &serde_json::Number) -> Option { num.as_f64() } + + fn to_json(&self, output_format: NumericOutputFormat) -> Option { + match output_format { + NumericOutputFormat::String => Some(JsonValue::String(self.to_string())), + NumericOutputFormat::Number => { + serde_json::Number::from_f64(*self).map(JsonValue::Number) + } + } + } } #[derive(Clone, Default)] @@ -496,7 +552,25 @@ fn build_mapping_tree_from_entries<'a>( Ok(mapping_node) } -fn get_numeric_options(quickwit_numeric_options: &QuickwitNumericOptions) -> NumericOptions { +fn get_numeric_options_for_bool_field( + quickwit_bool_options: &QuickwitBoolOptions, +) -> NumericOptions { + let mut numeric_options = NumericOptions::default(); + if quickwit_bool_options.stored { + numeric_options = numeric_options.set_stored(); + } + if quickwit_bool_options.indexed { + numeric_options = numeric_options.set_indexed(); + } + if quickwit_bool_options.fast { + numeric_options = numeric_options.set_fast(); + } + numeric_options +} + +fn get_numeric_options_for_numeric_field( + quickwit_numeric_options: &QuickwitNumericOptions, +) -> NumericOptions { let mut numeric_options = NumericOptions::default(); if quickwit_numeric_options.stored { numeric_options = numeric_options.set_stored(); @@ -621,7 +695,7 @@ fn build_mapping_from_field_type<'a>( Ok(MappingTree::Leaf(mapping_leaf)) } FieldMappingType::I64(options, cardinality) => { - let numeric_options = get_numeric_options(options); + let numeric_options = get_numeric_options_for_numeric_field(options); let field = schema_builder.add_i64_field(&field_name, numeric_options); let mapping_leaf = MappingLeaf { field, @@ -631,7 +705,7 @@ fn build_mapping_from_field_type<'a>( Ok(MappingTree::Leaf(mapping_leaf)) } FieldMappingType::U64(options, cardinality) => { - let numeric_options = get_numeric_options(options); + let numeric_options = get_numeric_options_for_numeric_field(options); let field = schema_builder.add_u64_field(&field_name, numeric_options); let mapping_leaf = MappingLeaf { field, @@ -641,7 +715,7 @@ fn build_mapping_from_field_type<'a>( Ok(MappingTree::Leaf(mapping_leaf)) } FieldMappingType::F64(options, cardinality) => { - let numeric_options = get_numeric_options(options); + let numeric_options = get_numeric_options_for_numeric_field(options); let field = schema_builder.add_f64_field(&field_name, numeric_options); let mapping_leaf = MappingLeaf { field, @@ -651,7 +725,7 @@ fn build_mapping_from_field_type<'a>( Ok(MappingTree::Leaf(mapping_leaf)) } FieldMappingType::Bool(options, cardinality) => { - let numeric_options = get_numeric_options(options); + let numeric_options = get_numeric_options_for_bool_field(options); let field = schema_builder.add_bool_field(&field_name, numeric_options); let mapping_leaf = MappingLeaf { field, @@ -723,8 +797,8 @@ mod tests { use super::{value_to_json, LeafType, MappingLeaf}; use crate::default_doc_mapper::date_time_type::QuickwitDateTimeOptions; use crate::default_doc_mapper::field_mapping_entry::{ - BinaryFormat, QuickwitBytesOptions, QuickwitIpAddrOptions, QuickwitNumericOptions, - QuickwitTextOptions, + BinaryFormat, NumericOutputFormat, QuickwitBoolOptions, QuickwitBytesOptions, + QuickwitIpAddrOptions, QuickwitNumericOptions, QuickwitTextOptions, }; use crate::Cardinality; @@ -818,11 +892,35 @@ mod tests { ); } + #[test] + fn test_parse_u64_coercion() { + let leaf = LeafType::U64(QuickwitNumericOptions::default()); + assert_eq!( + leaf.value_from_json(json!("20")).unwrap(), + TantivyValue::U64(20u64) + ); + assert_eq!( + leaf.value_from_json(json!("foo")).unwrap_err(), + "Failed to coerce JSON string `\"foo\"` to u64." + ); + + let numeric_options = QuickwitNumericOptions { + coerce: false, + ..Default::default() + }; + let leaf = LeafType::U64(numeric_options); + assert_eq!( + leaf.value_from_json(json!("20")).unwrap_err(), + "Expected JSON number, got string `\"20\"`. Enable coercion to u64 with the `coerce` \ + parameter in the field mapping." + ); + } + #[test] fn test_parse_u64_negative_should_error() { let leaf = LeafType::U64(QuickwitNumericOptions::default()); assert_eq!( - leaf.value_from_json(json!(-20i64)).err().unwrap(), + leaf.value_from_json(json!(-20i64)).unwrap_err(), "Expected u64, got inconvertible JSON number `-20`." ); } @@ -840,7 +938,7 @@ mod tests { fn test_parse_i64_from_f64_should_error() { let leaf = LeafType::I64(QuickwitNumericOptions::default()); assert_eq!( - leaf.value_from_json(json!(20.2f64)).err().unwrap(), + leaf.value_from_json(json!(20.2f64)).unwrap_err(), "Expected i64, got inconvertible JSON number `20.2`." ); } @@ -866,7 +964,7 @@ mod tests { #[test] fn test_parse_bool_mapping() { - let leaf = LeafType::Bool(QuickwitNumericOptions::default()); + let leaf = LeafType::Bool(QuickwitBoolOptions::default()); assert_eq!( leaf.value_from_json(json!(true)).unwrap(), TantivyValue::Bool(true) @@ -875,7 +973,7 @@ mod tests { #[test] fn test_parse_bool_multivalued() { - let typ = LeafType::Bool(QuickwitNumericOptions::default()); + let typ = LeafType::Bool(QuickwitBoolOptions::default()); let field = Field::from_field_id(10); let leaf_entry = MappingLeaf { field, @@ -1001,7 +1099,8 @@ mod tests { .unwrap_err(); assert_eq!( parse_err.to_string(), - "The field `root.my_field` could not be parsed: Expected JSON number, got `[1,2]`." + "The field `root.my_field` could not be parsed: Expected JSON number or string, got \ + `[1,2]`." ); } @@ -1158,20 +1257,90 @@ mod tests { } #[test] - fn test_serialize_bytes() { - let base64 = QuickwitBytesOptions::default(); - let hex = QuickwitBytesOptions { + fn test_tantivy_value_to_json_value_bytes() { + let bytes_options_base64 = QuickwitBytesOptions::default(); + assert_eq!( + value_to_json( + TantivyValue::Bytes(vec![1, 2, 3]), + &LeafType::Bytes(bytes_options_base64) + ) + .unwrap(), + serde_json::json!("AQID") + ); + + let bytes_options_hex = QuickwitBytesOptions { output_format: BinaryFormat::Hex, - ..QuickwitBytesOptions::default() + ..Default::default() }; + assert_eq!( + value_to_json( + TantivyValue::Bytes(vec![1, 2, 3]), + &LeafType::Bytes(bytes_options_hex) + ) + .unwrap(), + serde_json::json!("010203") + ); + } + #[test] + fn test_tantivy_value_to_json_value_f64() { + let numeric_options_number = QuickwitNumericOptions::default(); assert_eq!( - value_to_json(TantivyValue::Bytes(vec![1, 2, 3]), &LeafType::Bytes(base64)).unwrap(), - serde_json::json!("AQID") + value_to_json( + TantivyValue::F64(0.1), + &LeafType::F64(numeric_options_number) + ) + .unwrap(), + serde_json::json!(0.1) ); + + let numeric_options_str = QuickwitNumericOptions { + output_format: NumericOutputFormat::String, + ..Default::default() + }; assert_eq!( - value_to_json(TantivyValue::Bytes(vec![1, 2, 3]), &LeafType::Bytes(hex)).unwrap(), - serde_json::json!("010203") + value_to_json(TantivyValue::F64(0.1), &LeafType::F64(numeric_options_str)).unwrap(), + serde_json::json!("0.1") + ); + } + + #[test] + fn test_tantivy_value_to_json_value_i64() { + let numeric_options_number = QuickwitNumericOptions::default(); + assert_eq!( + value_to_json( + TantivyValue::I64(-1), + &LeafType::I64(numeric_options_number) + ) + .unwrap(), + serde_json::json!(-1) + ); + + let numeric_options_str = QuickwitNumericOptions { + output_format: NumericOutputFormat::String, + ..Default::default() + }; + assert_eq!( + value_to_json(TantivyValue::I64(-1), &LeafType::I64(numeric_options_str)).unwrap(), + serde_json::json!("-1") + ); + } + + #[test] + fn test_tantivy_value_to_json_value_u64() { + let numeric_options_number = QuickwitNumericOptions::default(); + assert_eq!( + value_to_json(TantivyValue::U64(1), &LeafType::U64(numeric_options_number)).unwrap(), + serde_json::json!(1u64) + ); + + let numeric_options_str = QuickwitNumericOptions { + output_format: NumericOutputFormat::String, + ..Default::default() + }; + assert_eq!( + value_to_json(TantivyValue::U64(1), &LeafType::U64(numeric_options_str)).unwrap(), + serde_json::json!("1") ); } diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs index 8df827b0e9c..ff449964b2b 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper.rs @@ -230,9 +230,9 @@ mod tests { use quickwit_query::query_ast::{query_ast_from_user_text, UserInputQuery}; use quickwit_query::BooleanOperand; - use tantivy::schema::{Field, FieldType, IndexRecordOption, Term}; + use tantivy::schema::{Field, FieldType, Term}; - use crate::default_doc_mapper::{FieldMappingType, QuickwitJsonOptions, TextIndexingOptions}; + use crate::default_doc_mapper::{FieldMappingType, QuickwitJsonOptions}; use crate::{ Cardinality, DefaultDocMapper, DefaultDocMapperBuilder, DocMapper, DocParsingError, FieldMappingEntry, Mode, TermRange, WarmupInfo, DYNAMIC_FIELD_NAME, @@ -530,9 +530,10 @@ mod tests { #[cfg(feature = "testsuite")] fn test_doc_mapper_query_with_multilang_field() { use quickwit_query::query_ast::TermQuery; + use tantivy::schema::IndexRecordOption; use crate::default_doc_mapper::{ - QuickwitTextOptions, QuickwitTextTokenizer, TokenizerType, + QuickwitTextOptions, QuickwitTextTokenizer, TextIndexingOptions, TokenizerType, }; use crate::{TokenizerConfig, TokenizerEntry}; let mut doc_mapper_builder = DefaultDocMapperBuilder::default(); diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.expected.json index 9227720ae7e..e9bd9c16174 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.expected.json @@ -30,9 +30,11 @@ }, "field_mappings": [ { + "coerce": true, "fast": true, "indexed": true, "name": "tenant_id", + "output_format": "number", "stored": true, "type": "u64" }, diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.json index 464f1f3fad7..14b9755308f 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.4.json @@ -25,7 +25,9 @@ "indexed": true, "name": "tenant_id", "stored": true, - "type": "u64" + "type": "u64", + "coerce": true, + "output_format": "number" }, { "fast": true, diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.expected.json index 3184db35284..44c2e6bc1e3 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.expected.json @@ -30,9 +30,11 @@ }, "field_mappings": [ { + "coerce": true, "fast": true, "indexed": true, "name": "tenant_id", + "output_format": "number", "stored": true, "type": "u64" }, diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.json index fcb3f0863d5..8aa022f6406 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.5.json @@ -25,7 +25,9 @@ "indexed": true, "name": "tenant_id", "stored": true, - "type": "u64" + "type": "u64", + "coerce": true, + "output_format": "number" }, { "fast": true, diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json index b6206ec38d7..399410c0ffb 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.expected.json @@ -30,9 +30,11 @@ }, "field_mappings": [ { + "coerce": true, "fast": true, "indexed": true, "name": "tenant_id", + "output_format": "number", "stored": true, "type": "u64" }, diff --git a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json index b6206ec38d7..399410c0ffb 100644 --- a/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json +++ b/quickwit/quickwit-metastore/test-data/file-backed-index/v0.6.json @@ -30,9 +30,11 @@ }, "field_mappings": [ { + "coerce": true, "fast": true, "indexed": true, "name": "tenant_id", + "output_format": "number", "stored": true, "type": "u64" }, diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json index f3620dd8787..6537a85f335 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.4.expected.json @@ -19,9 +19,11 @@ }, "field_mappings": [ { + "coerce": true, "fast": true, "indexed": true, "name": "tenant_id", + "output_format": "number", "stored": true, "type": "u64" }, diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json index f3620dd8787..6537a85f335 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.5.expected.json @@ -19,9 +19,11 @@ }, "field_mappings": [ { + "coerce": true, "fast": true, "indexed": true, "name": "tenant_id", + "output_format": "number", "stored": true, "type": "u64" }, diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json index db773b05b33..1fb511afa8b 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.expected.json @@ -19,9 +19,11 @@ }, "field_mappings": [ { + "coerce": true, "fast": true, "indexed": true, "name": "tenant_id", + "output_format": "number", "stored": true, "type": "u64" }, diff --git a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json index db773b05b33..1fb511afa8b 100644 --- a/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json +++ b/quickwit/quickwit-metastore/test-data/index-metadata/v0.6.json @@ -19,9 +19,11 @@ }, "field_mappings": [ { + "coerce": true, "fast": true, "indexed": true, "name": "tenant_id", + "output_format": "number", "stored": true, "type": "u64" },