Skip to content

Commit

Permalink
Add coerce and output format options for numeric fields
Browse files Browse the repository at this point in the history
  • Loading branch information
guilload committed Aug 2, 2023
1 parent 65d9566 commit 3b7aec6
Show file tree
Hide file tree
Showing 4 changed files with 257 additions and 58 deletions.
5 changes: 3 additions & 2 deletions quickwit/quickwit-doc-mapper/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@ quickwit-query = { workspace = true }
criterion = { workspace = true }
matches = { workspace = true }
proptest = { workspace = true }
quickwit-proto = { workspace = true }
quickwit-query = { workspace = true, features = ["testsuite"] }
serde_yaml = { workspace = true }
time = { workspace = true }

quickwit-proto = { workspace = true }
quickwit-query = { workspace = true, features = ["testsuite"] }

[features]
multilang = ["quickwit-query/multilang"]
testsuite = ["multilang"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,40 @@ pub struct QuickwitNumericOptions {
pub indexed: bool,
#[serde(default)]
pub fast: bool,
#[serde(default = "default_as_true")]
pub coerce: bool,
#[serde(default)]
pub output_format: NumericOutputFormat,
}

impl Default for QuickwitNumericOptions {
fn default() -> Self {
Self {
description: None,
indexed: true,
stored: true,
fast: false,
coerce: true,
output_format: NumericOutputFormat::default(),
}
}
}

#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, utoipa::ToSchema)]
#[serde(deny_unknown_fields)]
pub struct QuickwitBoolOptions {
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(default = "default_as_true")]
pub stored: bool,
#[serde(default = "default_as_true")]
pub indexed: bool,
#[serde(default)]
pub fast: bool,
}

impl Default for QuickwitBoolOptions {
fn default() -> Self {
Self {
description: None,
Expand Down Expand Up @@ -150,15 +181,15 @@ pub enum BinaryFormat {
impl BinaryFormat {
pub fn as_str(&self) -> &str {
match self {
BinaryFormat::Base64 => "base64",
BinaryFormat::Hex => "hex",
Self::Base64 => "base64",
Self::Hex => "hex",
}
}

pub fn format_to_json(&self, value: &[u8]) -> JsonValue {
match self {
BinaryFormat::Base64 => BASE64_STANDARD.encode(value).into(),
BinaryFormat::Hex => hex::encode(value).into(),
Self::Base64 => BASE64_STANDARD.encode(value).into(),
Self::Hex => hex::encode(value).into(),
}
}

Expand All @@ -172,21 +203,27 @@ impl BinaryFormat {
));
};
let payload = match self {
BinaryFormat::Base64 => {
BASE64_STANDARD
.decode(&byte_str)
.map_err(|base64_decode_err| {
format!("Expected base64 string, got `{byte_str}`: {base64_decode_err}")
})?
}
BinaryFormat::Hex => hex::decode(&byte_str).map_err(|hex_decode_err| {
Self::Base64 => BASE64_STANDARD
.decode(&byte_str)
.map_err(|base64_decode_err| {
format!("Expected base64 string, got `{byte_str}`: {base64_decode_err}")
})?,
Self::Hex => hex::decode(&byte_str).map_err(|hex_decode_err| {
format!("Expected hex string, got `{byte_str}`: {hex_decode_err}")
})?,
};
Ok(TantivyValue::Bytes(payload))
}
}

#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Default, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum NumericOutputFormat {
#[default]
Number,
String,
}

#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, utoipa::ToSchema)]
#[serde(deny_unknown_fields)]
pub struct QuickwitIpAddrOptions {
Expand Down Expand Up @@ -515,8 +552,8 @@ fn deserialize_mapping_type(
Ok(FieldMappingType::F64(numeric_options, cardinality))
}
Type::Bool => {
let numeric_options: QuickwitNumericOptions = serde_json::from_value(json)?;
Ok(FieldMappingType::Bool(numeric_options, cardinality))
let bool_options: QuickwitBoolOptions = serde_json::from_value(json)?;
Ok(FieldMappingType::Bool(bool_options, cardinality))
}
Type::IpAddr => {
let ip_addr_options: QuickwitIpAddrOptions = serde_json::from_value(json)?;
Expand Down Expand Up @@ -590,8 +627,8 @@ fn typed_mapping_to_json_params(
FieldMappingType::Text(text_options, _) => serialize_to_map(&text_options),
FieldMappingType::U64(options, _)
| FieldMappingType::I64(options, _)
| FieldMappingType::F64(options, _)
| FieldMappingType::Bool(options, _) => serialize_to_map(&options),
| FieldMappingType::F64(options, _) => serialize_to_map(&options),
FieldMappingType::Bool(options, _) => serialize_to_map(&options),
FieldMappingType::Bytes(options, _) => serialize_to_map(&options),
FieldMappingType::IpAddr(options, _) => serialize_to_map(&options),
FieldMappingType::DateTime(date_time_options, _) => serialize_to_map(&date_time_options),
Expand Down Expand Up @@ -969,20 +1006,21 @@ mod tests {

#[test]
fn test_deserialize_i64_parsing_error_with_text_options() {
let result = serde_json::from_str::<FieldMappingEntry>(
let error = serde_json::from_str::<FieldMappingEntry>(
r#"
{
"name": "my_field_name",
"type": "i64",
"tokenizer": "basic"
}
"#,
);
let error = result.unwrap_err();
)
.unwrap_err();

assert_eq!(
error.to_string(),
"Error while parsing field `my_field_name`: unknown field `tokenizer`, expected one \
of `description`, `stored`, `indexed`, `fast`"
of `description`, `stored`, `indexed`, `fast`, `coerce`, `output_format`"
);
}

Expand Down Expand Up @@ -1053,6 +1091,8 @@ mod tests {
"stored": true,
"fast": false,
"indexed": true,
"coerce": true,
"output_format": "number"
})
);
Ok(())
Expand All @@ -1072,7 +1112,7 @@ mod tests {
.unwrap_err()
.to_string(),
"Error while parsing field `my_field_name`: unknown field `tokenizer`, expected one \
of `description`, `stored`, `indexed`, `fast`"
of `description`, `stored`, `indexed`, `fast`, `coerce`, `output_format`"
);
}

Expand Down Expand Up @@ -1139,6 +1179,8 @@ mod tests {
"stored": true,
"fast": false,
"indexed": true,
"coerce": true,
"output_format": "number"
})
);
}
Expand All @@ -1163,6 +1205,8 @@ mod tests {
"stored": true,
"fast": false,
"indexed": true,
"coerce": true,
"output_format": "number"
})
);
}
Expand Down Expand Up @@ -1514,6 +1558,8 @@ mod tests {
"stored": true,
"fast": false,
"indexed": true,
"coerce": true,
"output_format": "number"
})
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use tantivy::schema::Type;

use super::date_time_type::QuickwitDateTimeOptions;
use super::field_mapping_entry::QuickwitBoolOptions;
use crate::default_doc_mapper::field_mapping_entry::{
QuickwitBytesOptions, QuickwitIpAddrOptions, QuickwitJsonOptions, QuickwitNumericOptions,
QuickwitObjectOptions, QuickwitTextOptions,
Expand All @@ -41,7 +42,7 @@ pub(crate) enum FieldMappingType {
/// 64-bit float mapping type configuration.
F64(QuickwitNumericOptions, Cardinality),
/// Bool mapping type configuration.
Bool(QuickwitNumericOptions, Cardinality),
Bool(QuickwitBoolOptions, Cardinality),
/// IP Address mapping type configuration.
IpAddr(QuickwitIpAddrOptions, Cardinality),
/// Bytes mapping type configuration.
Expand Down
Loading

0 comments on commit 3b7aec6

Please sign in to comment.