diff --git a/quickwit/quickwit-query/Cargo.toml b/quickwit/quickwit-query/Cargo.toml index e1229da8e8b..00a6e598d6e 100644 --- a/quickwit/quickwit-query/Cargo.toml +++ b/quickwit/quickwit-query/Cargo.toml @@ -18,6 +18,7 @@ lindera-core = { workspace = true, optional = true } lindera-dictionary = { workspace = true, optional = true } lindera-tokenizer = { workspace = true, optional = true } once_cell = { workspace = true } +regex = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } serde_with = { workspace = true } @@ -27,6 +28,7 @@ whichlang = { workspace = true, optional = true } quickwit-common = { workspace = true } quickwit-datetime = { workspace = true } +lazy_static = "1.5.0" [dev-dependencies] criterion = { workspace = true } diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs index 273496bbdeb..0302cf0892b 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs @@ -17,10 +17,15 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . +use std::collections::HashMap; use std::ops::Bound; use std::str::FromStr; +use anyhow::Error; +use lazy_static::lazy_static; +use once_cell::sync::Lazy; use quickwit_datetime::StrptimeParser; +use regex::RegexSet; use serde::Deserialize; use crate::elastic_query_dsl::one_field_map::OneFieldMap; @@ -29,6 +34,24 @@ use crate::not_nan_f32::NotNaNf32; use crate::query_ast::QueryAst; use crate::JsonLiteral; +/// Elasticsearch/OpenSearch uses a set of preconfigured formats, more information could be found here +/// https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html + +lazy_static! { + static ref ELASTICSEARCH_FORMAT_TO_STRFTIME: HashMap<&'static str, &'static str> = { + let mut m = HashMap::new(); + m.insert(r"^yyyy-MM-dd'T'HH:mm:ss\.SSSZ$", "%Y-%m-%dT%H:%M:%S.%3f%:z"); + m.insert(r"^date_optional_time$", "%Y-%m-%dT%H:%M:%S.%3f%:z"); + m.insert(r"^strict_date_optional_time$", "%Y-%m-%dT%H:%M:%S.%3f%:z"); + m.insert(r"^yyyy-MM-dd$", "%Y-%m-%d"); + m.insert(r"^yyyyMMdd$", "%Y%m%d"); + m + }; +} + +static ELASTICSEARCH_FORMAT_REGEX_SET: Lazy = + Lazy::new(|| RegexSet::new(ELASTICSEARCH_FORMAT_TO_STRFTIME.keys()).unwrap()); + #[derive(Deserialize, Debug, Default, Eq, PartialEq, Clone)] #[serde(deny_unknown_fields)] pub struct RangeQueryParams { @@ -60,9 +83,7 @@ impl ConvertableToQueryAst for RangeQuery { format, } = self.value; let (gt, gte, lt, lte) = if let Some(JsonLiteral::String(fmt)) = format { - let parser = StrptimeParser::from_str(&fmt).map_err(|reason| { - anyhow::anyhow!("failed to create parser from : {}; reason: {}", fmt, reason) - })?; + let parser = create_strptime_parser(&fmt)?; ( gt.map(|v| parse_and_convert(v, &parser)).transpose()?, gte.map(|v| parse_and_convert(v, &parser)).transpose()?, @@ -108,6 +129,29 @@ fn parse_and_convert(literal: JsonLiteral, parser: &StrptimeParser) -> anyhow::R } } +fn create_strptime_parser(fmt: &String) -> Result { + let strptime_format = convert_format_to_strpformat(&fmt)?; + StrptimeParser::from_str(&strptime_format).map_err(|reason| { + anyhow::anyhow!("failed to create parser from : {}; reason: {}", fmt, reason) + }) +} + +fn convert_format_to_strpformat(format: &str) -> Result { + let matches: Vec<_> = ELASTICSEARCH_FORMAT_REGEX_SET + .matches(format) + .into_iter() + .collect(); + if matches.is_empty() { + return Err(anyhow::anyhow!("unsupported format {}", format)); + } + let matching_pattern = &ELASTICSEARCH_FORMAT_REGEX_SET.patterns()[matches[0]]; + + match ELASTICSEARCH_FORMAT_TO_STRFTIME.get(matching_pattern.as_str()) { + Some(strftime_fmt) => Ok(strftime_fmt.to_string()), + None => Err(anyhow::anyhow!("no mapping provided for {}", format)), + } +} + #[cfg(test)] mod tests { use std::str::FromStr;