diff --git a/quickwit/quickwit-datetime/src/date_time_format.rs b/quickwit/quickwit-datetime/src/date_time_format.rs index 400a6c993a4..81b297f3f9a 100644 --- a/quickwit/quickwit-datetime/src/date_time_format.rs +++ b/quickwit/quickwit-datetime/src/date_time_format.rs @@ -27,8 +27,8 @@ use serde::{Deserialize, Deserializer, Serialize}; use serde_json::Value as JsonValue; use time::error::{Format, TryFromParsed}; use time::format_description::modifier::{ - Day, Hour, Minute, Month as MonthModifier, Padding, Second, Subsecond, SubsecondDigits, Year, - YearRepr, + Day, Hour, Minute, Month as MonthModifier, Padding, Second, Subsecond, SubsecondDigits, + WeekNumber, WeekNumberRepr, Weekday, WeekdayRepr, Year, YearRepr, }; use time::format_description::well_known::{Iso8601, Rfc2822, Rfc3339}; use time::format_description::{Component, OwnedFormatItem}; @@ -44,6 +44,15 @@ fn literal(s: &[u8]) -> OwnedFormatItem { OwnedFormatItem::Literal(boxed_slice) } +#[inline] +fn get_padding(ptn: &str) -> Padding { + if ptn.len() == 2 { + Padding::Zero + } else { + Padding::None + } +} + fn build_optional_item(java_datetime_format: &str) -> Option { assert!(java_datetime_format.len() >= 2); let optional_date_format = &java_datetime_format[1..java_datetime_format.len() - 1]; @@ -65,64 +74,65 @@ fn build_zone_offset(_: &str) -> Option { } fn build_year_item(ptn: &str) -> Option { + let mut year = Year::default(); let year_repr = if ptn.len() == 4 { YearRepr::Full } else { YearRepr::LastTwo }; - let mut year = Year::default(); year.repr = year_repr; Some(OwnedFormatItem::Component(Component::Year(year))) } +fn build_week_year_item(ptn: &str) -> Option { + // TODO no `Component` for that + build_year_item(ptn) +} + fn build_month_item(ptn: &str) -> Option { let mut month: MonthModifier = Default::default(); - if ptn.len() == 2 { - month.padding = Padding::Zero; - } else { - month.padding = Padding::None; - } + month.padding = get_padding(ptn); Some(OwnedFormatItem::Component(Component::Month(month))) } fn build_day_item(ptn: &str) -> Option { let mut day = Day::default(); - if ptn.len() == 2 { - day.padding = Padding::Zero; - } else { - day.padding = Padding::None; - }; + day.padding = get_padding(ptn); Some(OwnedFormatItem::Component(Component::Day(day))) } +fn build_weekday_item(_: &str) -> Option { + let mut weekday = Weekday::default(); + weekday.repr = WeekdayRepr::Monday; + weekday.one_indexed = false; + Some(OwnedFormatItem::Component(Component::Weekday(weekday))) +} + +fn build_week_number_item(ptn: &str) -> Option { + let mut week_number = WeekNumber::default(); + week_number.repr = WeekNumberRepr::Monday; + week_number.padding = get_padding(ptn); + Some(OwnedFormatItem::Component(Component::WeekNumber( + week_number, + ))) +} + fn build_hour_item(ptn: &str) -> Option { let mut hour = Hour::default(); - if ptn.len() == 2 { - hour.padding = Padding::Zero; - } else { - hour.padding = Padding::None; - }; + hour.padding = get_padding(ptn); hour.is_12_hour_clock = false; Some(OwnedFormatItem::Component(Component::Hour(hour))) } fn build_minute_item(ptn: &str) -> Option { let mut minute: Minute = Default::default(); - if ptn.len() == 2 { - minute.padding = Padding::Zero; - } else { - minute.padding = Padding::None; - } + minute.padding = get_padding(ptn); Some(OwnedFormatItem::Component(Component::Minute(minute))) } fn build_second_item(ptn: &str) -> Option { let mut second: Second = Default::default(); - if ptn.len() == 2 { - second.padding = Padding::Zero; - } else { - second.padding = Padding::None; - } + second.padding = get_padding(ptn); Some(OwnedFormatItem::Component(Component::Second(second))) } @@ -152,6 +162,9 @@ fn java_date_format_tokenizer() -> &'static RegexTokenizer { }), (r#"[^\w\[\]{}]"#, |s| Some(literal(s.as_bytes()))), (r#"\[.*\]"#, build_optional_item), + (r#"xx(xx)?"#, build_week_year_item), + (r#"ww?"#, build_week_number_item), + (r#"e?"#, build_weekday_item), ]) .unwrap() }) @@ -166,13 +179,28 @@ fn resolve_java_datetime_format_alias(java_datetime_format: &str) -> &str { OnceLock::new(); let java_datetime_format_map = JAVA_DATE_FORMAT_ALIASES.get_or_init(|| { let mut m = HashMap::new(); - m.insert("date_optional_time", "yyyy-MM-dd['T'HH:mm:ss.SSSZ]"); - m.insert("strict_date_optional_time", "yyyy-MM-dd'T['HH:mm:ss.SSSZ]"); + m.insert("date_optional_time", "yyyy-MM-dd['T'HH:mm:ss.SSSZ]"); + m.insert("strict_date_optional_time", "yyyy-MM-dd['T'HH:mm:ss.SSSZ]"); m.insert( "strict_date_optional_time_nanos", "yyyy-MM-dd['T'HH:mm:ss.SSSSSSZ]", ); m.insert("basic_date", "yyyyMMdd"); + + m.insert("strict_basic_week_date", "xxxx'W'wwe"); + m.insert("basic_week_date", "xx[xx]'W'wwe"); + + m.insert("strict_basic_week_date_time", "xxxx'W'wwe'T'HHmmss.SSSZ"); + m.insert("basic_week_date_time", "xx[xx]'W'wwe'T'HHmmss.SSSZ"); + + m.insert( + "strict_basic_week_date_time_no_millis", + "xxxx'W'wwe'T'HHmmssZ", + ); + m.insert("basic_week_date_time_no_millis", "xx[xx]'W'wwe'T'HHmmssZ"); + + m.insert("strict_week_date", "xxxx-'W'ww-e"); + m.insert("week_date", "xxxx-'W'w[w]-e"); m }); java_datetime_format_map @@ -211,7 +239,7 @@ impl StrptimeParser { self.parse_date_time_with_default_timezone(date_time_str, UtcOffset::UTC) } - /// Parse a date. If no timezone is specificied we will assume the timezone passed as + /// Parse a date. If no timezone is specified we will assume the timezone passed as /// `default_offset`. If the date is missing, it will be automatically set to 00:00:00. pub fn parse_date_time_with_default_timezone( &self, @@ -736,11 +764,116 @@ mod tests { ); test_parse_java_datetime_aux( "date_optional_time", - "2021-01-21T03:01:22.312+01:00", + "2021W313", datetime!(2021-01-21 03:01:22.312 +1), ); } + #[test] + fn test_parse_java_week_formats() { + test_parse_java_datetime_aux( + "basic_week_date", + "2024W313", + datetime!(2024-08-01 0:00:00.0 +00:00:00), + ); + test_parse_java_datetime_aux( + "basic_week_date", + "24W313", + datetime!(2024-08-01 0:00:00.0 +00:00:00), + ); + // // ❌ 'the 'year' component could not be parsed' + // test_parse_java_datetime_aux( + // "basic_week_date", + // "1W313", + // datetime!(2018-08-02 0:00:00.0 +00:00:00), + // ); + // // ❌ 'the 'offset hour' component could not be parsed' + // test_parse_java_datetime_aux( + // "basic_week_date_time", + // "2018W313T121212.1Z", + // datetime!(2024-08-01 0:00:00.0 +00:00:00), + // ); + // // ❌ 'the 'offset hour' component could not be parsed' + // test_parse_java_datetime_aux( + // "basic_week_date_time", + // "2018W313T121212.123Z", + // datetime!(2024-08-01 0:00:00.0 +00:00:00), + // ); + // // ❌ 'the 'offset hour' component could not be parsed' + // test_parse_java_datetime_aux( + // "basic_week_date_time", + // "2018W313T121212.123456789Z", + // datetime!(2024-08-01 0:00:00.0 +00:00:00), + // ); + // // ❌ 'a character literal was not valid' + // test_parse_java_datetime_aux( + // "basic_week_date_time", + // "2018W313T121212.123+0100", + // datetime!(2024-08-01 0:00:00.0 +00:00:00), + // ); + // test_parse_java_datetime_aux( + // "basic_week_date_time", + // "2018W313T121212.123+01:00", + // datetime!(2024-08-01 12:12:12.123 +01:00:00), + // ); + // // ❌ 'the 'offset hour' component could not be parsed' + // test_parse_java_datetime_aux( + // "basic_week_date_time_no_millis", + // "2018W313T121212Z", + // datetime!(2024-08-01 12:12:12.123 +01:00:00), + // ); + // // ❌ 'a character literal was not valid' + // test_parse_java_datetime_aux( + // "basic_week_date_time_no_millis", + // "2018W313T121212+0100", + // datetime!(2024-08-01 12:12:12.123 +01:00:00), + // ); + test_parse_java_datetime_aux( + "basic_week_date_time_no_millis", + "2018W313T121212+01:00", + datetime!(2024-08-01 12:12:12.0 +01:00:00), + ); + + test_parse_java_datetime_aux( + "week_date", + "2012-W48-6", + datetime!(2012-12-02 0:00:00.0 +00:00:00), + ); + + test_parse_java_datetime_aux( + "week_date", + "2012-W01-6", + datetime!(2012-01-08 0:00:00.0 +00:00:00), + ); + + test_parse_java_datetime_aux( + "week_date", + "2012-W1-6", + datetime!(2012-01-08 0:00:00.0 +00:00:00), + ); + } + + #[test] + fn test_parse_java_strict_week_formats() { + test_parse_java_datetime_aux( + "strict_basic_week_date", + "2024W313", + datetime!(2024-08-01 0:00:00.0 +00:00:00), + ); + + test_parse_java_datetime_aux( + "strict_week_date", + "2012-W48-6", + datetime!(2012-12-02 0:00:00.0 +00:00:00), + ); + + test_parse_java_datetime_aux( + "strict_week_date", + "2012-W01-6", + datetime!(2012-01-08 0:00:00.0 +00:00:00), + ); + } + #[test] fn test_infer_year() { let inferred_year = infer_year(None, Month::January, 2024); diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs index 46f41461104..7fab2a910a4 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs @@ -127,7 +127,7 @@ mod tests { boost: None, format: JsonLiteral::String("yyyy-MM-dd['T'HH:mm:ss]".to_string()).into(), }; - let range_query = ElasticRangeQuery { + let range_query: ElasticRangeQuery = ElasticRangeQuery { field: "date".to_string(), value: range_query_params, };