Skip to content

Commit

Permalink
fix: add week patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
etolbakov authored and kuzaxak committed Sep 28, 2024
1 parent f15a6f2 commit 15859e9
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 33 deletions.
197 changes: 165 additions & 32 deletions quickwit/quickwit-datetime/src/date_time_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ use serde::{Deserialize, Deserializer, Serialize};
use serde_json::Value as JsonValue;
use time::error::{Format, TryFromParsed};
use time::format_description::modifier::{
Day, Hour, Minute, Month as MonthModifier, Padding, Second, Subsecond, SubsecondDigits, Year,
YearRepr,
Day, Hour, Minute, Month as MonthModifier, Padding, Second, Subsecond, SubsecondDigits,
WeekNumber, WeekNumberRepr, Weekday, WeekdayRepr, Year, YearRepr,
};
use time::format_description::well_known::{Iso8601, Rfc2822, Rfc3339};
use time::format_description::{Component, OwnedFormatItem};
Expand All @@ -44,6 +44,15 @@ fn literal(s: &[u8]) -> OwnedFormatItem {
OwnedFormatItem::Literal(boxed_slice)
}

#[inline]
fn get_padding(ptn: &str) -> Padding {
if ptn.len() == 2 {
Padding::Zero
} else {
Padding::None
}
}

fn build_optional_item(java_datetime_format: &str) -> Option<OwnedFormatItem> {
assert!(java_datetime_format.len() >= 2);
let optional_date_format = &java_datetime_format[1..java_datetime_format.len() - 1];
Expand All @@ -65,64 +74,65 @@ fn build_zone_offset(_: &str) -> Option<OwnedFormatItem> {
}

fn build_year_item(ptn: &str) -> Option<OwnedFormatItem> {
let mut year = Year::default();
let year_repr = if ptn.len() == 4 {
YearRepr::Full
} else {
YearRepr::LastTwo
};
let mut year = Year::default();
year.repr = year_repr;
Some(OwnedFormatItem::Component(Component::Year(year)))
}

fn build_week_year_item(ptn: &str) -> Option<OwnedFormatItem> {
// TODO no `Component` for that
build_year_item(ptn)
}

fn build_month_item(ptn: &str) -> Option<OwnedFormatItem> {
let mut month: MonthModifier = Default::default();
if ptn.len() == 2 {
month.padding = Padding::Zero;
} else {
month.padding = Padding::None;
}
month.padding = get_padding(ptn);
Some(OwnedFormatItem::Component(Component::Month(month)))
}

fn build_day_item(ptn: &str) -> Option<OwnedFormatItem> {
let mut day = Day::default();
if ptn.len() == 2 {
day.padding = Padding::Zero;
} else {
day.padding = Padding::None;
};
day.padding = get_padding(ptn);
Some(OwnedFormatItem::Component(Component::Day(day)))
}

fn build_weekday_item(_: &str) -> Option<OwnedFormatItem> {
let mut weekday = Weekday::default();
weekday.repr = WeekdayRepr::Monday;
weekday.one_indexed = false;
Some(OwnedFormatItem::Component(Component::Weekday(weekday)))
}

fn build_week_number_item(ptn: &str) -> Option<OwnedFormatItem> {
let mut week_number = WeekNumber::default();
week_number.repr = WeekNumberRepr::Monday;
week_number.padding = get_padding(ptn);
Some(OwnedFormatItem::Component(Component::WeekNumber(
week_number,
)))
}

fn build_hour_item(ptn: &str) -> Option<OwnedFormatItem> {
let mut hour = Hour::default();
if ptn.len() == 2 {
hour.padding = Padding::Zero;
} else {
hour.padding = Padding::None;
};
hour.padding = get_padding(ptn);
hour.is_12_hour_clock = false;
Some(OwnedFormatItem::Component(Component::Hour(hour)))
}

fn build_minute_item(ptn: &str) -> Option<OwnedFormatItem> {
let mut minute: Minute = Default::default();
if ptn.len() == 2 {
minute.padding = Padding::Zero;
} else {
minute.padding = Padding::None;
}
minute.padding = get_padding(ptn);
Some(OwnedFormatItem::Component(Component::Minute(minute)))
}

fn build_second_item(ptn: &str) -> Option<OwnedFormatItem> {
let mut second: Second = Default::default();
if ptn.len() == 2 {
second.padding = Padding::Zero;
} else {
second.padding = Padding::None;
}
second.padding = get_padding(ptn);
Some(OwnedFormatItem::Component(Component::Second(second)))
}

Expand Down Expand Up @@ -152,6 +162,9 @@ fn java_date_format_tokenizer() -> &'static RegexTokenizer<OwnedFormatItem> {
}),
(r#"[^\w\[\]{}]"#, |s| Some(literal(s.as_bytes()))),
(r#"\[.*\]"#, build_optional_item),
(r#"xx(xx)?"#, build_week_year_item),
(r#"ww?"#, build_week_number_item),
(r#"e?"#, build_weekday_item),
])
.unwrap()
})
Expand All @@ -166,13 +179,28 @@ fn resolve_java_datetime_format_alias(java_datetime_format: &str) -> &str {
OnceLock::new();
let java_datetime_format_map = JAVA_DATE_FORMAT_ALIASES.get_or_init(|| {
let mut m = HashMap::new();
m.insert("date_optional_time", "yyyy-MM-dd['T'HH:mm:ss.SSSZ]");
m.insert("strict_date_optional_time", "yyyy-MM-dd'T['HH:mm:ss.SSSZ]");
m.insert("date_optional_time", "yyyy-MM-dd['T'HH:mm:ss.SSSZ]");
m.insert("strict_date_optional_time", "yyyy-MM-dd['T'HH:mm:ss.SSSZ]");
m.insert(
"strict_date_optional_time_nanos",
"yyyy-MM-dd['T'HH:mm:ss.SSSSSSZ]",
);
m.insert("basic_date", "yyyyMMdd");

m.insert("strict_basic_week_date", "xxxx'W'wwe");
m.insert("basic_week_date", "xx[xx]'W'wwe");

m.insert("strict_basic_week_date_time", "xxxx'W'wwe'T'HHmmss.SSSZ");
m.insert("basic_week_date_time", "xx[xx]'W'wwe'T'HHmmss.SSSZ");

m.insert(
"strict_basic_week_date_time_no_millis",
"xxxx'W'wwe'T'HHmmssZ",
);
m.insert("basic_week_date_time_no_millis", "xx[xx]'W'wwe'T'HHmmssZ");

m.insert("strict_week_date", "xxxx-'W'ww-e");
m.insert("week_date", "xxxx-'W'w[w]-e");
m
});
java_datetime_format_map
Expand Down Expand Up @@ -211,7 +239,7 @@ impl StrptimeParser {
self.parse_date_time_with_default_timezone(date_time_str, UtcOffset::UTC)
}

/// Parse a date. If no timezone is specificied we will assume the timezone passed as
/// Parse a date. If no timezone is specified we will assume the timezone passed as
/// `default_offset`. If the date is missing, it will be automatically set to 00:00:00.
pub fn parse_date_time_with_default_timezone(
&self,
Expand Down Expand Up @@ -736,11 +764,116 @@ mod tests {
);
test_parse_java_datetime_aux(
"date_optional_time",
"2021-01-21T03:01:22.312+01:00",
"2021W313",
datetime!(2021-01-21 03:01:22.312 +1),
);
}

#[test]
fn test_parse_java_week_formats() {
test_parse_java_datetime_aux(
"basic_week_date",
"2024W313",
datetime!(2024-08-01 0:00:00.0 +00:00:00),
);
test_parse_java_datetime_aux(
"basic_week_date",
"24W313",
datetime!(2024-08-01 0:00:00.0 +00:00:00),
);
// // ❌ 'the 'year' component could not be parsed'
// test_parse_java_datetime_aux(
// "basic_week_date",
// "1W313",
// datetime!(2018-08-02 0:00:00.0 +00:00:00),
// );
// // ❌ 'the 'offset hour' component could not be parsed'
// test_parse_java_datetime_aux(
// "basic_week_date_time",
// "2018W313T121212.1Z",
// datetime!(2024-08-01 0:00:00.0 +00:00:00),
// );
// // ❌ 'the 'offset hour' component could not be parsed'
// test_parse_java_datetime_aux(
// "basic_week_date_time",
// "2018W313T121212.123Z",
// datetime!(2024-08-01 0:00:00.0 +00:00:00),
// );
// // ❌ 'the 'offset hour' component could not be parsed'
// test_parse_java_datetime_aux(
// "basic_week_date_time",
// "2018W313T121212.123456789Z",
// datetime!(2024-08-01 0:00:00.0 +00:00:00),
// );
// // ❌ 'a character literal was not valid'
// test_parse_java_datetime_aux(
// "basic_week_date_time",
// "2018W313T121212.123+0100",
// datetime!(2024-08-01 0:00:00.0 +00:00:00),
// );
// test_parse_java_datetime_aux(
// "basic_week_date_time",
// "2018W313T121212.123+01:00",
// datetime!(2024-08-01 12:12:12.123 +01:00:00),
// );
// // ❌ 'the 'offset hour' component could not be parsed'
// test_parse_java_datetime_aux(
// "basic_week_date_time_no_millis",
// "2018W313T121212Z",
// datetime!(2024-08-01 12:12:12.123 +01:00:00),
// );
// // ❌ 'a character literal was not valid'
// test_parse_java_datetime_aux(
// "basic_week_date_time_no_millis",
// "2018W313T121212+0100",
// datetime!(2024-08-01 12:12:12.123 +01:00:00),
// );
test_parse_java_datetime_aux(
"basic_week_date_time_no_millis",
"2018W313T121212+01:00",
datetime!(2024-08-01 12:12:12.0 +01:00:00),
);

test_parse_java_datetime_aux(
"week_date",
"2012-W48-6",
datetime!(2012-12-02 0:00:00.0 +00:00:00),
);

test_parse_java_datetime_aux(
"week_date",
"2012-W01-6",
datetime!(2012-01-08 0:00:00.0 +00:00:00),
);

test_parse_java_datetime_aux(
"week_date",
"2012-W1-6",
datetime!(2012-01-08 0:00:00.0 +00:00:00),
);
}

#[test]
fn test_parse_java_strict_week_formats() {
test_parse_java_datetime_aux(
"strict_basic_week_date",
"2024W313",
datetime!(2024-08-01 0:00:00.0 +00:00:00),
);

test_parse_java_datetime_aux(
"strict_week_date",
"2012-W48-6",
datetime!(2012-12-02 0:00:00.0 +00:00:00),
);

test_parse_java_datetime_aux(
"strict_week_date",
"2012-W01-6",
datetime!(2012-01-08 0:00:00.0 +00:00:00),
);
}

#[test]
fn test_infer_year() {
let inferred_year = infer_year(None, Month::January, 2024);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ mod tests {
boost: None,
format: JsonLiteral::String("yyyy-MM-dd['T'HH:mm:ss]".to_string()).into(),
};
let range_query = ElasticRangeQuery {
let range_query: ElasticRangeQuery = ElasticRangeQuery {
field: "date".to_string(),
value: range_query_params,
};
Expand Down

0 comments on commit 15859e9

Please sign in to comment.