diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index d8883878e..833f17fc5 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -245,14 +245,14 @@ impl PrimitiveType { | (PrimitiveType::Long, PrimitiveLiteral::Long(_)) | (PrimitiveType::Float, PrimitiveLiteral::Float(_)) | (PrimitiveType::Double, PrimitiveLiteral::Double(_)) - | (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Decimal(_)) - | (PrimitiveType::Date, PrimitiveLiteral::Date(_)) - | (PrimitiveType::Time, PrimitiveLiteral::Time(_)) - | (PrimitiveType::Timestamp, PrimitiveLiteral::Timestamp(_)) - | (PrimitiveType::Timestamptz, PrimitiveLiteral::Timestamptz(_)) + | (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(_)) + | (PrimitiveType::Date, PrimitiveLiteral::Int(_)) + | (PrimitiveType::Time, PrimitiveLiteral::Long(_)) + | (PrimitiveType::Timestamp, PrimitiveLiteral::Long(_)) + | (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(_)) | (PrimitiveType::String, PrimitiveLiteral::String(_)) - | (PrimitiveType::Uuid, PrimitiveLiteral::Uuid(_)) - | (PrimitiveType::Fixed(_), PrimitiveLiteral::Fixed(_)) + | (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_)) + | (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_)) | (PrimitiveType::Binary, PrimitiveLiteral::Binary(_)) ) } @@ -933,11 +933,15 @@ mod tests { Type::Struct(StructType { fields: vec![ NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid)) - .with_initial_default(Literal::Primitive(PrimitiveLiteral::Uuid( - Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb").unwrap(), + .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128( + Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb") + .unwrap() + .as_u128(), ))) - .with_write_default(Literal::Primitive(PrimitiveLiteral::Uuid( - Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae").unwrap(), + .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128( + Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae") + .unwrap() + .as_u128(), ))) .into(), NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(), @@ -1002,11 +1006,15 @@ mod tests { let struct_type = Type::Struct(StructType::new(vec![ NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid)) - .with_initial_default(Literal::Primitive(PrimitiveLiteral::Uuid( - Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb").unwrap(), + .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128( + Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb") + .unwrap() + .as_u128(), ))) - .with_write_default(Literal::Primitive(PrimitiveLiteral::Uuid( - Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae").unwrap(), + .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128( + Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae") + .unwrap() + .as_u128(), ))) .into(), NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(), @@ -1127,45 +1135,32 @@ mod tests { #[test] fn test_primitive_type_compatitable() { - let types = vec![ - PrimitiveType::Boolean, - PrimitiveType::Int, - PrimitiveType::Long, - PrimitiveType::Float, - PrimitiveType::Double, - PrimitiveType::Decimal { - precision: 9, - scale: 2, - }, - PrimitiveType::Date, - PrimitiveType::Time, - PrimitiveType::Timestamp, - PrimitiveType::Timestamptz, - PrimitiveType::String, - PrimitiveType::Uuid, - PrimitiveType::Fixed(8), - PrimitiveType::Binary, - ]; - let literals = vec![ - PrimitiveLiteral::Boolean(true), - PrimitiveLiteral::Int(1), - PrimitiveLiteral::Long(1), - PrimitiveLiteral::Float(1.0.into()), - PrimitiveLiteral::Double(1.0.into()), - PrimitiveLiteral::Decimal(1), - PrimitiveLiteral::Date(1), - PrimitiveLiteral::Time(1), - PrimitiveLiteral::Timestamp(1), - PrimitiveLiteral::Timestamptz(1), - PrimitiveLiteral::String("1".to_string()), - PrimitiveLiteral::Uuid(Uuid::new_v4()), - PrimitiveLiteral::Fixed(vec![1]), - PrimitiveLiteral::Binary(vec![1]), + let pairs = vec![ + (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(true)), + (PrimitiveType::Int, PrimitiveLiteral::Int(1)), + (PrimitiveType::Long, PrimitiveLiteral::Long(1)), + (PrimitiveType::Float, PrimitiveLiteral::Float(1.0.into())), + (PrimitiveType::Double, PrimitiveLiteral::Double(1.0.into())), + ( + PrimitiveType::Decimal { + precision: 9, + scale: 2, + }, + PrimitiveLiteral::Int128(1), + ), + (PrimitiveType::Date, PrimitiveLiteral::Int(1)), + (PrimitiveType::Time, PrimitiveLiteral::Long(1)), + (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(1)), + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(1)), + ( + PrimitiveType::Uuid, + PrimitiveLiteral::UInt128(Uuid::new_v4().as_u128()), + ), + (PrimitiveType::Fixed(8), PrimitiveLiteral::Binary(vec![1])), + (PrimitiveType::Binary, PrimitiveLiteral::Binary(vec![1])), ]; - for (i, t) in types.iter().enumerate() { - for (j, l) in literals.iter().enumerate() { - assert_eq!(i == j, t.compatible(l)); - } + for (ty, literal) in pairs { + assert!(ty.compatible(&literal)); } } } diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 54e2105ff..9148844cc 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -464,23 +464,29 @@ impl Transform { /// `StartsWith`, `NotStartsWith`), the original datum is returned /// unmodified. fn adjust_boundary(op: &PredicateOperator, datum: &Datum) -> Result> { - let literal = datum.literal(); - let adjusted_boundary = match op { - PredicateOperator::LessThan => match literal { - PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), - PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), - PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), - PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - 1)), + PredicateOperator::LessThan => match (datum.data_type(), datum.literal()) { + (PrimitiveType::Int, PrimitiveLiteral::Int(v)) => Some(Datum::int(v - 1)), + (PrimitiveType::Long, PrimitiveLiteral::Long(v)) => Some(Datum::long(v - 1)), + (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => { + Some(Datum::decimal(v - 1)?) + } + (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => Some(Datum::date(v - 1)), + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => { + Some(Datum::timestamp_micros(v - 1)) + } _ => Some(datum.to_owned()), }, - PredicateOperator::GreaterThan => match literal { - PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)), - PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), - PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), - PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v + 1)), + PredicateOperator::GreaterThan => match (datum.data_type(), datum.literal()) { + (PrimitiveType::Int, PrimitiveLiteral::Int(v)) => Some(Datum::int(v + 1)), + (PrimitiveType::Long, PrimitiveLiteral::Long(v)) => Some(Datum::long(v + 1)), + (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => { + Some(Datum::decimal(v + 1)?) + } + (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => Some(Datum::date(v + 1)), + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => { + Some(Datum::timestamp_micros(v + 1)) + } _ => Some(datum.to_owned()), }, PredicateOperator::Eq @@ -555,7 +561,7 @@ impl Transform { transformed: &Datum, ) -> Option { let should_adjust = match self { - Transform::Day => matches!(original.literal(), PrimitiveLiteral::Timestamp(_)), + Transform::Day => matches!(original.data_type(), PrimitiveType::Timestamp), Transform::Year | Transform::Month => true, _ => false, }; diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index 8c2e4abe3..03fd1ec38 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -65,24 +65,14 @@ pub enum PrimitiveLiteral { Float(OrderedFloat), /// Stored as 8-byte little-endian Double(OrderedFloat), - /// Stores days from the 1970-01-01 in an 4-byte little-endian int - Date(i32), - /// Stores microseconds from midnight in an 8-byte little-endian long - Time(i64), - /// Timestamp without timezone - Timestamp(i64), - /// Timestamp with timezone - Timestamptz(i64), /// UTF-8 bytes (without length) String(String), - /// 16-byte big-endian value - Uuid(Uuid), - /// Binary value - Fixed(Vec), /// Binary value (without length) Binary(Vec), - /// Stores unscaled value as big int. According to iceberg spec, the precision must less than 38(`MAX_DECIMAL_PRECISION`) , so i128 is suit here. - Decimal(i128), + /// Stored as 16-byte little-endian + Int128(i128), + /// Stored as 16-byte little-endian + UInt128(u128), } impl PrimitiveLiteral { @@ -248,26 +238,26 @@ impl PartialOrd for Datum { PrimitiveType::Double, ) => val.partial_cmp(other_val), ( - PrimitiveLiteral::Date(val), - PrimitiveLiteral::Date(other_val), + PrimitiveLiteral::Int(val), + PrimitiveLiteral::Int(other_val), PrimitiveType::Date, PrimitiveType::Date, ) => val.partial_cmp(other_val), ( - PrimitiveLiteral::Time(val), - PrimitiveLiteral::Time(other_val), + PrimitiveLiteral::Long(val), + PrimitiveLiteral::Long(other_val), PrimitiveType::Time, PrimitiveType::Time, ) => val.partial_cmp(other_val), ( - PrimitiveLiteral::Timestamp(val), - PrimitiveLiteral::Timestamp(other_val), + PrimitiveLiteral::Long(val), + PrimitiveLiteral::Long(other_val), PrimitiveType::Timestamp, PrimitiveType::Timestamp, ) => val.partial_cmp(other_val), ( - PrimitiveLiteral::Timestamptz(val), - PrimitiveLiteral::Timestamptz(other_val), + PrimitiveLiteral::Long(val), + PrimitiveLiteral::Long(other_val), PrimitiveType::Timestamptz, PrimitiveType::Timestamptz, ) => val.partial_cmp(other_val), @@ -278,14 +268,14 @@ impl PartialOrd for Datum { PrimitiveType::String, ) => val.partial_cmp(other_val), ( - PrimitiveLiteral::Uuid(val), - PrimitiveLiteral::Uuid(other_val), + PrimitiveLiteral::UInt128(val), + PrimitiveLiteral::UInt128(other_val), PrimitiveType::Uuid, PrimitiveType::Uuid, - ) => val.partial_cmp(other_val), + ) => Uuid::from_u128(*val).partial_cmp(&Uuid::from_u128(*other_val)), ( - PrimitiveLiteral::Fixed(val), - PrimitiveLiteral::Fixed(other_val), + PrimitiveLiteral::Binary(val), + PrimitiveLiteral::Binary(other_val), PrimitiveType::Fixed(_), PrimitiveType::Fixed(_), ) => val.partial_cmp(other_val), @@ -296,8 +286,8 @@ impl PartialOrd for Datum { PrimitiveType::Binary, ) => val.partial_cmp(other_val), ( - PrimitiveLiteral::Decimal(val), - PrimitiveLiteral::Decimal(other_val), + PrimitiveLiteral::Int128(val), + PrimitiveLiteral::Int128(other_val), PrimitiveType::Decimal { precision: _, scale, @@ -320,28 +310,33 @@ impl Display for Datum { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match (&self.r#type, &self.literal) { (_, PrimitiveLiteral::Boolean(val)) => write!(f, "{}", val), - (_, PrimitiveLiteral::Int(val)) => write!(f, "{}", val), - (_, PrimitiveLiteral::Long(val)) => write!(f, "{}", val), + (PrimitiveType::Int, PrimitiveLiteral::Int(val)) => write!(f, "{}", val), + (PrimitiveType::Long, PrimitiveLiteral::Long(val)) => write!(f, "{}", val), (_, PrimitiveLiteral::Float(val)) => write!(f, "{}", val), (_, PrimitiveLiteral::Double(val)) => write!(f, "{}", val), - (_, PrimitiveLiteral::Date(val)) => write!(f, "{}", days_to_date(*val)), - (_, PrimitiveLiteral::Time(val)) => write!(f, "{}", microseconds_to_time(*val)), - (_, PrimitiveLiteral::Timestamp(val)) => { + (PrimitiveType::Date, PrimitiveLiteral::Int(val)) => { + write!(f, "{}", days_to_date(*val)) + } + (PrimitiveType::Time, PrimitiveLiteral::Long(val)) => { + write!(f, "{}", microseconds_to_time(*val)) + } + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(val)) => { write!(f, "{}", microseconds_to_datetime(*val)) } - (_, PrimitiveLiteral::Timestamptz(val)) => { + (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => { write!(f, "{}", microseconds_to_datetimetz(*val)) } (_, PrimitiveLiteral::String(val)) => write!(f, r#""{}""#, val), - (_, PrimitiveLiteral::Uuid(val)) => write!(f, "{}", val), - (_, PrimitiveLiteral::Fixed(val)) => display_bytes(val, f), + (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(val)) => { + write!(f, "{}", Uuid::from_u128(*val)) + } (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f), ( PrimitiveType::Decimal { precision: _, scale, }, - PrimitiveLiteral::Decimal(val), + PrimitiveLiteral::Int128(val), ) => { write!(f, "{}", Decimal::from_i128_with_scale(*val, *scale)) } @@ -398,21 +393,21 @@ impl Datum { PrimitiveType::Double => { PrimitiveLiteral::Double(OrderedFloat(f64::from_le_bytes(bytes.try_into()?))) } - PrimitiveType::Date => PrimitiveLiteral::Date(i32::from_le_bytes(bytes.try_into()?)), - PrimitiveType::Time => PrimitiveLiteral::Time(i64::from_le_bytes(bytes.try_into()?)), + PrimitiveType::Date => PrimitiveLiteral::Int(i32::from_le_bytes(bytes.try_into()?)), + PrimitiveType::Time => PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)), PrimitiveType::Timestamp => { - PrimitiveLiteral::Timestamp(i64::from_le_bytes(bytes.try_into()?)) + PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)) } PrimitiveType::Timestamptz => { - PrimitiveLiteral::Timestamptz(i64::from_le_bytes(bytes.try_into()?)) + PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)) } PrimitiveType::String => { PrimitiveLiteral::String(std::str::from_utf8(bytes)?.to_string()) } PrimitiveType::Uuid => { - PrimitiveLiteral::Uuid(Uuid::from_u128(u128::from_be_bytes(bytes.try_into()?))) + PrimitiveLiteral::UInt128(u128::from_be_bytes(bytes.try_into()?)) } - PrimitiveType::Fixed(_) => PrimitiveLiteral::Fixed(Vec::from(bytes)), + PrimitiveType::Fixed(_) => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Decimal { precision: _, @@ -438,15 +433,10 @@ impl Datum { PrimitiveLiteral::Long(val) => ByteBuf::from(val.to_le_bytes()), PrimitiveLiteral::Float(val) => ByteBuf::from(val.to_le_bytes()), PrimitiveLiteral::Double(val) => ByteBuf::from(val.to_le_bytes()), - PrimitiveLiteral::Date(val) => ByteBuf::from(val.to_le_bytes()), - PrimitiveLiteral::Time(val) => ByteBuf::from(val.to_le_bytes()), - PrimitiveLiteral::Timestamp(val) => ByteBuf::from(val.to_le_bytes()), - PrimitiveLiteral::Timestamptz(val) => ByteBuf::from(val.to_le_bytes()), PrimitiveLiteral::String(val) => ByteBuf::from(val.as_bytes()), - PrimitiveLiteral::Uuid(val) => ByteBuf::from(val.as_u128().to_be_bytes()), - PrimitiveLiteral::Fixed(val) => ByteBuf::from(val.as_slice()), + PrimitiveLiteral::UInt128(val) => ByteBuf::from(val.to_be_bytes()), PrimitiveLiteral::Binary(val) => ByteBuf::from(val.as_slice()), - PrimitiveLiteral::Decimal(_) => todo!(), + PrimitiveLiteral::Int128(_) => todo!(), } } @@ -576,12 +566,12 @@ impl Datum { /// let t = Datum::date(2); /// /// assert_eq!(&format!("{t}"), "1970-01-03"); - /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Date(2)), t.into()); + /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(2)), t.into()); /// ``` pub fn date(days: i32) -> Self { Self { r#type: PrimitiveType::Date, - literal: PrimitiveLiteral::Date(days), + literal: PrimitiveLiteral::Int(days), } } @@ -668,7 +658,7 @@ impl Datum { Ok(Self { r#type: PrimitiveType::Time, - literal: PrimitiveLiteral::Time(value), + literal: PrimitiveLiteral::Long(value), }) } @@ -680,7 +670,7 @@ impl Datum { Self { r#type: PrimitiveType::Time, - literal: PrimitiveLiteral::Time(micro_secs), + literal: PrimitiveLiteral::Long(micro_secs), } } @@ -740,7 +730,7 @@ impl Datum { pub fn timestamp_micros(value: i64) -> Self { Self { r#type: PrimitiveType::Timestamp, - literal: PrimitiveLiteral::Timestamp(value), + literal: PrimitiveLiteral::Long(value), } } @@ -798,7 +788,7 @@ impl Datum { pub fn timestamptz_micros(value: i64) -> Self { Self { r#type: PrimitiveType::Timestamptz, - literal: PrimitiveLiteral::Timestamptz(value), + literal: PrimitiveLiteral::Long(value), } } @@ -868,7 +858,7 @@ impl Datum { pub fn uuid(uuid: Uuid) -> Self { Self { r#type: PrimitiveType::Uuid, - literal: PrimitiveLiteral::Uuid(uuid), + literal: PrimitiveLiteral::UInt128(uuid.as_u128()), } } @@ -907,7 +897,7 @@ impl Datum { let value: Vec = input.into_iter().collect(); Self { r#type: PrimitiveType::Fixed(value.len() as u64), - literal: PrimitiveLiteral::Fixed(value), + literal: PrimitiveLiteral::Binary(value), } } @@ -968,7 +958,7 @@ impl Datum { if let Type::Primitive(p) = r#type { Ok(Self { r#type: p, - literal: PrimitiveLiteral::Decimal(decimal.mantissa()), + literal: PrimitiveLiteral::Int128(decimal.mantissa()), }) } else { unreachable!("Decimal type must be primitive.") @@ -980,7 +970,7 @@ impl Datum { match target_type { Type::Primitive(target_primitive_type) => { match (&self.literal, &self.r#type, target_primitive_type) { - (PrimitiveLiteral::Date(val), _, PrimitiveType::Int) => Ok(Datum::int(*val)), + (PrimitiveLiteral::Int(val), _, PrimitiveType::Int) => Ok(Datum::int(*val)), (PrimitiveLiteral::Int(val), _, PrimitiveType::Date) => Ok(Datum::date(*val)), // TODO: implement more type conversions (_, self_type, target_type) if self_type == target_type => Ok(self), @@ -1242,7 +1232,7 @@ impl Literal { /// Creates date literal from number of days from unix epoch directly. pub fn date(days: i32) -> Self { - Self::Primitive(PrimitiveLiteral::Date(days)) + Self::Primitive(PrimitiveLiteral::Int(days)) } /// Creates a date in `%Y-%m-%d` format, assume in utc timezone. @@ -1293,7 +1283,7 @@ impl Literal { /// Creates time in microseconds directly pub fn time(value: i64) -> Self { - Self::Primitive(PrimitiveLiteral::Time(value)) + Self::Primitive(PrimitiveLiteral::Long(value)) } /// Creates time literal from [`chrono::NaiveTime`]. @@ -1356,12 +1346,12 @@ impl Literal { /// Creates a timestamp from unix epoch in microseconds. pub fn timestamp(value: i64) -> Self { - Self::Primitive(PrimitiveLiteral::Timestamp(value)) + Self::Primitive(PrimitiveLiteral::Long(value)) } /// Creates a timestamp with timezone from unix epoch in microseconds. pub fn timestamptz(value: i64) -> Self { - Self::Primitive(PrimitiveLiteral::Timestamptz(value)) + Self::Primitive(PrimitiveLiteral::Long(value)) } /// Creates a timestamp from [`DateTime`]. @@ -1421,7 +1411,7 @@ impl Literal { /// Creates uuid literal. pub fn uuid(uuid: Uuid) -> Self { - Self::Primitive(PrimitiveLiteral::Uuid(uuid)) + Self::Primitive(PrimitiveLiteral::UInt128(uuid.as_u128())) } /// Creates uuid from str. See [`Uuid::parse_str`]. @@ -1454,12 +1444,12 @@ impl Literal { /// ```rust /// use iceberg::spec::{Literal, PrimitiveLiteral}; /// let t1 = Literal::fixed(vec![1u8, 2u8]); - /// let t2 = Literal::Primitive(PrimitiveLiteral::Fixed(vec![1u8, 2u8])); + /// let t2 = Literal::Primitive(PrimitiveLiteral::Binary(vec![1u8, 2u8])); /// /// assert_eq!(t1, t2); /// ``` pub fn fixed>(input: I) -> Self { - Literal::Primitive(PrimitiveLiteral::Fixed(input.into_iter().collect())) + Literal::Primitive(PrimitiveLiteral::Binary(input.into_iter().collect())) } /// Creates a binary literal from bytes. @@ -1479,7 +1469,7 @@ impl Literal { /// Creates a decimal literal. pub fn decimal(decimal: i128) -> Self { - Self::Primitive(PrimitiveLiteral::Decimal(decimal)) + Self::Primitive(PrimitiveLiteral::Int128(decimal)) } /// Creates decimal literal from string. See [`Decimal::from_str_exact`]. @@ -1643,22 +1633,22 @@ impl Literal { ))?)), ))), (PrimitiveType::Date, JsonValue::String(s)) => { - Ok(Some(Literal::Primitive(PrimitiveLiteral::Date( + Ok(Some(Literal::Primitive(PrimitiveLiteral::Int( date::date_to_days(&NaiveDate::parse_from_str(&s, "%Y-%m-%d")?), )))) } (PrimitiveType::Time, JsonValue::String(s)) => { - Ok(Some(Literal::Primitive(PrimitiveLiteral::Time( + Ok(Some(Literal::Primitive(PrimitiveLiteral::Long( time::time_to_microseconds(&NaiveTime::parse_from_str(&s, "%H:%M:%S%.f")?), )))) } (PrimitiveType::Timestamp, JsonValue::String(s)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Timestamp(timestamp::datetime_to_microseconds( + PrimitiveLiteral::Long(timestamp::datetime_to_microseconds( &NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S%.f")?, )), ))), (PrimitiveType::Timestamptz, JsonValue::String(s)) => { - Ok(Some(Literal::Primitive(PrimitiveLiteral::Timestamptz( + Ok(Some(Literal::Primitive(PrimitiveLiteral::Long( timestamptz::datetimetz_to_microseconds(&Utc.from_utc_datetime( &NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S%.f+00:00")?, )), @@ -1668,7 +1658,7 @@ impl Literal { Ok(Some(Literal::Primitive(PrimitiveLiteral::String(s)))) } (PrimitiveType::Uuid, JsonValue::String(s)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Uuid(Uuid::parse_str(&s)?), + PrimitiveLiteral::UInt128(Uuid::parse_str(&s)?.as_u128()), ))), (PrimitiveType::Fixed(_), JsonValue::String(_)) => todo!(), (PrimitiveType::Binary, JsonValue::String(_)) => todo!(), @@ -1681,7 +1671,7 @@ impl Literal { ) => { let mut decimal = Decimal::from_str_exact(&s)?; decimal.rescale(*scale); - Ok(Some(Literal::Primitive(PrimitiveLiteral::Decimal( + Ok(Some(Literal::Primitive(PrimitiveLiteral::Int128( decimal.mantissa(), )))) } @@ -1777,51 +1767,58 @@ impl Literal { /// See [this spec](https://iceberg.apache.org/spec/#json-single-value-serialization) for reference. pub fn try_into_json(self, r#type: &Type) -> Result { match (self, r#type) { - (Literal::Primitive(prim), _) => match prim { - PrimitiveLiteral::Boolean(val) => Ok(JsonValue::Bool(val)), - PrimitiveLiteral::Int(val) => Ok(JsonValue::Number((val).into())), - PrimitiveLiteral::Long(val) => Ok(JsonValue::Number((val).into())), - PrimitiveLiteral::Float(val) => match Number::from_f64(val.0 as f64) { - Some(number) => Ok(JsonValue::Number(number)), - None => Ok(JsonValue::Null), - }, - PrimitiveLiteral::Double(val) => match Number::from_f64(val.0) { - Some(number) => Ok(JsonValue::Number(number)), - None => Ok(JsonValue::Null), - }, - PrimitiveLiteral::Date(val) => { + (Literal::Primitive(prim), Type::Primitive(prim_type)) => match (prim_type, prim) { + (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(val)) => { + Ok(JsonValue::Bool(val)) + } + (PrimitiveType::Int, PrimitiveLiteral::Int(val)) => { + Ok(JsonValue::Number((val).into())) + } + (PrimitiveType::Long, PrimitiveLiteral::Long(val)) => { + Ok(JsonValue::Number((val).into())) + } + (PrimitiveType::Float, PrimitiveLiteral::Float(val)) => { + match Number::from_f64(val.0 as f64) { + Some(number) => Ok(JsonValue::Number(number)), + None => Ok(JsonValue::Null), + } + } + (PrimitiveType::Double, PrimitiveLiteral::Double(val)) => { + match Number::from_f64(val.0) { + Some(number) => Ok(JsonValue::Number(number)), + None => Ok(JsonValue::Null), + } + } + (PrimitiveType::Date, PrimitiveLiteral::Int(val)) => { Ok(JsonValue::String(date::days_to_date(val).to_string())) } - PrimitiveLiteral::Time(val) => Ok(JsonValue::String( + (PrimitiveType::Time, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String( time::microseconds_to_time(val).to_string(), )), - PrimitiveLiteral::Timestamp(val) => Ok(JsonValue::String( + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String( timestamp::microseconds_to_datetime(val) .format("%Y-%m-%dT%H:%M:%S%.f") .to_string(), )), - PrimitiveLiteral::Timestamptz(val) => Ok(JsonValue::String( + (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String( timestamptz::microseconds_to_datetimetz(val) .format("%Y-%m-%dT%H:%M:%S%.f+00:00") .to_string(), )), - PrimitiveLiteral::String(val) => Ok(JsonValue::String(val.clone())), - PrimitiveLiteral::Uuid(val) => Ok(JsonValue::String(val.to_string())), - PrimitiveLiteral::Fixed(val) => Ok(JsonValue::String(val.iter().fold( - String::new(), - |mut acc, x| { - acc.push_str(&format!("{:x}", x)); - acc - }, - ))), - PrimitiveLiteral::Binary(val) => Ok(JsonValue::String(val.iter().fold( + (PrimitiveType::String, PrimitiveLiteral::String(val)) => { + Ok(JsonValue::String(val.clone())) + } + (_, PrimitiveLiteral::UInt128(val)) => { + Ok(JsonValue::String(Uuid::from_u128(val).to_string())) + } + (_, PrimitiveLiteral::Binary(val)) => Ok(JsonValue::String(val.iter().fold( String::new(), |mut acc, x| { acc.push_str(&format!("{:x}", x)); acc }, ))), - PrimitiveLiteral::Decimal(val) => match r#type { + (_, PrimitiveLiteral::Int128(val)) => match r#type { Type::Primitive(PrimitiveType::Decimal { precision: _precision, scale, @@ -1834,6 +1831,10 @@ impl Literal { "The iceberg type for decimal literal must be decimal.", ))?, }, + _ => Err(Error::new( + ErrorKind::DataInvalid, + "The iceberg value doesn't fit to the iceberg type.", + )), }, (Literal::Struct(s), Type::Struct(struct_type)) => { let mut id_and_value = Vec::with_capacity(struct_type.fields().len()); @@ -1888,15 +1889,10 @@ impl Literal { PrimitiveLiteral::Long(any) => Box::new(any), PrimitiveLiteral::Float(any) => Box::new(any), PrimitiveLiteral::Double(any) => Box::new(any), - PrimitiveLiteral::Date(any) => Box::new(any), - PrimitiveLiteral::Time(any) => Box::new(any), - PrimitiveLiteral::Timestamp(any) => Box::new(any), - PrimitiveLiteral::Timestamptz(any) => Box::new(any), - PrimitiveLiteral::Fixed(any) => Box::new(any), PrimitiveLiteral::Binary(any) => Box::new(any), PrimitiveLiteral::String(any) => Box::new(any), - PrimitiveLiteral::Uuid(any) => Box::new(any), - PrimitiveLiteral::Decimal(any) => Box::new(any), + PrimitiveLiteral::UInt128(any) => Box::new(any), + PrimitiveLiteral::Int128(any) => Box::new(any), }, _ => unimplemented!(), } @@ -2197,17 +2193,12 @@ mod _serde { super::PrimitiveLiteral::Long(v) => RawLiteralEnum::Long(v), super::PrimitiveLiteral::Float(v) => RawLiteralEnum::Float(v.0), super::PrimitiveLiteral::Double(v) => RawLiteralEnum::Double(v.0), - super::PrimitiveLiteral::Date(v) => RawLiteralEnum::Int(v), - super::PrimitiveLiteral::Time(v) => RawLiteralEnum::Long(v), - super::PrimitiveLiteral::Timestamp(v) => RawLiteralEnum::Long(v), - super::PrimitiveLiteral::Timestamptz(v) => RawLiteralEnum::Long(v), super::PrimitiveLiteral::String(v) => RawLiteralEnum::String(v), - super::PrimitiveLiteral::Uuid(v) => { - RawLiteralEnum::Bytes(ByteBuf::from(v.as_u128().to_be_bytes())) + super::PrimitiveLiteral::UInt128(v) => { + RawLiteralEnum::Bytes(ByteBuf::from(v.to_be_bytes())) } - super::PrimitiveLiteral::Fixed(v) => RawLiteralEnum::Bytes(ByteBuf::from(v)), super::PrimitiveLiteral::Binary(v) => RawLiteralEnum::Bytes(ByteBuf::from(v)), - super::PrimitiveLiteral::Decimal(v) => { + super::PrimitiveLiteral::Int128(v) => { RawLiteralEnum::Bytes(ByteBuf::from(v.to_be_bytes())) } }, @@ -2777,7 +2768,7 @@ mod tests { check_json_serde( record, - Literal::Primitive(PrimitiveLiteral::Date(17486)), + Literal::Primitive(PrimitiveLiteral::Int(17486)), &Type::Primitive(PrimitiveType::Date), ); } @@ -2788,7 +2779,7 @@ mod tests { check_json_serde( record, - Literal::Primitive(PrimitiveLiteral::Time(81068123456)), + Literal::Primitive(PrimitiveLiteral::Long(81068123456)), &Type::Primitive(PrimitiveType::Time), ); } @@ -2799,7 +2790,7 @@ mod tests { check_json_serde( record, - Literal::Primitive(PrimitiveLiteral::Timestamp(1510871468123456)), + Literal::Primitive(PrimitiveLiteral::Long(1510871468123456)), &Type::Primitive(PrimitiveType::Timestamp), ); } @@ -2810,7 +2801,7 @@ mod tests { check_json_serde( record, - Literal::Primitive(PrimitiveLiteral::Timestamptz(1510871468123456)), + Literal::Primitive(PrimitiveLiteral::Long(1510871468123456)), &Type::Primitive(PrimitiveType::Timestamptz), ); } @@ -2832,8 +2823,10 @@ mod tests { check_json_serde( record, - Literal::Primitive(PrimitiveLiteral::Uuid( - Uuid::parse_str("f79c3e09-677c-4bbd-a479-3f349cb785e7").unwrap(), + Literal::Primitive(PrimitiveLiteral::UInt128( + Uuid::parse_str("f79c3e09-677c-4bbd-a479-3f349cb785e7") + .unwrap() + .as_u128(), )), &Type::Primitive(PrimitiveType::Uuid), ); @@ -2845,7 +2838,7 @@ mod tests { check_json_serde( record, - Literal::Primitive(PrimitiveLiteral::Decimal(1420)), + Literal::Primitive(PrimitiveLiteral::Int128(1420)), &Type::decimal(28, 2).unwrap(), ); } @@ -3012,7 +3005,7 @@ mod tests { #[test] fn avro_convert_test_date() { check_convert_with_avro( - Literal::Primitive(PrimitiveLiteral::Date(17486)), + Literal::Primitive(PrimitiveLiteral::Int(17486)), &Type::Primitive(PrimitiveType::Date), ); } @@ -3020,7 +3013,7 @@ mod tests { #[test] fn avro_convert_test_time() { check_convert_with_avro( - Literal::Primitive(PrimitiveLiteral::Time(81068123456)), + Literal::Primitive(PrimitiveLiteral::Long(81068123456)), &Type::Primitive(PrimitiveType::Time), ); } @@ -3028,7 +3021,7 @@ mod tests { #[test] fn avro_convert_test_timestamp() { check_convert_with_avro( - Literal::Primitive(PrimitiveLiteral::Timestamp(1510871468123456)), + Literal::Primitive(PrimitiveLiteral::Long(1510871468123456)), &Type::Primitive(PrimitiveType::Timestamp), ); } @@ -3036,7 +3029,7 @@ mod tests { #[test] fn avro_convert_test_timestamptz() { check_convert_with_avro( - Literal::Primitive(PrimitiveLiteral::Timestamptz(1510871468123456)), + Literal::Primitive(PrimitiveLiteral::Long(1510871468123456)), &Type::Primitive(PrimitiveType::Timestamptz), ); } diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index e19e5b841..83cbbd833 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -21,7 +21,7 @@ use arrow_array::ArrayRef; use arrow_schema::{DataType, TimeUnit}; use super::TransformFunction; -use crate::spec::{Datum, PrimitiveLiteral}; +use crate::spec::{Datum, PrimitiveLiteral, PrimitiveType}; #[derive(Debug)] pub struct Bucket { @@ -221,17 +221,19 @@ impl TransformFunction for Bucket { } fn transform_literal(&self, input: &Datum) -> crate::Result> { - let val = match input.literal() { - PrimitiveLiteral::Int(v) => self.bucket_int(*v), - PrimitiveLiteral::Long(v) => self.bucket_long(*v), - PrimitiveLiteral::Decimal(v) => self.bucket_decimal(*v), - PrimitiveLiteral::Date(v) => self.bucket_date(*v), - PrimitiveLiteral::Time(v) => self.bucket_time(*v), - PrimitiveLiteral::Timestamp(v) => self.bucket_timestamp(*v), - PrimitiveLiteral::String(v) => self.bucket_str(v.as_str()), - PrimitiveLiteral::Uuid(v) => self.bucket_bytes(v.as_ref()), - PrimitiveLiteral::Binary(v) => self.bucket_bytes(v.as_ref()), - PrimitiveLiteral::Fixed(v) => self.bucket_bytes(v.as_ref()), + let val = match (input.data_type(), input.literal()) { + (PrimitiveType::Int, PrimitiveLiteral::Int(v)) => self.bucket_int(*v), + (PrimitiveType::Long, PrimitiveLiteral::Long(v)) => self.bucket_long(*v), + (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => self.bucket_decimal(*v), + (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => self.bucket_date(*v), + (PrimitiveType::Time, PrimitiveLiteral::Long(v)) => self.bucket_time(*v), + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => self.bucket_timestamp(*v), + (PrimitiveType::String, PrimitiveLiteral::String(v)) => self.bucket_str(v.as_str()), + (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(v)) => { + self.bucket_bytes(uuid::Uuid::from_u128(*v).as_ref()) + } + (PrimitiveType::Binary, PrimitiveLiteral::Binary(v)) => self.bucket_bytes(v.as_ref()), + (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(v)) => self.bucket_bytes(v.as_ref()), _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, @@ -561,7 +563,7 @@ mod test { Datum::decimal_from_str(curr)?, Datum::decimal_from_str(prev)?, ]), - Some("name IN (6, 2)"), + Some("name IN (2, 6)"), )?; fixture.assert_projection( diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 44e96af94..86ff9269b 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -25,7 +25,7 @@ use arrow_schema::{DataType, TimeUnit}; use chrono::{DateTime, Datelike, Duration}; use super::TransformFunction; -use crate::spec::{Datum, PrimitiveLiteral}; +use crate::spec::{Datum, PrimitiveLiteral, PrimitiveType}; use crate::{Error, ErrorKind, Result}; /// Hour in one second. @@ -68,10 +68,12 @@ impl TransformFunction for Year { } fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { - let val = match input.literal() { - PrimitiveLiteral::Date(v) => Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR, - PrimitiveLiteral::Timestamp(v) => Self::timestamp_to_year(*v)?, - PrimitiveLiteral::Timestamptz(v) => Self::timestamp_to_year(*v)?, + let val = match (input.data_type(), input.literal()) { + (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => { + Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR + } + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => Self::timestamp_to_year(*v)?, + (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => Self::timestamp_to_year(*v)?, _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, @@ -137,13 +139,15 @@ impl TransformFunction for Month { } fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { - let val = match input.literal() { - PrimitiveLiteral::Date(v) => { + let val = match (input.data_type(), input.literal()) { + (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => { (Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR) * 12 + Date32Type::to_naive_date(*v).month0() as i32 } - PrimitiveLiteral::Timestamp(v) => Self::timestamp_to_month(*v)?, - PrimitiveLiteral::Timestamptz(v) => Self::timestamp_to_month(*v)?, + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => Self::timestamp_to_month(*v)?, + (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => { + Self::timestamp_to_month(*v)? + } _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, @@ -221,10 +225,12 @@ impl TransformFunction for Day { } fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { - let val = match input.literal() { - PrimitiveLiteral::Date(v) => *v, - PrimitiveLiteral::Timestamp(v) => Self::day_timestamp_micro(*v)?, - PrimitiveLiteral::Timestamptz(v) => Self::day_timestamp_micro(*v)?, + let val = match (input.data_type(), input.literal()) { + (PrimitiveType::Date, PrimitiveLiteral::Int(v)) => *v, + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => Self::day_timestamp_micro(*v)?, + (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => { + Self::day_timestamp_micro(*v)? + } _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, @@ -272,9 +278,11 @@ impl TransformFunction for Hour { } fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { - let val = match input.literal() { - PrimitiveLiteral::Timestamp(v) => Self::hour_timestamp_micro(*v), - PrimitiveLiteral::Timestamptz(v) => Self::hour_timestamp_micro(*v), + let val = match (input.data_type(), input.literal()) { + (PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => Self::hour_timestamp_micro(*v), + (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(v)) => { + Self::hour_timestamp_micro(*v) + } _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, @@ -1294,7 +1302,7 @@ mod test { Datum::timestamp_from_str(value)?, Datum::timestamp_from_str(another)?, ]), - Some("name IN (-363, -364, -365)"), + Some("name IN (-363, -365, -364)"), )?; fixture.assert_projection( diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs index 04fe8445a..cba5409bb 100644 --- a/crates/iceberg/src/transform/truncate.rs +++ b/crates/iceberg/src/transform/truncate.rs @@ -144,7 +144,7 @@ impl TransformFunction for Truncate { let width = self.width as i64; Datum::long(Self::truncate_i64(*v, width)) })), - PrimitiveLiteral::Decimal(v) => Ok(Some({ + PrimitiveLiteral::Int128(v) => Ok(Some({ let width = self.width as i128; Datum::decimal(Self::truncate_decimal_i128(*v, width))? })), @@ -378,7 +378,7 @@ mod test { Datum::decimal_from_str(curr)?, Datum::decimal_from_str(next)?, ]), - Some("name IN (10090, 9990, 9890)"), + Some("name IN (9890, 9990, 10090)"), )?; fixture.assert_projection( @@ -448,7 +448,7 @@ mod test { Datum::decimal_from_str(curr)?, Datum::decimal_from_str(next)?, ]), - Some("name IN (9900, 10000, 10100)"), + Some("name IN (10000, 10100, 9900)"), )?; fixture.assert_projection( diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index ef21f9d33..d41714b9e 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -347,9 +347,7 @@ impl MinMaxColAggregator { let convert_func = |v: ByteArray| -> Result { Result::::Ok(Datum::new( ty.clone(), - PrimitiveLiteral::Decimal(i128::from_le_bytes( - v.data().try_into().unwrap(), - )), + PrimitiveLiteral::Int128(i128::from_le_bytes(v.data().try_into().unwrap())), )) }; self.update_state::(field_id, &stat, convert_func) @@ -364,7 +362,7 @@ impl MinMaxColAggregator { let convert_func = |v: i32| { Result::::Ok(Datum::new( ty.clone(), - PrimitiveLiteral::Decimal(i128::from(v)), + PrimitiveLiteral::Int128(i128::from(v)), )) }; self.update_state::(field_id, &stat, convert_func) @@ -379,7 +377,7 @@ impl MinMaxColAggregator { let convert_func = |v: i64| { Result::::Ok(Datum::new( ty.clone(), - PrimitiveLiteral::Decimal(i128::from(v)), + PrimitiveLiteral::Int128(i128::from(v)), )) }; self.update_state::(field_id, &stat, convert_func) @@ -1180,7 +1178,7 @@ mod tests { precision: 10, scale: 5 }, - PrimitiveLiteral::Decimal(1) + PrimitiveLiteral::Int128(1) ) ), (12, Datum::uuid(Uuid::from_u128(0))), @@ -1210,7 +1208,7 @@ mod tests { precision: 10, scale: 5 }, - PrimitiveLiteral::Decimal(100) + PrimitiveLiteral::Int128(100) ) ), (12, Datum::uuid(Uuid::from_u128(3))),