diff --git a/Cargo.toml b/Cargo.toml index dccc6bdf1..697317c1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ chrono = "0.4" derive_builder = "0.20.0" either = "1" env_logger = "0.11.0" +fnv = "1" futures = "0.3" iceberg = { version = "0.2.0", path = "./crates/iceberg" } iceberg-catalog-rest = { version = "0.2.0", path = "./crates/catalog/rest" } diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 32288ee81..ee6b2ff4f 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -40,6 +40,7 @@ bitvec = { workspace = true } chrono = { workspace = true } derive_builder = { workspace = true } either = { workspace = true } +fnv = { workspace = true } futures = { workspace = true } itertools = { workspace = true } lazy_static = { workspace = true } diff --git a/crates/iceberg/src/expr/mod.rs b/crates/iceberg/src/expr/mod.rs index bbfae2003..92bb01fc2 100644 --- a/crates/iceberg/src/expr/mod.rs +++ b/crates/iceberg/src/expr/mod.rs @@ -118,7 +118,9 @@ impl PredicateOperator { /// Bind expression to a schema. pub trait Bind { + /// The type of the bounded result. type Bound; + /// Bind an expression to a schema. fn bind(self, schema: SchemaRef, case_sensitive: bool) -> crate::Result; } #[cfg(test)] diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index f6f1fe353..9c702fdd8 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -23,7 +23,8 @@ use crate::error::Result; use crate::expr::{Bind, BoundReference, PredicateOperator, Reference}; use crate::spec::{Datum, SchemaRef}; use crate::{Error, ErrorKind}; -use std::collections::HashSet; +use fnv::FnvHashSet; + use std::fmt::{Debug, Display, Formatter}; use std::mem::MaybeUninit; use std::ops::Not; @@ -167,7 +168,7 @@ pub struct SetExpression { /// Term of this predicate, for example, `a` in `a in (1, 2, 3)`. term: T, /// Literals of this predicate, for example, `(1, 2, 3)` in `a in (1, 2, 3)`. - literals: HashSet, + literals: FnvHashSet, } impl Debug for SetExpression { @@ -181,7 +182,7 @@ impl Debug for SetExpression { } impl SetExpression { - pub(crate) fn new(op: PredicateOperator, term: T, literals: HashSet) -> Self { + pub(crate) fn new(op: PredicateOperator, term: T, literals: FnvHashSet) -> Self { debug_assert!(op.is_set()); Self { op, term, literals } } @@ -308,7 +309,7 @@ impl Bind for Predicate { .literals .into_iter() .map(|l| l.to(&bound_expr.term.field().field_type)) - .collect::>>()?; + .collect::>>()?; match &bound_expr.op { &PredicateOperator::In => { @@ -366,7 +367,7 @@ impl Display for Predicate { write!(f, "NOT ({})", expr.inputs()[0]) } Predicate::Unary(expr) => { - write!(f, "{}", expr.term) + write!(f, "{} {}", expr.term, expr.op) } Predicate::Binary(expr) => { write!(f, "{} {} {}", expr.term, expr.op, expr.literal) @@ -379,7 +380,7 @@ impl Display for Predicate { expr.op, expr.literals .iter() - .map(|l| format!("{:?}", l)) + .map(|l| format!("{}", l)) .collect::>() .join(", ") ) diff --git a/crates/iceberg/src/expr/term.rs b/crates/iceberg/src/expr/term.rs index 12e23866f..9681e0813 100644 --- a/crates/iceberg/src/expr/term.rs +++ b/crates/iceberg/src/expr/term.rs @@ -17,8 +17,8 @@ //! Term definition. -use crate::expr::Bind; -use crate::expr::{BinaryExpression, Predicate, PredicateOperator}; +use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression}; +use crate::expr::{Bind, UnaryExpression}; use crate::spec::{Datum, NestedField, NestedFieldRef, SchemaRef}; use crate::{Error, ErrorKind}; use std::fmt::{Display, Formatter}; @@ -65,6 +65,41 @@ impl Reference { datum, )) } + + /// Creates an is null expression. For example, `a IS NULL`. + /// + /// # Example + /// + /// ```rust + /// + /// use iceberg::expr::Reference; + /// let expr = Reference::new("a").is_null(); + /// + /// assert_eq!(&format!("{expr}"), "a IS NULL"); + /// ``` + pub fn is_null(self) -> Predicate { + Predicate::Unary(UnaryExpression::new(PredicateOperator::IsNull, self)) + } + + /// Creates an in expression. For example, `a IN (1, 2, 3)`. + /// + /// # Example + /// + /// ```rust + /// + /// use iceberg::expr::Reference; + /// use iceberg::spec::Datum; + /// let expr = Reference::new("a").r#in(vec![Datum::long(1), Datum::long(2), Datum::long(3)]); + /// + /// assert_eq!(&format!("{expr}"), "a IN (1, 3, 2)"); + /// ``` + pub fn r#in(self, values: impl IntoIterator) -> Predicate { + Predicate::Set(SetExpression::new( + PredicateOperator::In, + self, + values.into_iter().collect(), + )) + } } impl Display for Reference { @@ -94,7 +129,7 @@ impl Bind for Reference { } /// A named reference in a bound expression after binding to a schema. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct BoundReference { // This maybe different from [`name`] filed in [`NestedField`] since this contains full path. // For example, if the field is `a.b.c`, then `field.name` is `c`, but `original_name` is `a.b.c`. @@ -128,6 +163,63 @@ pub type BoundTerm = BoundReference; #[cfg(test)] mod tests { + use crate::expr::{Bind, BoundReference, Reference}; + use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type}; + use std::sync::Arc; + + fn table_schema_simple() -> SchemaRef { + Arc::new( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![2]) + .with_fields(vec![ + NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), + NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(), + ]) + .build() + .unwrap(), + ) + } + + #[test] + fn test_bind_reference() { + let schema = table_schema_simple(); + let reference = Reference::new("bar").bind(schema, true).unwrap(); + + let expected_ref = BoundReference::new( + "bar", + NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), + ); + + assert_eq!(expected_ref, reference); + } + #[test] - fn test_bind_reference() {} + fn test_bind_reference_case_insensitive() { + let schema = table_schema_simple(); + let reference = Reference::new("BaR").bind(schema, false).unwrap(); + + let expected_ref = BoundReference::new( + "BaR", + NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), + ); + + assert_eq!(expected_ref, reference); + } + + #[test] + fn test_bind_reference_failure() { + let schema = table_schema_simple(); + let result = Reference::new("bar_not_eix").bind(schema, true); + + assert!(result.is_err()); + } + + #[test] + fn test_bind_reference_case_insensitive_failure() { + let schema = table_schema_simple(); + let result = Reference::new("BaR_non").bind(schema, false); + assert!(result.is_err()); + } } diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index fe1d13457..d0730ea3c 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -661,7 +661,17 @@ impl Datum { /// Convert the datum to `target_type`. pub fn to(self, target_type: &Type) -> Result { - todo!() + // TODO: We should allow more type conversions + match target_type { + Type::Primitive(typ) if typ == &self.r#type => Ok(self), + _ => Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Can't convert datum from {} type to {} type.", + self.r#type, target_type + ), + )), + } } }