From 09b6ea3435c19fa4f990cb1f7787ed8457e46ae1 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Thu, 7 Mar 2024 16:56:59 +0800 Subject: [PATCH] Add tests --- crates/iceberg/src/expr/predicate.rs | 271 +++++++++++++++++++++++++-- crates/iceberg/src/expr/term.rs | 80 +++----- crates/iceberg/src/spec/schema.rs | 4 +- crates/iceberg/src/spec/values.rs | 2 +- 4 files changed, 283 insertions(+), 74 deletions(-) diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index 45be5d6c4..fb443a87e 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -19,19 +19,17 @@ //! Predicate expressions are used to filter data, and evaluates to a boolean value. For example, //! `a > 10` is a predicate expression, and it evaluates to `true` if `a` is greater than `10`, -use crate::expr::{BoundReference, PredicateOperator, Reference}; -use crate::spec::Datum; +use std::fmt::{Debug, Display, Formatter}; +use std::mem::MaybeUninit; +use std::ops::Not; + +use fnv::FnvHashSet; use itertools::Itertools; -use std::collections::HashSet; + use crate::error::Result; use crate::expr::{Bind, BoundReference, PredicateOperator, Reference}; use crate::spec::{Datum, SchemaRef}; use crate::{Error, ErrorKind}; -use fnv::FnvHashSet; - -use std::fmt::{Debug, Display, Formatter}; -use std::mem::MaybeUninit; -use std::ops::Not; /// Logical expression, such as `AND`, `OR`, `NOT`. #[derive(PartialEq)] @@ -490,7 +488,11 @@ impl Predicate { impl Not for Predicate { type Output = Predicate; - /// Create a predicate which is the reverse of this predicate. For example: `NOT (a > 10)` + /// Create a predicate which is the reverse of this predicate. For example: `NOT (a > 10)`. + /// + /// This is different from [`Predicate::negate()`] since it doesn't rewrite expression, but + /// just adds a `NOT` operator. + /// /// # Example /// ///```rust @@ -530,12 +532,46 @@ pub enum BoundPredicate { Set(SetExpression), } +impl Display for BoundPredicate { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + BoundPredicate::AlwaysTrue => { + write!(f, "True") + } + BoundPredicate::AlwaysFalse => { + write!(f, "False") + } + BoundPredicate::And(expr) => { + write!(f, "({}) AND ({})", expr.inputs()[0], expr.inputs()[1]) + } + BoundPredicate::Or(expr) => { + write!(f, "({}) OR ({})", expr.inputs()[0], expr.inputs()[1]) + } + BoundPredicate::Not(expr) => { + write!(f, "NOT ({})", expr.inputs()[0]) + } + BoundPredicate::Unary(expr) => { + write!(f, "{}", expr) + } + BoundPredicate::Binary(expr) => { + write!(f, "{}", expr) + } + BoundPredicate::Set(expr) => { + write!(f, "{}", expr) + } + } + } +} + #[cfg(test)] mod tests { + use std::ops::Not; + use std::sync::Arc; + + use crate::expr::Bind; use crate::expr::Reference; use crate::spec::Datum; - use std::collections::HashSet; - use std::ops::Not; + use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type}; #[test] fn test_predicate_negate_and() { @@ -604,20 +640,15 @@ mod tests { #[test] fn test_predicate_negate_set() { - let expression = Reference::new("a").is_in(HashSet::from([Datum::long(5), Datum::long(6)])); + let expression = Reference::new("a").is_in([Datum::long(5), Datum::long(6)]); - let expected = - Reference::new("a").is_not_in(HashSet::from([Datum::long(5), Datum::long(6)])); + let expected = Reference::new("a").is_not_in([Datum::long(5), Datum::long(6)]); let result = expression.negate(); assert_eq!(result, expected); } - use crate::expr::{Bind, Reference}; - use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type}; - use std::sync::Arc; - fn table_schema_simple() -> SchemaRef { Arc::new( Schema::builder() @@ -640,4 +671,208 @@ mod tests { let bound_expr = expr.bind(schema, true).unwrap(); assert_eq!(&format!("{bound_expr}"), "foo IS NULL"); } + + #[test] + fn test_bind_is_null_required() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_null(); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "False"); + } + + #[test] + fn test_bind_is_not_null() { + let schema = table_schema_simple(); + let expr = Reference::new("foo").is_not_null(); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "foo IS NOT NULL"); + } + + #[test] + fn test_bind_is_not_null_required() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_not_null(); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "True"); + } + + #[test] + fn test_bind_less_than() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").less_than(Datum::int(10)); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "bar < 10"); + } + + #[test] + fn test_bind_less_than_wrong_type() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").less_than(Datum::string("abcd")); + let bound_expr = expr.bind(schema, true); + assert!(bound_expr.is_err()); + } + + #[test] + fn test_bind_greater_than_or_eq() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").greater_than_or_equal_to(Datum::int(10)); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "bar >= 10"); + } + + #[test] + fn test_bind_greater_than_or_eq_wrong_type() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").greater_than_or_equal_to(Datum::string("abcd")); + let bound_expr = expr.bind(schema, true); + assert!(bound_expr.is_err()); + } + + #[test] + fn test_bind_in() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_in([Datum::int(10), Datum::int(20)]); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "bar IN (20, 10)"); + } + + #[test] + fn test_bind_in_empty() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_in(vec![]); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "False"); + } + + #[test] + fn test_bind_in_one_literal() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_in(vec![Datum::int(10)]); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "bar = 10"); + } + + #[test] + fn test_bind_in_wrong_type() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_in(vec![Datum::int(10), Datum::string("abcd")]); + let bound_expr = expr.bind(schema, true); + assert!(bound_expr.is_err()); + } + + #[test] + fn test_bind_not_in() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_not_in([Datum::int(10), Datum::int(20)]); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "bar NOT IN (20, 10)"); + } + + #[test] + fn test_bind_not_in_empty() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_not_in(vec![]); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "True"); + } + + #[test] + fn test_bind_not_in_one_literal() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_not_in(vec![Datum::int(10)]); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "bar != 10"); + } + + #[test] + fn test_bind_not_in_wrong_type() { + let schema = table_schema_simple(); + let expr = Reference::new("bar").is_not_in([Datum::int(10), Datum::string("abcd")]); + let bound_expr = expr.bind(schema, true); + assert!(bound_expr.is_err()); + } + + #[test] + fn test_bind_and() { + let schema = table_schema_simple(); + let expr = Reference::new("bar") + .less_than(Datum::int(10)) + .and(Reference::new("foo").is_null()); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "(bar < 10) AND (foo IS NULL)"); + } + + #[test] + fn test_bind_and_always_false() { + let schema = table_schema_simple(); + let expr = Reference::new("foo") + .less_than(Datum::string("abcd")) + .and(Reference::new("bar").is_null()); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "False"); + } + + #[test] + fn test_bind_and_always_true() { + let schema = table_schema_simple(); + let expr = Reference::new("foo") + .less_than(Datum::string("abcd")) + .and(Reference::new("bar").is_not_null()); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), r#"foo < "abcd""#); + } + + #[test] + fn test_bind_or() { + let schema = table_schema_simple(); + let expr = Reference::new("bar") + .less_than(Datum::int(10)) + .or(Reference::new("foo").is_null()); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "(bar < 10) OR (foo IS NULL)"); + } + + #[test] + fn test_bind_or_always_true() { + let schema = table_schema_simple(); + let expr = Reference::new("foo") + .less_than(Datum::string("abcd")) + .or(Reference::new("bar").is_not_null()); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "True"); + } + + #[test] + fn test_bind_or_always_false() { + let schema = table_schema_simple(); + let expr = Reference::new("foo") + .less_than(Datum::string("abcd")) + .or(Reference::new("bar").is_null()); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), r#"foo < "abcd""#); + } + + #[test] + fn test_bind_not() { + let schema = table_schema_simple(); + let expr = !Reference::new("bar").less_than(Datum::int(10)); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "NOT (bar < 10)"); + } + + #[test] + fn test_bind_not_always_true() { + let schema = table_schema_simple(); + let expr = !Reference::new("bar").is_not_null(); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), "False"); + } + + #[test] + fn test_bind_not_always_false() { + let schema = table_schema_simple(); + let expr = !Reference::new("bar").is_null(); + let bound_expr = expr.bind(schema, true).unwrap(); + assert_eq!(&format!("{bound_expr}"), r#"True"#); + } } diff --git a/crates/iceberg/src/expr/term.rs b/crates/iceberg/src/expr/term.rs index 522c34715..e39c97e0f 100644 --- a/crates/iceberg/src/expr/term.rs +++ b/crates/iceberg/src/expr/term.rs @@ -17,14 +17,14 @@ //! Term definition. -use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression}; -use crate::expr::{Bind, UnaryExpression}; +use std::fmt::{Display, Formatter}; + +use fnv::FnvHashSet; + +use crate::expr::Bind; +use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression, UnaryExpression}; use crate::spec::{Datum, NestedField, NestedFieldRef, SchemaRef}; use crate::{Error, ErrorKind}; -use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression, UnaryExpression}; -use crate::spec::{Datum, NestedField, NestedFieldRef}; -use std::collections::HashSet; -use std::fmt::{Display, Formatter}; /// Unbound term before binding to a schema. pub type Term = Reference; @@ -69,41 +69,6 @@ impl Reference { )) } - /// Creates an is null expression. For example, `a IS NULL`. - /// - /// # Example - /// - /// ```rust - /// - /// use iceberg::expr::Reference; - /// let expr = Reference::new("a").is_null(); - /// - /// assert_eq!(&format!("{expr}"), "a IS NULL"); - /// ``` - pub fn is_null(self) -> Predicate { - Predicate::Unary(UnaryExpression::new(PredicateOperator::IsNull, self)) - } - - /// Creates an in expression. For example, `a IN (1, 2, 3)`. - /// - /// # Example - /// - /// ```rust - /// - /// use iceberg::expr::Reference; - /// use iceberg::spec::Datum; - /// let expr = Reference::new("a").r#in(vec![Datum::long(1), Datum::long(2), Datum::long(3)]); - /// - /// assert_eq!(&format!("{expr}"), "a IN (1, 3, 2)"); - /// ``` - pub fn r#in(self, values: impl IntoIterator) -> Predicate { - Predicate::Set(SetExpression::new( - PredicateOperator::In, - self, - values.into_iter().collect(), - )) - } - /// Creates a greater-than-or-equal-to than expression. For example, `a >= 10`. /// /// # Example @@ -162,16 +127,20 @@ impl Reference { /// /// ```rust /// - /// use std::collections::HashSet; + /// use fnv::FnvHashSet; /// use iceberg::expr::Reference; /// use iceberg::spec::Datum; - /// let expr = Reference::new("a").is_in(HashSet::from([Datum::long(5), Datum::long(6)])); + /// let expr = Reference::new("a").is_in([Datum::long(5), Datum::long(6)]); /// /// let as_string = format!("{expr}"); /// assert!(&as_string == "a IN (5, 6)" || &as_string == "a IN (6, 5)"); /// ``` - pub fn is_in(self, literals: HashSet) -> Predicate { - Predicate::Set(SetExpression::new(PredicateOperator::In, self, literals)) + pub fn is_in(self, literals: impl IntoIterator) -> Predicate { + Predicate::Set(SetExpression::new( + PredicateOperator::In, + self, + FnvHashSet::from_iter(literals), + )) } /// Creates an is-not-in expression. For example, `a IS NOT IN (5, 6)`. @@ -180,16 +149,20 @@ impl Reference { /// /// ```rust /// - /// use std::collections::HashSet; + /// use fnv::FnvHashSet; /// use iceberg::expr::Reference; /// use iceberg::spec::Datum; - /// let expr = Reference::new("a").is_not_in(HashSet::from([Datum::long(5), Datum::long(6)])); + /// let expr = Reference::new("a").is_not_in([Datum::long(5), Datum::long(6)]); /// /// let as_string = format!("{expr}"); /// assert!(&as_string == "a NOT IN (5, 6)" || &as_string == "a NOT IN (6, 5)"); /// ``` - pub fn is_not_in(self, literals: HashSet) -> Predicate { - Predicate::Set(SetExpression::new(PredicateOperator::NotIn, self, literals)) + pub fn is_not_in(self, literals: impl IntoIterator) -> Predicate { + Predicate::Set(SetExpression::new( + PredicateOperator::NotIn, + self, + FnvHashSet::from_iter(literals), + )) } } @@ -254,9 +227,10 @@ pub type BoundTerm = BoundReference; #[cfg(test)] mod tests { + use std::sync::Arc; + use crate::expr::{Bind, BoundReference, Reference}; use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type}; - use std::sync::Arc; fn table_schema_simple() -> SchemaRef { Arc::new( @@ -289,10 +263,10 @@ mod tests { #[test] fn test_bind_reference_case_insensitive() { let schema = table_schema_simple(); - let reference = Reference::new("BaR").bind(schema, false).unwrap(); + let reference = Reference::new("BAR").bind(schema, false).unwrap(); let expected_ref = BoundReference::new( - "BaR", + "BAR", NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), ); @@ -310,7 +284,7 @@ mod tests { #[test] fn test_bind_reference_case_insensitive_failure() { let schema = table_schema_simple(); - let result = Reference::new("BaR_non").bind(schema, false); + let result = Reference::new("bar_non_exist").bind(schema, false); assert!(result.is_err()); } } diff --git a/crates/iceberg/src/spec/schema.rs b/crates/iceberg/src/spec/schema.rs index ecd7c702d..975a2a9ef 100644 --- a/crates/iceberg/src/spec/schema.rs +++ b/crates/iceberg/src/spec/schema.rs @@ -1060,8 +1060,8 @@ table { ("qUUx", 6), ("QUUX.KEY", 7), ("QUUX.Value", 8), - ("qUUX.valUE.Key", 9), - ("qUux.vALUE.Value", 10), + ("qUUX.VALUE.Key", 9), + ("qUux.VaLue.Value", 10), ("lOCAtION", 11), ("LOCAtioN.ELeMENt", 12), ("LoCATion.element.LATitude", 13), diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index c0d3f09bd..0595773eb 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -106,7 +106,7 @@ impl Display for Datum { (_, PrimitiveLiteral::TimestampTZ(val)) => { write!(f, "{}", microseconds_to_datetimetz(*val)) } - (_, PrimitiveLiteral::String(val)) => write!(f, "{}", val), + (_, PrimitiveLiteral::String(val)) => write!(f, r#""{}""#, val), (_, PrimitiveLiteral::UUID(val)) => write!(f, "{}", val), (_, PrimitiveLiteral::Fixed(val)) => display_bytes(val, f), (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f),