Skip to content

Commit

Permalink
Add some
Browse files Browse the repository at this point in the history
  • Loading branch information
liurenjie1024 committed Mar 6, 2024
1 parent 3c1554c commit c2ec8ab
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 11 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ chrono = "0.4"
derive_builder = "0.20.0"
either = "1"
env_logger = "0.11.0"
fnv = "1"
futures = "0.3"
iceberg = { version = "0.2.0", path = "./crates/iceberg" }
iceberg-catalog-rest = { version = "0.2.0", path = "./crates/catalog/rest" }
Expand Down
1 change: 1 addition & 0 deletions crates/iceberg/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ bitvec = { workspace = true }
chrono = { workspace = true }
derive_builder = { workspace = true }
either = { workspace = true }
fnv = { workspace = true }
futures = { workspace = true }
itertools = { workspace = true }
lazy_static = { workspace = true }
Expand Down
2 changes: 2 additions & 0 deletions crates/iceberg/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ impl PredicateOperator {

/// Bind expression to a schema.
pub trait Bind {
/// The type of the bounded result.
type Bound;
/// Bind an expression to a schema.
fn bind(self, schema: SchemaRef, case_sensitive: bool) -> crate::Result<Self::Bound>;
}
#[cfg(test)]
Expand Down
13 changes: 7 additions & 6 deletions crates/iceberg/src/expr/predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ use crate::error::Result;
use crate::expr::{Bind, BoundReference, PredicateOperator, Reference};
use crate::spec::{Datum, SchemaRef};
use crate::{Error, ErrorKind};
use std::collections::HashSet;
use fnv::FnvHashSet;

use std::fmt::{Debug, Display, Formatter};
use std::mem::MaybeUninit;
use std::ops::Not;
Expand Down Expand Up @@ -167,7 +168,7 @@ pub struct SetExpression<T> {
/// Term of this predicate, for example, `a` in `a in (1, 2, 3)`.
term: T,
/// Literals of this predicate, for example, `(1, 2, 3)` in `a in (1, 2, 3)`.
literals: HashSet<Datum>,
literals: FnvHashSet<Datum>,
}

impl<T: Debug> Debug for SetExpression<T> {
Expand All @@ -181,7 +182,7 @@ impl<T: Debug> Debug for SetExpression<T> {
}

impl<T> SetExpression<T> {
pub(crate) fn new(op: PredicateOperator, term: T, literals: HashSet<Datum>) -> Self {
pub(crate) fn new(op: PredicateOperator, term: T, literals: FnvHashSet<Datum>) -> Self {
debug_assert!(op.is_set());
Self { op, term, literals }
}
Expand Down Expand Up @@ -308,7 +309,7 @@ impl Bind for Predicate {
.literals
.into_iter()
.map(|l| l.to(&bound_expr.term.field().field_type))
.collect::<Result<HashSet<Datum>>>()?;
.collect::<Result<FnvHashSet<Datum>>>()?;

match &bound_expr.op {
&PredicateOperator::In => {
Expand Down Expand Up @@ -366,7 +367,7 @@ impl Display for Predicate {
write!(f, "NOT ({})", expr.inputs()[0])
}
Predicate::Unary(expr) => {
write!(f, "{}", expr.term)
write!(f, "{} {}", expr.term, expr.op)
}
Predicate::Binary(expr) => {
write!(f, "{} {} {}", expr.term, expr.op, expr.literal)
Expand All @@ -379,7 +380,7 @@ impl Display for Predicate {
expr.op,
expr.literals
.iter()
.map(|l| format!("{:?}", l))
.map(|l| format!("{}", l))
.collect::<Vec<String>>()
.join(", ")
)
Expand Down
100 changes: 96 additions & 4 deletions crates/iceberg/src/expr/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

//! Term definition.

use crate::expr::Bind;
use crate::expr::{BinaryExpression, Predicate, PredicateOperator};
use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression};
use crate::expr::{Bind, UnaryExpression};
use crate::spec::{Datum, NestedField, NestedFieldRef, SchemaRef};
use crate::{Error, ErrorKind};
use std::fmt::{Display, Formatter};
Expand Down Expand Up @@ -65,6 +65,41 @@ impl Reference {
datum,
))
}

/// Creates an is null expression. For example, `a IS NULL`.
///
/// # Example
///
/// ```rust
///
/// use iceberg::expr::Reference;
/// let expr = Reference::new("a").is_null();
///
/// assert_eq!(&format!("{expr}"), "a IS NULL");
/// ```
pub fn is_null(self) -> Predicate {
Predicate::Unary(UnaryExpression::new(PredicateOperator::IsNull, self))
}

/// Creates an in expression. For example, `a IN (1, 2, 3)`.
///
/// # Example
///
/// ```rust
///
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr = Reference::new("a").r#in(vec![Datum::long(1), Datum::long(2), Datum::long(3)]);
///
/// assert_eq!(&format!("{expr}"), "a IN (1, 3, 2)");
/// ```
pub fn r#in(self, values: impl IntoIterator<Item = Datum>) -> Predicate {
Predicate::Set(SetExpression::new(
PredicateOperator::In,
self,
values.into_iter().collect(),
))
}
}

impl Display for Reference {
Expand Down Expand Up @@ -94,7 +129,7 @@ impl Bind for Reference {
}

/// A named reference in a bound expression after binding to a schema.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BoundReference {
// This maybe different from [`name`] filed in [`NestedField`] since this contains full path.
// For example, if the field is `a.b.c`, then `field.name` is `c`, but `original_name` is `a.b.c`.
Expand Down Expand Up @@ -128,6 +163,63 @@ pub type BoundTerm = BoundReference;

#[cfg(test)]
mod tests {
use crate::expr::{Bind, BoundReference, Reference};
use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};
use std::sync::Arc;

fn table_schema_simple() -> SchemaRef {
Arc::new(
Schema::builder()
.with_schema_id(1)
.with_identifier_field_ids(vec![2])
.with_fields(vec![
NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
])
.build()
.unwrap(),
)
}

#[test]
fn test_bind_reference() {
let schema = table_schema_simple();
let reference = Reference::new("bar").bind(schema, true).unwrap();

let expected_ref = BoundReference::new(
"bar",
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
);

assert_eq!(expected_ref, reference);
}

#[test]
fn test_bind_reference() {}
fn test_bind_reference_case_insensitive() {
let schema = table_schema_simple();
let reference = Reference::new("BaR").bind(schema, false).unwrap();

let expected_ref = BoundReference::new(
"BaR",
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
);

assert_eq!(expected_ref, reference);
}

#[test]
fn test_bind_reference_failure() {
let schema = table_schema_simple();
let result = Reference::new("bar_not_eix").bind(schema, true);

assert!(result.is_err());
}

#[test]
fn test_bind_reference_case_insensitive_failure() {
let schema = table_schema_simple();
let result = Reference::new("BaR_non").bind(schema, false);
assert!(result.is_err());
}
}
12 changes: 11 additions & 1 deletion crates/iceberg/src/spec/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,17 @@ impl Datum {

/// Convert the datum to `target_type`.
pub fn to(self, target_type: &Type) -> Result<Datum> {
todo!()
// TODO: We should allow more type conversions
match target_type {
Type::Primitive(typ) if typ == &self.r#type => Ok(self),
_ => Err(Error::new(
ErrorKind::DataInvalid,
format!(
"Can't convert datum from {} type to {} type.",
self.r#type, target_type
),
)),
}
}
}

Expand Down

0 comments on commit c2ec8ab

Please sign in to comment.