Skip to content

Commit

Permalink
[CHORE] Implement growables for array types (#1287)
Browse files Browse the repository at this point in the history
Growables are generally useful for abstracting "physical" operations
such as:

1. Take
2. Broadcast
3. Filter

This will become much more important as we add new Array types
(FixedSizeListArray, StructArray etc). These arrays can just implement
their own Growable classes, and implementations for the physical kernels
will be easy to implement.

## Changes

1. Adds a new `Growable<Arr>` trait that is a growable to build the
specified `Arr`
2. Adds a new `GrowableArray` trait which is implemented by `DataArray`
and `LogicalArray`: these types can now create a growable using their
associated `::make_growable` functions
3. Refactors `if_else` to use the new growables, reducing quite a bit of
code and nasty macro usage

---------

Co-authored-by: Jay Chia <[email protected]@users.noreply.github.com>
Co-authored-by: Sammy Sidhu <[email protected]>
  • Loading branch information
3 people authored Aug 24, 2023
1 parent faaebb6 commit e0b988c
Show file tree
Hide file tree
Showing 13 changed files with 802 additions and 323 deletions.
140 changes: 140 additions & 0 deletions src/daft-core/src/array/growable/arrow_growable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
use std::marker::PhantomData;

use common_error::DaftResult;

use crate::{
array::{ops::from_arrow::FromArrow, DataArray},
datatypes::{
BinaryType, BooleanType, DaftArrowBackedType, DaftDataType, ExtensionArray, ExtensionType,
Field, FixedSizeListType, Float32Type, Float64Type, Int128Type, Int16Type, Int32Type,
Int64Type, Int8Type, ListType, NullType, StructType, UInt16Type, UInt32Type, UInt64Type,
UInt8Type, Utf8Type,
},
DataType, IntoSeries,
};

use super::Growable;

pub struct ArrowGrowable<'a, T: DaftDataType, G: arrow2::array::growable::Growable<'a>>
where
T: DaftArrowBackedType,
DataArray<T>: IntoSeries,
{
name: String,
dtype: DataType,
arrow2_growable: G,
_phantom: PhantomData<&'a T>,
}

impl<'a, T: DaftDataType, G: arrow2::array::growable::Growable<'a>> ArrowGrowable<'a, T, G>
where
T: DaftArrowBackedType,
DataArray<T>: IntoSeries,
{
pub fn new(name: String, dtype: &DataType, arrow2_growable: G) -> Self {
Self {
name,
dtype: dtype.clone(),
arrow2_growable,
_phantom: PhantomData,
}
}
}

impl<'a, T: DaftDataType, G: arrow2::array::growable::Growable<'a>> Growable<DataArray<T>>
for ArrowGrowable<'a, T, G>
where
T: DaftArrowBackedType,
DataArray<T>: IntoSeries,
{
#[inline]
fn extend(&mut self, index: usize, start: usize, len: usize) {
self.arrow2_growable.extend(index, start, len);
}

#[inline]
fn add_nulls(&mut self, additional: usize) {
self.arrow2_growable.extend_validity(additional)
}

#[inline]
fn build(&mut self) -> DaftResult<DataArray<T>> {
let arrow_array = self.arrow2_growable.as_box();
let field = Field::new(self.name.clone(), self.dtype.clone());
DataArray::<T>::from_arrow(&field, arrow_array)
}
}

pub struct ArrowExtensionGrowable<'a> {
name: String,
dtype: DataType,
child_growable: Box<dyn arrow2::array::growable::Growable<'a> + 'a>,
}

impl<'a> ArrowExtensionGrowable<'a> {
pub fn new(
name: String,
dtype: &DataType,
child_growable: Box<dyn arrow2::array::growable::Growable<'a> + 'a>,
) -> Self {
assert!(matches!(dtype, DataType::Extension(..)));
Self {
name,
dtype: dtype.clone(),
child_growable,
}
}
}

impl<'a> Growable<DataArray<ExtensionType>> for ArrowExtensionGrowable<'a> {
#[inline]
fn extend(&mut self, index: usize, start: usize, len: usize) {
self.child_growable.extend(index, start, len)
}
#[inline]
fn add_nulls(&mut self, additional: usize) {
self.child_growable.extend_validity(additional)
}
#[inline]
fn build(&mut self) -> DaftResult<DataArray<ExtensionType>> {
let arr = self.child_growable.as_box();
let field = Field::new(self.name.clone(), self.dtype.clone());
ExtensionArray::from_arrow(&field, arr)
}
}

pub type ArrowNullGrowable<'a> = ArrowGrowable<'a, NullType, arrow2::array::growable::GrowableNull>;
pub type ArrowBooleanGrowable<'a> =
ArrowGrowable<'a, BooleanType, arrow2::array::growable::GrowableBoolean<'a>>;
pub type ArrowInt8Growable<'a> =
ArrowGrowable<'a, Int8Type, arrow2::array::growable::GrowablePrimitive<'a, i8>>;
pub type ArrowInt16Growable<'a> =
ArrowGrowable<'a, Int16Type, arrow2::array::growable::GrowablePrimitive<'a, i16>>;
pub type ArrowInt32Growable<'a> =
ArrowGrowable<'a, Int32Type, arrow2::array::growable::GrowablePrimitive<'a, i32>>;
pub type ArrowInt64Growable<'a> =
ArrowGrowable<'a, Int64Type, arrow2::array::growable::GrowablePrimitive<'a, i64>>;
pub type ArrowInt128Growable<'a> =
ArrowGrowable<'a, Int128Type, arrow2::array::growable::GrowablePrimitive<'a, i128>>;
pub type ArrowUInt8Growable<'a> =
ArrowGrowable<'a, UInt8Type, arrow2::array::growable::GrowablePrimitive<'a, u8>>;
pub type ArrowUInt16Growable<'a> =
ArrowGrowable<'a, UInt16Type, arrow2::array::growable::GrowablePrimitive<'a, u16>>;
pub type ArrowUInt32Growable<'a> =
ArrowGrowable<'a, UInt32Type, arrow2::array::growable::GrowablePrimitive<'a, u32>>;
pub type ArrowUInt64Growable<'a> =
ArrowGrowable<'a, UInt64Type, arrow2::array::growable::GrowablePrimitive<'a, u64>>;
pub type ArrowFloat32Growable<'a> =
ArrowGrowable<'a, Float32Type, arrow2::array::growable::GrowablePrimitive<'a, f32>>;
pub type ArrowFloat64Growable<'a> =
ArrowGrowable<'a, Float64Type, arrow2::array::growable::GrowablePrimitive<'a, f64>>;
pub type ArrowBinaryGrowable<'a> =
ArrowGrowable<'a, BinaryType, arrow2::array::growable::GrowableBinary<'a, i64>>;
pub type ArrowUtf8Growable<'a> =
ArrowGrowable<'a, Utf8Type, arrow2::array::growable::GrowableUtf8<'a, i64>>;
pub type ArrowListGrowable<'a> =
ArrowGrowable<'a, ListType, arrow2::array::growable::GrowableList<'a, i64>>;
pub type ArrowFixedSizeListGrowable<'a> =
ArrowGrowable<'a, FixedSizeListType, arrow2::array::growable::GrowableFixedSizeList<'a>>;
pub type ArrowStructGrowable<'a> =
ArrowGrowable<'a, StructType, arrow2::array::growable::GrowableStruct<'a>>;
61 changes: 61 additions & 0 deletions src/daft-core/src/array/growable/logical_growable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use std::marker::PhantomData;

use common_error::DaftResult;

use crate::{
datatypes::{logical::LogicalArray, DaftDataType, DaftLogicalType, Field},
DataType, IntoSeries,
};

use super::Growable;

pub struct LogicalGrowable<'a, L: DaftLogicalType>
where
LogicalArray<L>: IntoSeries,
{
name: String,
dtype: DataType,
physical_growable: Box<dyn Growable<<L::PhysicalType as DaftDataType>::ArrayType> + 'a>,
_phantom: PhantomData<L>,
}

impl<'a, L: DaftLogicalType> LogicalGrowable<'a, L>
where
LogicalArray<L>: IntoSeries,
{
pub fn new(
name: String,
dtype: &DataType,
physical_growable: Box<dyn Growable<<L::PhysicalType as DaftDataType>::ArrayType> + 'a>,
) -> Self {
Self {
name,
dtype: dtype.clone(),
physical_growable,
_phantom: PhantomData,
}
}
}

impl<'a, L: DaftLogicalType> Growable<LogicalArray<L>> for LogicalGrowable<'a, L>
where
LogicalArray<L>: IntoSeries,
{
#[inline]
fn extend(&mut self, index: usize, start: usize, len: usize) {
self.physical_growable.extend(index, start, len);
}
#[inline]
fn add_nulls(&mut self, additional: usize) {
self.physical_growable.add_nulls(additional)
}
#[inline]
fn build(&mut self) -> DaftResult<LogicalArray<L>> {
let physical_arr = self.physical_growable.build()?;
let arr = LogicalArray::<L>::new(
Field::new(self.name.clone(), self.dtype.clone()),
physical_arr,
);
Ok(arr)
}
}
Loading

0 comments on commit e0b988c

Please sign in to comment.