-
Notifications
You must be signed in to change notification settings - Fork 163
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CHORE] Implement growables for array types (#1287)
Growables are generally useful for abstracting "physical" operations such as: 1. Take 2. Broadcast 3. Filter This will become much more important as we add new Array types (FixedSizeListArray, StructArray etc). These arrays can just implement their own Growable classes, and implementations for the physical kernels will be easy to implement. ## Changes 1. Adds a new `Growable<Arr>` trait that is a growable to build the specified `Arr` 2. Adds a new `GrowableArray` trait which is implemented by `DataArray` and `LogicalArray`: these types can now create a growable using their associated `::make_growable` functions 3. Refactors `if_else` to use the new growables, reducing quite a bit of code and nasty macro usage --------- Co-authored-by: Jay Chia <[email protected]@users.noreply.github.com> Co-authored-by: Sammy Sidhu <[email protected]>
- Loading branch information
1 parent
faaebb6
commit e0b988c
Showing
13 changed files
with
802 additions
and
323 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
use std::marker::PhantomData; | ||
|
||
use common_error::DaftResult; | ||
|
||
use crate::{ | ||
array::{ops::from_arrow::FromArrow, DataArray}, | ||
datatypes::{ | ||
BinaryType, BooleanType, DaftArrowBackedType, DaftDataType, ExtensionArray, ExtensionType, | ||
Field, FixedSizeListType, Float32Type, Float64Type, Int128Type, Int16Type, Int32Type, | ||
Int64Type, Int8Type, ListType, NullType, StructType, UInt16Type, UInt32Type, UInt64Type, | ||
UInt8Type, Utf8Type, | ||
}, | ||
DataType, IntoSeries, | ||
}; | ||
|
||
use super::Growable; | ||
|
||
pub struct ArrowGrowable<'a, T: DaftDataType, G: arrow2::array::growable::Growable<'a>> | ||
where | ||
T: DaftArrowBackedType, | ||
DataArray<T>: IntoSeries, | ||
{ | ||
name: String, | ||
dtype: DataType, | ||
arrow2_growable: G, | ||
_phantom: PhantomData<&'a T>, | ||
} | ||
|
||
impl<'a, T: DaftDataType, G: arrow2::array::growable::Growable<'a>> ArrowGrowable<'a, T, G> | ||
where | ||
T: DaftArrowBackedType, | ||
DataArray<T>: IntoSeries, | ||
{ | ||
pub fn new(name: String, dtype: &DataType, arrow2_growable: G) -> Self { | ||
Self { | ||
name, | ||
dtype: dtype.clone(), | ||
arrow2_growable, | ||
_phantom: PhantomData, | ||
} | ||
} | ||
} | ||
|
||
impl<'a, T: DaftDataType, G: arrow2::array::growable::Growable<'a>> Growable<DataArray<T>> | ||
for ArrowGrowable<'a, T, G> | ||
where | ||
T: DaftArrowBackedType, | ||
DataArray<T>: IntoSeries, | ||
{ | ||
#[inline] | ||
fn extend(&mut self, index: usize, start: usize, len: usize) { | ||
self.arrow2_growable.extend(index, start, len); | ||
} | ||
|
||
#[inline] | ||
fn add_nulls(&mut self, additional: usize) { | ||
self.arrow2_growable.extend_validity(additional) | ||
} | ||
|
||
#[inline] | ||
fn build(&mut self) -> DaftResult<DataArray<T>> { | ||
let arrow_array = self.arrow2_growable.as_box(); | ||
let field = Field::new(self.name.clone(), self.dtype.clone()); | ||
DataArray::<T>::from_arrow(&field, arrow_array) | ||
} | ||
} | ||
|
||
pub struct ArrowExtensionGrowable<'a> { | ||
name: String, | ||
dtype: DataType, | ||
child_growable: Box<dyn arrow2::array::growable::Growable<'a> + 'a>, | ||
} | ||
|
||
impl<'a> ArrowExtensionGrowable<'a> { | ||
pub fn new( | ||
name: String, | ||
dtype: &DataType, | ||
child_growable: Box<dyn arrow2::array::growable::Growable<'a> + 'a>, | ||
) -> Self { | ||
assert!(matches!(dtype, DataType::Extension(..))); | ||
Self { | ||
name, | ||
dtype: dtype.clone(), | ||
child_growable, | ||
} | ||
} | ||
} | ||
|
||
impl<'a> Growable<DataArray<ExtensionType>> for ArrowExtensionGrowable<'a> { | ||
#[inline] | ||
fn extend(&mut self, index: usize, start: usize, len: usize) { | ||
self.child_growable.extend(index, start, len) | ||
} | ||
#[inline] | ||
fn add_nulls(&mut self, additional: usize) { | ||
self.child_growable.extend_validity(additional) | ||
} | ||
#[inline] | ||
fn build(&mut self) -> DaftResult<DataArray<ExtensionType>> { | ||
let arr = self.child_growable.as_box(); | ||
let field = Field::new(self.name.clone(), self.dtype.clone()); | ||
ExtensionArray::from_arrow(&field, arr) | ||
} | ||
} | ||
|
||
pub type ArrowNullGrowable<'a> = ArrowGrowable<'a, NullType, arrow2::array::growable::GrowableNull>; | ||
pub type ArrowBooleanGrowable<'a> = | ||
ArrowGrowable<'a, BooleanType, arrow2::array::growable::GrowableBoolean<'a>>; | ||
pub type ArrowInt8Growable<'a> = | ||
ArrowGrowable<'a, Int8Type, arrow2::array::growable::GrowablePrimitive<'a, i8>>; | ||
pub type ArrowInt16Growable<'a> = | ||
ArrowGrowable<'a, Int16Type, arrow2::array::growable::GrowablePrimitive<'a, i16>>; | ||
pub type ArrowInt32Growable<'a> = | ||
ArrowGrowable<'a, Int32Type, arrow2::array::growable::GrowablePrimitive<'a, i32>>; | ||
pub type ArrowInt64Growable<'a> = | ||
ArrowGrowable<'a, Int64Type, arrow2::array::growable::GrowablePrimitive<'a, i64>>; | ||
pub type ArrowInt128Growable<'a> = | ||
ArrowGrowable<'a, Int128Type, arrow2::array::growable::GrowablePrimitive<'a, i128>>; | ||
pub type ArrowUInt8Growable<'a> = | ||
ArrowGrowable<'a, UInt8Type, arrow2::array::growable::GrowablePrimitive<'a, u8>>; | ||
pub type ArrowUInt16Growable<'a> = | ||
ArrowGrowable<'a, UInt16Type, arrow2::array::growable::GrowablePrimitive<'a, u16>>; | ||
pub type ArrowUInt32Growable<'a> = | ||
ArrowGrowable<'a, UInt32Type, arrow2::array::growable::GrowablePrimitive<'a, u32>>; | ||
pub type ArrowUInt64Growable<'a> = | ||
ArrowGrowable<'a, UInt64Type, arrow2::array::growable::GrowablePrimitive<'a, u64>>; | ||
pub type ArrowFloat32Growable<'a> = | ||
ArrowGrowable<'a, Float32Type, arrow2::array::growable::GrowablePrimitive<'a, f32>>; | ||
pub type ArrowFloat64Growable<'a> = | ||
ArrowGrowable<'a, Float64Type, arrow2::array::growable::GrowablePrimitive<'a, f64>>; | ||
pub type ArrowBinaryGrowable<'a> = | ||
ArrowGrowable<'a, BinaryType, arrow2::array::growable::GrowableBinary<'a, i64>>; | ||
pub type ArrowUtf8Growable<'a> = | ||
ArrowGrowable<'a, Utf8Type, arrow2::array::growable::GrowableUtf8<'a, i64>>; | ||
pub type ArrowListGrowable<'a> = | ||
ArrowGrowable<'a, ListType, arrow2::array::growable::GrowableList<'a, i64>>; | ||
pub type ArrowFixedSizeListGrowable<'a> = | ||
ArrowGrowable<'a, FixedSizeListType, arrow2::array::growable::GrowableFixedSizeList<'a>>; | ||
pub type ArrowStructGrowable<'a> = | ||
ArrowGrowable<'a, StructType, arrow2::array::growable::GrowableStruct<'a>>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
use std::marker::PhantomData; | ||
|
||
use common_error::DaftResult; | ||
|
||
use crate::{ | ||
datatypes::{logical::LogicalArray, DaftDataType, DaftLogicalType, Field}, | ||
DataType, IntoSeries, | ||
}; | ||
|
||
use super::Growable; | ||
|
||
pub struct LogicalGrowable<'a, L: DaftLogicalType> | ||
where | ||
LogicalArray<L>: IntoSeries, | ||
{ | ||
name: String, | ||
dtype: DataType, | ||
physical_growable: Box<dyn Growable<<L::PhysicalType as DaftDataType>::ArrayType> + 'a>, | ||
_phantom: PhantomData<L>, | ||
} | ||
|
||
impl<'a, L: DaftLogicalType> LogicalGrowable<'a, L> | ||
where | ||
LogicalArray<L>: IntoSeries, | ||
{ | ||
pub fn new( | ||
name: String, | ||
dtype: &DataType, | ||
physical_growable: Box<dyn Growable<<L::PhysicalType as DaftDataType>::ArrayType> + 'a>, | ||
) -> Self { | ||
Self { | ||
name, | ||
dtype: dtype.clone(), | ||
physical_growable, | ||
_phantom: PhantomData, | ||
} | ||
} | ||
} | ||
|
||
impl<'a, L: DaftLogicalType> Growable<LogicalArray<L>> for LogicalGrowable<'a, L> | ||
where | ||
LogicalArray<L>: IntoSeries, | ||
{ | ||
#[inline] | ||
fn extend(&mut self, index: usize, start: usize, len: usize) { | ||
self.physical_growable.extend(index, start, len); | ||
} | ||
#[inline] | ||
fn add_nulls(&mut self, additional: usize) { | ||
self.physical_growable.add_nulls(additional) | ||
} | ||
#[inline] | ||
fn build(&mut self) -> DaftResult<LogicalArray<L>> { | ||
let physical_arr = self.physical_growable.build()?; | ||
let arr = LogicalArray::<L>::new( | ||
Field::new(self.name.clone(), self.dtype.clone()), | ||
physical_arr, | ||
); | ||
Ok(arr) | ||
} | ||
} |
Oops, something went wrong.