Skip to content

Commit

Permalink
[CHORE] Refactor arrays to share a FromArrow constructor trait (#1276)
Browse files Browse the repository at this point in the history
This eliminates some code duplication by having each Array
implementation implement a `FromArrow` trait.

Will be useful when we add more arrays (e.g. StructArray,
FixedSizeListArray, LogicalStructArray etc)

Co-authored-by: Jay Chia <[email protected]@users.noreply.github.com>
  • Loading branch information
jaychia and Jay Chia authored Aug 16, 2023
1 parent 0664353 commit 2b82afc
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 11 deletions.
12 changes: 5 additions & 7 deletions src/daft-core/src/array/ops/cast.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::as_arrow::AsArrow;
use crate::{
array::{ops::image::ImageArraySidecarData, DataArray},
array::{ops::from_arrow::FromArrow, ops::image::ImageArraySidecarData, DataArray},
datatypes::{
logical::{
DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray,
Expand Down Expand Up @@ -132,12 +132,11 @@ where

if dtype.is_logical() {
with_match_daft_logical_types!(dtype, |$T| {
let physical = DataArray::try_from((Field::new(to_cast.name(), dtype.to_physical()), result_arrow_physical_array))?;
return Ok(LogicalArray::<$T>::new(new_field.clone(), physical).into_series());
return Ok(LogicalArray::<$T>::from_arrow(new_field.as_ref(), result_arrow_physical_array)?.into_series())
})
}
with_match_arrow_daft_types!(dtype, |$T| {
Ok(DataArray::<$T>::try_from((new_field.clone(), result_arrow_physical_array))?.into_series())
Ok(DataArray::<$T>::from_arrow(new_field.as_ref(), result_arrow_physical_array)?.into_series())
})
}

Expand Down Expand Up @@ -224,12 +223,11 @@ where

if dtype.is_logical() {
with_match_daft_logical_types!(dtype, |$T| {
let physical = DataArray::try_from((Field::new(to_cast.name(), target_physical_type), result_array))?;
return Ok(LogicalArray::<$T>::new(new_field.clone(), physical).into_series());
return Ok(LogicalArray::<$T>::from_arrow(new_field.as_ref(), result_array)?.into_series());
})
}
with_match_arrow_daft_types!(dtype, |$T| {
Ok(DataArray::<$T>::try_from((new_field.clone(), result_array))?.into_series())
return Ok(DataArray::<$T>::from_arrow(new_field.as_ref(), result_array)?.into_series());
})
}

Expand Down
28 changes: 28 additions & 0 deletions src/daft-core/src/array/ops/from_arrow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use common_error::DaftResult;

use crate::{
array::DataArray,
datatypes::{logical::LogicalArray, DaftLogicalType, DaftPhysicalType, Field},
};

/// Arrays that implement [`FromArrow`] can be instantiated from a Box<dyn arrow2::array::Array>
pub trait FromArrow
where
Self: Sized,
{
fn from_arrow(field: &Field, arrow_arr: Box<dyn arrow2::array::Array>) -> DaftResult<Self>;
}

impl<T: DaftPhysicalType> FromArrow for DataArray<T> {
fn from_arrow(field: &Field, arrow_arr: Box<dyn arrow2::array::Array>) -> DaftResult<Self> {
DataArray::<T>::try_from((field.clone(), arrow_arr))
}
}

impl<L: DaftLogicalType> FromArrow for LogicalArray<L> {
fn from_arrow(field: &Field, arrow_arr: Box<dyn arrow2::array::Array>) -> DaftResult<Self> {
let data_array_field = Field::new(field.name.clone(), field.dtype.to_physical());
let physical = DataArray::try_from((data_array_field, arrow_arr))?;
Ok(LogicalArray::<L>::new(field.clone(), physical))
}
}
1 change: 1 addition & 0 deletions src/daft-core/src/array/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod count;
mod date;
mod filter;
mod float;
pub mod from_arrow;
mod full;
mod get;
pub(crate) mod groups;
Expand Down
8 changes: 4 additions & 4 deletions src/daft-core/src/series/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use common_error::{DaftError, DaftResult};

use super::Series;

use crate::array::ops::from_arrow::FromArrow;
use crate::series::array_impl::IntoSeries;

impl TryFrom<(&str, Box<dyn arrow2::array::Array>)> for Series {
Expand Down Expand Up @@ -52,8 +53,7 @@ impl TryFrom<(&str, Box<dyn arrow2::array::Array>)> for Series {
};

let res = with_match_daft_logical_types!(dtype, |$T| {
let physical = DataArray::try_from((Field::new(name, physical_type), physical_arrow_array))?;
LogicalArray::<$T>::new(field, physical).into_series()
LogicalArray::<$T>::from_arrow(field.as_ref(), physical_arrow_array)?.into_series()
});
return Ok(res);
}
Expand All @@ -70,12 +70,12 @@ impl TryFrom<(&str, Box<dyn arrow2::array::Array>)> for Series {
},
)?;
return Ok(
with_match_physical_daft_types!(physical_type, |$T| DataArray::<$T>::new(field, casted_array)?.into_series()),
with_match_physical_daft_types!(physical_type, |$T| DataArray::<$T>::from_arrow(field.as_ref(), casted_array)?.into_series()),
);
}

Ok(
with_match_physical_daft_types!(dtype, |$T| DataArray::<$T>::new(field, array.into())?.into_series()),
with_match_physical_daft_types!(dtype, |$T| DataArray::<$T>::from_arrow(field.as_ref(), array.into())?.into_series()),
)
}
}

0 comments on commit 2b82afc

Please sign in to comment.