From 02a340445f77e8799b83b7ac4bd590ee9072a682 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Fri, 27 Sep 2024 16:05:13 -0700 Subject: [PATCH] sammy/to-arrow-docs --- src/daft-core/src/array/ops/as_arrow.rs | 5 ++++- src/daft-core/src/series/mod.rs | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/daft-core/src/array/ops/as_arrow.rs b/src/daft-core/src/array/ops/as_arrow.rs index 51ba52dd2c..26b5eaec0c 100644 --- a/src/daft-core/src/array/ops/as_arrow.rs +++ b/src/daft-core/src/array/ops/as_arrow.rs @@ -15,7 +15,10 @@ use crate::{ pub trait AsArrow { type Output; - // Retrieve the underlying concrete Arrow2 array. + /// Retrieve the underlying internal Arrow2 array. + /// This does not correct for the logical types and will just yield the physical type of the array. + /// For example, a TimestampArray will yield an arrow Int64Array rather than a arrow Timestamp Array. + /// To get a corrected arrow type, see `.to_arrow()`. fn as_arrow(&self) -> &Self::Output; } diff --git a/src/daft-core/src/series/mod.rs b/src/daft-core/src/series/mod.rs index 128b1bd344..276fdfde87 100644 --- a/src/daft-core/src/series/mod.rs +++ b/src/daft-core/src/series/mod.rs @@ -38,6 +38,12 @@ impl PartialEq for Series { } impl Series { + /// Exports this Series into an Arrow arrow that is corrected for the Arrow type system. + /// For example, Daft's TimestampArray is a logical type that is backed by an Int64Array Physical array. + /// If we were to call `.as_arrow()` or `.physical`on the TimestampArray, we would get an Int64Array that represented the time units. + /// However if we want to export our Timestamp array to another arrow system like arrow2 kernels or python, duckdb or more. + /// We should convert it back to the canonical arrow dtype of Timestamp rather than Int64. + /// To get the internal physical type without conversion, see `as_arrow()`. pub fn to_arrow(&self) -> Box { self.inner.to_arrow() }