diff --git a/Cargo.lock b/Cargo.lock index 26b264a3d7..c48d36b3f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1258,6 +1258,13 @@ dependencies = [ "serde_json", ] +[[package]] +name = "common-hashable-float-wrapper" +version = "0.2.0-dev0" +dependencies = [ + "serde", +] + [[package]] name = "common-io-config" version = "0.2.0-dev0" @@ -1280,6 +1287,15 @@ dependencies = [ "serde", ] +[[package]] +name = "common-resource-request" +version = "0.2.0-dev0" +dependencies = [ + "common-hashable-float-wrapper", + "pyo3", + "serde", +] + [[package]] name = "common-system-info" version = "0.2.0-dev0" @@ -1567,6 +1583,8 @@ version = "0.2.0-dev0" dependencies = [ "common-daft-config", "common-display", + "common-hashable-float-wrapper", + "common-resource-request", "common-system-info", "common-tracing", "daft-compression", @@ -1618,6 +1636,7 @@ dependencies = [ "comfy-table 7.1.1", "common-daft-config", "common-error", + "common-hashable-float-wrapper", "daft-minhash", "daft-sketch", "fastrand 2.1.0", @@ -1690,6 +1709,7 @@ name = "daft-dsl" version = "0.2.0-dev0" dependencies = [ "common-error", + "common-hashable-float-wrapper", "common-py-serde", "common-treenode", "daft-core", @@ -1889,6 +1909,7 @@ name = "daft-physical-plan" version = "0.2.0-dev0" dependencies = [ "common-error", + "common-resource-request", "daft-core", "daft-dsl", "daft-plan", @@ -1907,6 +1928,7 @@ dependencies = [ "common-error", "common-io-config", "common-py-serde", + "common-resource-request", "common-treenode", "daft-core", "daft-dsl", diff --git a/Cargo.toml b/Cargo.toml index 6da2a63380..bb6d14af80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,8 @@ [dependencies] common-daft-config = {path = "src/common/daft-config", default-features = false} common-display = {path = "src/common/display", default-features = false} +common-hashable-float-wrapper = {path = "src/common/hashable-float-wrapper", default-features = false} +common-resource-request = {path = "src/common/resource-request", default-features = false} common-system-info = {path = "src/common/system-info", default-features = false} common-tracing = {path = "src/common/tracing", default-features = false} daft-compression = {path = "src/daft-compression", default-features = false} @@ -49,7 +51,8 @@ python = [ "daft-functions/python", "common-daft-config/python", "common-system-info/python", - "common-display/python" + "common-display/python", + "common-resource-request/python" ] [lib] diff --git a/src/common/hashable-float-wrapper/Cargo.toml b/src/common/hashable-float-wrapper/Cargo.toml new file mode 100644 index 0000000000..ce370ed965 --- /dev/null +++ b/src/common/hashable-float-wrapper/Cargo.toml @@ -0,0 +1,7 @@ +[dependencies] +serde = {workspace = true} + +[package] +edition = {workspace = true} +name = "common-hashable-float-wrapper" +version = {workspace = true} diff --git a/src/daft-core/src/utils/hashable_float_wrapper.rs b/src/common/hashable-float-wrapper/src/lib.rs similarity index 100% rename from src/daft-core/src/utils/hashable_float_wrapper.rs rename to src/common/hashable-float-wrapper/src/lib.rs diff --git a/src/common/resource-request/Cargo.toml b/src/common/resource-request/Cargo.toml new file mode 100644 index 0000000000..12244b9283 --- /dev/null +++ b/src/common/resource-request/Cargo.toml @@ -0,0 +1,12 @@ +[dependencies] +common-hashable-float-wrapper = {path = "../hashable-float-wrapper"} +pyo3 = {workspace = true, optional = true} +serde = {workspace = true} + +[features] +python = ["dep:pyo3"] + +[package] +edition = {workspace = true} +name = "common-resource-request" +version = {workspace = true} diff --git a/src/daft-plan/src/resource_request.rs b/src/common/resource-request/src/lib.rs similarity index 95% rename from src/daft-plan/src/resource_request.rs rename to src/common/resource-request/src/lib.rs index 9994230617..3cbab9aa66 100644 --- a/src/daft-plan/src/resource_request.rs +++ b/src/common/resource-request/src/lib.rs @@ -1,9 +1,6 @@ -use daft_core::{impl_bincode_py_state_serialization, utils::hashable_float_wrapper::FloatWrapper}; +use common_hashable_float_wrapper::FloatWrapper; #[cfg(feature = "python")] -use pyo3::{ - pyclass, pyclass::CompareOp, pymethods, types::PyBytes, PyObject, PyResult, PyTypeInfo, Python, - ToPyObject, -}; +use pyo3::{pyclass, pyclass::CompareOp, pymethods, types::PyModule, PyResult, Python}; use std::hash::{Hash, Hasher}; use std::ops::Add; @@ -215,4 +212,8 @@ impl ResourceRequest { } } -impl_bincode_py_state_serialization!(ResourceRequest); +#[cfg(feature = "python")] +pub fn register_modules(_py: Python, parent: &PyModule) -> PyResult<()> { + parent.add_class::()?; + Ok(()) +} diff --git a/src/daft-core/Cargo.toml b/src/daft-core/Cargo.toml index efd4b89784..edc5a8c4fd 100644 --- a/src/daft-core/Cargo.toml +++ b/src/daft-core/Cargo.toml @@ -22,6 +22,7 @@ chrono-tz = {workspace = true} comfy-table = {workspace = true} common-daft-config = {path = "../common/daft-config", default-features = false} common-error = {path = "../common/error", default-features = false} +common-hashable-float-wrapper = {path = "../common/hashable-float-wrapper"} daft-minhash = {path = "../daft-minhash", default-features = false} daft-sketch = {path = "../daft-sketch", default-features = false} fastrand = "2.1.0" diff --git a/src/daft-core/src/array/ops/is_in.rs b/src/daft-core/src/array/ops/is_in.rs index 6a5c9bfec9..d70c4035ba 100644 --- a/src/daft-core/src/array/ops/is_in.rs +++ b/src/daft-core/src/array/ops/is_in.rs @@ -9,8 +9,8 @@ use crate::{ use super::as_arrow::AsArrow; use super::{full::FullNull, DaftIsIn}; -use crate::utils::hashable_float_wrapper::FloatWrapper; use common_error::DaftResult; +use common_hashable_float_wrapper::FloatWrapper; use std::collections::{BTreeSet, HashSet}; macro_rules! collect_to_set_and_check_membership { diff --git a/src/daft-core/src/utils/mod.rs b/src/daft-core/src/utils/mod.rs index 957867366c..239f30af79 100644 --- a/src/daft-core/src/utils/mod.rs +++ b/src/daft-core/src/utils/mod.rs @@ -1,7 +1,6 @@ pub mod arrow; pub mod display_table; pub mod dyn_compare; -pub mod hashable_float_wrapper; pub mod supertype; pub use bincode; diff --git a/src/daft-dsl/Cargo.toml b/src/daft-dsl/Cargo.toml index ce45026f53..048b52cb9e 100644 --- a/src/daft-dsl/Cargo.toml +++ b/src/daft-dsl/Cargo.toml @@ -1,5 +1,6 @@ [dependencies] common-error = {path = "../common/error", default-features = false} +common-hashable-float-wrapper = {path = "../common/hashable-float-wrapper"} common-py-serde = {path = "../common/py-serde", default-features = false} common-treenode = {path = "../common/treenode", default-features = false} daft-core = {path = "../daft-core", default-features = false} diff --git a/src/daft-dsl/src/expr.rs b/src/daft-dsl/src/expr.rs index e96bd001ad..fb4ea04339 100644 --- a/src/daft-dsl/src/expr.rs +++ b/src/daft-dsl/src/expr.rs @@ -1,3 +1,4 @@ +use common_hashable_float_wrapper::FloatWrapper; use daft_core::{ count_mode::CountMode, datatypes::{try_mean_supertype, try_sum_supertype, DataType, Field, FieldID}, @@ -65,7 +66,7 @@ pub enum Expr { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Hash, Eq)] pub struct ApproxPercentileParams { pub child: ExprRef, - pub percentiles: Vec>, + pub percentiles: Vec>, pub force_list_output: bool, } @@ -402,10 +403,7 @@ impl Expr { ) -> ExprRef { Expr::Agg(AggExpr::ApproxPercentile(ApproxPercentileParams { child: self, - percentiles: percentiles - .iter() - .map(|f| daft_core::utils::hashable_float_wrapper::FloatWrapper(*f)) - .collect(), + percentiles: percentiles.iter().map(|f| FloatWrapper(*f)).collect(), force_list_output, })) .into() diff --git a/src/daft-dsl/src/functions/numeric/mod.rs b/src/daft-dsl/src/functions/numeric/mod.rs index 9b5b744d5c..9ca51fce17 100644 --- a/src/daft-dsl/src/functions/numeric/mod.rs +++ b/src/daft-dsl/src/functions/numeric/mod.rs @@ -10,6 +10,7 @@ mod trigonometry; use abs::AbsEvaluator; use ceil::CeilEvaluator; +use common_hashable_float_wrapper::FloatWrapper; use floor::FloorEvaluator; use log::LogEvaluator; use round::RoundEvaluator; @@ -22,7 +23,6 @@ use trigonometry::Atan2Evaluator; use crate::functions::numeric::exp::ExpEvaluator; use crate::functions::numeric::trigonometry::{TrigonometricFunction, TrigonometryEvaluator}; use crate::{Expr, ExprRef}; -use daft_core::utils::hashable_float_wrapper::FloatWrapper; use super::FunctionEvaluator; diff --git a/src/daft-dsl/src/lit.rs b/src/daft-dsl/src/lit.rs index 3d40569b0b..80223050d8 100644 --- a/src/daft-dsl/src/lit.rs +++ b/src/daft-dsl/src/lit.rs @@ -1,9 +1,9 @@ use crate::expr::Expr; use crate::ExprRef; +use common_hashable_float_wrapper::FloatWrapper; use daft_core::datatypes::logical::{Decimal128Array, TimeArray}; use daft_core::utils::display_table::{display_decimal128, display_time64}; -use daft_core::utils::hashable_float_wrapper::FloatWrapper; use daft_core::{array::ops::full::FullNull, datatypes::DataType}; use daft_core::{ datatypes::{ diff --git a/src/daft-physical-plan/Cargo.toml b/src/daft-physical-plan/Cargo.toml index 02d8275e9f..9ba603ab52 100644 --- a/src/daft-physical-plan/Cargo.toml +++ b/src/daft-physical-plan/Cargo.toml @@ -1,5 +1,6 @@ [dependencies] common-error = {path = "../common/error", default-features = false} +common-resource-request = {path = "../common/resource-request", default-features = false} daft-core = {path = "../daft-core", default-features = false} daft-dsl = {path = "../daft-dsl", default-features = false} daft-plan = {path = "../daft-plan", default-features = false} diff --git a/src/daft-physical-plan/src/local_plan.rs b/src/daft-physical-plan/src/local_plan.rs index 7b0e7e58c0..556a552c24 100644 --- a/src/daft-physical-plan/src/local_plan.rs +++ b/src/daft-physical-plan/src/local_plan.rs @@ -1,8 +1,9 @@ use std::sync::Arc; +use common_resource_request::ResourceRequest; use daft_core::{schema::SchemaRef, JoinType}; use daft_dsl::{AggExpr, ExprRef}; -use daft_plan::{InMemoryInfo, ResourceRequest}; +use daft_plan::InMemoryInfo; use daft_scan::{ScanTask, ScanTaskRef}; pub type LocalPhysicalPlanRef = Arc; diff --git a/src/daft-plan/Cargo.toml b/src/daft-plan/Cargo.toml index f4d9534cee..d7cb88fecc 100644 --- a/src/daft-plan/Cargo.toml +++ b/src/daft-plan/Cargo.toml @@ -18,6 +18,7 @@ common-display = {path = "../common/display", default-features = false} common-error = {path = "../common/error", default-features = false} common-io-config = {path = "../common/io-config", default-features = false} common-py-serde = {path = "../common/py-serde", default-features = false} +common-resource-request = {path = "../common/resource-request", default-features = false} common-treenode = {path = "../common/treenode", default-features = false} daft-core = {path = "../daft-core", default-features = false} daft-dsl = {path = "../daft-dsl", default-features = false} @@ -41,6 +42,7 @@ python = [ "common-error/python", "common-io-config/python", "common-daft-config/python", + "common-resource-request/python", "daft-core/python", "daft-dsl/python", "daft-functions/python", diff --git a/src/daft-plan/src/builder.rs b/src/daft-plan/src/builder.rs index 4309ac75ba..fde9e78bd5 100644 --- a/src/daft-plan/src/builder.rs +++ b/src/daft-plan/src/builder.rs @@ -12,11 +12,12 @@ use crate::{ }, sink_info::{OutputFileInfo, SinkInfo}, source_info::SourceInfo, - LogicalPlanRef, ResourceRequest, + LogicalPlanRef, }; use common_display::DisplayFormat; use common_error::DaftResult; use common_io_config::IOConfig; +use common_resource_request::ResourceRequest; use daft_core::{ join::{JoinStrategy, JoinType}, schema::{Schema, SchemaRef}, diff --git a/src/daft-plan/src/lib.rs b/src/daft-plan/src/lib.rs index 483fbe99ce..34d270f431 100644 --- a/src/daft-plan/src/lib.rs +++ b/src/daft-plan/src/lib.rs @@ -13,7 +13,6 @@ pub mod physical_ops; mod physical_optimization; mod physical_plan; mod physical_planner; -mod resource_request; mod sink_info; pub mod source_info; #[cfg(test)] @@ -30,7 +29,6 @@ pub use physical_planner::{ logical_to_physical, populate_aggregation_stages, AdaptivePlanner, MaterializedResults, QueryStageOutput, }; -pub use resource_request::ResourceRequest; pub use sink_info::{OutputFileInfo, SinkInfo}; pub use source_info::{FileInfo, FileInfos, InMemoryInfo, SourceInfo}; @@ -57,7 +55,6 @@ pub fn register_modules(_py: Python, parent: &PyModule) -> PyResult<()> { parent.add_class::()?; parent.add_class::()?; parent.add_class::()?; - parent.add_class::()?; parent.add_class::()?; parent.add_class::()?; parent.add_class::()?; diff --git a/src/daft-plan/src/logical_ops/actor_pool_project.rs b/src/daft-plan/src/logical_ops/actor_pool_project.rs index e2971a9543..cbe67f42ef 100644 --- a/src/daft-plan/src/logical_ops/actor_pool_project.rs +++ b/src/daft-plan/src/logical_ops/actor_pool_project.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use common_resource_request::ResourceRequest; use common_treenode::TreeNode; use daft_core::schema::{Schema, SchemaRef}; use daft_dsl::{ @@ -14,7 +15,7 @@ use snafu::ResultExt; use crate::{ logical_plan::{CreationSnafu, Result}, - LogicalPlan, ResourceRequest, + LogicalPlan, }; #[derive(Clone, Debug, PartialEq, Eq, Hash)] diff --git a/src/daft-plan/src/logical_ops/project.rs b/src/daft-plan/src/logical_ops/project.rs index 1a5cca0712..302a10e3de 100644 --- a/src/daft-plan/src/logical_ops/project.rs +++ b/src/daft-plan/src/logical_ops/project.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use common_resource_request::ResourceRequest; use daft_core::datatypes::FieldID; use daft_core::schema::{Schema, SchemaRef}; use daft_dsl::{optimization, resolve_exprs, AggExpr, ApproxPercentileParams, Expr, ExprRef}; @@ -9,7 +10,7 @@ use snafu::ResultExt; use crate::logical_optimization::Transformed; use crate::logical_plan::{CreationSnafu, Result}; -use crate::{LogicalPlan, ResourceRequest}; +use crate::LogicalPlan; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Project { diff --git a/src/daft-plan/src/logical_optimization/rules/push_down_projection.rs b/src/daft-plan/src/logical_optimization/rules/push_down_projection.rs index 9fe76f37a0..6077bb6660 100644 --- a/src/daft-plan/src/logical_optimization/rules/push_down_projection.rs +++ b/src/daft-plan/src/logical_optimization/rules/push_down_projection.rs @@ -2,6 +2,7 @@ use std::{collections::HashMap, sync::Arc}; use common_error::DaftResult; +use common_resource_request::ResourceRequest; use common_treenode::TreeNode; use daft_core::{schema::Schema, JoinType}; use daft_dsl::{col, optimization::replace_columns_with_expressions, Expr, ExprRef}; @@ -10,7 +11,7 @@ use indexmap::IndexSet; use crate::{ logical_ops::{ActorPoolProject, Aggregate, Join, Pivot, Project, Source}, source_info::SourceInfo, - LogicalPlan, ResourceRequest, + LogicalPlan, }; use super::{ApplyOrder, OptimizerRule, Transformed}; diff --git a/src/daft-plan/src/physical_ops/actor_pool_project.rs b/src/daft-plan/src/physical_ops/actor_pool_project.rs index 2b99a50d24..28647fd60e 100644 --- a/src/daft-plan/src/physical_ops/actor_pool_project.rs +++ b/src/daft-plan/src/physical_ops/actor_pool_project.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use common_error::{DaftError, DaftResult}; +use common_resource_request::ResourceRequest; use common_treenode::TreeNode; use daft_dsl::{ functions::{ @@ -12,9 +13,7 @@ use daft_dsl::{ use itertools::Itertools; use serde::{Deserialize, Serialize}; -use crate::{ - partitioning::translate_clustering_spec, ClusteringSpec, PhysicalPlanRef, ResourceRequest, -}; +use crate::{partitioning::translate_clustering_spec, ClusteringSpec, PhysicalPlanRef}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct ActorPoolProject { diff --git a/src/daft-plan/src/physical_ops/project.rs b/src/daft-plan/src/physical_ops/project.rs index 864785722d..356fc7c3f4 100644 --- a/src/daft-plan/src/physical_ops/project.rs +++ b/src/daft-plan/src/physical_ops/project.rs @@ -1,12 +1,12 @@ use std::sync::Arc; use common_error::DaftResult; +use common_resource_request::ResourceRequest; use daft_dsl::ExprRef; use itertools::Itertools; use crate::{ partitioning::translate_clustering_spec, physical_plan::PhysicalPlanRef, ClusteringSpec, - ResourceRequest, }; use serde::{Deserialize, Serialize}; diff --git a/src/lib.rs b/src/lib.rs index bf1f2996a5..2dd40b1324 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,6 +74,7 @@ pub mod pylib { common_daft_config::register_modules(_py, m)?; common_system_info::register_modules(_py, m)?; + common_resource_request::register_modules(_py, m)?; daft_core::register_modules(_py, m)?; daft_core::python::register_modules(_py, m)?; daft_local_execution::register_modules(_py, m)?;