Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add business_day_count function #15512

Merged
merged 3 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/licenses/NUMPY_LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Copyright (c) 2005-2023, NumPy Developers.
All rights reserved.

Redistribution and use in source and binary forms, with or without
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could lead to problems as we want to distribute as MIT/Apache. But I also don't believe this required. You have interpreted their source code and written it as Rust. I think a link with credits should suffice.

modification, are permitted provided that the following conditions are
met:

* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.

* Neither the name of the NumPy Developers nor the names of any
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1 change: 1 addition & 0 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ is_between = ["polars-plan/is_between"]
is_unique = ["polars-plan/is_unique"]
cross_join = ["polars-plan/cross_join", "polars-pipe?/cross_join", "polars-ops/cross_join"]
asof_join = ["polars-plan/asof_join", "polars-time", "polars-ops/asof_join"]
business = ["polars-plan/business"]
concat_str = ["polars-plan/concat_str"]
range = ["polars-plan/range"]
mode = ["polars-plan/mode"]
Expand Down
1 change: 1 addition & 0 deletions crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ is_unique = []
unique_counts = []
is_between = []
approx_unique = []
business = ["dtype-date"]
fused = []
cutqcut = ["dtype-categorical", "dtype-struct"]
rle = ["dtype-struct"]
Expand Down
97 changes: 97 additions & 0 deletions crates/polars-ops/src/series/ops/business.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
use polars_core::prelude::arity::binary_elementwise_values;
use polars_core::prelude::*;

/// Count the number of business days between `start` and `end`, excluding `end`.
pub fn business_day_count(start: &Series, end: &Series) -> PolarsResult<Series> {
let start_dates = start.date()?;
let end_dates = end.date()?;

// TODO: support customising weekdays
let week_mask: [bool; 7] = [true, true, true, true, true, false, false];
let n_business_days_in_week_mask = week_mask.iter().filter(|&x| *x).count() as i32;

let out = match (start_dates.len(), end_dates.len()) {
(_, 1) => {
if let Some(end_date) = end_dates.get(0) {
start_dates.apply_values(|start_date| {
business_day_count_impl(
start_date,
end_date,
&week_mask,
n_business_days_in_week_mask,
)
})
} else {
Int32Chunked::full_null(start_dates.name(), start_dates.len())
}
},
(1, _) => {
if let Some(start_date) = start_dates.get(0) {
end_dates.apply_values(|end_date| {
business_day_count_impl(
start_date,
end_date,
&week_mask,
n_business_days_in_week_mask,
)
})
} else {
Int32Chunked::full_null(start_dates.name(), end_dates.len())
}
},
_ => binary_elementwise_values(start_dates, end_dates, |start_date, end_date| {
business_day_count_impl(
start_date,
end_date,
&week_mask,
n_business_days_in_week_mask,
)
}),
};
Ok(out.into_series())
}

/// Ported from:
/// https://github.com/numpy/numpy/blob/e59c074842e3f73483afa5ddef031e856b9fd313/numpy/_core/src/multiarray/datetime_busday.c#L355-L433
fn business_day_count_impl(
mut start_date: i32,
mut end_date: i32,
week_mask: &[bool; 7],
n_business_days_in_week_mask: i32,
) -> i32 {
let swapped = start_date > end_date;
if swapped {
(start_date, end_date) = (end_date, start_date);
start_date += 1;
end_date += 1;
}

let mut start_weekday = weekday(start_date);
let diff = end_date - start_date;
let whole_weeks = diff / 7;
let mut count = 0;
count += whole_weeks * n_business_days_in_week_mask;
start_date += whole_weeks * 7;
while start_date < end_date {
if unsafe { *week_mask.get_unchecked(start_weekday) } {
count += 1;
}
start_date += 1;
start_weekday += 1;
if start_weekday >= 7 {
start_weekday = 0;
}
}
if swapped {
-count
} else {
count
}
}

fn weekday(x: i32) -> usize {
// the first modulo might return a negative number, so we add 7 and take
// the modulo again so we're sure we have something between 0 (Monday)
// and 6 (Sunday)
(((x - 4) % 7 + 7) % 7) as usize
}
4 changes: 4 additions & 0 deletions crates/polars-ops/src/series/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ mod approx_algo;
#[cfg(feature = "approx_unique")]
mod approx_unique;
mod arg_min_max;
#[cfg(feature = "business")]
mod business;
mod clip;
#[cfg(feature = "cum_agg")]
mod cum_agg;
Expand Down Expand Up @@ -65,6 +67,8 @@ pub use approx_algo::*;
#[cfg(feature = "approx_unique")]
pub use approx_unique::*;
pub use arg_min_max::ArgAgg;
#[cfg(feature = "business")]
pub use business::*;
pub use clip::*;
#[cfg(feature = "cum_agg")]
pub use cum_agg::*;
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ is_between = ["polars-ops/is_between"]
cross_join = ["polars-ops/cross_join"]
asof_join = ["polars-time", "polars-ops/asof_join"]
concat_str = []
business = ["polars-ops/business"]
range = []
mode = ["polars-ops/mode"]
cum_agg = ["polars-ops/cum_agg"]
Expand Down Expand Up @@ -252,6 +253,7 @@ features = [
"ciborium",
"dtype-decimal",
"arg_where",
"business",
"range",
"meta",
"hive_partitions",
Expand Down
45 changes: 45 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/business.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use std::fmt::{Display, Formatter};

use polars_core::prelude::*;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use crate::dsl::SpecialEq;
use crate::map_as_slice;
use crate::prelude::SeriesUdf;

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug, Eq, Hash)]
pub enum BusinessFunction {
#[cfg(feature = "business")]
BusinessDayCount,
}

impl Display for BusinessFunction {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use BusinessFunction::*;
let s = match self {
#[cfg(feature = "business")]
&BusinessDayCount => "business_day_count",
};
write!(f, "{s}")
}
}
impl From<BusinessFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(func: BusinessFunction) -> Self {
use BusinessFunction::*;
match func {
#[cfg(feature = "business")]
BusinessDayCount => {
map_as_slice!(business_day_count)
},
}
}
}

#[cfg(feature = "business")]
pub(super) fn business_day_count(s: &[Series]) -> PolarsResult<Series> {
let start = &s[0];
let end = &s[1];
polars_ops::prelude::business_day_count(start, end)
}
12 changes: 12 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ mod array;
mod binary;
mod boolean;
mod bounds;
#[cfg(feature = "business")]
mod business;
#[cfg(feature = "dtype-categorical")]
mod cat;
#[cfg(feature = "round_series")]
Expand Down Expand Up @@ -81,6 +83,8 @@ use serde::{Deserialize, Serialize};

pub(crate) use self::binary::BinaryFunction;
pub use self::boolean::BooleanFunction;
#[cfg(feature = "business")]
pub(super) use self::business::BusinessFunction;
#[cfg(feature = "dtype-categorical")]
pub(crate) use self::cat::CategoricalFunction;
#[cfg(feature = "temporal")]
Expand Down Expand Up @@ -117,6 +121,8 @@ pub enum FunctionExpr {

// Other expressions
Boolean(BooleanFunction),
#[cfg(feature = "business")]
Business(BusinessFunction),
#[cfg(feature = "abs")]
Abs,
Negate,
Expand Down Expand Up @@ -349,6 +355,8 @@ impl Hash for FunctionExpr {

// Other expressions
Boolean(f) => f.hash(state),
#[cfg(feature = "business")]
Business(f) => f.hash(state),
Pow(f) => f.hash(state),
#[cfg(feature = "search_sorted")]
SearchSorted(f) => f.hash(state),
Expand Down Expand Up @@ -557,6 +565,8 @@ impl Display for FunctionExpr {

// Other expressions
Boolean(func) => return write!(f, "{func}"),
#[cfg(feature = "business")]
Business(func) => return write!(f, "{func}"),
#[cfg(feature = "abs")]
Abs => "abs",
Negate => "negate",
Expand Down Expand Up @@ -815,6 +825,8 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {

// Other expressions
Boolean(func) => func.into(),
#[cfg(feature = "business")]
Business(func) => func.into(),
#[cfg(feature = "abs")]
Abs => map!(abs::abs),
Negate => map!(dispatch::negate),
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ impl FunctionExpr {

// Other expressions
Boolean(func) => func.get_field(mapper),
#[cfg(feature = "business")]
Business(_) => mapper.with_dtype(DataType::Int32),
#[cfg(feature = "abs")]
Abs => mapper.with_same_dtype(),
Negate => mapper.with_same_dtype(),
Expand Down
15 changes: 15 additions & 0 deletions crates/polars-plan/src/dsl/functions/business.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
use super::*;

#[cfg(feature = "dtype-date")]
pub fn business_day_count(start: Expr, end: Expr) -> Expr {
let input = vec![start, end];

Expr::Function {
input,
function: FunctionExpr::Business(BusinessFunction::BusinessDayCount {}),
options: FunctionOptions {
allow_rename: true,
..Default::default()
},
}
}
4 changes: 4 additions & 0 deletions crates/polars-plan/src/dsl/functions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//!
//! Functions on expressions that might be useful.
mod arity;
#[cfg(feature = "business")]
mod business;
#[cfg(feature = "dtype-struct")]
mod coerce;
mod concat;
Expand All @@ -18,6 +20,8 @@ mod syntactic_sugar;
mod temporal;

pub use arity::*;
#[cfg(all(feature = "business", feature = "dtype-date"))]
pub use business::*;
#[cfg(feature = "dtype-struct")]
pub use coerce::*;
pub use concat::*;
Expand Down
1 change: 1 addition & 0 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ array_any_all = ["polars-lazy?/array_any_all", "dtype-array"]
asof_join = ["polars-lazy?/asof_join", "polars-ops/asof_join"]
bigidx = ["polars-core/bigidx", "polars-lazy?/bigidx", "polars-ops/big_idx"]
binary_encoding = ["polars-ops/binary_encoding", "polars-lazy?/binary_encoding", "polars-sql?/binary_encoding"]
business = ["polars-lazy?/business", "polars-ops/business"]
checked_arithmetic = ["polars-core/checked_arithmetic"]
chunked_ids = ["polars-ops?/chunked_ids"]
coalesce = ["polars-lazy?/coalesce"]
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ features = [
"abs",
"approx_unique",
"arg_where",
"business",
"concat_str",
"cum_agg",
"cumulative_eval",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ These functions are available from the Polars module root and can be used as exp
arctan2d
arg_sort_by
arg_where
business_day_count
coalesce
concat_list
concat_str
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
arctan2d,
arg_sort_by,
arg_where,
business_day_count,
coalesce,
col,
collect_all,
Expand Down Expand Up @@ -330,6 +331,7 @@
# polars.functions
"align_frames",
"arg_where",
"business_day_count",
"concat",
"date_range",
"date_ranges",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from polars.functions.as_datatype import date_ as date
from polars.functions.as_datatype import datetime_ as datetime
from polars.functions.as_datatype import time_ as time
from polars.functions.business import business_day_count
from polars.functions.col import col
from polars.functions.eager import align_frames, concat
from polars.functions.lazy import (
Expand Down Expand Up @@ -124,6 +125,7 @@
"arctan2",
"arctan2d",
"arg_sort_by",
"business_day_count",
"coalesce",
"col",
"collect_all",
Expand Down
Loading
Loading