Skip to content

Commit

Permalink
feat: add business_day_count function
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Apr 6, 2024
1 parent cc6c642 commit 083bc31
Show file tree
Hide file tree
Showing 18 changed files with 333 additions and 0 deletions.
1 change: 1 addition & 0 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ is_between = ["polars-plan/is_between"]
is_unique = ["polars-plan/is_unique"]
cross_join = ["polars-plan/cross_join", "polars-pipe?/cross_join", "polars-ops/cross_join"]
asof_join = ["polars-plan/asof_join", "polars-time", "polars-ops/asof_join"]
business = ["polars-plan/business"]
concat_str = ["polars-plan/concat_str"]
range = ["polars-plan/range"]
mode = ["polars-plan/mode"]
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ is_between = ["polars-ops/is_between"]
cross_join = ["polars-ops/cross_join"]
asof_join = ["polars-time", "polars-ops/asof_join"]
concat_str = []
business = []
range = []
mode = ["polars-ops/mode"]
cum_agg = ["polars-ops/cum_agg"]
Expand Down Expand Up @@ -252,6 +253,7 @@ features = [
"ciborium",
"dtype-decimal",
"arg_where",
"business",
"range",
"meta",
"hive_partitions",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use polars_core::chunked_array::ops::arity::binary_elementwise_values;
use polars_core::prelude::*;

pub(super) fn business_day_count(s: &[Series]) -> PolarsResult<Series> {
let start = &s[0];
let end = &s[1];

let start_dates = start.date()?;
let end_dates = end.date()?;

// TODO: support customising weekdays
let week_mask: [bool; 7] = [true, true, true, true, true, false, false];
let n_weekdays = week_mask.iter().filter(|&x| *x).count() as i32;

let out = match (start_dates.len(), end_dates.len()) {
(_, 1) => {
if let Some(end_date) = end_dates.get(0) {
start_dates.apply_values(|start_date| {
impl_business_day_count(start_date, end_date, &week_mask, n_weekdays)
})
} else {
Int32Chunked::full_null(start_dates.name(), start_dates.len())
}
},
(1, _) => {
if let Some(start_date) = start_dates.get(0) {
end_dates.apply_values(|end_date| {
impl_business_day_count(start_date, end_date, &week_mask, n_weekdays)
})
} else {
Int32Chunked::full_null(start_dates.name(), start_dates.len())
}
},
_ => binary_elementwise_values(start_dates, end_dates, |start_date, end_date| {
impl_business_day_count(start_date, end_date, &week_mask, n_weekdays)
}),
};
Ok(out.into_series())
}

/// Ported from:
/// https://github.com/numpy/numpy/blob/e59c074842e3f73483afa5ddef031e856b9fd313/numpy/_core/src/multiarray/datetime_busday.c#L355-L433
fn impl_business_day_count(
mut start_date: i32,
mut end_date: i32,
weekmask: &[bool; 7],
n_weekdays: i32,
) -> i32 {
let swapped = start_date > end_date;
if swapped {
(start_date, end_date) = (end_date, start_date);
start_date += 1;
end_date += 1;
}

let mut start_weekday = weekday(start_date) as usize;
let diff = end_date - start_date;
let whole_weeks = diff / 7;
let mut count = 0;
count += whole_weeks * n_weekdays;
start_date += whole_weeks * 7;
while start_date < end_date {
if unsafe { *weekmask.get_unchecked(start_weekday - 1) } {
count += 1;
}
start_date += 1;
start_weekday += 1;
if start_weekday > 7 {
start_weekday = 1;
}
}
if swapped {
-count
} else {
count
}
}

pub(crate) fn weekday(x: i32) -> i32 {
// the first modulo might return a negative number, so we add 7 and take
// the modulo again so we're sure we have something between 0 and 6
((x - 4) % 7 + 7) % 7 + 1
}
41 changes: 41 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/business/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
use std::fmt::{Display, Formatter};

use polars_core::prelude::*;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use crate::dsl::SpecialEq;
use crate::map_as_slice;
use crate::prelude::SeriesUdf;

#[cfg(feature = "dtype-date")]
mod business_day_count;

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug, Eq, Hash)]
pub enum BusinessFunction {
#[cfg(feature = "business")]
BusinessDayCount {},
}

impl Display for BusinessFunction {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use BusinessFunction::*;
let s = match self {
#[cfg(feature = "business")]
&BusinessDayCount { .. } => "business_day_count",
};
write!(f, "{s}")
}
}
impl From<BusinessFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(func: BusinessFunction) -> Self {
use BusinessFunction::*;
match func {
#[cfg(feature = "business")]
BusinessDayCount {} => {
map_as_slice!(business_day_count::business_day_count)
},
}
}
}
12 changes: 12 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ mod array;
mod binary;
mod boolean;
mod bounds;
#[cfg(feature = "business")]
mod business;
#[cfg(feature = "dtype-categorical")]
mod cat;
#[cfg(feature = "round_series")]
Expand Down Expand Up @@ -81,6 +83,8 @@ use serde::{Deserialize, Serialize};

pub(crate) use self::binary::BinaryFunction;
pub use self::boolean::BooleanFunction;
#[cfg(feature = "business")]
pub(super) use self::business::BusinessFunction;
#[cfg(feature = "dtype-categorical")]
pub(crate) use self::cat::CategoricalFunction;
#[cfg(feature = "temporal")]
Expand Down Expand Up @@ -117,6 +121,8 @@ pub enum FunctionExpr {

// Other expressions
Boolean(BooleanFunction),
#[cfg(feature = "business")]
Business(BusinessFunction),
#[cfg(feature = "abs")]
Abs,
Negate,
Expand Down Expand Up @@ -349,6 +355,8 @@ impl Hash for FunctionExpr {

// Other expressions
Boolean(f) => f.hash(state),
#[cfg(feature = "business")]
Business(f) => f.hash(state),
Pow(f) => f.hash(state),
#[cfg(feature = "search_sorted")]
SearchSorted(f) => f.hash(state),
Expand Down Expand Up @@ -557,6 +565,8 @@ impl Display for FunctionExpr {

// Other expressions
Boolean(func) => return write!(f, "{func}"),
#[cfg(feature = "business")]
Business(func) => return write!(f, "{func}"),
#[cfg(feature = "abs")]
Abs => "abs",
Negate => "negate",
Expand Down Expand Up @@ -815,6 +825,8 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {

// Other expressions
Boolean(func) => func.into(),
#[cfg(feature = "business")]
Business(func) => func.into(),
#[cfg(feature = "abs")]
Abs => map!(abs::abs),
Negate => map!(dispatch::negate),
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ impl FunctionExpr {

// Other expressions
Boolean(func) => func.get_field(mapper),
#[cfg(feature = "business")]
Business(_) => mapper.with_dtype(DataType::Int32),
#[cfg(feature = "abs")]
Abs => mapper.with_same_dtype(),
Negate => mapper.with_same_dtype(),
Expand Down
15 changes: 15 additions & 0 deletions crates/polars-plan/src/dsl/functions/business.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
use super::*;

#[cfg(feature = "dtype-date")]
pub fn business_day_count(start: Expr, end: Expr) -> Expr {
let input = vec![start, end];

Expr::Function {
input,
function: FunctionExpr::Business(BusinessFunction::BusinessDayCount {}),
options: FunctionOptions {
allow_rename: true,
..Default::default()
},
}
}
4 changes: 4 additions & 0 deletions crates/polars-plan/src/dsl/functions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//!
//! Functions on expressions that might be useful.
mod arity;
#[cfg(feature = "business")]
mod business;
#[cfg(feature = "dtype-struct")]
mod coerce;
mod concat;
Expand All @@ -18,6 +20,8 @@ mod syntactic_sugar;
mod temporal;

pub use arity::*;
#[cfg(all(feature = "business", feature = "dtype-date"))]
pub use business::*;
#[cfg(feature = "dtype-struct")]
pub use coerce::*;
pub use concat::*;
Expand Down
1 change: 1 addition & 0 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ array_any_all = ["polars-lazy?/array_any_all", "dtype-array"]
asof_join = ["polars-lazy?/asof_join", "polars-ops/asof_join"]
bigidx = ["polars-core/bigidx", "polars-lazy?/bigidx", "polars-ops/big_idx"]
binary_encoding = ["polars-ops/binary_encoding", "polars-lazy?/binary_encoding", "polars-sql?/binary_encoding"]
business = ["polars-lazy?/business"]
checked_arithmetic = ["polars-core/checked_arithmetic"]
chunked_ids = ["polars-ops?/chunked_ids"]
coalesce = ["polars-lazy?/coalesce"]
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ features = [
"abs",
"approx_unique",
"arg_where",
"business",
"concat_str",
"cum_agg",
"cumulative_eval",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
arctan2d,
arg_sort_by,
arg_where,
business_day_count,
coalesce,
col,
collect_all,
Expand Down Expand Up @@ -330,6 +331,7 @@
# polars.functions
"align_frames",
"arg_where",
"business_day_count",
"concat",
"date_range",
"date_ranges",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from polars.functions.as_datatype import date_ as date
from polars.functions.as_datatype import datetime_ as datetime
from polars.functions.as_datatype import time_ as time
from polars.functions.business import business_day_count
from polars.functions.col import col
from polars.functions.eager import align_frames, concat
from polars.functions.lazy import (
Expand Down Expand Up @@ -124,6 +125,7 @@
"arctan2",
"arctan2d",
"arg_sort_by",
"business_day_count",
"coalesce",
"col",
"collect_all",
Expand Down
66 changes: 66 additions & 0 deletions py-polars/polars/functions/business.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from __future__ import annotations

import contextlib
from typing import TYPE_CHECKING

from polars._utils.parse_expr_input import parse_as_expression
from polars._utils.wrap import wrap_expr

with contextlib.suppress(ImportError): # Module not available when building docs
import polars.polars as plr

if TYPE_CHECKING:
from datetime import date

from polars import Expr
from polars.type_aliases import IntoExprColumn


def business_day_count(
start: date | IntoExprColumn,
end: date | IntoExprColumn,
) -> Expr:
"""
Count the number of business days between `start` and `end`.
By default, Saturday and Sunday are excluded. The ability to
customise week mask and holidays are not yet implemented.
Parameters
----------
start
Start dates.
end
End dates.
Returns
-------
Expr
Examples
--------
>>> from datetime import date
>>> df = pl.DataFrame({
>>> 'start': [date(2020, 1, 1), date(2020, 1, 2)],
>>> 'end': [date(2020, 1, 2), date(2020, 1, 10)],
>>> })
>>> df.with_columns(
... total_day_count=(pl.col("end") - pl.col("start")).dt.total_days(),
... business_day_count=pl.business_day_count("start", "end"),
... )
shape: (2, 4)
┌────────────┬────────────┬─────────────────┬────────────────────┐
│ start ┆ end ┆ total_day_count ┆ business_day_count │
│ --- ┆ --- ┆ --- ┆ --- │
│ date ┆ date ┆ i64 ┆ i32 │
╞════════════╪════════════╪═════════════════╪════════════════════╡
│ 2020-01-01 ┆ 2020-01-02 ┆ 1 ┆ 1 │
│ 2020-01-02 ┆ 2020-01-10 ┆ 8 ┆ 6 │
└────────────┴────────────┴─────────────────┴────────────────────┘
Note how the two "count" columns differ due to the weekend (2020-01-04 - 2020-01-05)
not being counted by `business_day_count`.
"""
start_pyexpr = parse_as_expression(start)
end_pyexpr = parse_as_expression(end)
return wrap_expr(plr.business_day_count(start_pyexpr, end_pyexpr))
11 changes: 11 additions & 0 deletions py-polars/src/functions/business.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
use polars::lazy::dsl;
use pyo3::prelude::*;

use crate::PyExpr;

#[pyfunction]
pub fn business_day_count(start: PyExpr, end: PyExpr) -> PyExpr {
let start = start.inner;
let end = end.inner;
dsl::business_day_count(start, end).into()
}
2 changes: 2 additions & 0 deletions py-polars/src/functions/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod aggregation;
mod business;
mod eager;
mod io;
mod lazy;
Expand All @@ -10,6 +11,7 @@ mod string_cache;
mod whenthen;

pub use aggregation::*;
pub use business::*;
pub use eager::*;
pub use io::*;
pub use lazy::*;
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(functions::time_ranges))
.unwrap();

// Functions - business
m.add_wrapped(wrap_pyfunction!(functions::business_day_count))
.unwrap();

// Functions - aggregation
m.add_wrapped(wrap_pyfunction!(functions::all_horizontal))
.unwrap();
Expand Down
Loading

0 comments on commit 083bc31

Please sign in to comment.