Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for some operations with decimals #988

Merged
merged 14 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/explorer/backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ defmodule Explorer.Backend.Series do
| Time.t()
| NaiveDateTime.t()
| Explorer.Duration.t()
| Decimal.t()

@type non_finite :: Explorer.Series.non_finite()
@type option(type) :: type | nil
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ defmodule Explorer.PolarsBackend.Native do
def s_fill_missing_with_atom(_s, _value), do: err()
def s_fill_missing_with_date(_s, _value), do: err()
def s_fill_missing_with_datetime(_s, _value), do: err()
def s_fill_missing_with_decimal(_s, _value), do: err()
def s_greater(_s, _rhs), do: err()
def s_greater_equal(_s, _rhs), do: err()
def s_head(_s, _length), do: err()
Expand Down
3 changes: 3 additions & 0 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ defmodule Explorer.PolarsBackend.Series do
@impl true
def from_list(data, type) when is_list(data) do
series = Shared.from_list(data, type)

Explorer.Backend.Series.new(series, type)
end

Expand Down Expand Up @@ -645,6 +646,8 @@ defmodule Explorer.PolarsBackend.Series do
is_boolean(value) -> :s_fill_missing_with_boolean
is_struct(value, Date) -> :s_fill_missing_with_date
is_struct(value, NaiveDateTime) -> :s_fill_missing_with_datetime
is_struct(value, Decimal) -> :s_fill_missing_with_decimal
true -> raise "cannot fill missing with value: #{inspect(value)}"
end

Shared.apply_series(series, operation, [value])
Expand Down
2 changes: 1 addition & 1 deletion lib/explorer/polars_backend/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ defmodule Explorer.PolarsBackend.Shared do
{:duration, precision} -> apply(:s_from_list_duration, [name, list, precision])
:binary -> Native.s_from_list_binary(name, list)
:null -> Native.s_from_list_null(name, length(list))
{:decimal, precision, scale} -> Native.s_from_list_decimal(name, list, precision, scale)
{:decimal, precision, scale} -> apply(:s_from_list_decimal, [name, list, precision, scale])
end
end

Expand Down
83 changes: 75 additions & 8 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ defmodule Explorer.Series do
* `{:f, size}` - a 64-bit or 32-bit floating point number
* `{:s, size}` - a 8-bit or 16-bit or 32-bit or 64-bit signed integer number.
* `{:u, size}` - a 8-bit or 16-bit or 32-bit or 64-bit unsigned integer number.
* `{:decimal, precision, scale}` - a 128-bit signed integer number representing a decimal,
with a scale and precision. This unwraps to `Decimal`, using the `:decimal` package.
philss marked this conversation as resolved.
Show resolved Hide resolved
* `:null` - `nil`s exclusively
* `:string` - UTF-8 encoded binary
* `:time` - Time type that unwraps to `Elixir.Time`
Expand All @@ -38,10 +40,11 @@ defmodule Explorer.Series do
When passing a dtype as argument, aliases are supported for convenience
and compatibility with the Elixir ecosystem:

* All numeric dtypes (signed integer, unsigned integer, and floats) can
be specified as an atom in the form of `:s32`, `:u8`, `:f32` and so on
* All numeric dtypes (signed integer, unsigned integer, floats and decimals) can
be specified as an atom in the form of `:s32`, `:u8`, `:f32` and so on.
* The atom `:float` as an alias for `{:f, 64}` to mirror Elixir's floats
* The atom `:integer` as an alias for `{:s, 64}` to mirror Elixir's integers
* The atom `:decimal` as an alias for the `{:decimal, 38, 0}`.

A series must consist of a single data type only. Series may have `nil` values in them.
The series `dtype` can be retrieved via the `dtype/1` function or directly accessed as
Expand Down Expand Up @@ -140,7 +143,14 @@ defmodule Explorer.Series do
@numeric_dtypes Explorer.Shared.numeric_types()
@numeric_or_temporal_dtypes @numeric_dtypes ++ @temporal_dtypes

@io_dtypes Shared.dtypes() -- [:binary, :string, {:list, :any}, {:struct, :any}]
@io_dtypes Shared.dtypes() --
[
:binary,
:string,
{:list, :any},
{:struct, :any},
{:decimal, :nil_or_pos_integer, :pos_integer}
philss marked this conversation as resolved.
Show resolved Hide resolved
]

@type dtype ::
:null
Expand All @@ -150,11 +160,12 @@ defmodule Explorer.Series do
| :date
| :time
| :string
| naive_datetime_dtype
| datetime_dtype
| decimal_dtype
| duration_dtype
| float_dtype
| list_dtype
| naive_datetime_dtype
| signed_integer_dtype
| struct_dtype
| unsigned_integer_dtype
Expand All @@ -170,10 +181,12 @@ defmodule Explorer.Series do
@type signed_integer_dtype :: {:s, 8} | {:s, 16} | {:s, 32} | {:s, 64}
@type unsigned_integer_dtype :: {:u, 8} | {:u, 16} | {:u, 32} | {:u, 64}
@type float_dtype :: {:f, 32} | {:f, 64}
@type decimal_dtype :: {:decimal, nil | pos_integer(), pos_integer()}
philss marked this conversation as resolved.
Show resolved Hide resolved

@type dtype_alias :: integer_dtype_alias | float_dtype_alias
@type dtype_alias :: integer_dtype_alias | float_dtype_alias | decimal_dtype_alias
@type float_dtype_alias :: :float | :f32 | :f64
@type integer_dtype_alias :: :integer | :u8 | :u16 | :u32 | :u64 | :s8 | :s16 | :s32 | :s64
@type decimal_dtype_alias :: :decimal | :d0 | :d1 | :d2 | :d3 | :d4 | :d5
philss marked this conversation as resolved.
Show resolved Hide resolved

@type t :: %Series{data: Explorer.Backend.Series.t(), dtype: dtype()}
@type lazy_t :: %Series{data: Explorer.Backend.LazySeries.t(), dtype: dtype()}
Expand All @@ -197,14 +210,24 @@ defmodule Explorer.Series do
@behaviour Access
@compile {:no_warn_undefined, Nx}

defguardp is_numeric(n) when K.or(is_number(n), K.in(n, [:nan, :infinity, :neg_infinity]))
defguardp is_numeric(n)
when is_number(n)
|> K.or(K.in(n, [:nan, :infinity, :neg_infinity]))
|> K.or(is_struct(n, Decimal))

defguardp is_io_dtype(dtype) when K.in(dtype, @io_dtypes)

defguardp is_numeric_dtype(dtype) when K.in(dtype, @numeric_dtypes)
defguardp is_decimal_dtype(dtype)
when is_tuple(dtype)
|> K.and(tuple_size(dtype) == 3)
|> K.and(elem(dtype, 0) == :decimal)
|> K.and(elem(dtype, 2) |> K.is_integer())

defguardp is_numeric_dtype(dtype)
when K.or(K.in(dtype, @numeric_dtypes), is_decimal_dtype(dtype))

defguardp is_numeric_or_bool_dtype(dtype)
when K.in(dtype, [:boolean | @numeric_dtypes])
when K.or(dtype == :boolean, is_numeric_dtype(dtype))

defguardp is_precision(precision)
when K.in(precision, [:millisecond, :microsecond, :nanosecond])
Expand Down Expand Up @@ -1260,6 +1283,7 @@ defmodule Explorer.Series do
def iotype(%Series{dtype: dtype}) do
case dtype do
:category -> {:u, 32}
{:decimal, _, _} -> {:s, 128}
other -> Shared.dtype_to_iotype(other)
end
end
Expand Down Expand Up @@ -2577,6 +2601,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, there is a typo here, it should be backticks but we fix it later. :D

* decimals: the result will be a float

## Examples

Expand Down Expand Up @@ -2650,6 +2675,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* decimals: the result will be a float

## Examples

Expand Down Expand Up @@ -2719,6 +2745,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* decimals: the result will be a float

## Examples

Expand Down Expand Up @@ -2789,6 +2816,7 @@ defmodule Explorer.Series do
* `:time`
* `:datetime`
* `:duration`
* `:decimal`
philss marked this conversation as resolved.
Show resolved Hide resolved

## Examples

Expand Down Expand Up @@ -2889,6 +2917,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* decimals: the result will be a float

## Examples

Expand Down Expand Up @@ -2920,6 +2949,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* decimals: the result will be a float

## Examples

Expand Down Expand Up @@ -3262,6 +3292,7 @@ defmodule Explorer.Series do
* `:time`
* `:datetime`
* `:duration`
* `:decimal`

## Examples

Expand Down Expand Up @@ -3344,6 +3375,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* `:decimal`

## Examples

Expand Down Expand Up @@ -3395,8 +3427,18 @@ defmodule Explorer.Series do
defp cast_to_add({:datetime, p, tz}, {:duration, p}), do: {:datetime, p, tz}
defp cast_to_add({:duration, p}, {:datetime, p, tz}), do: {:datetime, p, tz}
defp cast_to_add({:duration, p}, {:duration, p}), do: {:duration, p}

defp cast_to_add({:decimal, p1, s1}, {:decimal, p2, s2}),
do: {:decimal, maybe_max(p1, p2), maybe_max(s1, s2)}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some rule we need to follow here? For example, is there a maximum value for precision and scale?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to ChatGPT:


Yes, in Apache Arrow's decimal128 type, the precision and scale are constrained as follows:

Precision: This represents the total number of digits that can be stored, both before and after the decimal point. For decimal128, the maximum precision is 38 digits. This means that it can store up to 38 significant digits.

Scale: This defines how many of the digits are allocated to the fractional part (i.e., after the decimal point). The scale can be any value between 0 and the precision value. For example, if you have a precision of 38 and set a scale of 10, then 28 digits can be used before the decimal point and 10 digits after.

Thus, the maximum precision is 38, and the scale can be anywhere from 0 to 38, depending on the application needs.


So I think we are good, but I'd encapsulate this logic in a function. :)


defp cast_to_add(left, right), do: Shared.merge_numeric_dtype(left, right)

defp maybe_max(left, right) when K.and(is_integer(left), is_integer(right)),
do: K.max(left, right)

defp maybe_max(left, nil), do: left
defp maybe_max(nil, right), do: right

@doc """
Subtracts right from left, element-wise.

Expand All @@ -3412,6 +3454,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* decimals

## Examples

Expand Down Expand Up @@ -3463,6 +3506,10 @@ defmodule Explorer.Series do
defp cast_to_subtract({:datetime, p, tz}, {:datetime, p, tz}), do: {:duration, p}
defp cast_to_subtract({:datetime, p, tz}, {:duration, p}), do: {:datetime, p, tz}
defp cast_to_subtract({:duration, p}, {:duration, p}), do: {:duration, p}

defp cast_to_subtract({:decimal, p1, s1}, {:decimal, p2, s2}),
do: {:decimal, maybe_max(p1, p2), maybe_max(s1, s2)}

defp cast_to_subtract(left, right), do: Shared.merge_numeric_dtype(left, right)

@doc """
Expand All @@ -3478,6 +3525,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* decimals: the result will be a decimal series

## Examples

Expand Down Expand Up @@ -3515,6 +3563,10 @@ defmodule Explorer.Series do
defp cast_to_multiply({:duration, p}, {:s, _}), do: {:duration, p}
defp cast_to_multiply({:f, _}, {:duration, p}), do: {:duration, p}
defp cast_to_multiply({:duration, p}, {:f, _}), do: {:duration, p}

defp cast_to_multiply({:decimal, p1, s1}, {:decimal, p2, s2}),
do: {:decimal, maybe_max(p1, p2), maybe_max(s1, s2)}

defp cast_to_multiply(left, right), do: Shared.merge_numeric_dtype(left, right)

@doc """
Expand All @@ -3530,6 +3582,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* decimals: the result will be a float series

## Examples

Expand Down Expand Up @@ -3590,6 +3643,8 @@ defmodule Explorer.Series do
defp cast_to_divide({:f, left}, {:f, right}), do: {:f, max(left, right)}
defp cast_to_divide({:duration, p}, {:s, _}), do: {:duration, p}
defp cast_to_divide({:duration, p}, {:f, _}), do: {:duration, p}
# This is due limitations of Polars. Ideally it should be decimal here.
defp cast_to_divide({:decimal, _, _}, {:decimal, _, _}), do: {:f, 64}
defp cast_to_divide(_, _), do: nil

@doc """
Expand All @@ -3607,6 +3662,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* decimals: the result will be a float series

## Examples

Expand Down Expand Up @@ -3663,6 +3719,9 @@ defmodule Explorer.Series do
defp cast_to_pow({:f, l}, {n, _}) when K.in(n, [:u, :s]), do: {:f, l}
defp cast_to_pow({n, _}, {:f, r}) when K.in(n, [:u, :s]), do: {:f, r}
defp cast_to_pow({n, _}, {:s, _}) when K.in(n, [:u, :s]), do: {:s, 64}
# Due to a limitation in Polars, it's not possible to use decimals only here.
defp cast_to_pow({:decimal, _, _}, {:decimal, _, _}), do: {:f, 64}
defp cast_to_pow({:decimal, _, _}, {:s, _}), do: {:f, 64}
defp cast_to_pow(_, _), do: nil

@doc """
Expand All @@ -3675,6 +3734,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* `:decimal` - returns f64 series.

## Examples

Expand All @@ -3699,6 +3759,7 @@ defmodule Explorer.Series do

* floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
* integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
* `:decimal`.

## Examples

Expand Down Expand Up @@ -4182,6 +4243,7 @@ defmodule Explorer.Series do
* `:time`
* `:datetime`
* `:duration`
* `:decimal`

## Examples

Expand Down Expand Up @@ -4221,6 +4283,7 @@ defmodule Explorer.Series do
* `:time`
* `:datetime`
* `:duration`
* `:decimal`

## Examples

Expand Down Expand Up @@ -4260,6 +4323,7 @@ defmodule Explorer.Series do
* `:time`
* `:datetime`
* `:duration`
* `:decimal`

## Examples

Expand Down Expand Up @@ -4299,6 +4363,7 @@ defmodule Explorer.Series do
* `:time`
* `:datetime`
* `:duration`
* `:decimal`

## Examples

Expand Down Expand Up @@ -4468,6 +4533,8 @@ defmodule Explorer.Series do
defp cast_to_ordered_series({:duration, _}, %Explorer.Duration{}),
do: :duration

defp cast_to_ordered_series({:decimal, _precision, _scale} = decimal, %Decimal{}), do: decimal

defp cast_to_ordered_series(_dtype, _value),
do: nil

Expand Down
Loading
Loading