elixir-explorer · philss · Oct 16, 2024 · Sep 18, 2024 · Sep 19, 2024 · Sep 20, 2024
diff --git a/lib/explorer/backend/series.ex b/lib/explorer/backend/series.ex
@@ -18,6 +18,7 @@ defmodule Explorer.Backend.Series do
           | Time.t()
           | NaiveDateTime.t()
           | Explorer.Duration.t()
+          | Decimal.t()
 
   @type non_finite :: Explorer.Series.non_finite()
   @type option(type) :: type | nil

diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex
@@ -319,6 +319,7 @@ defmodule Explorer.PolarsBackend.Native do
   def s_fill_missing_with_atom(_s, _value), do: err()
   def s_fill_missing_with_date(_s, _value), do: err()
   def s_fill_missing_with_datetime(_s, _value), do: err()
+  def s_fill_missing_with_decimal(_s, _value), do: err()
   def s_greater(_s, _rhs), do: err()
   def s_greater_equal(_s, _rhs), do: err()
   def s_head(_s, _length), do: err()

diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex
@@ -22,6 +22,7 @@ defmodule Explorer.PolarsBackend.Series do
   @impl true
   def from_list(data, type) when is_list(data) do
     series = Shared.from_list(data, type)
+
     Explorer.Backend.Series.new(series, type)
   end
 
@@ -645,6 +646,8 @@ defmodule Explorer.PolarsBackend.Series do
         is_boolean(value) -> :s_fill_missing_with_boolean
         is_struct(value, Date) -> :s_fill_missing_with_date
         is_struct(value, NaiveDateTime) -> :s_fill_missing_with_datetime
+        is_struct(value, Decimal) -> :s_fill_missing_with_decimal
+        true -> raise "cannot fill missing with value: #{inspect(value)}"
       end
 
     Shared.apply_series(series, operation, [value])

diff --git a/lib/explorer/polars_backend/shared.ex b/lib/explorer/polars_backend/shared.ex
@@ -189,7 +189,7 @@ defmodule Explorer.PolarsBackend.Shared do
       {:duration, precision} -> apply(:s_from_list_duration, [name, list, precision])
       :binary -> Native.s_from_list_binary(name, list)
       :null -> Native.s_from_list_null(name, length(list))
-      {:decimal, precision, scale} -> Native.s_from_list_decimal(name, list, precision, scale)
+      {:decimal, precision, scale} -> apply(:s_from_list_decimal, [name, list, precision, scale])
     end
   end
 

diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex
@@ -26,6 +26,8 @@ defmodule Explorer.Series do
     * `{:f, size}` - a 64-bit or 32-bit floating point number
     * `{:s, size}` - a 8-bit or 16-bit or 32-bit or 64-bit signed integer number.
     * `{:u, size}` - a 8-bit or 16-bit or 32-bit or 64-bit unsigned integer number.
+    * `{:decimal, precision, scale}` - a 128-bit signed integer number representing a decimal,
+      with a scale and precision. This unwraps to `Decimal`, using the `:decimal` package.
     * `:null` - `nil`s exclusively
     * `:string` - UTF-8 encoded binary
     * `:time` - Time type that unwraps to `Elixir.Time`
@@ -38,10 +40,11 @@ defmodule Explorer.Series do
   When passing a dtype as argument, aliases are supported for convenience
   and compatibility with the Elixir ecosystem:
 
-    * All numeric dtypes (signed integer, unsigned integer, and floats) can
-      be specified as an atom in the form of `:s32`, `:u8`, `:f32` and so on
+    * All numeric dtypes (signed integer, unsigned integer, floats and decimals) can
+      be specified as an atom in the form of `:s32`, `:u8`, `:f32` and so on.
     * The atom `:float` as an alias for `{:f, 64}` to mirror Elixir's floats
     * The atom `:integer` as an alias for `{:s, 64}` to mirror Elixir's integers
+    * The atom `:decimal` as an alias for the `{:decimal, 38, 0}`.
 
   A series must consist of a single data type only. Series may have `nil` values in them.
   The series `dtype` can be retrieved via the `dtype/1` function or directly accessed as
@@ -140,7 +143,14 @@ defmodule Explorer.Series do
   @numeric_dtypes Explorer.Shared.numeric_types()
   @numeric_or_temporal_dtypes @numeric_dtypes ++ @temporal_dtypes
 
-  @io_dtypes Shared.dtypes() -- [:binary, :string, {:list, :any}, {:struct, :any}]
+  @io_dtypes Shared.dtypes() --
+               [
+                 :binary,
+                 :string,
+                 {:list, :any},
+                 {:struct, :any},
+                 {:decimal, :nil_or_pos_integer, :pos_integer}
+               ]
 
   @type dtype ::
           :null
@@ -150,11 +160,12 @@ defmodule Explorer.Series do
           | :date
           | :time
           | :string
-          | naive_datetime_dtype
           | datetime_dtype
+          | decimal_dtype
           | duration_dtype
           | float_dtype
           | list_dtype
+          | naive_datetime_dtype
           | signed_integer_dtype
           | struct_dtype
           | unsigned_integer_dtype
@@ -170,10 +181,12 @@ defmodule Explorer.Series do
   @type signed_integer_dtype :: {:s, 8} | {:s, 16} | {:s, 32} | {:s, 64}
   @type unsigned_integer_dtype :: {:u, 8} | {:u, 16} | {:u, 32} | {:u, 64}
   @type float_dtype :: {:f, 32} | {:f, 64}
+  @type decimal_dtype :: {:decimal, nil | pos_integer(), pos_integer()}
 
-  @type dtype_alias :: integer_dtype_alias | float_dtype_alias
+  @type dtype_alias :: integer_dtype_alias | float_dtype_alias | decimal_dtype_alias
   @type float_dtype_alias :: :float | :f32 | :f64
   @type integer_dtype_alias :: :integer | :u8 | :u16 | :u32 | :u64 | :s8 | :s16 | :s32 | :s64
+  @type decimal_dtype_alias :: :decimal | :d0 | :d1 | :d2 | :d3 | :d4 | :d5
 
   @type t :: %Series{data: Explorer.Backend.Series.t(), dtype: dtype()}
   @type lazy_t :: %Series{data: Explorer.Backend.LazySeries.t(), dtype: dtype()}
@@ -197,14 +210,24 @@ defmodule Explorer.Series do
   @behaviour Access
   @compile {:no_warn_undefined, Nx}
 
-  defguardp is_numeric(n) when K.or(is_number(n), K.in(n, [:nan, :infinity, :neg_infinity]))
+  defguardp is_numeric(n)
+            when is_number(n)
+                 |> K.or(K.in(n, [:nan, :infinity, :neg_infinity]))
+                 |> K.or(is_struct(n, Decimal))
 
   defguardp is_io_dtype(dtype) when K.in(dtype, @io_dtypes)
 
-  defguardp is_numeric_dtype(dtype) when K.in(dtype, @numeric_dtypes)
+  defguardp is_decimal_dtype(dtype)
+            when is_tuple(dtype)
+                 |> K.and(tuple_size(dtype) == 3)
+                 |> K.and(elem(dtype, 0) == :decimal)
+                 |> K.and(elem(dtype, 2) |> K.is_integer())
+
+  defguardp is_numeric_dtype(dtype)
+            when K.or(K.in(dtype, @numeric_dtypes), is_decimal_dtype(dtype))
 
   defguardp is_numeric_or_bool_dtype(dtype)
-            when K.in(dtype, [:boolean | @numeric_dtypes])
+            when K.or(dtype == :boolean, is_numeric_dtype(dtype))
 
   defguardp is_precision(precision)
             when K.in(precision, [:millisecond, :microsecond, :nanosecond])
@@ -1260,6 +1283,7 @@ defmodule Explorer.Series do
   def iotype(%Series{dtype: dtype}) do
     case dtype do
       :category -> {:u, 32}
+      {:decimal, _, _} -> {:s, 128}
       other -> Shared.dtype_to_iotype(other)
     end
   end
@@ -2577,6 +2601,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals: the result will be a float
 
   ## Examples
 
@@ -2650,6 +2675,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals: the result will be a float
 
   ## Examples
 
@@ -2719,6 +2745,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals: the result will be a float
 
   ## Examples
 
@@ -2789,6 +2816,7 @@ defmodule Explorer.Series do
     * `:time`
     * `:datetime`
     * `:duration`
+    * `:decimal`
 
   ## Examples
 
@@ -2889,6 +2917,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals: the result will be a float
 
   ## Examples
 
@@ -2920,6 +2949,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals: the result will be a float
 
   ## Examples
 
@@ -3262,6 +3292,7 @@ defmodule Explorer.Series do
     * `:time`
     * `:datetime`
     * `:duration`
+    * `:decimal`
 
   ## Examples
 
@@ -3344,6 +3375,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * `:decimal`
 
   ## Examples
 
@@ -3395,8 +3427,18 @@ defmodule Explorer.Series do
   defp cast_to_add({:datetime, p, tz}, {:duration, p}), do: {:datetime, p, tz}
   defp cast_to_add({:duration, p}, {:datetime, p, tz}), do: {:datetime, p, tz}
   defp cast_to_add({:duration, p}, {:duration, p}), do: {:duration, p}
+
+  defp cast_to_add({:decimal, p1, s1}, {:decimal, p2, s2}),
+    do: {:decimal, maybe_max(p1, p2), maybe_max(s1, s2)}
+
   defp cast_to_add(left, right), do: Shared.merge_numeric_dtype(left, right)
 
+  defp maybe_max(left, right) when K.and(is_integer(left), is_integer(right)),
+    do: K.max(left, right)
+
+  defp maybe_max(left, nil), do: left
+  defp maybe_max(nil, right), do: right
+
   @doc """
   Subtracts right from left, element-wise.
 
@@ -3412,6 +3454,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals
 
   ## Examples
 
@@ -3463,6 +3506,10 @@ defmodule Explorer.Series do
   defp cast_to_subtract({:datetime, p, tz}, {:datetime, p, tz}), do: {:duration, p}
   defp cast_to_subtract({:datetime, p, tz}, {:duration, p}), do: {:datetime, p, tz}
   defp cast_to_subtract({:duration, p}, {:duration, p}), do: {:duration, p}
+
+  defp cast_to_subtract({:decimal, p1, s1}, {:decimal, p2, s2}),
+    do: {:decimal, maybe_max(p1, p2), maybe_max(s1, s2)}
+
   defp cast_to_subtract(left, right), do: Shared.merge_numeric_dtype(left, right)
 
   @doc """
@@ -3478,6 +3525,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals: the result will be a decimal series
 
   ## Examples
 
@@ -3515,6 +3563,10 @@ defmodule Explorer.Series do
   defp cast_to_multiply({:duration, p}, {:s, _}), do: {:duration, p}
   defp cast_to_multiply({:f, _}, {:duration, p}), do: {:duration, p}
   defp cast_to_multiply({:duration, p}, {:f, _}), do: {:duration, p}
+
+  defp cast_to_multiply({:decimal, p1, s1}, {:decimal, p2, s2}),
+    do: {:decimal, maybe_max(p1, p2), maybe_max(s1, s2)}
+
   defp cast_to_multiply(left, right), do: Shared.merge_numeric_dtype(left, right)
 
   @doc """
@@ -3530,6 +3582,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals: the result will be a float series
 
   ## Examples
 
@@ -3590,6 +3643,8 @@ defmodule Explorer.Series do
   defp cast_to_divide({:f, left}, {:f, right}), do: {:f, max(left, right)}
   defp cast_to_divide({:duration, p}, {:s, _}), do: {:duration, p}
   defp cast_to_divide({:duration, p}, {:f, _}), do: {:duration, p}
+  # This is due limitations of Polars. Ideally it should be decimal here.
+  defp cast_to_divide({:decimal, _, _}, {:decimal, _, _}), do: {:f, 64}
   defp cast_to_divide(_, _), do: nil
 
   @doc """
@@ -3607,6 +3662,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * decimals: the result will be a float series
 
   ## Examples
 
@@ -3663,6 +3719,9 @@ defmodule Explorer.Series do
   defp cast_to_pow({:f, l}, {n, _}) when K.in(n, [:u, :s]), do: {:f, l}
   defp cast_to_pow({n, _}, {:f, r}) when K.in(n, [:u, :s]), do: {:f, r}
   defp cast_to_pow({n, _}, {:s, _}) when K.in(n, [:u, :s]), do: {:s, 64}
+  # Due to a limitation in Polars, it's not possible to use decimals only here.
+  defp cast_to_pow({:decimal, _, _}, {:decimal, _, _}), do: {:f, 64}
+  defp cast_to_pow({:decimal, _, _}, {:s, _}), do: {:f, 64}
   defp cast_to_pow(_, _), do: nil
 
   @doc """
@@ -3675,6 +3734,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * `:decimal` - returns f64 series.
 
   ## Examples
 
@@ -3699,6 +3759,7 @@ defmodule Explorer.Series do
 
     * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)}
     * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)}
+    * `:decimal`.
 
   ## Examples
 
@@ -4182,6 +4243,7 @@ defmodule Explorer.Series do
     * `:time`
     * `:datetime`
     * `:duration`
+    * `:decimal`
 
   ## Examples
 
@@ -4221,6 +4283,7 @@ defmodule Explorer.Series do
     * `:time`
     * `:datetime`
     * `:duration`
+    * `:decimal`
 
   ## Examples
 
@@ -4260,6 +4323,7 @@ defmodule Explorer.Series do
     * `:time`
     * `:datetime`
     * `:duration`
+    * `:decimal`
 
   ## Examples
 
@@ -4299,6 +4363,7 @@ defmodule Explorer.Series do
     * `:time`
     * `:datetime`
     * `:duration`
+    * `:decimal`
 
   ## Examples
 
@@ -4468,6 +4533,8 @@ defmodule Explorer.Series do
   defp cast_to_ordered_series({:duration, _}, %Explorer.Duration{}),
     do: :duration
 
+  defp cast_to_ordered_series({:decimal, _precision, _scale} = decimal, %Decimal{}), do: decimal
+
   defp cast_to_ordered_series(_dtype, _value),
     do: nil