From 4f0f29b8efa898d386c37246fd07fa9936816dc1 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Wed, 30 Mar 2022 21:20:11 -0300 Subject: [PATCH] Provide column name in error for DF.from_columns/2 (#146) Closes https://github.com/elixir-nx/explorer/issues/69 --- lib/explorer/data_frame.ex | 7 ++-- lib/explorer/polars_backend/data_frame.ex | 23 +++++++++--- lib/explorer/polars_backend/series.ex | 4 +-- lib/explorer/series.ex | 11 ++++-- lib/explorer/shared.ex | 43 ++++++++++++++--------- 5 files changed, 61 insertions(+), 27 deletions(-) diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 595c21b6d..bac0f76f7 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -171,8 +171,8 @@ defmodule Explorer.DataFrame do @doc """ Creates a new dataframe from a map or keyword of lists or series. - Lists and series must be the same length. This function calls `Explorer.Series.from_list/2` - for lists, so they must conform to the requirements for making a series. + Lists and series must be the same length. This function has the same validations from + `Explorer.Series.from_list/2` for lists, so they must conform to the requirements for making a series. ## Options @@ -200,6 +200,9 @@ defmodule Explorer.DataFrame do floats float [1.0, 2.0] ints integer [1, nil] > + + iex> Explorer.DataFrame.from_columns(%{floats: [1.0, 2.0], ints: [1, "wrong"]}) + ** (ArgumentError) cannot create series "ints": cannot make a series from mismatched types - the value "wrong" does not match inferred dtype integer """ @spec from_columns(series :: map() | Keyword.t(), opts :: Keyword.t()) :: DataFrame.t() def from_columns(series, opts \\ []) do diff --git a/lib/explorer/polars_backend/data_frame.ex b/lib/explorer/polars_backend/data_frame.ex index 755b91609..144dda425 100644 --- a/lib/explorer/polars_backend/data_frame.ex +++ b/lib/explorer/polars_backend/data_frame.ex @@ -150,8 +150,10 @@ defmodule Explorer.PolarsBackend.DataFrame do def from_columns(map) do series_list = Enum.map(map, &from_columns_handler/1) - {:ok, df} = Native.df_new(series_list) - Shared.to_dataframe(df) + case Native.df_new(series_list) do + {:ok, df} -> Shared.to_dataframe(df) + {:error, error} -> raise ArgumentError, error + end end defp from_columns_handler({key, value}) when is_atom(key) do @@ -160,7 +162,7 @@ defmodule Explorer.PolarsBackend.DataFrame do end defp from_columns_handler({colname, value}) when is_list(value) do - series = Series.from_list(value) + series = series_from_list!(colname, value) from_columns_handler({colname, series}) end @@ -168,6 +170,19 @@ defmodule Explorer.PolarsBackend.DataFrame do series |> PolarsSeries.rename(colname) |> Shared.to_polars_s() end + # Like `Explorer.Series.from_list/2`, but gives a better error message with the series name. + defp series_from_list!(name, list) do + case Explorer.Shared.check_types(list) do + {:ok, type} -> + {list, type} = Explorer.Shared.cast_numerics(list, type) + PolarsSeries.from_list(list, type, name) + + {:error, error} -> + message = "cannot create series #{inspect(name)}: " <> error + raise ArgumentError, message + end + end + @impl true def to_map(%DataFrame{data: df}, convert_series?) do Enum.reduce(df, %{}, &to_map_reducer(&1, &2, convert_series?)) @@ -267,7 +282,7 @@ defmodule Explorer.PolarsBackend.DataFrame do do: mutate_reducer({colname, callback.(df)}, df) defp mutate_reducer({colname, values}, df) when is_list(values), - do: mutate_reducer({colname, Series.from_list(values)}, df) + do: mutate_reducer({colname, series_from_list!(colname, values)}, df) defp mutate_reducer({colname, value}, %DataFrame{} = df) when is_binary(colname), diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index 6175d5fb6..3299fbc33 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -2,7 +2,7 @@ defmodule Explorer.PolarsBackend.Series do @moduledoc false import Kernel, except: [length: 1] - import Explorer.Shared, only: [check_types: 1, cast_numerics: 2] + import Explorer.Shared, only: [check_types!: 1, cast_numerics: 2] alias Explorer.DataFrame alias Explorer.PolarsBackend.Native @@ -322,7 +322,7 @@ defmodule Explorer.PolarsBackend.Series do @impl true def transform(series, fun) do list = series |> Series.to_list() |> Enum.map(fun) - type = check_types(list) + type = check_types!(list) {list, type} = cast_numerics(list, type) from_list(list, type) diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 7a9d0057a..36c284609 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -22,7 +22,7 @@ defmodule Explorer.Series do alias __MODULE__, as: Series alias Kernel, as: K - import Explorer.Shared, only: [impl!: 1, check_types: 1, cast_numerics: 2] + import Explorer.Shared, only: [impl!: 1, check_types!: 1, cast_numerics: 2] import Nx.Defn.Kernel, only: [keyword!: 2] import Kernel, except: [length: 1, and: 2] @@ -111,12 +111,17 @@ defmodule Explorer.Series do Mixing non-numeric data types will raise an ArgumentError. iex> Explorer.Series.from_list([1, "a"]) - ** (ArgumentError) cannot make a series from mismatched types: type of "a" does not match inferred dtype integer + ** (ArgumentError) cannot make a series from mismatched types - the value "a" does not match inferred dtype integer + + Trying to create a "nil" series will result in an ArgumentError exception. + + iex> Explorer.Series.from_list([nil, nil]) + ** (ArgumentError) cannot make a series from a list of all nils """ @spec from_list(list :: list(), opts :: Keyword.t()) :: Series.t() def from_list(list, opts \\ []) do backend = backend_from_options!(opts) - type = check_types(list) + type = check_types!(list) {list, type} = cast_numerics(list, type) backend.from_list(list, type) end diff --git a/lib/explorer/shared.ex b/lib/explorer/shared.ex index b47a46f1d..6a35131c9 100644 --- a/lib/explorer/shared.ex +++ b/lib/explorer/shared.ex @@ -50,27 +50,38 @@ defmodule Explorer.Shared do """ def check_types(list) do type = - for el <- list, reduce: nil do - type -> - new_type = type(el, type) || type + Enum.reduce_while(list, nil, fn el, type -> + new_type = type(el, type) || type - cond do - new_type == :numeric and type in [:float, :integer] -> - new_type + cond do + new_type == :numeric and type in [:float, :integer] -> + {:cont, new_type} - new_type != type and !is_nil(type) -> - raise ArgumentError, - "cannot make a series from mismatched types: type of #{inspect(el)} does not match inferred dtype #{type}" + new_type != type and !is_nil(type) -> + {:halt, + {:error, + "cannot make a series from mismatched types - the value #{inspect(el)} does not match inferred dtype #{type}"}} - true -> - new_type - end - end + true -> + {:cont, new_type} + end + end) - if is_nil(type), - do: raise(ArgumentError, "cannot make a series from a list of all nils") + case type do + nil -> {:error, "cannot make a series from a list of all nils"} + {:error, _} = error -> error + valid -> {:ok, valid} + end + end - type + @doc """ + Gets the `dtype` of a list or raise error if not possible. + """ + def check_types!(list) do + case check_types(list) do + {:ok, dtype} -> dtype + {:error, error} -> raise ArgumentError, error + end end defp type(item, type) when is_integer(item) and type == :float, do: :numeric