Skip to content

Commit

Permalink
Provide column name in error for DF.from_columns/2 (#146)
Browse files Browse the repository at this point in the history
Closes #69
  • Loading branch information
philss authored Mar 31, 2022
1 parent 69f421c commit 4f0f29b
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 27 deletions.
7 changes: 5 additions & 2 deletions lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ defmodule Explorer.DataFrame do
@doc """
Creates a new dataframe from a map or keyword of lists or series.
Lists and series must be the same length. This function calls `Explorer.Series.from_list/2`
for lists, so they must conform to the requirements for making a series.
Lists and series must be the same length. This function has the same validations from
`Explorer.Series.from_list/2` for lists, so they must conform to the requirements for making a series.
## Options
Expand Down Expand Up @@ -200,6 +200,9 @@ defmodule Explorer.DataFrame do
floats float [1.0, 2.0]
ints integer [1, nil]
>
iex> Explorer.DataFrame.from_columns(%{floats: [1.0, 2.0], ints: [1, "wrong"]})
** (ArgumentError) cannot create series "ints": cannot make a series from mismatched types - the value "wrong" does not match inferred dtype integer
"""
@spec from_columns(series :: map() | Keyword.t(), opts :: Keyword.t()) :: DataFrame.t()
def from_columns(series, opts \\ []) do
Expand Down
23 changes: 19 additions & 4 deletions lib/explorer/polars_backend/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,10 @@ defmodule Explorer.PolarsBackend.DataFrame do
def from_columns(map) do
series_list = Enum.map(map, &from_columns_handler/1)

{:ok, df} = Native.df_new(series_list)
Shared.to_dataframe(df)
case Native.df_new(series_list) do
{:ok, df} -> Shared.to_dataframe(df)
{:error, error} -> raise ArgumentError, error
end
end

defp from_columns_handler({key, value}) when is_atom(key) do
Expand All @@ -160,14 +162,27 @@ defmodule Explorer.PolarsBackend.DataFrame do
end

defp from_columns_handler({colname, value}) when is_list(value) do
series = Series.from_list(value)
series = series_from_list!(colname, value)
from_columns_handler({colname, series})
end

defp from_columns_handler({colname, %Series{} = series}) when is_binary(colname) do
series |> PolarsSeries.rename(colname) |> Shared.to_polars_s()
end

# Like `Explorer.Series.from_list/2`, but gives a better error message with the series name.
defp series_from_list!(name, list) do
case Explorer.Shared.check_types(list) do
{:ok, type} ->
{list, type} = Explorer.Shared.cast_numerics(list, type)
PolarsSeries.from_list(list, type, name)

{:error, error} ->
message = "cannot create series #{inspect(name)}: " <> error
raise ArgumentError, message
end
end

@impl true
def to_map(%DataFrame{data: df}, convert_series?) do
Enum.reduce(df, %{}, &to_map_reducer(&1, &2, convert_series?))
Expand Down Expand Up @@ -267,7 +282,7 @@ defmodule Explorer.PolarsBackend.DataFrame do
do: mutate_reducer({colname, callback.(df)}, df)

defp mutate_reducer({colname, values}, df) when is_list(values),
do: mutate_reducer({colname, Series.from_list(values)}, df)
do: mutate_reducer({colname, series_from_list!(colname, values)}, df)

defp mutate_reducer({colname, value}, %DataFrame{} = df)
when is_binary(colname),
Expand Down
4 changes: 2 additions & 2 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ defmodule Explorer.PolarsBackend.Series do
@moduledoc false

import Kernel, except: [length: 1]
import Explorer.Shared, only: [check_types: 1, cast_numerics: 2]
import Explorer.Shared, only: [check_types!: 1, cast_numerics: 2]

alias Explorer.DataFrame
alias Explorer.PolarsBackend.Native
Expand Down Expand Up @@ -322,7 +322,7 @@ defmodule Explorer.PolarsBackend.Series do
@impl true
def transform(series, fun) do
list = series |> Series.to_list() |> Enum.map(fun)
type = check_types(list)
type = check_types!(list)
{list, type} = cast_numerics(list, type)

from_list(list, type)
Expand Down
11 changes: 8 additions & 3 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ defmodule Explorer.Series do
alias __MODULE__, as: Series
alias Kernel, as: K

import Explorer.Shared, only: [impl!: 1, check_types: 1, cast_numerics: 2]
import Explorer.Shared, only: [impl!: 1, check_types!: 1, cast_numerics: 2]
import Nx.Defn.Kernel, only: [keyword!: 2]
import Kernel, except: [length: 1, and: 2]

Expand Down Expand Up @@ -111,12 +111,17 @@ defmodule Explorer.Series do
Mixing non-numeric data types will raise an ArgumentError.
iex> Explorer.Series.from_list([1, "a"])
** (ArgumentError) cannot make a series from mismatched types: type of "a" does not match inferred dtype integer
** (ArgumentError) cannot make a series from mismatched types - the value "a" does not match inferred dtype integer
Trying to create a "nil" series will result in an ArgumentError exception.
iex> Explorer.Series.from_list([nil, nil])
** (ArgumentError) cannot make a series from a list of all nils
"""
@spec from_list(list :: list(), opts :: Keyword.t()) :: Series.t()
def from_list(list, opts \\ []) do
backend = backend_from_options!(opts)
type = check_types(list)
type = check_types!(list)
{list, type} = cast_numerics(list, type)
backend.from_list(list, type)
end
Expand Down
43 changes: 27 additions & 16 deletions lib/explorer/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -50,27 +50,38 @@ defmodule Explorer.Shared do
"""
def check_types(list) do
type =
for el <- list, reduce: nil do
type ->
new_type = type(el, type) || type
Enum.reduce_while(list, nil, fn el, type ->
new_type = type(el, type) || type

cond do
new_type == :numeric and type in [:float, :integer] ->
new_type
cond do
new_type == :numeric and type in [:float, :integer] ->
{:cont, new_type}

new_type != type and !is_nil(type) ->
raise ArgumentError,
"cannot make a series from mismatched types: type of #{inspect(el)} does not match inferred dtype #{type}"
new_type != type and !is_nil(type) ->
{:halt,
{:error,
"cannot make a series from mismatched types - the value #{inspect(el)} does not match inferred dtype #{type}"}}

true ->
new_type
end
end
true ->
{:cont, new_type}
end
end)

if is_nil(type),
do: raise(ArgumentError, "cannot make a series from a list of all nils")
case type do
nil -> {:error, "cannot make a series from a list of all nils"}
{:error, _} = error -> error
valid -> {:ok, valid}
end
end

type
@doc """
Gets the `dtype` of a list or raise error if not possible.
"""
def check_types!(list) do
case check_types(list) do
{:ok, dtype} -> dtype
{:error, error} -> raise ArgumentError, error
end
end

defp type(item, type) when is_integer(item) and type == :float, do: :numeric
Expand Down

0 comments on commit 4f0f29b

Please sign in to comment.