Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow push!/pushfirst!/append!/prepend! with multiple values #3372

Merged
merged 17 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## New functionalities

* Allow passing multiple values to add in `push!`, `pushfirst!`,
`append!`, and `prepend!`
([#3372](https://github.com/JuliaData/DataFrames.jl/pull/3372))
* `rename` and `rename!` now allow to apply a function transforming
column names only to a subset of the columns specified by the `cols`
keyword argument
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "DataFrames"
uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
version = "1.6.1"
version = "1.7.0"

[deps]
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
Expand Down
189 changes: 170 additions & 19 deletions src/dataframe/insertion.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
"""
append!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
append!(df::DataFrame, table; cols::Symbol=:setequal,
append!(df::DataFrame, tables...; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))

Add the rows of `df2` to the end of `df`. If the second argument `table` is not
an `AbstractDataFrame` then it is converted using `DataFrame(table,
copycols=false)` before being appended.
Add the rows of tables passed as `tables` to the end of `df`. If the table is not
an `AbstractDataFrame` then it is converted using
`DataFrame(table, copycols=false)` before being appended.

The exact behavior of `append!` depends on the `cols` argument:
* If `cols == :setequal` (this is the default) then `df2` must contain exactly
Expand Down Expand Up @@ -78,18 +76,53 @@ julia> df1
4 │ 4 4
5 │ 5 5
6 │ 6 6

julia> append!(df2, DataFrame(A=1), (; C=1:2), cols=:union)
6×3 DataFrame
Row │ A B C
│ Float64? Int64? Int64?
─────┼─────────────────────────────
1 │ 4.0 4 missing
2 │ 5.0 5 missing
3 │ 6.0 6 missing
4 │ 1.0 missing missing
5 │ missing missing 1
6 │ missing missing 2
```
"""
Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset])) =
_append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true)

function Base.append!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
append!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

function Base.append!(df::DataFrame, @nospecialize tables...;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
throw(ArgumentError("`cols` keyword argument must be " *
":orderequal, :setequal, :intersect, :subset or :union)"))
end

return foldl((df, table) -> append!(df, table, cols=cols, promote=promote),
collect(Any, tables), init=df)
end

"""
prepend!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
prepend!(df::DataFrame, table; cols::Symbol=:setequal,
prepend!(df::DataFrame, tables...; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))

Add the rows of tables passed as `tables` to the beginning of `df`. If the table is not
an `AbstractDataFrame` then it is converted using
`DataFrame(table, copycols=false)` before being appended.

Add the rows of `df2` to the beginning of `df`. If the second argument `table`
is not an `AbstractDataFrame` then it is converted using `DataFrame(table,
copycols=false)` before being prepended.
Expand Down Expand Up @@ -164,12 +197,45 @@ julia> df1
4 │ 1 1
5 │ 2 2
6 │ 3 3

julia> prepend!(df2, DataFrame(A=1), (; C=1:2), cols=:union)
6×3 DataFrame
Row │ A B C
│ Float64? Int64? Int64?
─────┼─────────────────────────────
1 │ 1.0 missing missing
2 │ missing missing 1
3 │ missing missing 2
4 │ 4.0 4 missing
5 │ 5.0 5 missing
6 │ 6.0 6 missing
```
"""
Base.prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset])) =
_append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=false)

function Base.prepend!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
prepend!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

function Base.prepend!(df::DataFrame, @nospecialize tables...;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
throw(ArgumentError("`cols` keyword argument must be " *
":orderequal, :setequal, :intersect, :subset or :union)"))
end

return foldr((table, df) -> prepend!(df, table, cols=cols, promote=promote),
collect(Any, tables), init=df)
end

function _append_or_prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol,
promote::Bool, atend::Bool)
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
Expand Down Expand Up @@ -355,6 +421,10 @@ following way:
added to `df` (using `missing` for existing rows) and a `missing` value is
pushed to columns missing in `row` that are present in `df`.

If `row` is not a `DataFrameRow`, `NamedTuple`, `AbstractDict`, or `Tables.AbstractRow`
the `cols` keyword argument must be `:setequal` (the default),
because such rows do not provide column name information.

If `promote=true` and element type of a column present in `df` does not allow
the type of a pushed argument then a new column with a promoted element type
allowing it is freshly allocated and stored in `df`. If `promote=false` an error
Expand All @@ -371,12 +441,14 @@ $METADATA_FIXED
"""

"""
push!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false)
push!(df::DataFrame, row::Union{Tuple, AbstractArray}...;
cols::Symbol=:setequal, promote::Bool=false)
push!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,
Tables.AbstractRow};
Tables.AbstractRow}...;
cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))

Add one row at the end of `df` in-place, taking the values from `row`.
Several rows can be added by passing them as separate arguments.

$INSERTION_COMMON

Expand Down Expand Up @@ -452,18 +524,36 @@ julia> push!(df, NamedTuple(), cols=:subset)
6 │ 11 12 missing
7 │ 1.0 missing 1.0
8 │ missing missing missing

julia> push!(DataFrame(a=1, b=2), (3, 4), (5, 6))
3×2 DataFrame
Row │ a b
│ Int64 Int64
─────┼──────────────
1 │ 1 2
2 │ 3 4
3 │ 5 6
```
"""
Base.push!(df::DataFrame, row::Any; promote::Bool=false) =
_row_inserter!(df, -1, row, Val{:push}(), promote)
function Base.push!(df::DataFrame, row::Any;
cols=:setequal, promote::Bool=false)
if cols !== :setequal
throw(ArgumentError("`cols` can only be `:setequal` when `row` is a `$(typeof(row))` " *
"as this type does not provide column names"))
end

return _row_inserter!(df, -1, row, Val{:push}(), promote)
end

"""
pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false)
pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}...;
cols::Symbol=:setequal, promote::Bool=false)
pushfirst!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,
Tables.AbstractRow};
Tables.AbstractRow}...;
cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))

Add one row at the beginning of `df` in-place, taking the values from `row`.
bkamins marked this conversation as resolved.
Show resolved Hide resolved
Several rows can be added by passing them as separate arguments.

$INSERTION_COMMON

Expand Down Expand Up @@ -539,13 +629,30 @@ julia> pushfirst!(df, NamedTuple(), cols=:subset)
6 │ a 1 missing
7 │ b 2 missing
8 │ c 3 missing

julia> pushfirst!(DataFrame(a=1, b=2), (3, 4), (5, 6))
3×2 DataFrame
Row │ a b
│ Int64 Int64
─────┼──────────────
1 │ 3 4
2 │ 5 6
3 │ 1 2
```
"""
Base.pushfirst!(df::DataFrame, row::Any; promote::Bool=false) =
_row_inserter!(df, -1, row, Val{:pushfirst}(), promote)
function Base.pushfirst!(df::DataFrame, row::Any;
cols=:setequal, promote::Bool=false)
if cols !== :setequal
throw(ArgumentError("`cols` can only be `:setequal` when `row` is a `$(typeof(row))` " *
"as this type does not provide column names"))
end

return _row_inserter!(df, -1, row, Val{:pushfirst}(), promote)
end

"""
insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray}; promote::Bool=false)
insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray};
cols::Symbol=:setequal, promote::Bool=false)
insert!(df::DataFrame, index::Integer, row::Union{DataFrameRow, NamedTuple,
AbstractDict, Tables.AbstractRow};
cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))
Expand Down Expand Up @@ -629,7 +736,13 @@ julia> insert!(df, 3, NamedTuple(), cols=:subset)
8 │ 1.0 missing 1.0
```
"""
function Base.insert!(df::DataFrame, index::Integer, row::Any; promote::Bool=false)
function Base.insert!(df::DataFrame, index::Integer, row::Any;
cols=:setequal, promote::Bool=false)
if cols !== :setequal
throw(ArgumentError("`cols` can only be `:setequal` when `row` is a `$(typeof(row))` " *
"as this type does not provide column names"))
end

index isa Bool && throw(ArgumentError("invalid index: $index of type Bool"))
1 <= index <= nrow(df)+1 ||
throw(ArgumentError("invalid index: $index for data frame with $(nrow(df)) rows"))
Expand Down Expand Up @@ -986,3 +1099,41 @@ function _row_inserter!(df::DataFrame, loc::Integer,
_drop_all_nonnote_metadata!(df)
return df
end

function Base.push!(df::DataFrame, @nospecialize rows...;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if isempty(rows)
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
throw(ArgumentError("`cols` keyword argument must be " *
":orderequal, :setequal, :intersect, :subset or :union)"))
end
end
bkamins marked this conversation as resolved.
Show resolved Hide resolved
with_names_count = count(rows) do row
row isa Union{DataFrameRow,AbstractDict,NamedTuple,Tables.AbstractRow}
end
if 0 < with_names_count < length(rows)
throw(ArgumentError("Mixing rows with column names and without column names " *
"in a single `push!` call is not allowed"))
end
return foldl((df, row) -> push!(df, row, cols=cols, promote=promote), rows, init=df)
end

function Base.pushfirst!(df::DataFrame, @nospecialize rows...;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if isempty(rows)
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
throw(ArgumentError("`cols` keyword argument must be " *
":orderequal, :setequal, :intersect, :subset or :union)"))
end
end
bkamins marked this conversation as resolved.
Show resolved Hide resolved
with_names_count = count(rows) do row
row isa Union{DataFrameRow,AbstractDict,NamedTuple,Tables.AbstractRow}
end
if 0 < with_names_count < length(rows)
throw(ArgumentError("Mixing rows with column names and without column names " *
"in a single `push!` call is not allowed"))
end
return foldr((row, df) -> pushfirst!(df, row, cols=cols, promote=promote), rows, init=df)
end
20 changes: 1 addition & 19 deletions src/other/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,31 +63,13 @@ function DataFrame(x; copycols::Union{Nothing, Bool}=nothing)
end

# the logic here relies on the fact that Tables.CopiedColumns
# is the only exception for default copycols value
# is the only exception for default copycols value
DataFrame(x, cnames::AbstractVector; makeunique::Bool=false,
copycols::Union{Nothing, Bool}=nothing) =
rename!(DataFrame(x, copycols=something(copycols, !(x isa Tables.CopiedColumns))),
_name2symbol(cnames),
makeunique=makeunique)

function Base.append!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
append!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

function Base.prepend!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
prepend!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

# This supports the Tables.RowTable type; needed to avoid ambiguities w/ another constructor
DataFrame(x::AbstractVector{NamedTuple{names, T}}; copycols::Bool=true) where {names, T} =
fromcolumns(Tables.columns(Tables.IteratorWrapper(x)), collect(names), copycols=false)
Expand Down
Loading
Loading