From bb633619d0e393e392386e8adc905c40d2124568 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <84105208+topolarity@users.noreply.github.com>
Date: Sat, 7 Sep 2024 06:35:00 -0400
Subject: [PATCH 1/4] Remove REPL dependency (#3459)

---
 Project.toml       |  1 -
 src/DataFrames.jl  |  2 +-
 src/other/index.jl | 22 +++++++++++++++++++++-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index f76100bdf..5ba3ac209 100644
--- a/Project.toml
+++ b/Project.toml
@@ -18,7 +18,6 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/src/DataFrames.jl b/src/DataFrames.jl
index debd309f5..c85f16a70 100644
--- a/src/DataFrames.jl
+++ b/src/DataFrames.jl
@@ -1,6 +1,6 @@
 module DataFrames
 
-using Statistics, Printf, REPL
+using Statistics, Printf
 using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays
 @reexport using Missings, InvertedIndices
 using Base.Sort, Base.Order, Base.Iterators, Base.Threads
diff --git a/src/other/index.jl b/src/other/index.jl
index ae9358d38..61341b0c2 100644
--- a/src/other/index.jl
+++ b/src/other/index.jl
@@ -294,6 +294,26 @@ end
 @inline Base.getindex(x::AbstractIndex, rx::Regex) =
     getindex(x, filter(name -> occursin(rx, String(name)), _names(x)))
 
+# Levenshtein Distance
+# taken from https://github.com/JuliaLang/julia/blob/b5af119a6c608de43d6591a6c4129e9369239898/stdlib/REPL/src/docview.jl#L760-L776
+function _levenshtein(s1, s2)
+    a, b = collect(s1), collect(s2)
+    m = length(a)
+    n = length(b)
+    d = Matrix{Int}(undef, m+1, n+1)
+
+    d[1:m+1, 1] = 0:m
+    d[1, 1:n+1] = 0:n
+
+    for i = 1:m, j = 1:n
+        d[i+1,j+1] = min(d[i  , j+1] + 1,
+                         d[i+1, j  ] + 1,
+                         d[i  , j  ] + (a[i] != b[j]))
+    end
+
+    return d[m+1, n+1]
+end
+
 # Fuzzy matching rules:
 # 1. ignore case
 # 2. maximum Levenshtein distance is 2
@@ -302,7 +322,7 @@ end
 # Returns candidates ordered by (distance, name) pair
 function fuzzymatch(l::Dict{Symbol, Int}, idx::Symbol)
         idxs = uppercase(string(idx))
-        dist = [(REPL.levenshtein(uppercase(string(x)), idxs), x) for x in keys(l)]
+        dist = [(_levenshtein(uppercase(string(x)), idxs), x) for x in keys(l)]
         sort!(dist)
         c = [count(x -> x[1] <= i, dist) for i in 0:2]
         maxd = max(0, searchsortedlast(c, 8) - 1)

From 97bbb40a9aa74a1517a420c0f906cb5b98d8cff3 Mon Sep 17 00:00:00 2001
From: sprig <kosta@slumpy.org>
Date: Sat, 7 Sep 2024 03:38:25 -0700
Subject: [PATCH 2/4] Update filter docs, Fixes #3460 (#3461)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update filter docs

* Apply suggestions from code review

---------

Co-authored-by: Bogumił Kamiński <bkamins@sgh.waw.pl>
---
 src/abstractdataframe/abstractdataframe.jl | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
index e8f4e32ed..f549a07e8 100644
--- a/src/abstractdataframe/abstractdataframe.jl
+++ b/src/abstractdataframe/abstractdataframe.jl
@@ -1151,6 +1151,11 @@ data frames.
     function instead as it is consistent with other DataFrames.jl functions
     (as opposed to `filter`).
 
+!!! note
+
+    Due to type stability the `filter(cols => fun, df::AbstractDataFrame; view::Bool=false)`
+    call is preferred in performance critical applications.
+
 $METADATA_FIXED
 
 See also: [`filter!`](@ref)
@@ -1281,6 +1286,11 @@ data frames.
     function instead as it is consistent with other DataFrames.jl functions
     (as opposed to `filter!`).
 
+!!! note
+
+    Due to type stability the `filter!(cols => fun, df::AbstractDataFrame)`
+    call is preferred in performance critical applications.
+
 $METADATA_FIXED
 
 See also: [`filter`](@ref)

From 1761261e432ec923f8750e929d986d398bb60d31 Mon Sep 17 00:00:00 2001
From: Daniel Rizk <124117406+drizk1@users.noreply.github.com>
Date: Sat, 7 Sep 2024 06:48:34 -0400
Subject: [PATCH 3/4] Add TidierData to frameworks docs page (#3447)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add tidierdata to frameworks

* adds TidierData to docs toml

* change from begin end block

* add @kdpsingh edits

* Apply suggestions from code review

---------

Co-authored-by: Bogumił Kamiński <bkamins@sgh.waw.pl>
---
 docs/Project.toml                   |   1 +
 docs/src/man/querying_frameworks.md | 139 ++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+)

diff --git a/docs/Project.toml b/docs/Project.toml
index f6a9f940e..d821a4f08 100755
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -9,6 +9,7 @@ Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 Query = "1a8c2f83-1ff3-5112-b086-8aa67b057ba1"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+TidierData = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
 
 [compat]
 Documenter = "1"
diff --git a/docs/src/man/querying_frameworks.md b/docs/src/man/querying_frameworks.md
index abda7ec6f..dad7471b2 100644
--- a/docs/src/man/querying_frameworks.md
+++ b/docs/src/man/querying_frameworks.md
@@ -8,6 +8,145 @@ DataFramesMeta.jl, DataFrameMacros.jl and Query.jl. They implement a functionali
 These frameworks are designed both to make it easier for new users to start working with data frames in Julia
 and to allow advanced users to write more compact code.
 
+## TidierData.jl
+[TidierData.jl](https://tidierorg.github.io/TidierData.jl/latest/), part of 
+the [Tidier](https://tidierorg.github.io/Tidier.jl/dev/) ecosystem, is a macro-based 
+data analysis interface that wraps DataFrames.jl.  The instructions below are for version 
+0.16.0 of TidierData.jl.
+
+First, install the TidierData.jl package:
+
+```julia
+using Pkg
+Pkg.add("TidierData")
+```
+
+TidierData.jl enables clean, readable, and fast code for all major data transformation 
+functions including 
+[aggregating](https://tidierorg.github.io/TidierData.jl/latest/examples/generated/UserGuide/summarize/), 
+[pivoting](https://tidierorg.github.io/TidierData.jl/latest/examples/generated/UserGuide/pivots/), 
+[nesting](https://tidierorg.github.io/TidierData.jl/latest/examples/generated/UserGuide/nesting/), 
+and [joining](https://tidierorg.github.io/TidierData.jl/latest/examples/generated/UserGuide/joins/) 
+data frames. TidierData re-exports `DataFrame` from DataFrames.jl, `@chain` from Chain.jl, and 
+Statistics.jl to streamline data operations. 
+
+TidierData.jl is heavily inspired by the `dplyr` and `tidyr` R packages (part of the R 
+`tidyverse`), which it aims to implement using pure Julia by wrapping DataFrames.jl. While
+TidierData.jl borrows conventions from the `tidyverse`, it is important to note that the 
+`tidyverse` itself is often not considered idiomatic R code. TidierData.jl brings 
+data analysis conventions from `tidyverse` into Julia to have the best of both worlds: 
+tidy syntax and the speed and flexibility of the Julia language.
+
+TidierData.jl has two major differences from other macro-based packages. First, TidierData.jl 
+uses tidy expressions. An example of a tidy expression is `a = mean(b)`, where `b` refers 
+to an existing column in the data frame, and `a` refers to either a new or existing column. 
+Referring to variables outside of the data frame requires prefixing variables with `!!`. 
+For example, `a = mean(!!b)` refers to a variable `b` outside the data frame. Second, 
+TidierData.jl aims to make broadcasting mostly invisible through 
+[auto-vectorization](https://tidierorg.github.io/TidierData.jl/latest/examples/generated/UserGuide/autovec/). TidierData.jl currently uses a lookup table to decide which functions not to 
+vectorize; all other functions are automatically vectorized. This allows for 
+writing of concise expressions: `@mutate(df, a = a - mean(a))` transforms the `a` column 
+by subtracting each value by the mean of the column. Behind the scenes, the right-hand 
+expression is converted to `a .- mean(a)` because `mean()` is in the lookup table as a 
+function that should not be vectorized. Take a look at the 
+[auto-vectorization](https://tidierorg.github.io/TidierData.jl/latest/examples/generated/UserGuide/autovec/) documentation for details.
+
+One major benefit of combining tidy expressions with auto-vectorization is that 
+TidierData.jl code (which uses DataFrames.jl as its backend) can work directly on 
+databases using [TidierDB.jl](https://github.com/TidierOrg/TidierDB.jl), 
+which converts tidy expressions into SQL, supporting DuckDB and several other backends.
+
+```jldoctest tidierdata
+julia> using TidierData
+
+julia> df = DataFrame(
+                name = ["John", "Sally", "Roger"],
+                age = [54.0, 34.0, 79.0],
+                children = [0, 2, 4]
+            )
+3×3 DataFrame
+ Row │ name    age      children
+     │ String  Float64  Int64
+─────┼───────────────────────────
+   1 │ John       54.0         0
+   2 │ Sally      34.0         2
+   3 │ Roger      79.0         4
+
+julia> @chain df begin
+           @filter(children != 2)
+           @select(name, num_children = children)
+       end
+2×2 DataFrame
+ Row │ name    num_children 
+     │ String  Int64        
+─────┼──────────────────────
+   1 │ John               0
+   2 │ Roger              4
+```
+
+Below are examples showcasing `@group_by` with `@summarize` or `@mutate` - analagous to the split, apply, combine pattern.
+
+```jldoctest tidierdata
+julia> df = DataFrame(
+                groups = repeat('a':'e', inner = 2), 
+                b_col = 1:10, 
+                c_col = 11:20, 
+                d_col = 111:120
+            )
+10×4 DataFrame
+ Row │ groups  b_col  c_col  d_col 
+     │ Char    Int64  Int64  Int64 
+─────┼─────────────────────────────
+   1 │ a           1     11    111
+   2 │ a           2     12    112
+   3 │ b           3     13    113
+   4 │ b           4     14    114
+   5 │ c           5     15    115
+   6 │ c           6     16    116
+   7 │ d           7     17    117
+   8 │ d           8     18    118
+   9 │ e           9     19    119
+  10 │ e          10     20    120
+
+julia> @chain df begin
+           @filter(b_col > 2)
+           @group_by(groups)
+           @summarise(median_b = median(b_col), 
+                      across((b_col:d_col), mean))   
+       end
+4×5 DataFrame
+ Row │ groups  median_b  b_col_mean  c_col_mean  d_col_mean 
+     │ Char    Float64   Float64     Float64     Float64    
+─────┼──────────────────────────────────────────────────────
+   1 │ b            3.5         3.5        13.5       113.5
+   2 │ c            5.5         5.5        15.5       115.5
+   3 │ d            7.5         7.5        17.5       117.5
+   4 │ e            9.5         9.5        19.5       119.5
+
+julia> @chain df begin
+           @filter(b_col > 4 && c_col <= 18)
+           @group_by(groups)
+           @mutate(
+               new_col = b_col + maximum(d_col),
+               new_col2 = c_col - maximum(d_col),
+               new_col3 = case_when(c_col >= 18  => "high",
+                                    c_col > 15   => "medium",
+                                    true         => "low"))
+           @select(starts_with("new"))
+           @ungroup # required because `@mutate` does not ungroup
+       end
+4×4 DataFrame
+ Row │ groups  new_col  new_col2  new_col3 
+     │ Char    Int64    Int64     String   
+─────┼─────────────────────────────────────
+   1 │ c           121      -101  low
+   2 │ c           122      -100  medium
+   3 │ d           125      -101  medium
+   4 │ d           126      -100  high
+```
+
+For more examples, please visit the [TidierData.jl](https://tidierorg.github.io/TidierData.jl/latest/) documentation.
+
 ## DataFramesMeta.jl
 
 The [DataFramesMeta.jl](https://github.com/JuliaStats/DataFramesMeta.jl) package

From 96839313f523e98b894459f4bba959c21febd7f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 8 Sep 2024 10:50:16 +0200
Subject: [PATCH 4/4] fix tests on nightly and 32-bit (#3463)

---
 src/groupeddataframe/complextransforms.jl | 8 ++++++--
 test/io.jl                                | 6 +++---
 test/select.jl                            | 2 +-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl
index 0f7fea661..1a907de6f 100644
--- a/src/groupeddataframe/complextransforms.jl
+++ b/src/groupeddataframe/complextransforms.jl
@@ -269,8 +269,12 @@ function _combine_rows_with_first!((firstrow,)::Ref{Any},
     # Create up to one task per thread
     # This has lower overhead than creating one task per group,
     # but is optimal only if operations take roughly the same time for all groups
-    basesize = max(1, cld(len - 1, Threads.nthreads()))
-    partitions = Iterators.partition(2:len, basesize)
+    if isthreadsafe(outcols, incols)
+        basesize = max(1, cld(len - 1, Threads.nthreads()))
+        partitions = Iterators.partition(2:len, basesize)
+    else
+        partitions = (2:len,)
+    end
     widen_type_lock = ReentrantLock()
     outcolsref = Ref{NTuple{<:Any, AbstractVector}}(outcols)
     type_widened = fill(false, length(partitions))
diff --git a/test/io.jl b/test/io.jl
index dee15ea7c..b566c655e 100644
--- a/test/io.jl
+++ b/test/io.jl
@@ -760,7 +760,7 @@ end
     df = DataFrame(
         A=Int64[1,4,9,16,25,36,49,64],
         B = [
-            md"[DataFrames.jl](http://juliadata.github.io/DataFrames.jl)",
+            md"ABC",
             md"``\frac{x^2}{x^2+y^2}``",
             md"`Header`",
             md"This is *very*, **very**, very, very, very, very, very, very, very long line" ,
@@ -781,7 +781,7 @@ end
          Row │ A      B
              │ Int64  MD
         ─────┼──────────────────────────────────────────
-           1 │     1    DataFrames.jl (http://juliadat…
+           1 │     1    ABC
            2 │     4    \\frac{x^2}{x^2+y^2}
            3 │     9    Header
            4 │    16    This is very, very, very, very…
@@ -793,7 +793,7 @@ end
     @test sprint(show, "text/csv", df) ==
         """
         \"A\",\"B\"
-        1,\"[DataFrames.jl](http://juliadata.github.io/DataFrames.jl)\"
+        1,\"ABC\"
         4,\"\$\\\\frac{x^2}{x^2+y^2}\$\"
         9,\"`Header`\"
         16,\"This is *very*, **very**, very, very, very, very, very, very, very long line\"
diff --git a/test/select.jl b/test/select.jl
index 3a8ad3b23..02d84d4b9 100644
--- a/test/select.jl
+++ b/test/select.jl
@@ -3039,7 +3039,7 @@ end
     @test size(combine(df, :a => (x -> Any[]) => AsTable)) == (0, 0)
     df2 = combine(df, :a => (x -> NamedTuple{(:x,),Tuple{Int64}}[]) => AsTable)
     @test size(df2) == (0, 1)
-    @test eltype(df2.x) === Int
+    @test eltype(df2.x) === Int64
 end
 
 end # module