From c57f2e9d7e15a13e057e669efabc12876c187b8a Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Thu, 22 Feb 2024 22:37:38 +0100 Subject: [PATCH] contributors --- docs/Project.toml | 1 + docs/src/.vitepress/config.mts | 1 + docs/src/UserGuide/broadcast.jl | 6 +- docs/src/UserGuide/contributors.jl | 20 +++ docs/src/UserGuide/group_by.md | 193 +++++++++++++++++++++++++---- docs/src/contributors.md | 56 +++++++-- docs/src/index.md | 1 + 7 files changed, 240 insertions(+), 38 deletions(-) create mode 100644 docs/src/UserGuide/contributors.jl diff --git a/docs/Project.toml b/docs/Project.toml index a482e44d..3100a285 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -13,6 +13,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6" +GitHub = "bc5e4493-9b4d-5f90-b8aa-2b2bcaad7a26" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" diff --git a/docs/src/.vitepress/config.mts b/docs/src/.vitepress/config.mts index 8da3c15b..96dfc3f9 100644 --- a/docs/src/.vitepress/config.mts +++ b/docs/src/.vitepress/config.mts @@ -90,6 +90,7 @@ export default defineConfig({ { text: 'How do I ...', link: '/HowdoI/howdoi' }, { text: 'Contribute to docs', link: '/HowdoI/contribute' } ]}, + { text: 'Contributors', link: '/contributors' }, { text: 'API', items: [ { text: 'API Reference', link: 'api' }, diff --git a/docs/src/UserGuide/broadcast.jl b/docs/src/UserGuide/broadcast.jl index f7d51614..133f8ff1 100644 --- a/docs/src/UserGuide/broadcast.jl +++ b/docs/src/UserGuide/broadcast.jl @@ -5,6 +5,7 @@ using Downloads using Dates using Statistics using GLMakie + # url_path = "https://github.com/pydata/xarray-data/blob/master/" # filename = Downloads.download(url_path, "rasm.nc") #ds = Cube(filename) @@ -38,15 +39,14 @@ weights = map(./, g_tempo, sum_days) # verify that the sum per season is 1. sum.(weights) -# None of this work, they hang forever. # reading a million times the file, related to the othe issue? about indexing. g_dsW = broadcast_dims.(*, weights, g_ds) # -weighted_g = sum.(g_dsW, dims = :Ti) +weighted_g = sum.(g_dsW, dims = :Ti) # and lets drop the Time dimension weighted_g = dropdims.(weighted_g, dims=:Ti) ds_diff = map(.-, weighted_g, mean_g) -# plot arguments/attributes +# define plot arguments/attributes colorrange = (-30,30) colormap = Reverse(:Spectral) highclip = :red diff --git a/docs/src/UserGuide/contributors.jl b/docs/src/UserGuide/contributors.jl new file mode 100644 index 00000000..61effd54 --- /dev/null +++ b/docs/src/UserGuide/contributors.jl @@ -0,0 +1,20 @@ +using GitHub + +function getavatars(; n = 90) + contri = GitHub.contributors("JuliaDataCubes/YAXArrays.jl")[1] + avatars = [] + contributions = [] + urls = [] + for i in eachindex(contri) + push!(avatars, contri[i]["contributor"].avatar_url.uri) + push!(contributions, contri[i]["contributions"]) + push!(urls, contri[i]["contributor"].html_url.uri) + end + p = sortperm(contributions, rev=true) + return avatars[p], urls[p] +end + +avatars, urls = getavatars(; n = 90) +for (i,a) in enumerate(avatars) + println("""""") +end \ No newline at end of file diff --git a/docs/src/UserGuide/group_by.md b/docs/src/UserGuide/group_by.md index 872029e7..ba14a6af 100644 --- a/docs/src/UserGuide/group_by.md +++ b/docs/src/UserGuide/group_by.md @@ -1,57 +1,204 @@ # GroupBy -In the following example we will use the `groupby` function to calculate temporal and spatial averages. -````@example groupby +The following examples will use the `groupby` function to calculate temporal and spatial averages. + +````@example compareXarray using YAXArrays, DimensionalData using NetCDF using Downloads +using Dates +using Statistics ```` -````@example compare_with_xarray -url_path = "https://github.com/pydata/xarray-data/blob/master/" +### Seasonal Averages from Time Series of Monthly Means + +The following reproduces the example in [xarray](https://docs.xarray.dev/en/stable/examples/monthly-means.html) by [Joe Hamman](https://github.com/jhamman/). + +Where the goal is to calculate the seasonal average. And in order to do this properly, is necessary to calculate the weighted average considering that each month has a different number of days. + +### Download the data + +````@example compareXarray +url_path = "https://github.com/pydata/xarray-data/raw/master/rasm.nc" filename = Downloads.download(url_path, "rasm.nc") -ds = Cube(filename) +ds_o = Cube(filename) +nothing # hide ```` -````@example compare_with_xarray -g = groupby(ds, Ti => season(; start=December)) -m_g = mean.(g, dims=:Ti) -dropdims.(m_g, dims=:Ti) -```` +::: warning -# Create a named array for the month length +The following rebuild should not be necessary in the next release, plus is unpractical to use for large data sets. +Related to https://github.com/rafaqz/DimensionalData.jl/issues/642 -````@example compare_with_xarray -tempo = dims(ds, :Ti) -month_length = YAXArray((tempo,), daysinmonth.(tempo)) +::: + +````@example compareXarray + +axs = dims(ds_o) # get the dimensions +data = ds_o.data[:,:,:] # read the data +_FillValue = ds_o.properties["_FillValue"] +data = replace(data, _FillValue => NaN) +# create new YAXArray +ds = YAXArray(axs, data) +nothing # hide ```` -::: info +## GroupBy: seasons -The same is possible with a pure DimArray, namely +::: details function weighted_season(ds) ... end ````julia -month_length = DimArray(daysinmonth.(tempo), (tempo)) +function weighted_season(ds) + # calculate weights + tempo = dims(ds, :Ti) + month_length = YAXArray((tempo,), daysinmonth.(tempo)) + g_tempo = groupby(month_length, Ti => season(; start=December)) + sum_days = sum.(g_tempo, dims=:Ti) + weights = map(./, g_tempo, sum_days) + # unweighted seasons + g_ds = groupby(ds, Ti => season(; start=December)) + mean_g = mean.(g_ds, dims=:Ti) + mean_g = dropdims.(mean_g, dims=:Ti) + # weighted seasons + g_dsW = broadcast_dims.(*, weights, g_ds) + weighted_g = sum.(g_dsW, dims = :Ti); + weighted_g = dropdims.(weighted_g, dims=:Ti) + # differences + diff_g = map(.-, weighted_g, mean_g) + seasons = lookup(mean_g, :Ti) + return mean_g, weighted_g, diff_g, seasons +end ```` - ::: +Now, we continue with the `groupby` operations as usual -g_tempo = groupby(month_length, Ti => season(; start=December)) +````@ansi compareXarray +g_ds = groupby(ds, Ti => season(; start=December)) +```` -sum_days = sum.(g_tempo, dims=:Ti) +And the mean per season is calculated as follows + +````@ansi compareXarray +mean_g = mean.(g_ds, dims=:Ti) +```` + +### dropdims + +Note that now the time dimension has length one, we can use `dropdims` to remove it + +````@ansi compareXarray +mean_g = dropdims.(mean_g, dims=:Ti) +```` + +### seasons + +Due to the `groupby` function we will obtain new grouping names, in this case in the time dimension: + +````@example compareXarray +seasons = lookup(mean_g, :Ti) +```` + +Next, we will weight this grouping by days/month in each group. + +## GroupBy: weight + +Create a `YAXArray` for the month length -```julia +````@example compareXarray +tempo = dims(ds, :Ti) month_length = YAXArray((tempo,), daysinmonth.(tempo)) +```` + +Now group it by season + +````@ansi compareXarray g_tempo = groupby(month_length, Ti => season(; start=December)) +```` + +Get the number of days per season + +````@ansi compareXarray sum_days = sum.(g_tempo, dims=:Ti) +```` + +### weights + +Weight the seasonal groups by `sum_days` + +````@ansi compareXarray weights = map(./, g_tempo, sum_days) +```` + +Verify that the sum per season is 1 + +````@ansi compareXarray sum.(weights) +```` +### weighted seasons -g_ds = groupby(ds, Ti => season(; start=December)) +Now, let's weight the seasons + +````@ansi compareXarray +g_dsW = broadcast_dims.(*, weights, g_ds) +```` + +apply a `sum` over the time dimension and drop it + +````@ansi compareXarray +weighted_g = sum.(g_dsW, dims = :Ti); +weighted_g = dropdims.(weighted_g, dims=:Ti) +```` + +Calculate the differences +````@ansi compareXarray +diff_g = map(.-, weighted_g, mean_g) ```` -g_ds .* weights +All the previous steps are equivalent to calling the function defined at the top: +````julia +mean_g, weighted_g, diff_g, seasons = weighted_season(ds) +```` + +Once all calculations are done we can plot the results with `Makie.jl` as follows: + +````@example compareXarray +using CairoMakie +# define plot arguments/attributes +colorrange = (-30,30) +colormap = Reverse(:Spectral) +highclip = :red +lowclip = :grey15 +cb_label = ds_o.properties["long_name"] +```` + +````@example compareXarray +with_theme(theme_ggplot2()) do + hm_o, hm_d, hm_w = nothing, nothing, nothing + # the figure + fig = Figure(; size = (850,500)) + axs = [Axis(fig[i,j], aspect=DataAspect()) for i in 1:3, j in 1:4] + for (j, s) in enumerate(seasons) + hm_o = heatmap!(axs[1,j], mean_g[Ti=At(s)]; colorrange, lowclip, highclip, colormap) + hm_w = heatmap!(axs[2,j], weighted_g[Ti=At(s)]; colorrange, lowclip, highclip, colormap) + hm_d = heatmap!(axs[3,j], diff_g[Ti=At(s)]; colorrange=(-0.1,0.1), lowclip, highclip, + colormap=:diverging_bwr_20_95_c54_n256) + end + Colorbar(fig[1:2,5], hm_o, label=cb_label) + Colorbar(fig[3,5], hm_d, label="Tair") + hidedecorations!.(axs, grid=false, ticks=false, label=false) + # some labels + [axs[1,j].title = string.(s) for (j,s) in enumerate(seasons)] + Label(fig[0,1:5], "Seasonal Surface Air Temperature", fontsize=18, font=:bold) + axs[1,1].ylabel = "Unweighted" + axs[2,1].ylabel = "Weighted" + axs[3,1].ylabel = "Difference" + colgap!(fig.layout, 5) + rowgap!(fig.layout, 5) + fig +end +```` +which shows a good agreement with the results first published by [Joe Hamman](https://github.com/jhamman/). \ No newline at end of file diff --git a/docs/src/contributors.md b/docs/src/contributors.md index d012f049..409bb8c2 100644 --- a/docs/src/contributors.md +++ b/docs/src/contributors.md @@ -48,17 +48,14 @@ const coreMembers = [ } ] -const partners =[ - { - avatar: 'https://www.github.com/yyx990803.png', - name: 'John', - title: 'Creator' - }, - { - avatar: 'https://www.github.com/yyx990803.png', - name: 'Doe' - }, - ] +// const partners =[ +// { +// avatar: 'https://www.github.com/yyx990803.png', +// }, +// { +// avatar: 'https://www.github.com/yyx990803.png', +// }, +// ] @@ -76,11 +73,46 @@ const partners =[ + + ``` \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index 35906a4e..fb3c45d5 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -29,6 +29,7 @@ features: details: Well integrated with Julia's ecosystem, i.e., distributed operations are native. And plotting with Makie.jl is well supported. - title: Named dimensions and GroupBy details: Apply operations over named dimensions, select values by labels and integers as well as efficient split-apply-combine operations with groupby via DimensionalData.jl. + link: /UserGuide/group_by - title: Efficiency details: Efficient mapslices(x) and mapCube operations on huge multiple arrays, optimized for high-latency data access (object storage, compressed datasets). ``` \ No newline at end of file