diff --git a/Project.toml b/Project.toml index adca4bcc..708060e7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ClimateBase" uuid = "35604d93-0fb8-4872-9436-495b01d137e2" authors = ["Datseris ", "Philippe Roy "] -version = "0.9.0" +version = "0.9.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/docs/src/index.md b/docs/src/index.md index 7aede08f..85d9d413 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -165,18 +165,23 @@ time_in_days Most of the time the spatial information of your data is in the form of a Longitude × Latitude grid. This is simply achieved via the existence of two dimensions (`Lon, Lat`) in your dimensional data array. Height, although representing physical space as well, is not considered part of the "spatial dimensions", and is treated as any other additional dimension. This type of space is called `Grid`. It is assumed throughout that longitude and latitude are measured in **degrees**. -Another type of spatial coordinates is supported, and that is of **equal-area**, called `EqArea`. -There, the spatial dimension is instead given by a single `Vector` of coordinate locations, i.e. 2-element `SVector(longitude, latitude)`. The dimension of this vector is `Coord`. +Another type of spatial coordinates is supported, and that is of **equal-area**, called `EqArea`. In `EqArea` the spatial dimension is instead given by a single `Vector` of coordinate locations, i.e. 2-element `SVector(longitude, latitude)`. The dimension of this vector is `Coord`. Each point in this vector corresponds to a polygon (typically triangle or trapezoid) that covers equal amount of spatial area as any other point. The actual limits of each polygon are not included in the dimension. Typical examples of such equal area grids are reduced (or thinned) Gaussian grids or icosahedral-based grids. +!!! warn + + This `EqArea` type is currently in an **experimental phase** and in addition some functions assume that the underlying points are formulated in a Gaussian equal area grid, where the points are sorted by their latitude. + Within ClimateBase.jl aims to work with either type of spatial coordinate system. Therefore, physically inspired averaging functions, like [`spacemean`](@ref) or [`zonalmean`](@ref), work for both types of spatial coordinates. In addition, the function `spatialidxs` returns an iterator over the spatial coordinates of the data, and works for both types (grid or equal-area): ```@docs spatialidxs ``` + + ### Spatial aggregation ```@docs zonalmean @@ -195,6 +200,27 @@ dropagg collapse ``` +### Equal area creation + +At the moment, support for auto-loading equal area space types from `.nc` data does not exist. +You can make them yourself using the following approach: +```julia +file = NCDataset("some_file_with_eqarea.nc") +# the following lines make the coordinates of the equal area, which depends on +# how your .nc file is structured +lons = Array(file["lon"]) +lats = Array(file["lat"]) +coords = [SVector(lo, la) for (lo, la) in zip(lons, lats)] +# Sort points by their latitude (important!) +si = sortperm(coords, by = reverse) +# Load some remaining dimensions and create the proper `Dimension` tuple: +t = Array(file["time"]) +dimensions = (Coord(coords), Time(t)) +# Finally load the array data and make a ClimArray +data = Array(file["actual_data_like_radiation"]) +A = ClimArray(data, dimensions) +``` + ## Timeseries Analysis ```@docs sinusoidal_continuation diff --git a/src/ClimateBase.jl b/src/ClimateBase.jl index 9f8d4ac3..290de8ea 100644 --- a/src/ClimateBase.jl +++ b/src/ClimateBase.jl @@ -6,6 +6,7 @@ include("core/nc_io.jl") include("core/aggregation.jl") include("physical_dimensions/spatial.jl") +include("physical_dimensions/spatial_equalarea.jl") include("physical_dimensions/temporal.jl") include("climate/solar.jl") diff --git a/src/core/nc_io.jl b/src/core/nc_io.jl index c27524fd..4b65b7c1 100644 --- a/src/core/nc_io.jl +++ b/src/core/nc_io.jl @@ -62,20 +62,6 @@ We do two performance improvements while loading the data: 2. Dimensions that are ranges (i.e. sampled with constant step size) are automatically transformed to a standard Julia `Range` type (which makes sub-selecting faster). - -At the moment, support for auto-loading equal area space types does not exist, -see [Types of spatial coordinates](@ref). -But can transform them yourself into a `ClimArray` by doing e.g.: -```julia -file = NCDataset("some_file_with_eqarea.nc") -lons = file["lon"] -lats = file["lat"] -coords = [SVector(lo, la) for (lo, la) in zip(lons, lats)] -t = file["time"] -dimensions = (Coord(coords), Time(t)) -data = file["actual_data_like_radiation"] -A = ClimArray(data, dimensions) -``` """ function ClimArray(path::Union{String, Vector{String}}, args...; kwargs...) NCDataset(path) do ds @@ -90,40 +76,16 @@ end # must be sure to load the correct ranges of dimensions as well though! function ClimArray(ds::NCDatasets.AbstractDataset, var::String, name = var; eqarea = false) + eqarea && return ClimArray_eqarea(ds, var, name) svar = string(var) cfvar = ds[svar] attrib = Dict(cfvar.attrib) A = cfvar |> Array - if eqarea - # TODO: This piece of code is specific to CDO output... - if haskey(ds, "ncells") # this is the equal area grid, so we make a Coord dimension - lon = ds["lon"] |> Array .|> wrap_lon - lat = ds["lat"] |> Array - time = ds["time"] |> Array - lonlat = [SVector(lon[i], lat[i]) for i in 1:length(lon)] - # here we sort lonlat and A in ascending latitude order, - # because the CDO output has reverse or even totally unsorted order - si = sortperm(lonlat, by = reverse) - data = ClimArray(A[si, :], (Coord(lonlat[si]), Time(time)); - attrib = attrib, name = svar) - elseif haskey(ds, "reduced_points") - # TODO: This can be easily upgraded to arbitary dimensions via a simple - # dimension replacement / permutation at the end - lonlat = reduced_grid_to_points(ds["lat"], ds["reduced_points"]) - si = sortperm(lonlat, by = reverse) - time = ds["time"] |> Array - data = ClimArray(A[si, :], (Coord(lonlat[si]), Time(time)); - name = svar, attrib = attrib) - else - error("Don't know how to handle this equal area grid!") - end - else # standard variables - dnames = Tuple(NCDatasets.dimnames(cfvar)) - data = ClimArray(A, create_dims(ds, dnames); name = Symbol(name), attrib = attrib) - end - if !any(ismissing, data) - data = nomissing(data) + dnames = Tuple(NCDatasets.dimnames(cfvar)) + if !any(ismissing, A) + A = nomissing(A) end + data = ClimArray(A, create_dims(ds, dnames); name = Symbol(name), attrib = attrib) return data end @@ -193,22 +155,6 @@ function vector2range(t::Vector{<:Date}) return r end - -######################################################################### -# Equal area related -######################################################################### -function reduced_grid_to_points(lat, reduced_points) - lonlat = SVector{2, Float32}[] - for (i, θ) in enumerate(lat) - n = reduced_points[i] - dλ = Float32(360/n) - for j in 0:n-1 - push!(lonlat, SVector(0 + dλ*j, θ)) - end - end - return lonlat -end - ######################################################################### # Saving to .nc files ######################################################################### diff --git a/src/physical_dimensions/spatial.jl b/src/physical_dimensions/spatial.jl index c260a2a7..8653abe0 100644 --- a/src/physical_dimensions/spatial.jl +++ b/src/physical_dimensions/spatial.jl @@ -27,10 +27,6 @@ function spatialidxs(::Grid, A) return Iterators.product(lons, lats) end -function spatialidxs(::EqArea, A) - return ((Coord(i),) for i in 1:size(A, Coord)) -end - """ lonlatfirst(A::ClimArray, args...) → B Permute the dimensions of `A` to make a new array `B` that has first dimension longitude, @@ -75,9 +71,6 @@ using StatsBase export latmean, spacemean, zonalmean, spaceagg, uniquelats -# TODO: Document what it means to be Coord space. -# We expect that the coordinates are sorted by latitude - """ zonalmean(A::ClimArray) Return the zonal mean of `A`. @@ -89,54 +82,6 @@ Works for both grid and equal area space. zonalmean(A::AbDimArray, args...) = zonalmean(spacestructure(A), A, args...) zonalmean(::Grid, A::AbDimArray) = dropagg(mean, A, Lon) -function zonalmean(::EqArea, A::AbDimArray) - idxs, lats = uniquelats(A) - other = otherdims(A, Coord()) - r = zeros(eltype(A), (length(lats), size.(Ref(A), other)...), other) - R = ClimArray(r, (Lat(lats), other...), A.name) - for (i, r) in enumerate(idxs) - for j in otheridxs(A, Coord()) - R[Lat(i), j...] = mean(view(A, Coord(r), j...)) - end - end - return R -end -function zonalmean(::EqArea, A::AbDimArray{T, 1}) where {T} - idxs, lats = uniquelats(A) - res = zeros(T, length(lats)) - for (i, r) in enumerate(idxs) - res[i] = mean(view(A.data, r)) - end - return ClimArray(res, (Lat(lats),)) -end - -""" - uniquelats(A::AbDimArray) → idxs, lats - uniquelats(c::Vector{<:AbstractVector}) → idxs, lats -Find the unique latitudes of `A`. Return the indices (vector of ranges) that each latitude -in `lats` covers, as well as the latitudes themselves. -""" -uniquelats(A::AbDimArray) = uniquelats(dims(A, Coord)) -function uniquelats(c) - @assert issorted(c; by = x -> x[2]) - idxs = Vector{UnitRange{Int}}() - lats = eltype(eltype(c))[] - sizehint!(lats, round(Int, sqrt(length(c)))) - iprev = 1 - for i in 2:length(c) - if c[i][2] != c[i-1][2] - push!(lats, c[i-1][2]) - push!(idxs, iprev:(i-1)) - iprev = i - end - end - push!(lats, c[end][2]) - push!(idxs, iprev:length(c)) - sizehint!(lats, length(lats)) - return idxs, lats -end - - """ latmean(A::ClimArray [, r]) Return the latitude-mean `A` (mean across dimension `Lat`). @@ -225,14 +170,6 @@ function spaceweightassert(A, w) return wtype end -spaceagg(::EqArea, f, A, ::Nothing) = dropagg(f, A, Coord) -# I think the best scenario is to modify `dropagg` to take in weights. -function spaceagg(::EqArea, f, A, exw) - error("TODO") - w = pweights(Array(exw)) - dropagg(f, A, Coord) -end - ######################################################################### # Hemispheric sum/difference ######################################################################### @@ -257,23 +194,6 @@ function hemispheric_functions(::Grid, A) return nh, sh end -function hemispheric_functions(::EqArea, A) - c = dims(A, Coord).val - @assert issorted(c; by = x -> x[2]) - shi, nhi = hemisphere_indices(c) - nh = A[Coord(nhi)] - sh = A[Coord(shi)] - oldc = dims(sh, Coord).val - si = sortperm(oldc, by = x -> x[2], rev = true) - newc = [SVector(x[1], abs(x[2])) for x in oldc[si]] - di = dimindex(sh, Coord) - newdims = Any[dims(sh)...] - newdims[di] = Coord(newc) - data = reverse(Array(sh); dims = di) - sh = ClimArray(data, (newdims...,)) - return nh, sh -end - """ hemispheric_means(A) → nh, sh Return the (proper) averages of `A` over the northern and southern hemispheres. @@ -293,13 +213,5 @@ function hemispheric_means(::Grid, A::AbDimArray) return nh, sh end -function hemispheric_means(::EqArea, A::AbDimArray) - shi, nhi = hemisphere_indices(c) - nh = dropagg(mean, A[Coord(nhi)], Coord) - sh = dropagg(mean, A[Coord(shi)], Coord) - return nh, sh -end - latitudes(A) = latitudes(spacestructure(A), A) latitudes(::Grid, A) = dims(A, Lat).val -latitudes(::EqArea, A) = unique!([x[2] for x in dims(A, Coord)]) diff --git a/src/physical_dimensions/spatial_equalarea.jl b/src/physical_dimensions/spatial_equalarea.jl new file mode 100644 index 00000000..00e8e5fc --- /dev/null +++ b/src/physical_dimensions/spatial_equalarea.jl @@ -0,0 +1,186 @@ +######################################################################### +# Loading equal area +######################################################################### + +function ClimArray_eqarea(ds::NCDatasets.AbstractDataset, var::String, name = var) + svar = string(var) + cfvar = ds[svar] + attrib = Dict(cfvar.attrib) + A = cfvar |> Array + # TODO: This code has not yet been generalized to arbitrary dimensions + if haskey(ds, "ncells") # this is the equal area grid, so we make a Coord dimension + lon = ds["lon"] |> Array .|> wrap_lon + lat = ds["lat"] |> Array + time = ds["time"] |> Array + lonlat = [SVector(lon[i], lat[i]) for i in 1:length(lon)] + # here we sort lonlat and A in ascending latitude order, + # because the CDO output has reverse or even totally unsorted order + si = sortperm(lonlat, by = reverse) + data = ClimArray(A[si, :], (Coord(lonlat[si]), Time(time)); + attrib = attrib, name = svar) + elseif haskey(ds, "reduced_points") + # TODO: I've noticed that this converts integer dimension (like pressure) + # into Float64, but I'm not sure why... + alldims = [NCDatasets.dimnames(cfvar)...] + @assert "rgrid" ∈ alldims + i = findfirst(x -> x == "rgrid", alldims) + remainingdims = deleteat!(copy(alldims), i) + actualdims = Any[create_dims(ds, remainingdims)...] + + lonlat = reduced_grid_to_points(ds["lat"], ds["reduced_points"]) + si = sortperm(lonlat, by = reverse) + coords = Coord(lonlat; metadata = Dict("grid" => "Gaussian equal area.")) + + insert!(actualdims, i, coords) + + X = ClimArray(A, Tuple(actualdims)) + X = X[Coord(si)] + return ClimArray(X; name = Symbol(name), attrib) + else + error("Don't know how to handle this equal area grid!") + end + if !any(ismissing, data) + data = nomissing(data) + end + return data +end + +function reduced_grid_to_points(lat, reduced_points) + lonlat = SVector{2, Float32}[] + for (i, θ) in enumerate(lat) + n = reduced_points[i] + dλ = Float32(360/n) + for j in 0:n-1 + push!(lonlat, SVector(0 + dλ*j, θ)) + end + end + return lonlat +end + +######################################################################### +# Spatial functions +######################################################################### +function spatialidxs(::EqArea, A) + return ((Coord(i),) for i in 1:size(A, Coord)) +end + +function zonalmean(::EqArea, A::AbDimArray) + idxs, lats = uniquelats(A) + other = otherdims(A, Coord()) + r = zeros(eltype(A), (length(lats), size.(Ref(A), other)...), other) + R = ClimArray(r, (Lat(lats), other...), A.name) + for (i, r) in enumerate(idxs) + for j in otheridxs(A, Coord()) + R[Lat(i), j...] = mean(view(A, Coord(r), j...)) + end + end + return R +end +function zonalmean(::EqArea, A::AbDimArray{T, 1}) where {T} + idxs, lats = uniquelats(A) + res = zeros(T, length(lats)) + for (i, r) in enumerate(idxs) + res[i] = mean(view(A.data, r)) + end + return ClimArray(res, (Lat(lats),)) +end + + +""" + uniquelats(A::AbDimArray) → idxs, lats + uniquelats(c::Vector{<:AbstractVector}) → idxs, lats +Find the unique latitudes of `A`. Return the indices (vector of ranges) that each latitude +in `lats` covers, as well as the latitudes themselves. +""" +uniquelats(A::AbDimArray) = uniquelats(dims(A, Coord).val) +function uniquelats(c) + @assert issorted(c; by = x -> x[2]) + idxs = Vector{UnitRange{Int}}() + lats = eltype(eltype(c))[] + sizehint!(lats, round(Int, sqrt(length(c)))) + iprev = 1 + for i in 2:length(c) + if c[i][2] != c[i-1][2] + push!(lats, c[i-1][2]) + push!(idxs, iprev:(i-1)) + iprev = i + end + end + push!(lats, c[end][2]) + push!(idxs, iprev:length(c)) + sizehint!(lats, length(lats)) + return idxs, lats +end + +spaceagg(::EqArea, f, A, ::Nothing) = dropagg(f, A, Coord) +# I think the best scenario is to modify `dropagg` to take in weights. +function spaceagg(::EqArea, f, A, exw) + error("TODO") + w = pweights(Array(exw)) + dropagg(f, A, Coord) +end + +function hemispheric_functions(::EqArea, A) + c = dims(A, Coord).val + @assert issorted(c; by = x -> x[2]) + shi, nhi = hemisphere_indices(c) + nh = A[Coord(nhi)] + sh = A[Coord(shi)] + oldc = dims(sh, Coord).val + si = sortperm(oldc, by = x -> x[2], rev = true) + newc = [SVector(x[1], abs(x[2])) for x in oldc[si]] + di = dimindex(sh, Coord) + newdims = Any[dims(sh)...] + newdims[di] = Coord(newc) + data = reverse(Array(sh); dims = di) + sh = ClimArray(data, (newdims...,)) + return nh, sh +end + +function hemispheric_means(::EqArea, A::AbDimArray) + shi, nhi = hemisphere_indices(c) + nh = dropagg(mean, A[Coord(nhi)], Coord) + sh = dropagg(mean, A[Coord(shi)], Coord) + return nh, sh +end + +function hemisphere_indices(c) + idxs, lats = uniquelats(c) + i = findfirst(l → l > 0, lats) + shi = idxs[1][1]:idxs[i][end] + nhi = idxs[i+1][1]:idxs[end][end] + return nhi, shi +end + +latitudes(::EqArea, A) = unique!([x[2] for x in dims(A, Coord)]) + +######################################################################### +# Extention of convenience indexing of `Coord` +######################################################################### +# TODO: this section can be made much more general, but with much more +# effort: https://github.com/rafaqz/DimensionalData.jl/issues/207 +export coord_latitudes_between +function coord_latitudes_between(A::ClimArray, l1, l2) + coord_latitudes_between(dims(A, Coord).val, l1, l2) +end + +function coord_latitudes_between(c, l1, l2) + idxs, lats = uniquelats(c) + # Notice that lats is guaranteed sorted for gaussian equal area + i1 = searchsortedfirst(lats, l1) + i2 = searchsortedlast(lats, l2) + i1 > i2 && ((i1, i2) = (i2, i1)) # in case bounds are given in reverse order + return idxs[i1][1]:idxs[i2][end] +end + +# This method is necessary so that I can do A[Coord(Lat(...))] +function DimensionalData._dims2indices(c::Coord, sel::Coord{ <: Lat{ <: Between}}, emptyval = Colon()) + l1, l2 = sel.val.val.val + return coord_latitudes_between(c, l1, l2) # this is Vector{Int} +end + +# This allows Between to access Coord directly and be translated to latitude +function DimensionalData.sel2indices(c::Coord, sel::Between{Tuple{X,Y}}) where {X<:Real, Y<:Real} + l1, l2 = sel.val + return coord_latitudes_between(c, l1, l2) # this is Vector{Int} +end