Skip to content

Commit

Permalink
Automatic Coord loading when lon/lat are matrices (#80)
Browse files Browse the repository at this point in the history
* add age to common names

* minor Vector clarity

* allow lading matrix lonlat data into coord data

* up version
  • Loading branch information
Datseris authored Nov 18, 2021
1 parent 6755245 commit 97fe2aa
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ClimateBase"
uuid = "35604d93-0fb8-4872-9436-495b01d137e2"
authors = ["Datseris <[email protected]>", "Philippe Roy <[email protected]>"]
version = "0.13.9"
version = "0.13.10"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand Down
1 change: 1 addition & 0 deletions src/core/coredefs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const COMMONNAMES = Dict(
"longitude" => Lon,
"time" => Time,
"t" => Time,
"age" => Time,
"height" => Hei,
"altitude" => Hei,
"pressure" => Pre,
Expand Down
71 changes: 50 additions & 21 deletions src/core/nc_io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ end
# Reading
#########################################################################
"""
ncread(file, var; name, kwargs...) → A
ncread(file, var; kwargs...) → A
Load the variable `var` from the `file` and convert it into a [`ClimArray`](@ref)
with proper dimension mapping and also containing the variable attributes as a dictionary.
Dimension attributes are also given to the dimensions of `A`, if any exist.
Expand All @@ -64,7 +64,7 @@ See keywords below for specifications for unstructured grids.
`.nc` data (typically split by time), e.g.
```julia
using Glob # for getting all files
alldata = glob("toa_fluxes_2020_*.nc")
alldata = glob("toa_fluxes_*.nc")
file = NCDataset(alldata; aggdim = "time")
A = ClimArray(file, "tow_sw_all")
```
Expand All @@ -77,12 +77,17 @@ the created [`ClimArray`](@ref).
See also [`ncdetails`](@ref), [`nckeys`](@ref) and [`ncwrite`](@ref).
We do two performance improvements while loading the data:
## Smart loading
The following things make loading data with `ncread` smarter than directly trying to use
NCDatasets.jl and then convert to some kind of dimensional container.
1. Data are directly transformed into `ClimArray`, conserving metadata and dimension names.
1. If there are no missing values in the data (according to CF standards), the
returned array is automatically converted to a concrete type (i.e. `Union{Float32, Missing}`
becomes `Float32`).
2. Dimensions that are ranges (i.e. sampled with constant step size) are automatically
1. Dimensions that are ranges (i.e. sampled with constant step size) are automatically
transformed to a standard Julia `Range` type (which makes sub-selecting faster).
1. Automatically deducing whether the spatial information is in an orthogonal
grid or not, and creating a single `Coord` dimension if not.
## Keywords
* `name` optionally rename loaded array.
Expand Down Expand Up @@ -137,6 +142,8 @@ end
function autodetect_grid(ds)
if haskey(ds, "reduced_points") || haskey(ds.dim, "ncells") || haskey(ds, "clon")
return UnstructuredGrid()
elseif haskey(ds, "lat") && length(size(ds["lat"])) > 1
return UnstructuredGrid()
else
return LonLatGrid()
end
Expand Down Expand Up @@ -204,6 +211,7 @@ function to_proper_dimensions(dnames)
else
@warn """
Dimension name "$n" not in common names.
Please consider opening an issue/PR on GitHub proposing "$n" in common names!
Making a generic dimension named `Dim{:$n}` for now...
"""
push!(r, Dim{Symbol(n)})
Expand Down Expand Up @@ -270,23 +278,37 @@ function ncread_unstructured(ds::NCDatasets.AbstractDataset, var::String, name,
# reduced points or not, and obtain the name of the coord dimension in the `ds`
if isnothing(lon)
lonlat, original_grid_dim = load_coordinate_points(ds)
lonlat = convert_to_degrees(lonlat, ds)
else
lonlat = [SVector(lo, la) for (lo, la) in zip(lon, lat)]
original_grid_dim = intersect(NCDatasets.dimnames(cfvar), POSSIBLE_CELL_NAMES)[1]
lonlat = convert_to_degrees(lonlat)
end

# TODO: I've noticed that this converts integer dimension (like pressure)
# into Float64, but I'm not sure why...
alldims = [NCDatasets.dimnames(cfvar)...]

# Set up the remaining dimensions of the dataset
@assert original_grid_dim alldims
i = findfirst(x -> x == original_grid_dim, alldims)
remainingdims = deleteat!(copy(alldims), i)
A = Array(cfvar)
actualdims = Any[create_dims(ds, remainingdims, A)...]
if original_grid_dim isa Tuple
# stupid case where `lon` data are `Matrix`
A = Array(cfvar)
sizes = [size(A)...]
# First, find where lon, lat dimensions are positioned
is = findall(x -> x original_grid_dim, alldims)
@assert length(is) == 2
remainingdims = deleteat!(copy(alldims), is)
actualdims = Any[create_dims(ds, remainingdims, A)...]
# Then, reshape A accordingly
sizes[is[1]] = sizes[is[1]]*sizes[is[2]]
deleteat!(sizes, is[2])
A = reshape(A, sizes...)
i = is[1]
else
@assert original_grid_dim alldims
i = findfirst(x -> x == original_grid_dim, alldims)
remainingdims = deleteat!(copy(alldims), i)
A = Array(cfvar)
actualdims = Any[create_dims(ds, remainingdims, A)...]
end

# Make coordinate dimension
si = sortperm(lonlat, by = reverse)
Expand All @@ -312,31 +334,38 @@ end
function load_coordinate_points(ds)
if haskey(ds, "reduced_points")
lonlat = reduced_grid_to_points(ds["lat"], ds["reduced_points"])
original_grid_dim = "rgrid" # TODO: Specific to CDO Gaussian grid
original_grid_dim = "rgrid" # Specific to CDO Gaussian grid
elseif haskey(ds, "lon") && length(size(ds["lon"])) == 2
# This is the case of having a non-orthogonal grid, but
# still saving the lon/lat information as matrices whose dimensions are lon/lat
# (this is a rather stupid way to format things, unfortunately)
lons = ds["lon"] |> Matrix .|> wrap_lon |> vec
lats = ds["lat"] |> Matrix |> vec
original_grid_dim = ("lon", "lat")
elseif has_unstructured_key(ds)
if haskey(ds, "lon")
lons = ds["lon"] |> Array .|> wrap_lon
lats = ds["lat"] |> Array
lons = ds["lon"] |> Vector .|> wrap_lon
lats = ds["lat"] |> Vector
original_grid_dim = NCDatasets.dimnames(ds["lon"])[1]
elseif haskey(ds, "clon")
lons = ds["clon"] |> Array .|> wrap_lon
lats = ds["clat"] |> Array
lons = ds["clon"] |> Vector .|> wrap_lon
lats = ds["clat"] |> Vector
original_grid_dim = NCDatasets.dimnames(ds["clon"])[1]
else
error("""
We didn't find key `"lon"` or `"clon"` that represents the longitude of each
polygon in an unstructured grid.
polygon in a non-orthogonal grid.
""")
end
lonlat = [SVector(lo, la) for (lo, la) in zip(lons, lats)]
else
error("""
We didn't find any of the following keys: `"ncells", "cells", "reduced_points",
"clon", "lon"`, at least one of which is mandatory for unstructured grid.
If your data stores the "cell" information with a different name, please open
an issue and let us know!
We couldn't automatically identify the lon/lat values of cell centers.
Please provide explicitly keywords `lon, lat` in `ncread`.
""")
end
lonlat = [SVector(lo, la) for (lo, la) in zip(lons, lats)]
lonlat = convert_to_degrees(lonlat, ds)
return lonlat, original_grid_dim
end

Expand Down

0 comments on commit 97fe2aa

Please sign in to comment.