From 5d22b8dcbfdf276bdf04f4f33b4b27209c57d558 Mon Sep 17 00:00:00 2001 From: Datseris Date: Tue, 4 Jan 2022 12:21:46 +0100 Subject: [PATCH] add function `ncsize` --- docs/make.jl | 1 + docs/src/index.md | 97 ++-------------------------------------------- docs/src/netcdf.md | 95 +++++++++++++++++++++++++++++++++++++++++++++ src/core/nc_io.jl | 18 +++++++-- 4 files changed, 114 insertions(+), 97 deletions(-) create mode 100644 docs/src/netcdf.md diff --git a/docs/make.jl b/docs/make.jl index 8faca02e..8440d850 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -37,6 +37,7 @@ makedocs(modules = [ClimateBase, DimensionalData], ), pages = [ "Introduction" => "index.md", + "NetCDF IO" => "netcdf.md", "Statistics" => "statistics.md", "Plotting" => "plotting.md", "Advanced functionality" => "advanced.md", diff --git a/docs/src/index.md b/docs/src/index.md index 2e0db3a1..3badd160 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -66,101 +66,10 @@ end ``` We explicitly assume that `Lon, Lat` are measured in degrees and not radians or meters (extremely important for spatial averaging processes). -## NetCDF IO - -ClimateBase.jl has support for `file.nc ⇆ ClimArray`. -Usually this is done using NCDatasets.jl, but see below for a function that translates a loaded `xarray` (from Python) into `ClimArray`. - -### Read - -To load a `ClimArray` directly from an `.nc` file do: -```@docs -ncread -``` - -Notice that (at the moment) we use a pre-defined mapping of common names to proper dimensions - please feel free to extend the following via a Pull Request: -```@example main -using ClimateBase # hide -ClimateBase.COMMONNAMES -``` - -Also, the following convenience functions are provided for examining the content of on-disk `.nc` files without loading all data on memory. -```@docs -nckeys -ncdetails -globalattr -``` - -### Write -You can also write a bunch of `ClimArray`s directly into an `.nc` file with -```@docs -ncwrite -``` - -### xarray -You can use the following functions (which are not defined and exported in `ClimateBase` to avoid dependency on PyCall.jl) to load data using Python's `xarray`. -```julia -using ClimateBase, Dates -# This needs to numpy, xarray and dask installed from Conda -using PyCall -xr = pyimport("xarray") -np = pyimport("numpy") - -function climarray_from_xarray(xa, fieldname, name = fieldname) - w = getproperty(xa, Symbol(fieldname)) - raw_data = Array(w.values) - dnames = collect(w.dims) # dimensions in string name - dim_values, dim_attrs = extract_dimension_values_xarray(xa, dnames) - @assert collect(size(raw_data)) == length.(dim_values) - actual_dims = create_dims_xarray(dnames, dim_values, dim_attrs) - ca = ClimArray(raw_data, actual_dims, name; attrib = w.attrs) -end - -function extract_dimension_values_xarray(xa, dnames = collect(xa.dims)) - dim_values = [] - dim_attrs = Vector{Any}(fill(nothing, length(dnames))) - for (i, d) in enumerate(dnames) - dim_attrs[i] = getproperty(xa, d).attrs - x = getproperty(xa, d).values - if d ≠ "time" - push!(dim_values, x) - else - dates = [np.datetime_as_string(y)[1:19] for y in x] - dates = DateTime.(dates) - push!(dim_values, dates) - end - end - return dim_values, dim_attrs -end - -function create_dims_xarray(dnames, dim_values, dim_attrs) - true_dims = ClimateBase.to_proper_dimensions(dnames) - optimal_values = ClimateBase.vector2range.(dim_values) - out = [] - for i in 1:length(true_dims) - push!(out, true_dims[i](optimal_values[i]; metadata = dim_attrs[i])) - end - return (out...,) -end - -# Load some data -xa = xr.open_mfdataset(ERA5_files_path) -X = climarray_from_xarray(xa, "w", "optional name") -``` -## Ensemble types -A dedicated type representing ensembles has no reason to exist in ClimateBase.jl. -As the package takes advantage of standard Julia datastructures and syntax, those can be used to represent "ensembles". For example to do an "ensemble global mean" you can just do: -```julia -# load all data -E = [ClimArray("ensemble_$i.nc", "x") for i in 1:10] -# mean from all data -global_mean = mean(spacemean(X) for X in E) -``` -where you see that the "ensemble" was represented just as a `Vector{ClimArray}`. -Of course, this requires that all data can fit into memory, but this is so far the only way ClimateBase.jl operates anyways. - - ## Crash-course to DimensionalData.jl ```@docs DimensionalData ``` + +## Available selectors +**TODO** \ No newline at end of file diff --git a/docs/src/netcdf.md b/docs/src/netcdf.md new file mode 100644 index 00000000..a8fa6e6f --- /dev/null +++ b/docs/src/netcdf.md @@ -0,0 +1,95 @@ +# NetCDF IO + +ClimateBase.jl has support for `"file.nc" ⇆ ClimArray`. +Usually this is done using NCDatasets.jl, but see below for a function that translates a loaded `xarray` (from Python) into `ClimArray`. + +## Read + +To load a `ClimArray` directly from an `.nc` file do: +```@docs +ncread +``` + +Notice that (at the moment) we use a pre-defined mapping of common names to proper dimensions - please feel free to extend the following via a Pull Request: +```@example main +using ClimateBase # hide +ClimateBase.COMMONNAMES +``` + +Also, the following convenience functions are provided for examining the content of on-disk `.nc` files without loading all data on memory. +```@docs +nckeys +ncdetails +ncsize +globalattr +``` + +## Write +You can also write a bunch of `ClimArray`s directly into an `.nc` file with +```@docs +ncwrite +``` + +## xarray +You can use the following functions (which are not defined and exported in `ClimateBase` to avoid dependency on PyCall.jl) to load data using Python's `xarray`. +```julia +using ClimateBase, Dates +# This needs to numpy, xarray and dask installed from Conda +using PyCall +xr = pyimport("xarray") +np = pyimport("numpy") + +function climarray_from_xarray(xa, fieldname, name = fieldname) + w = getproperty(xa, Symbol(fieldname)) + raw_data = Array(w.values) + dnames = collect(w.dims) # dimensions in string name + dim_values, dim_attrs = extract_dimension_values_xarray(xa, dnames) + @assert collect(size(raw_data)) == length.(dim_values) + actual_dims = create_dims_xarray(dnames, dim_values, dim_attrs) + ca = ClimArray(raw_data, actual_dims, name; attrib = w.attrs) +end + +function extract_dimension_values_xarray(xa, dnames = collect(xa.dims)) + dim_values = [] + dim_attrs = Vector{Any}(fill(nothing, length(dnames))) + for (i, d) in enumerate(dnames) + dim_attrs[i] = getproperty(xa, d).attrs + x = getproperty(xa, d).values + if d ≠ "time" + push!(dim_values, x) + else + # Dates need special handling to be transformed into `DateTime`. + dates = [np.datetime_as_string(y)[1:19] for y in x] + dates = DateTime.(dates) + push!(dim_values, dates) + end + end + return dim_values, dim_attrs +end + +function create_dims_xarray(dnames, dim_values, dim_attrs) + true_dims = ClimateBase.to_proper_dimensions(dnames) + optimal_values = ClimateBase.vector2range.(dim_values) + out = [] + for i in 1:length(true_dims) + push!(out, true_dims[i](optimal_values[i]; metadata = dim_attrs[i])) + end + return (out...,) +end + +# Load some data +xa = xr.open_mfdataset(ERA5_files_path) +X = climarray_from_xarray(xa, "w", "optional name") +``` + +## Ensemble types +A dedicated type representing ensembles has no reason to exist in ClimateBase.jl. +As the package takes advantage of standard Julia datastructures and syntax, those can be used to represent "ensembles". For example to do an "ensemble global mean" you can just do: +```julia +# load all data +E = [ClimArray("ensemble_$i.nc", "x") for i in 1:10] +# mean from all data +global_mean = mean(spacemean(X) for X in E) +``` +where you see that the "ensemble" was represented just as a `Vector{ClimArray}`. +Of course, this requires that all data can fit into memory, but this is so far the only way ClimateBase.jl operates anyways. diff --git a/src/core/nc_io.jl b/src/core/nc_io.jl index 21225c45..60278459 100644 --- a/src/core/nc_io.jl +++ b/src/core/nc_io.jl @@ -5,7 +5,7 @@ https://github.com/rafaqz/GeoData.jl =# using NCDatasets: NCDatasets, NCDataset export NCDataset -export nckeys, ncdetails, globalattr +export nckeys, ncdetails, globalattr, ncsize export ncread, ncwrite dim_to_commonname(::Lat) = "lat" @@ -29,16 +29,27 @@ end nckeys(a::NCDataset) = keys(a) """ - ncdetails(file::String, io = stdout) + ncdetails(file, io = stdout) Print details about the `.nc` file in `file` on `io`. """ -function ncdetails(file::String, io = stdout) +function ncdetails(file, io = stdout) NCDataset(file) do ds show(io, MIME"text/plain"(), ds) end end ncdetails(ds::NCDataset, io = stdout) = show(io, MIME"text/plain"(), ds) +""" + ncsize(file, var) +Return the size of the variable of the `.nc` file without actually loading any data. +""" +function ncsize(file, var) + NCDataset(file) do ds + return size(ds[var]) + end +end + + """ globalattr(file::String) → Dict Return the global attributes of the .nc file. @@ -84,6 +95,7 @@ to an array with three dimensions, such syntaxes are possible: (:, :, 1:3) (1:5:100, 1:1, [1,5,6]) ``` +The function [`ncsize`](@ref) can be useful for `selection`. See also [`ncdetails`](@ref), [`nckeys`](@ref) and [`ncwrite`](@ref).