From 6a1d4a640a01eca0df23416ea203294c4ce6721f Mon Sep 17 00:00:00 2001 From: Milan Date: Wed, 31 May 2023 14:19:45 -0400 Subject: [PATCH] NetCDF output documented --- docs/src/output.md | 136 ++++++++++++++++++++++++++++++++++++++++++- src/output/output.jl | 61 +++++++++++++------ 2 files changed, 178 insertions(+), 19 deletions(-) diff --git a/docs/src/output.md b/docs/src/output.md index 04e7d6da0..71395578f 100644 --- a/docs/src/output.md +++ b/docs/src/output.md @@ -2,4 +2,138 @@ SpeedyWeather.jl uses NetCDF to output the data of a simulation. The following describes the details of this and how to change the way in which the NetCDF output is written. -There are many options to this available. \ No newline at end of file +There are many options to this available. + +## Accessing the NetCDF output writer + +The output writer is a component of every Model, i.e. `BarotropicModel`, `ShallowWaterModel`, `PrimitiveDryModel` and `PrimitiveWetModel`, hence a non-default output writer can be passed on as a keyword argument to the model constructor + +```julia +julia> spectral_grid = SpectralGrid() +julia> my_output_writer = OutputWriter(spectral_grid, PrimitiveDry) +julia> model = PrimitiveDryModel(;spectral_grid, output=my_output_writer) +``` + +So after we have defined the grid through the `SpectralGrid` object we can use and change +the implemented `OutputWriter` by passing on the following arguments +```julia +julia> my_output_writer = OutputWriter(spectral_grid, PrimitiveDry, kwargs...) +``` +the `spectral_grid` has to be the first argument then the model type +(`Barotropic`, `ShallowWater`, `PrimitiveDry`, `PrimitiveWet`) +which helps the output writer to make default choices on which variables to output. However, we can +also pass on further keyword arguments. So let's start with an example. + +## Example 1: NetCDF output every hour + +If we want to increase the frequency of the output we can choose `output_dt` (default `=6` in hours) like so +```julia +julia> my_output_writer = OutputWriter(spectral_grid, PrimitiveDry, output_dt=1) +julia> model = PrimitiveDryModel(;spectral_grid, output=my_output_writer) +``` +which will now output every hour. It is important to pass on the new output writer `my_output_writer` to the +model constructor, otherwise it will not be part of your model and the default is used instead. +Note that `output_dt` has to be understood as the minimum frequency or maximum output time step. +Example, we run the model at a resolution of T85 and the time step is going to be 670s +```julia +julia> spectral_grid = SpectralGrid(trunc=85) +julia> time_stepper = Leapfrog(spectral_grid) +Leapfrog{Float32}: +... + Δt_sec::Int64 = 670 +... +``` +This means that after 32 time steps 5h 57min and 20s will have passed where output will happen as +the next time step would be >6h. The time axis of the NetCDF output will look like +```julia +julia> using NCDatasets +julia> ds = NCDataset("run_0001/output.nc"); +julia> ds["time"][:] +5-element Vector{Dates.DateTime}: + 2000-01-01T00:00:00 + 2000-01-01T05:57:20 + 2000-01-01T11:54:40 + 2000-01-01T17:52:00 + 2000-01-01T23:49:20 + +julia> diff(ds["time"][:]) +4-element Vector{Dates.Millisecond}: + 21440000 milliseconds + 21440000 milliseconds + 21440000 milliseconds + 21440000 milliseconds +``` +This is so that we don't interpolate in time during output to hit exactly every 6 hours, but at +the same time have a constant spacing in time between output time steps. + +## Example 2: Output onto a higher/lower resolution grid + +Say we want to run the model at a given horizontal resolution but want to output on another resolution, +the `OutputWriter` takes as argument `output_Grid<:AbstractFullGrid` and `nlat_half::Int`. +So for example `output_Grid=FullClenshawGrid` and `nlat_half=48` will always interpolate onto a +regular 192x95 longitude-latitude grid of 1.875˚ resolution, regardless the grid and resolution used +for the model integration. +```julia +julia> my_output_writer = OutputWriter(spectral_grid, PrimitiveDry, output_Grid=FullClenshawGrid, nlat_half=48) +``` +Note that by default the output is on the corresponding full of the grid used in the dynamical core +so that interpolation only happens at most in the zonal direction as they share the location of the +latitude rings. You can check this by +```julia +julia> RingGrids.full_grid(OctahedralGaussianGrid) +FullGaussianGrid +``` +So the corresponding full grid of an `OctahedralGaussianGrid` is the `FullGaussiangrid` and the same resolution +`nlat_half` is chosen by default in the output writer (which you can change though as shown above). +Overview of the corresponding full grids + +| Grid | Corresponding full grid | +| --- | ----------------------- | +| FullGaussianGrid | FullGaussianGrid | +| FullClenshawGrid | FullClenshawGrid | +| OctahadralGaussianGrid | FullGaussianGrid | +| OctahedralClensawhGrid | FullClenshawGrid | +| HEALPixGrid | FullHEALPixGrid | +| OctaHEALPixGrid | FullOctaHEALPixGrid | + +The grids `FullHEALPixGrid`, `FullOctaHEALPixGrid` share the same latitude rings as their reduced grids, +but have always as many longitude points as they are at most around the equator. These grids are not +tested in the dynamical core (but you may use them experimentally) and mostly designed for output purposes. + +## Example 3: Changing the output path or identification + +That's easy by passing on `path="/my/favourite/path/"` and the folder `run_*` with `*` the identification +of the run (that's the `id` keyword, which can be manually set but is also automatically determined as a +number counting up depending on which folders already exist) will be created within. +```julia +julia> path = pwd() +"/Users/milan" +julia> my_output_writer = OutputWriter(spectral_grid, PrimitiveDry, path=path) +``` +This folder must already exist. If you want to give your run a name/identification you can pass on `id` +``` +julia> my_output_writer = OutputWriter(spectral_grid,PrimitiveDry,id="diffusion_test"); +``` +which will be used instead of a 4 digit number like 0001, 0002 which is automatically determined if +`id` is not provided. You will see the id of the run in the progress bar +``` +Weather is speedy: run diffusion_test 100%|███████████████████████| Time: 0:00:12 (19.20 years/day) +``` +and the run folder, here `run_diffusion_test`, is also named accordingly +```julia +shell> ls +... +run_diffusion_test +... +``` + +## Further options + +Further options are described in the `OutputWriter` docstring, (also accessible via `julia>?OutputWriter` for example). +Note that some fields are actual options, but others are derived from the options you provided or are +arrays/objects the output writer needs, but shouldn't be passed on by the user. +The actual options are declared as `[OPTION]` in the following + +```@docs +OutputWriter +``` \ No newline at end of file diff --git a/src/output/output.jl b/src/output/output.jl index 4bf08eda8..6de15a451 100644 --- a/src/output/output.jl +++ b/src/output/output.jl @@ -27,31 +27,48 @@ const DEFAULT_OUTPUT_NF = Float32 """ $(TYPEDSIGNATURES) NetCDF output writer. Contains all output options and auxiliary fields for output interpolation. -To be initialised with `OutputWriter(::SpectralGrid,::TimeStepper,kwargs...)` to pass on the -resolution/time stepping information from those structs. Options include +To be initialised with `OutputWriter(::SpectralGrid,::Type{<:ModelSetup},kwargs...)` to pass on the +resolution information and the model type which chooses which variables to output. Options include $(TYPEDFIELDS)""" Base.@kwdef mutable struct OutputWriter{NF<:Union{Float32,Float64},Model<:ModelSetup} <: AbstractOutputWriter spectral_grid::SpectralGrid # FILE OPTIONS - output::Bool = false # output to netCDF? - path::String = pwd() # path to output folder - id::Union{String,Int} = "" # run identification number/string + output::Bool = false + + "[OPTION] path to output folder, run_???? will be created within" + path::String = pwd() + + "[OPTION] run identification number/string" + id::Union{String,Int} = "0001" run_path::String = "" # will be determined in initalize! - filename::String = "output.nc" # name of the output netcdf file - write_restart::Bool = true # also write restart file if output==true? + + "[OPTION] name of the output netcdf file" + filename::String = "output.nc" + + "[OPTION] also write restart file if output==true?" + write_restart::Bool = true pkg_version::VersionNumber = pkgversion(SpeedyWeather) # WHAT/WHEN OPTIONS startdate::DateTime = DateTime(2000,1,1) - output_dt::Float64 = 6 # output time step [hours] - output_vars::Vector{Symbol} = default_output_vars(Model) # vector of output variables as Symbols - missing_value::NF = NaN # missing value to be used in netcdf output + + "[OPTION] output frequency, time step [hrs]" + output_dt::Float64 = 6 + + "[OPTION] which variables to output, u, v, vor, div, pres, temp, humid" + output_vars::Vector{Symbol} = default_output_vars(Model) + + "[OPTION] missing value to be used in netcdf output" + missing_value::NF = NaN # COMPRESSION OPTIONS - compression_level::Int = 3 # compression level; 1=low but fast, 9=high but slow - keepbits::Keepbits = Keepbits() # mantissa bits to keep for every variable + "[OPTION] lossless compression level; 1=low but fast, 9=high but slow" + compression_level::Int = 3 + + "[OPTION] mantissa bits to keep for every variable" + keepbits::Keepbits = Keepbits() # TIME STEPS AND COUNTERS (initialize later) output_every_n_steps::Int = 0 # output frequency @@ -65,15 +82,19 @@ Base.@kwdef mutable struct OutputWriter{NF<:Union{Float32,Float64},Model<:ModelS input_Grid::Type{<:AbstractGrid} = spectral_grid.Grid # Output as matrix (particularly for reduced grids) - as_matrix::Bool = false # if true sort grid points into a matrix (interpolation-free) - # full grid for output if output_matrix == false + "[OPTION] sort grid points into a matrix (interpolation-free), for OctahedralClenshawGrid, OctaHEALPixGrid only" + as_matrix::Bool = false + quadrant_rotation::NTuple{4,Int} = (0,1,2,3) # rotation of output quadrant # matrix of output quadrant matrix_quadrant::NTuple{4,Tuple{Int,Int}} = ((2,2),(1,2),(1,1),(2,1)) # OUTPUT GRID + "[OPTION] the grid used for output, full grids only" output_Grid::Type{<:AbstractFullGrid} = RingGrids.full_grid(input_Grid) - nlat_half::Int = spectral_grid.nlat_half # default: same nlat_half for in/output + + "[OPTION] the resolution of the output grid, default: same nlat_half as in the dynamical core" + nlat_half::Int = spectral_grid.nlat_half nlon::Int = as_matrix ? RingGrids.matrix_size(input_Grid,spectral_grid.nlat_half)[1] : RingGrids.get_nlon(output_Grid,nlat_half) nlat::Int = as_matrix ? RingGrids.matrix_size(input_Grid,spectral_grid.nlat_half)[2] : @@ -202,7 +223,7 @@ function initialize!( # GET RUN ID, CREATE FOLDER # get new id only if not already specified - output.id = output.id == "" ? get_run_id(output.path) : output.id + output.id = get_run_id(output.path,output.id) output.run_path = create_output_folder(output.path,output.id) feedback.id = output.id # synchronize with feedback struct @@ -251,8 +272,12 @@ Checks existing `run_????` folders in `path` to determine a 4-digit `id` number by counting up. E.g. if folder run_0001 exists it will return the string "0002". Does not create a folder for the returned run id. """ -function get_run_id(path::String) - # pull list of existing run_???? folders via readdir +function get_run_id(path::String,id::String) + # if run_???? folder doesn't exist yet don't change the id + run_id = string("run_",run_id_to_string(id)) + !(run_id in readdir(path)) && return id + + # otherwise pull list of existing run_???? folders via readdir pattern = r"run_\d\d\d\d" # run_???? in regex runlist = filter(x->startswith(x,pattern),readdir(path)) runlist = filter(x->endswith( x,pattern),runlist)