Skip to content

Commit

Permalink
implement alternative to missing values
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander-Barth committed Jan 10, 2024
1 parent 61b20b5 commit 4a3a9fc
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 36 deletions.
109 changes: 79 additions & 30 deletions src/cfvariable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ function cfvariable(ds,
# look also at parent if defined
units = _getattrib(ds,_v,_parentname,"units",nothing),
calendar = _getattrib(ds,_v,_parentname,"calendar",nothing),
_experimental_missing_value = missing,
)

v = _v
Expand Down Expand Up @@ -154,6 +155,15 @@ function cfvariable(ds,
end
end

__experimental_missing_value =
# use NaN32 rather than NaN to avoid unnecessary promotion
# to double precision
if scaledtype == Float32 && _experimental_missing_value === NaN
NaN32
end
__experimental_missing_value = _experimental_missing_value


storage_attrib = (
fillvalue = fillvalue,
missing_values = (missing_value...,),
Expand All @@ -162,17 +172,19 @@ function cfvariable(ds,
calendar = calendar,
time_origin = time_origin,
time_factor = time_factor,
_experimental_missing_value = __experimental_missing_value,
)

rettype = _get_rettype(ds, calendar, fillvalue, missing_value, scaledtype)
rettype = _get_rettype(ds, calendar, fillvalue, missing_value,
scaledtype,__experimental_missing_value)

return CFVariable{rettype,ndims(v),typeof(v),typeof(attrib),typeof(storage_attrib)}(
v,attrib,storage_attrib)

end


function _get_rettype(ds, calendar, fillvalue, missing_value, rettype)
function _get_rettype(ds, calendar, fillvalue, missing_value, rettype, _experimental_missing_value)
# rettype can be a date if calendar is different from nothing
if calendar !== nothing
DT = nothing
Expand All @@ -193,7 +205,8 @@ function _get_rettype(ds, calendar, fillvalue, missing_value, rettype)
end

if (fillvalue !== nothing) || (!isempty(missing_value))
rettype = Union{Missing,rettype}
#rettype = Union{Missing,rettype}
rettype = promote_type(typeof(_experimental_missing_value),rettype)
end
return rettype
end
Expand Down Expand Up @@ -224,9 +237,9 @@ calendar(v::CFVariable) = v._storage_attrib.calendar
The time unit in milliseconds. E.g. seconds would be 1000., days would be 86400000.
The result can also be `nothing` if the variable has no time units.
"""
time_factor(v::CFVariable) = v._storage_attrib[:time_factor]

time_factor(v::CFVariable) = v._storage_attrib.time_factor

_experimental_missing_value(v::CFVariable) = v._storage_attrib._experimental_missing_value

# fillvalue can be NaN (unfortunately)
@inline isfillvalue(data,fillvalue) = data == fillvalue
Expand Down Expand Up @@ -270,36 +283,67 @@ end
@inline asdate(data::Float32,time_origin,time_factor,DTcast) =
convert(DTcast,time_origin + Dates.Millisecond(round(Int64,time_factor * Float64(data))))


@inline fromdate(data::TimeType,time_origin,inv_time_factor) =
Dates.value(data - time_origin) * inv_time_factor
@inline fromdate(data,time_origin,time_factor) = data

@inline function CFtransform(data,fv,scale_factor,add_offset,time_origin,time_factor,DTcast)
return asdate(

@inline CFtransform_experimental_missing_value(data,_experimental_missing_value) = data
@inline CFtransform_experimental_missing_value(data::Missing,_experimental_missing_value) = _experimental_missing_value

@inline CFinvtransform_experimental_missing_value(data::Missing,_experimental_missing_value::Missing) = missing
@inline CFinvtransform_experimental_missing_value(data,_experimental_missing_value::Missing) = data

# fall-back if _experimental_missing_value is not missing
@inline function CFinvtransform_experimental_missing_value(data,_experimental_missing_value)
# note NaN === NaN is true
if data === _experimental_missing_value
return missing
else
data
end
end

# Transformation pipelne
#
# fillvalue to missing -> scale -> add offset -> transform to dates -> missing to alternative sentinel value
#
# Inverse transformation pipleine
#
# alternative sentinel value to missing -> round float if should be ints -> encode dates -> remove offset -> inverse scalling -> missing to fillvalue
#
# All steps are optional and can be skipped if not applicable


@inline function CFtransform(data,fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value,DTcast)
return CFtransform_experimental_missing_value(
asdate(
CFtransform_offset(
CFtransform_scale(
CFtransform_missing(data,fv),
scale_factor),
add_offset),
time_origin,time_factor,DTcast)
time_origin,time_factor,DTcast),
_experimental_missing_value)
end

# round float to integers
_approximate(::Type{T},data) where T <: Integer = round(T,data)
_approximate(::Type,data) = data


@inline function CFinvtransform(data,fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,DT)
return _approximate(
@inline function CFinvtransform(data,fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,_experimental_missing_value,DT)
return CFtransform_experimental_missing_value(
_approximate(
DT,
CFtransform_replace_missing(
CFtransform_scale(
CFtransform_offset(
fromdate(data,time_origin,inv_time_factor),
minus_offset),
inv_scale_factor),
fv))
fv)),
_experimental_missing_value)
end


Expand All @@ -310,27 +354,27 @@ end
# CFtransform.(data,fv,scale_factor,add_offset,time_origin,time_factor,DTcast)

# for scalars
@inline CFtransformdata(data,fv,scale_factor,add_offset,time_origin,time_factor,DTcast) =
CFtransform(data,fv,scale_factor,add_offset,time_origin,time_factor,DTcast)
@inline CFtransformdata(data,fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value,DTcast) =
CFtransform(data,fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value,DTcast)

# in-place version
@inline function CFtransformdata!(out,data::AbstractArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor) where {T,N}
@inline function CFtransformdata!(out,data::AbstractArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value) where {T,N}
DTcast = eltype(out)
@inbounds @simd for i in eachindex(data)
out[i] = CFtransform(data[i],fv,scale_factor,add_offset,time_origin,time_factor,DTcast)
out[i] = CFtransform(data[i],fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value,DTcast)
end
return out
end

# for arrays
@inline function CFtransformdata(data::AbstractArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,DTcast) where {T,N}
@inline function CFtransformdata(data::AbstractArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value,DTcast) where {T,N}
out = Array{DTcast,N}(undef,size(data))
return CFtransformdata!(out,data::AbstractArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor)
return CFtransformdata!(out,data::AbstractArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value)
end

@inline function CFtransformdata(
data::AbstractArray{T,N},fv::Tuple{},scale_factor::Nothing,
add_offset::Nothing,time_origin::Nothing,time_factor::Nothing,::Type{T}) where {T,N}
add_offset::Nothing,time_origin::Nothing,time_factor::Nothing,_experimental_missing_value,::Type{T}) where {T,N}
# no transformation necessary (avoid allocation)
return data
end
Expand All @@ -350,46 +394,46 @@ end
# end

# for arrays
@inline function CFinvtransformdata(data::AbstractArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,DT) where {T,N}
@inline function CFinvtransformdata(data::AbstractArray{T,N},fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value,DT) where {T,N}
inv_scale_factor = _inv(scale_factor)
minus_offset = _minus(add_offset)
inv_time_factor = _inv(time_factor)

out = Array{DT,N}(undef,size(data))
@inbounds @simd for i in eachindex(data)
out[i] = CFinvtransform(data[i],fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,DT)
out[i] = CFinvtransform(data[i],fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,_experimental_missing_value,DT)
end
return out
end

@inline function CFinvtransformdata(
data::AbstractArray{T,N},fv::Tuple{},scale_factor::Nothing,
add_offset::Nothing,time_origin::Nothing,time_factor::Nothing,::Type{T}) where {T,N}
add_offset::Nothing,time_origin::Nothing,time_factor::Nothing,_experimental_missing_value,::Type{T}) where {T,N}
# no transformation necessary (avoid allocation)
return data
end

# for scalar
@inline function CFinvtransformdata(data,fv,scale_factor,add_offset,time_origin,time_factor,DT)
@inline function CFinvtransformdata(data,fv,scale_factor,add_offset,time_origin,time_factor,_experimental_missing_value,DT)
inv_scale_factor = _inv(scale_factor)
minus_offset = _minus(add_offset)
inv_time_factor = _inv(time_factor)

return CFinvtransform(data,fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,DT)
return CFinvtransform(data,fv,inv_scale_factor,minus_offset,time_origin,inv_time_factor,_experimental_missing_value,DT)
end



# this function is necessary to avoid "iterating" over a single character in Julia 1.0 (fixed Julia 1.3)
# https://discourse.julialang.org/t/broadcasting-and-single-characters/16836
@inline CFtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DTcast) = CFtransform_missing(data,fv)
@inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)
#@inline CFtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DTcast) = CFtransform_missing(data,fv)
#@inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)


function Base.getindex(v::CFVariable, indexes::Union{Integer,Colon,AbstractRange{<:Integer},AbstractVector{<:Integer}}...)
data = v.var[indexes...]
return CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),eltype(v))
time_origin(v),time_factor(v),_experimental_missing_value(v),eltype(v))
end

function Base.setindex!(v::CFVariable,data::Array{Missing,N},indexes::Union{Int,Colon,AbstractRange{<:Integer}}...) where N
Expand All @@ -407,7 +451,9 @@ function Base.setindex!(v::CFVariable,data::Union{T,Array{T,N}},indexes::Union{I
# is incompatible with the provided data
v.var[indexes...] = CFinvtransformdata(
data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),eltype(v.var))
time_origin(v),time_factor(v),
_experimental_missing_value(v),
eltype(v.var))
return data
end

Expand All @@ -419,7 +465,9 @@ function Base.setindex!(v::CFVariable,data,indexes::Union{Int,Colon,AbstractRang
v.var[indexes...] = CFinvtransformdata(
data,fill_and_missing_values(v),
scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),eltype(v.var))
time_origin(v),time_factor(v),
_experimental_missing_value(v),
eltype(v.var))

return data
end
Expand Down Expand Up @@ -550,6 +598,7 @@ close(ds)
load!(v.var,buffer,indices...)
fmv = fill_and_missing_values(v)
return CFtransformdata!(data,buffer,fmv,scale_factor(v),add_offset(v),
time_origin(v),time_factor(v))
time_origin(v),time_factor(v),
_experimental_missing_value(v))
end
end
9 changes: 7 additions & 2 deletions src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,11 @@ function Base.show(io::IO,ds::AbstractDataset)
end



_experimental_missing_value(ds::AbstractDataset) = missing

"""
v = getindex(ds::NCDataset, varname::SymbolOrString)
v = getindex(ds::AbstractDataset, varname::SymbolOrString)
Return the variable `varname` in the dataset `ds` as a
`CFVariable`. The following CF convention are honored when the
Expand Down Expand Up @@ -157,7 +160,9 @@ because both variables are related thought the bounds attribute following the CF
See also [`cfvariable(ds, varname)`](@ref).
"""
function Base.getindex(ds::AbstractDataset,varname::SymbolOrString)
return cfvariable(ds, varname)
return cfvariable(
ds, varname,
_experimental_missing_value = _experimental_missing_value(ds))
end


Expand Down
35 changes: 31 additions & 4 deletions test/test_scaling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@ using Test
import CommonDataModel as CDM
using DataStructures
using Dates
import CommonDataModel: AbstractDataset, AbstractVariable, Attributes, Dimensions, fillvalue
import CommonDataModel:
AbstractDataset,
AbstractVariable,
Attributes,
Dimensions,
defDim,
defVar,
fillvalue

fname = tempname()
ds = MemoryDataset(fname,"c")
Expand Down Expand Up @@ -140,11 +147,8 @@ md["data"][1,2] = DateTime(2000,2,1)

@test CDM.dataset(md["data"]) == md



md.attrib["history"] == "lala"


@test haskey(md.attrib,"history")

@test get(md.attrib,"foooo","bar") == "bar"
Expand All @@ -163,3 +167,26 @@ str = String(take!(io))
@test_logs (:warn, r".*analysis.*") CDM.defVar(md,"data2",eltype(data),("lon","lat"), attrib = OrderedDict{String,Any}( "units" => "days since analysis"));

close(md)



# Alternative to Missing for NetCDF fillvalue

fname = tempname()
data = randn(3,4)
fv = 9999
data[2,2] = fv

ds = TDS(fname,"c")
defDim(ds,"lon",size(data,1))
defDim(ds,"lat",size(data,2))
ncv = defVar(ds,"data",Float64,("lon","lat"),fillvalue = fv)
ncv.var[:,:] = data

ds["data"]
ncv = CDM.cfvariable(ds,"data",_experimental_missing_value = NaN)
@test eltype(ncv) == Float64
@test ncv[1,1] == data[1,1]
@test isnan(ncv[2,2])

close(ds)

0 comments on commit 4a3a9fc

Please sign in to comment.