Skip to content

Commit

Permalink
Merge pull request #453 from JuliaDataCubes/fc/showlazyloaded
Browse files Browse the repository at this point in the history
Hint to data loading in show of a data cube
  • Loading branch information
lazarusA authored Oct 17, 2024
2 parents 44a9d70 + 1b3245c commit 4635df4
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 13 deletions.
9 changes: 3 additions & 6 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.9'
- 'lts'
- '1'
os:
- ubuntu-latest
Expand All @@ -23,7 +23,7 @@ jobs:
- x64
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
Expand All @@ -44,8 +44,5 @@ jobs:
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN}}
file: lcov.info
- uses: coverallsapp/github-action@master
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
path-to-lcov: ./lcov.info
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@
[ci-img]: https://github.com/JuliaDataCubes/YAXArrays.jl/workflows/CI/badge.svg
[ci-url]: https://github.com/JuliaDataCubes/YAXArrays.jl/actions?query=workflow%3ACI

[coveralls-img]: https://coveralls.io/repos/github/JuliaDataCubes/YAXArrays.jl/badge.svg?branch=master
[coveralls-url]: https://coveralls.io/github/JuliaDataCubes/YAXArrays.jl?branch=master

[zenodo-url]: https://doi.org/10.5281/zenodo.7505394
[zenodo-img]: https://zenodo.org/badge/DOI/10.5281/zenodo.7505394.svg

Expand Down
13 changes: 10 additions & 3 deletions src/Cubes/Cubes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,10 @@ Cubes.caxes(x::DD.Dimension) = (x,)
Given any array implementing the YAXArray interface it returns an in-memory [`YAXArray`](@ref) from it.
"""
function readcubedata(x)
csize = cubesize(x)
if csize > YAXDefaults.max_cache[]
@warn "Loading a Cube of size $(formatbytes(csize))."
end
YAXArray(caxes(x), getindex_all(x), getattributes(x))
end

Expand Down Expand Up @@ -506,16 +510,19 @@ getCubeDes(::DD.Dimension) = "Cube axis"
getCubeDes(::YAXArray) = "YAXArray"
getCubeDes(::Type{T}) where {T} = string(T)

loadingstatus(x) = "loaded in memory"
loadingstatus(x::DiskArrays.AbstractDiskArray) = "loaded lazily"

function DD.show_after(io::IO, mime, c::YAXArray)
blockwidth = get(io, :blockwidth, 0)
DD.print_block_separator(io, loadingstatus(parent(c)), blockwidth, blockwidth)

# ? sizeof : Check if the element type is a bitstype or a union of bitstypes
if (isconcretetype(eltype(c)) && isbitstype(eltype(c))) ||
(eltype(c) isa Union && all(isbitstype, Base.uniontypes(eltype(c))))

DD.print_block_separator(io, "file size", blockwidth, blockwidth)
println(io, "\n file size: ", formatbytes(cubesize(c)))
println(io, "\n data size: ", formatbytes(cubesize(c)))
else # fallback
DD.print_block_separator(io, "memory size", blockwidth, blockwidth)
println(io, "\n summarysize: ", formatbytes(Base.summarysize(parent(c))))
end
DD.print_block_close(io, blockwidth)
Expand Down
12 changes: 11 additions & 1 deletion src/DatasetAPI/Datasets.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Datasets
#import ..Cubes.Axes: axsym, axname, CubeAxis, findAxis, CategoricalAxis, RangeAxis, caxes
import ..Cubes: Cubes, YAXArray, concatenatecubes, CleanMe, subsetcube, copy_diskarray, setchunks, caxes
import ..Cubes: Cubes, YAXArray, concatenatecubes, CleanMe, subsetcube, copy_diskarray, setchunks, caxes, readcubedata, cubesize, formatbytes
using ...YAXArrays: YAXArrays, YAXDefaults, findAxis
using DataStructures: OrderedDict, counter
using Dates: Day, Hour, Minute, Second, Month, Year, Date, DateTime, TimeType, AbstractDateTime
Expand Down Expand Up @@ -178,6 +178,16 @@ function Base.getproperty(x::Dataset, k::Symbol)
x[k]
end
end

function readcubedata(ds::Dataset)
dssize = sum(cubesize.(values(ds.cubes)))
if dssize > YAXDefaults.max_cache[]
@warn "Loading data of size $(formatbytes(dssize))"
end
inmemcubes = OrderedDict(key=> readcubedata(val) for (key, val) in pairs(ds.cubes))
Dataset(inmemcubes, ds.axes, ds.properties)
end

Base.getindex(x::Dataset, i::Symbol) =
haskey(x.cubes, i) ? x.cubes[i] :
haskey(x.axes, i) ? x.axes[i] : throw(ArgumentError("$i not found in Dataset"))
Expand Down
5 changes: 5 additions & 0 deletions test/Datasets/datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,11 @@ end
@test ds.b.data == y
@test ds.b.chunks == b.chunks

@test ds.b.data isa AbstractDiskArray
inmemds = readcubedata(ds)
@test !isa(inmemds.b.data, AbstractDiskArray)


ds2 = Dataset(c=c)
savedataset(ds2, path=f, backend=:zarr, append=true)
ds = open_dataset(f, driver=:zarr)
Expand Down

0 comments on commit 4635df4

Please sign in to comment.