Skip to content

Commit

Permalink
Format files using DocumentFormat
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] authored Apr 26, 2020
1 parent 459f01e commit d303665
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 94 deletions.
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using Documenter, FeatherLib
makedocs(
modules = [FeatherLib],
sitename = "FeatherLib.jl",
analytics="UA-132838790-1",
analytics = "UA-132838790-1",
pages = [
"Introduction" => "index.md"
]
Expand Down
16 changes: 8 additions & 8 deletions src/loadfile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,32 @@ getoutputlength(version::Int32, x::Integer) = version < FEATHER_VERSION ? x : pa
function validatefile(filename::AbstractString, data::AbstractVector{UInt8})
if length(data) < MIN_FILE_LENGTH
throw(ArgumentError("'$file' is not in feather format: total length of file: $(length(data))"))
end
end
header = data[1:4]
footer = data[(end-3):end]
footer = data[(end - 3):end]
if header FEATHER_MAGIC_BYTES || footer FEATHER_MAGIC_BYTES
throw(ArgumentError(string("'$filename' is not in feather format: header = $header, ",
"footer = $footer.")))
end
end

function loadfile(filename::AbstractString; use_mmap::Bool=true)
function loadfile(filename::AbstractString; use_mmap::Bool = true)
isfile(filename) || throw(ArgumentError("'$filename' is not a valid file."))
data = use_mmap ? Mmap.mmap(filename) : read(filename)
validatefile(filename, data)
data
end

function metalength(data::AbstractVector{UInt8})
read(IOBuffer(data[(length(data)-7):(length(data)-4)]), Int32)
read(IOBuffer(data[(length(data) - 7):(length(data) - 4)]), Int32)
end

function metaposition(data::AbstractVector{UInt8}, metalen::Integer=metalength(data))
length(data) - (metalen+7)
function metaposition(data::AbstractVector{UInt8}, metalen::Integer = metalength(data))
length(data) - (metalen + 7)
end

function rootposition(data::AbstractVector{UInt8}, mpos::Integer=metaposition(data))
read(IOBuffer(data[mpos:(mpos+4)]), Int32)
function rootposition(data::AbstractVector{UInt8}, mpos::Integer = metaposition(data))
read(IOBuffer(data[mpos:(mpos + 4)]), Int32)
end

function getctable(data::AbstractVector{UInt8})
Expand Down
12 changes: 6 additions & 6 deletions src/metadata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ mutable struct CategoryMetadata
ordered::Bool
end

@DEFAULT CategoryMetadata ordered=false
@DEFAULT CategoryMetadata ordered = false

mutable struct TimestampMetadata
unit::TimeUnit
Expand All @@ -43,7 +43,7 @@ mutable struct TimeMetadata
unit::TimeUnit
end

@UNION TypeMetadata (Nothing,CategoryMetadata,TimestampMetadata,DateMetadata,TimeMetadata)
@UNION TypeMetadata (Nothing, CategoryMetadata, TimestampMetadata, DateMetadata, TimeMetadata)

mutable struct Column
name::String
Expand All @@ -53,8 +53,8 @@ mutable struct Column
user_metadata::String
end

function Column(name::String, values::PrimitiveArray, metadata::TypeMetadata=nothing,
user_metadata::String="")
function Column(name::String, values::PrimitiveArray, metadata::TypeMetadata = nothing,
user_metadata::String = "")
Column(name, values, FlatBuffers.typeorder(TypeMetadata, typeof(metadata)),
metadata, user_metadata)
end
Expand Down Expand Up @@ -119,7 +119,7 @@ const JULIA_TIME_DICT = Dict{Metadata.TimeUnit,DataType}(
Metadata.MICROSECOND => Dates.Microsecond,
Metadata.NANOSECOND => Dates.Nanosecond
)
const METADATA_TIME_DICT = Dict{DataType,Metadata.TimeUnit}(v=>k for (k,v) in JULIA_TIME_DICT)
const METADATA_TIME_DICT = Dict{DataType,Metadata.TimeUnit}(v => k for (k, v) in JULIA_TIME_DICT)


isprimitivetype(t::Metadata.DType) = t NON_PRIMITIVE_TYPES
Expand Down Expand Up @@ -171,4 +171,4 @@ function getmetadata(io::IO, ::Type{T}, A::DictEncoding) where T
Metadata.CategoryMetadata(vals, true)
end

getmetadata(io::IO, ::Type{Union{Missing, T}}, A::DictEncoding) where T = getmetadata(io, T, A)
getmetadata(io::IO, ::Type{Union{Missing,T}}, A::DictEncoding) where T = getmetadata(io, T, A)
12 changes: 6 additions & 6 deletions src/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ struct ResultSet
metadata::String
end

function featherread(filename::AbstractString; use_mmap=true)
data = loadfile(filename, use_mmap=use_mmap)
function featherread(filename::AbstractString; use_mmap = true)
data = loadfile(filename, use_mmap = use_mmap)
ctable = getctable(data)
ncols = length(ctable.columns)
colnames = [Symbol(col.name) for col in ctable.columns]
Expand All @@ -15,12 +15,12 @@ function featherread(filename::AbstractString; use_mmap=true)
return ResultSet(columns, colnames, ctable.description, ctable.metadata)
end

#=====================================================================================================
#= ====================================================================================================
new column construction stuff
=====================================================================================================#
==================================================================================================== =#
Base.length(p::Metadata.PrimitiveArray) = p.length

startloc(p::Metadata.PrimitiveArray) = p.offset+1
startloc(p::Metadata.PrimitiveArray) = p.offset + 1

Arrow.nullcount(p::Metadata.PrimitiveArray) = p.null_count

Expand All @@ -29,7 +29,7 @@ function bitmasklength(p::Metadata.PrimitiveArray)
end

function offsetslength(p::Metadata.PrimitiveArray)
isprimitivetype(p.dtype) ? 0 : padding((length(p)+1)*sizeof(Int32))
isprimitivetype(p.dtype) ? 0 : padding((length(p) + 1) * sizeof(Int32))
end

valueslength(p::Metadata.PrimitiveArray) = p.total_bytes - offsetslength(p) - bitmasklength(p)
Expand Down
8 changes: 4 additions & 4 deletions src/write.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
function featherwrite(filename::AbstractString, columns, colnames; description::AbstractString="", metadata::AbstractString="")
function featherwrite(filename::AbstractString, columns, colnames; description::AbstractString = "", metadata::AbstractString = "")
ncol = length(columns)
nrows = length(columns[1])
cols = ArrowVector[arrowformat(_first_col_convert_pass(col)) for col in columns]

open(filename, "w+") do io
writepadded(io, FEATHER_MAGIC_BYTES)
colmetadata = Metadata.Column[writecolumn(io, string(colnames[i]), cols[i]) for i in 1:ncol]
Expand Down Expand Up @@ -43,7 +43,7 @@ function writecontents(::Type{Metadata.PrimitiveArray}, io::IO, A::ArrowVector)
a = position(io)
writecontents(io, A)
b = position(io)
Metadata.PrimitiveArray(A, a, b-a)
Metadata.PrimitiveArray(A, a, b - a)
end


Expand All @@ -55,7 +55,7 @@ end

function writemetadata(io::IO, ctable::Metadata.CTable)
meta = FlatBuffers.build!(ctable)
rng = (meta.head+1):length(meta.bytes)
rng = (meta.head + 1):length(meta.bytes)
writepadded(io, view(meta.bytes, rng))
Int32(length(rng))
end
18 changes: 9 additions & 9 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@ temps = []

@testset "FeatherLib" begin

include("test_readwrite.jl")
include("test_arrow.jl")
include("test_readwrite.jl")
include("test_arrow.jl")

GC.gc(); GC.gc()
for t in temps
try
rm(t)
catch
GC.gc()
GC.gc(); GC.gc()
for t in temps
try
rm(t)
catch
GC.gc()
try
rm(t)
catch
end
end
end
end

# issue #34
# data = DataFrame(A=Union{Missing, String}[randstring(10) for i ∈ 1:100], B=rand(100))
Expand Down
94 changes: 46 additions & 48 deletions test/test_arrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ randdate() = Date(rand(0:4000), rand(1:12), rand(1:27))
randtime() = Dates.Time(rand(0:23), rand(0:59), rand(0:59))
randdatetime() = randdate() + randtime()

randstrings() = String[[randstring(rand(0:20)) for i 1:(NROWS-1)]; "a"]
randstrings() = String[[randstring(rand(0:20)) for i 1:(NROWS - 1)]; "a"]
function randstrings(::Missing)
Union{String,Missing}[[rand(Bool) ? missing : randstring(rand(0:20)) for i 1:(NROWS-1)]; "a"]
Union{String,Missing}[[rand(Bool) ? missing : randstring(rand(0:20)) for i 1:(NROWS - 1)]; "a"]
end

convstring(str::AbstractString) = String(str)
convstring(::Missing) = missing

@testset "ArrowTests" begin

cols = [rand(Int32,NROWS),
rand(Float64,NROWS),
cols = [rand(Int32, NROWS),
rand(Float64, NROWS),
Date[randdate() for i 1:NROWS],
DateTime[randdatetime() for i 1:NROWS],
Dates.Time[randtime() for i 1:NROWS],
Expand All @@ -34,55 +34,53 @@ cols = [rand(Int32,NROWS),
CategoricalArrays.categorical(randstrings()),
CategoricalArrays.categorical(randstrings(missing))]

colnames = [:ints,:floats,:dates,:datetimes,:times,:missingints,:strings,
colnames = [:ints,:floats,:dates,:datetimes,:times,:missingints,:strings,
:missingstrings,:catstrings,:catstringsmissing]

featherwrite(arrow_tempname, cols, colnames)
featherwrite(arrow_tempname, cols, colnames)

ndf = featherread(arrow_tempname)
ndf = featherread(arrow_tempname)

@test ndf.names == colnames
@test ndf.names == colnames

@test typeof(ndf.columns[1]) == Arrow.Primitive{Int32}
@test typeof(ndf.columns[2]) == Arrow.Primitive{Float64}
@test typeof(ndf.columns[3]) == Arrow.Primitive{Arrow.Datestamp}
@test typeof(ndf.columns[4]) == Arrow.Primitive{Arrow.Timestamp{Dates.Millisecond}}
@test typeof(ndf.columns[5]) == Arrow.Primitive{Arrow.TimeOfDay{Dates.Nanosecond,Int64}}
@test typeof(ndf.columns[6]) == Arrow.NullablePrimitive{Int64}
@test typeof(ndf.columns[7]) == Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}
@test typeof(ndf.columns[8]) == Arrow.NullableList{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}
@test typeof(ndf.columns[9]) == Arrow.DictEncoding{String,Arrow.Primitive{Int32},
Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}}
@test typeof(ndf.columns[10]) ==
Arrow.DictEncoding{Union{String,Missing},Arrow.NullablePrimitive{Int32},Arrow.List{String,Arrow.DefaultOffset,
Arrow.Primitive{UInt8}}}
@test typeof(ndf.columns[1]) == Arrow.Primitive{Int32}
@test typeof(ndf.columns[2]) == Arrow.Primitive{Float64}
@test typeof(ndf.columns[3]) == Arrow.Primitive{Arrow.Datestamp}
@test typeof(ndf.columns[4]) == Arrow.Primitive{Arrow.Timestamp{Dates.Millisecond}}
@test typeof(ndf.columns[5]) == Arrow.Primitive{Arrow.TimeOfDay{Dates.Nanosecond,Int64}}
@test typeof(ndf.columns[6]) == Arrow.NullablePrimitive{Int64}
@test typeof(ndf.columns[7]) == Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}
@test typeof(ndf.columns[8]) == Arrow.NullableList{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}
@test typeof(ndf.columns[9]) == Arrow.DictEncoding{String,Arrow.Primitive{Int32},Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}}
@test typeof(ndf.columns[10]) ==
Arrow.DictEncoding{Union{String,Missing},Arrow.NullablePrimitive{Int32},Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}}

for j 1:N_IDX_TESTS
i = rand(1:NROWS)
@test cols[1][i] == ndf.columns[1][i]
@test cols[2][i] == ndf.columns[2][i]
@test cols[3][i] == convert(Date, ndf.columns[3][i])
@test cols[4][i] == convert(DateTime, ndf.columns[4][i])
@test cols[5][i] == convert(Dates.Time, ndf.columns[5][i])
@test isequal(cols[6][i], ndf.columns[6][i])
@test cols[7][i] == ndf.columns[7][i]
@test isequal(cols[8][i], ndf.columns[8][i])
@test cols[9][i] == String(ndf.columns[9][i])
@test isequal(cols[10][i], convstring(ndf.columns[10][i]))
end
for j 1:N_IDX_TESTS
a, b = extrema(rand(1:NROWS, 2))
i = a:b
@test cols[1][i] == ndf.columns[1][i]
@test cols[2][i] == ndf.columns[2][i]
@test cols[3][i] == convert.(Date, ndf.columns[3][i])
@test cols[4][i] == convert.(DateTime, ndf.columns[4][i])
@test cols[5][i] == convert.(Dates.Time, ndf.columns[5][i])
@test isequal(cols[6][i], ndf.columns[6][i])
@test cols[7][i] == ndf.columns[7][i]
@test isequal(cols[8][i], ndf.columns[8][i])
@test cols[9][i] == String.(ndf.columns[9][i])
@test isequal(cols[10][i], convstring.(ndf.columns[10][i]))
end
for j 1:N_IDX_TESTS
i = rand(1:NROWS)
@test cols[1][i] == ndf.columns[1][i]
@test cols[2][i] == ndf.columns[2][i]
@test cols[3][i] == convert(Date, ndf.columns[3][i])
@test cols[4][i] == convert(DateTime, ndf.columns[4][i])
@test cols[5][i] == convert(Dates.Time, ndf.columns[5][i])
@test isequal(cols[6][i], ndf.columns[6][i])
@test cols[7][i] == ndf.columns[7][i]
@test isequal(cols[8][i], ndf.columns[8][i])
@test cols[9][i] == String(ndf.columns[9][i])
@test isequal(cols[10][i], convstring(ndf.columns[10][i]))
end
for j 1:N_IDX_TESTS
a, b = extrema(rand(1:NROWS, 2))
i = a:b
@test cols[1][i] == ndf.columns[1][i]
@test cols[2][i] == ndf.columns[2][i]
@test cols[3][i] == convert.(Date, ndf.columns[3][i])
@test cols[4][i] == convert.(DateTime, ndf.columns[4][i])
@test cols[5][i] == convert.(Dates.Time, ndf.columns[5][i])
@test isequal(cols[6][i], ndf.columns[6][i])
@test cols[7][i] == ndf.columns[7][i]
@test isequal(cols[8][i], ndf.columns[8][i])
@test cols[9][i] == String.(ndf.columns[9][i])
@test isequal(cols[10][i], convstring.(ndf.columns[10][i]))
end

end
24 changes: 12 additions & 12 deletions test/test_readwrite.jl
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
@testset "ReadWrite" begin

testdir = joinpath(@__DIR__, "data")
files = map(x -> joinpath(testdir, x), readdir(testdir))
files = map(x->joinpath(testdir, x), readdir(testdir))

for f in files
res = featherread(f)
columns, headers = res.columns, res.names
res = featherread(f)
columns, headers = res.columns, res.names

ncols = length(columns)
nrows = length(columns[1])

temp = tempname()
push!(temps, temp)

featherwrite(temp, columns, headers, description=res.description, metadata=res.metadata)
featherwrite(temp, columns, headers, description = res.description, metadata = res.metadata)

res2 = featherread(temp)
columns2, headers2 = res2.columns, res2.names
res2 = featherread(temp)
columns2, headers2 = res2.columns, res2.names

@test length(columns2) == ncols

@test headers==headers2
@test headers == headers2

for (c1,c2) in zip(columns, columns2)
@test length(c1)==nrows
@test length(c2)==nrows
for (c1, c2) in zip(columns, columns2)
@test length(c1) == nrows
@test length(c2) == nrows
for i = 1:nrows
@test isequal(c1[i], c2[i])
end
end

@test res.description == res2.description
@test res.metadata == res2.metadata
@test res.description == res2.description
@test res.metadata == res2.metadata
# for (col1,col2) in zip(source.ctable.columns,sink.ctable.columns)
# @test col1.name == col2.name
# @test col1.metadata_type == col2.metadata_type
Expand Down

0 comments on commit d303665

Please sign in to comment.