diff --git a/.travis.yml b/.travis.yml index 42c1c2a..0679809 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,8 @@ os: - linux - osx julia: - - 0.6 + - 0.7 + - 1.0 - nightly notifications: email: false @@ -15,6 +16,12 @@ matrix: allow_failures: - julia: nightly +branches: + only: + - master + - /release-.*/ + - /v(\d+)\.(\d+)\.(\d+)/ + ## uncomment and modify the following lines to manually install system packages #addons: # apt: # apt-get for linux @@ -28,4 +35,4 @@ matrix: # - julia -e 'Pkg.clone(pwd()); Pkg.build("ParquetFiles"); Pkg.test("ParquetFiles"; coverage=true)' after_success: # push coverage results to Codecov - - julia -e 'cd(Pkg.dir("ParquetFiles")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' + - julia -e 'using Pkg; cd(Pkg.dir("ParquetFiles")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' diff --git a/NEWS.md b/NEWS.md index d1c9c9d..17ce64c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,2 +1,6 @@ +# ParquetFiles.jl v0.1.0 Release Notes +* Drop julia 0.6 support, add julia 0.7/1.0 support +* Add show methods + # ParquetFiles.jl v0.0.1 Release Notes * Initial release diff --git a/README.md b/README.md index 0812465..472826f 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,18 @@ # ParquetFiles [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) -[![Build Status](https://travis-ci.org/davidanthoff/ParquetFiles.jl.svg?branch=master)](https://travis-ci.org/davidanthoff/ParquetFiles.jl) -[![Build status](https://ci.appveyor.com/api/projects/status/svgqskv2wul3egrr/branch/master?svg=true)](https://ci.appveyor.com/project/davidanthoff/parquetfiles-jl/branch/master) +[![Build Status](https://travis-ci.org/queryverse/ParquetFiles.jl.svg?branch=master)](https://travis-ci.org/queryverse/ParquetFiles.jl) +[![Build status](https://ci.appveyor.com/api/projects/status/984c6kxfhdhgj77m/branch/master?svg=true)](https://ci.appveyor.com/project/queryverse/parquetfiles-jl/branch/master) [![ParquetFiles](http://pkg.julialang.org/badges/ParquetFiles_0.6.svg)](http://pkg.julialang.org/?pkg=ParquetFiles) -[![codecov.io](http://codecov.io/github/davidanthoff/ParquetFiles.jl/coverage.svg?branch=master)](http://codecov.io/github/davidanthoff/ParquetFiles.jl?branch=master) +[![codecov.io](http://codecov.io/github/queryverse/ParquetFiles.jl/coverage.svg?branch=master)](http://codecov.io/github/queryverse/ParquetFiles.jl?branch=master) ## Overview -This package provides load support for [Parquet](https://parquet.apache.org/) files under the -[FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package. +This package provides load support for [Parquet](https://parquet.apache.org/) files under the [FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package. ## Installation -Use ``Pkg.add("ParquetFiles")`` in Julia to install ParquetFiles and its dependencies. +Use ``] add ParquetFiles`` in Julia to install ParquetFiles and its dependencies. ## Usage @@ -22,18 +21,15 @@ Use ``Pkg.add("ParquetFiles")`` in Julia to install ParquetFiles and its depende To read a Parquet file into a ``DataFrame``, use the following julia code: ````julia -using FileIO, ParquetFiles, DataFrames +using ParquetFiles, DataFrames df = DataFrame(load("data.parquet")) ```` -The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/davidanthoff/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/davidanthoff/IterableTables.jl). Here are some examples of materializing a Parquet file into data structures that are not a ``DataFrame``: +The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing a Parquet file into data structures that are not a ``DataFrame``: ````julia -using FileIO, ParquetFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly - -# Load into a DataTable -dt = DataTable(load("data.parquet")) +using ParquetFiles, IndexedTables, TimeSeries, Temporal, VegaLite # Load into an IndexedTable it = IndexedTable(load("data.parquet")) @@ -45,7 +41,7 @@ ta = TimeArray(load("data.parquet")) ts = TS(load("data.parquet")) # Plot directly with Gadfly -plot(load("data.parquet"), x=:a, y=:b, Geom.line) +@vlplot(:point, data=load("data.parquet"), x=:a, y=:b) ```` ### Using the pipe syntax @@ -53,9 +49,9 @@ plot(load("data.parquet"), x=:a, y=:b, Geom.line) ``load`` also support the pipe syntax. For example, to load a Parquet file into a ``DataFrame``, one can use the following code: ````julia -using FileIO, ParquetFiles, DataFrame +using ParquetFiles, DataFrame df = load("data.parquet") |> DataFrame ```` -The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/davidanthoff/Query.jl) queries, for example one can easily load a Parquet file, pipe it into a query, then pipe it to the ``save`` function to store the results in a new file. +The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load a Parquet file, pipe it into a query, then pipe it to the ``save`` function to store the results in a new file. diff --git a/REQUIRE b/REQUIRE index 14a5857..f304252 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,7 +1,8 @@ -julia 0.6 -Parquet 0.1.0 -IteratorInterfaceExtensions 0.0.2 -TableTraits 0.0.3 -NamedTuples 4.0.0 -FileIO 0.9.0 -IterableTables 0.6.0 +julia 0.7 +Parquet 0.2.0 +IteratorInterfaceExtensions 0.1.1 +TableTraits 0.4.0 +FileIO 1.0.0 +IterableTables 0.9.0 +DataValues 0.4.5 +TableShowUtils 0.2.0 diff --git a/appveyor.yml b/appveyor.yml index 52ce55a..5167de8 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,19 +1,22 @@ environment: matrix: - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" + - julia_version: 0.7 + - julia_version: 1 + - julia_version: nightly + +platform: + - x86 # 32-bit + - x64 # 64-bit matrix: - allow_failures: - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" + allow_failures: + - julia_version: nightly branches: only: - master - /release-.*/ + - /v(\d+)\.(\d+)\.(\d+)/ notifications: - provider: Email @@ -22,24 +25,18 @@ notifications: on_build_status_changed: false install: - - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12" -# If there's a newer build queued for the same PR, cancel this one - - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` - https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` - Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` - throw "There are newer queued builds for this pull request, failing early." } -# Download most recent Julia Windows binary - - ps: (new-object net.webclient).DownloadFile( - $env:JULIA_URL, - "C:\projects\julia-binary.exe") -# Run installer silently, output to C:\projects\julia - - C:\projects\julia-binary.exe /S /D=C:\projects\julia + - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1")) build_script: -# Need to convert from shallow to complete for Pkg.clone to work - - IF EXIST .git\shallow (git fetch --unshallow) - - C:\projects\julia\bin\julia -e "versioninfo(); - Pkg.clone(pwd(), \"ParquetFiles\"); Pkg.build(\"ParquetFiles\")" + - echo "%JL_BUILD_SCRIPT%" + - C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%" test_script: - - C:\projects\julia\bin\julia -e "Pkg.test(\"ParquetFiles\")" + - echo "%JL_TEST_SCRIPT%" + - C:\julia\bin\julia -e "%JL_TEST_SCRIPT%" + +# # Uncomment to support code coverage upload. Should only be enabled for packages +# # which would have coverage gaps without running on Windows +# on_success: +# - echo "%JL_CODECOV_SCRIPT%" +# - C:\julia\bin\julia -e "%JL_CODECOV_SCRIPT%" diff --git a/src/ParquetFiles.jl b/src/ParquetFiles.jl index 95314d1..74852d9 100644 --- a/src/ParquetFiles.jl +++ b/src/ParquetFiles.jl @@ -1,8 +1,7 @@ module ParquetFiles -using Parquet, IteratorInterfaceExtensions, TableTraits, NamedTuples, - FileIO -import IterableTables +using Parquet, IteratorInterfaceExtensions, TableTraits, FileIO +import IterableTables, DataValues, TableShowUtils export load @@ -10,6 +9,20 @@ struct ParquetFile filename::String end +function Base.show(io::IO, source::ParquetFile) + TableShowUtils.printtable(io, getiterator(source), "Parquet file") +end + +function Base.show(io::IO, ::MIME"text/html", source::ParquetFile) + TableShowUtils.printHTMLtable(io, getiterator(source)) +end +Base.Multimedia.showable(::MIME"text/html", source::ParquetFile) = true + +function Base.show(io::IO, ::MIME"application/vnd.dataresource+json", source::ParquetFile) + TableShowUtils.printdataresource(io, getiterator(source)) +end +Base.Multimedia.showable(::MIME"application/vnd.dataresource+json", source::ParquetFile) = true + struct ParquetNamedTupleIterator{T,T_row} rc::RecCursor nrows::Int @@ -23,21 +36,18 @@ function Base.length(itr::ParquetNamedTupleIterator) return itr.nrows end -function Base.start(itr::ParquetNamedTupleIterator) - return start(itr.rc) -end - -@generated function Base.next(itr::ParquetNamedTupleIterator{T,T_row}, state) where {T,T_row} +@generated function Base.iterate(itr::ParquetNamedTupleIterator{T,T_row}, state...) where {T,T_row} names = fieldnames(T) - x = quote - v, next_state = next(itr.rc, state) - return T($([T.types[i]<:String ? :(String(copy(v.$(names[i])))) : :(v.$(names[i])) for i=1:length(T.types)]...)), next_state + quote + y = iterate(itr.rc, state...) + if y===nothing + return nothing + else + v = y[1] + next_state = y[2] + return T(($([fieldtype(T, i)<:String ? :(String(copy(v.$(names[i])))) : :(v.$(names[i])) for i=1:length(names)]...),)), next_state + end end - return x -end - -function Base.done(itr::ParquetNamedTupleIterator, state) - return done(itr.rc, state) end function fileio_load(f::FileIO.File{FileIO.format"Parquet"}) @@ -59,7 +69,7 @@ function IteratorInterfaceExtensions.getiterator(file::ParquetFile) col_names = fieldnames(T_row) col_types = [i<:Vector{UInt8} ? String : i for i in T_row.types] - T = eval(:(@NT($(col_names...)))){col_types...} + T = NamedTuple{(col_names...,), Tuple{col_types...}} rc = RecCursor(p, 1:nrows(p), colnames(p), JuliaBuilder(p, T_row)) diff --git a/test/runtests.jl b/test/runtests.jl index 9598e10..1657df2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,10 +1,12 @@ using ParquetFiles -using NamedTuples -using Base.Test +using Parquet +using IteratorInterfaceExtensions +using TableTraits +using Test @testset "ParquetFiles" begin -parquet_pkg_dir = Pkg.dir("Parquet") +parquet_pkg_dir = joinpath(dirname(pathof(Parquet)), "..") include(joinpath(parquet_pkg_dir, "test", "get_parcompat.jl")) @@ -20,6 +22,6 @@ it = IteratorInterfaceExtensions.getiterator(pqf) ar = collect(it) @test length(ar) == 25 -@test ar[1] == @NT(n_nationkey = 0, n_name = "ALGERIA", n_regionkey = 0, n_comment = " haggle. carefully final deposits detect slyly agai") +@test ar[1] == (n_nationkey = 0, n_name = "ALGERIA", n_regionkey = 0, n_comment = " haggle. carefully final deposits detect slyly agai") end