diff --git a/.gitignore b/.gitignore index 8c960ec..3f02ca7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.jl.cov *.jl.*.cov *.jl.mem +Manifest.toml diff --git a/Project.toml b/Project.toml index 5ff9f3d..7b4fb47 100644 --- a/Project.toml +++ b/Project.toml @@ -1,17 +1,26 @@ name = "EmpiricalCDFs" uuid = "0dcf7749-4f9f-5e13-9c57-506664207bbc" +license = "MIT" version = "0.2.2" -license= "MIT" [deps] -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] +Aqua = ">= 0.8" +JET = ">= 0.0" +Printf = ">= 0.0" +Random = ">= 0.0" +Statistics = "1" +Test = ">= 0.0" julia = "1" [extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Test", "Aqua", "JET"] diff --git a/README.md b/README.md index 491e3e6..642f9e4 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,18 @@ +[![Build Status](https://github.com/jlapeyre/EmpiricalCDFs.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/jlapeyre/EmpiricalCDFs.jl/actions/workflows/CI.yml?query=branch%3Amain) +[![Coverage](https://codecov.io/gh/jlapeyre/EmpiricalCDFs.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/jlapeyre/EmpiricalCDFs.jl) +[![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) +[![JET QA](https://img.shields.io/badge/JET.jl-%E2%9C%88%EF%B8%8F-%23aa4444)](https://github.com/aviatesk/JET.jl) + # EmpiricalCDFs.jl *Empirical cumulative distribution functions* -[![](https://img.shields.io/badge/docs-latest-blue.svg)](https://jlapeyre.github.io/EmpiricalCDFs.jl/latest) -Linux, OSX: [![Build Status](https://travis-ci.org/jlapeyre/EmpiricalCDFs.jl.svg?branch=master)](https://travis-ci.org/jlapeyre/EmpiricalCDFs.jl) -  -Windows: [![Build Status](https://ci.appveyor.com/api/projects/status/github/jlapeyre/EmpiricalCDFs.jl?branch=master&svg=true)](https://ci.appveyor.com/project/jlapeyre/empiricalcdfs-jl) -      -[![Coverage Status](https://coveralls.io/repos/jlapeyre/EmpiricalCDFs.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/jlapeyre/EmpiricalCDFs.jl?branch=master) -[![codecov.io](http://codecov.io/github/jlapeyre/EmpiricalCDFs.jl/coverage.svg?branch=master)](http://codecov.io/github/jlapeyre/EmpiricalCDFs.jl?branch=master) - Provides [empirical cumulative distribution functions (CDFs)](https://en.wikipedia.org/wiki/Empirical_distribution_function) (or "empirical distribution functions" as they are know to probabalists). See the documentation [https://jlapeyre.github.io/EmpiricalCDFs.jl/latest](https://jlapeyre.github.io/EmpiricalCDFs.jl/latest). -~~I'm surprised that this module is not more popular (if stars are a good measure) because it's rather generic, -I use it frequently for new projects, -and the functionality is not available elsewhere.~~ In the meantime it's gained some stars. + *EmpiricalCDFs* implements empirical CDFs; building, evaluating, random sampling, evaluating the inverse, etc. It is useful especially for examining the @@ -26,6 +21,10 @@ For this purpose, you specify a lower cutoff; data points below this value will resulting CDF will still be properly normalized. This ability to process and filter data [online](https://en.wikipedia.org/wiki/Online_algorithm) is absent in `StatsBase.ecdf`. +~~I'm surprised that this module is not more popular (if stars are a good measure) because it's rather generic, +I use it frequently for new projects, +and the functionality is not available elsewhere.~~ In the meantime it's gained some stars. + diff --git a/src/IOcdf.jl b/src/IOcdf.jl index e5c2145..b06051a 100644 --- a/src/IOcdf.jl +++ b/src/IOcdf.jl @@ -89,14 +89,36 @@ Return the version number of the file format. """ version(cdff::CDFfile) = cdff.vn -for f in (:sort!, :push!, :append!, :getindex, :length, :size, :firstindex, :lastindex, :rand, :minimum, :maximum) +for f in (:sort!, :push!, :append!, :getindex, :length, :size, :firstindex, :lastindex) @eval Base.$(f)(cdff::CDFfile, args...) = $(f)(cdff.cdf, args...) end -for f in (:mean, :std, :quantile ) +import Random +for f in (:rand,) + @eval Base.$(f)(cdff::CDFfile) = $(f)(cdff.cdf) + @eval Base.$(f)(X::Random.AbstractRNG, cdff::CDFfile) = $(f)(X, cdff.cdf) +end + +for f in (:extrema, :maximum, :minimum) + @eval begin + Base.$(f)(cdf::CDFfile; kws...) = $(f)(cdf.cdf; kws...) + Base.$(f)(func, cdf::CDFfile; kws...) = $(f)(func, cdf.cdf; kws...) + end +end + + +for f in (:std, :quantile ) @eval Statistics.$(f)(cdff::CDFfile, args...) = $(f)(cdff.cdf, args...) end +for f in (:mean,) + @eval begin + Statistics.$(f)(cdf::CDFfile; kws...) = Statistics.$(f)(cdf.cdf; kws...) + Statistics.$(f)(func, cdf::CDFfile; kws...) = Statistics.$(f)(func, cdf.cdf; kws...) + end +end + + for f in (:getcdfindex, :counts) # :mle, :KSstatistic, :mleKS, :scanmle) @eval $(f)(cdff::CDFfile, args...) = $(f)(cdff.cdf, args...) @@ -111,7 +133,7 @@ end #### -function make_CDFfile_version_string(v) +function make_CDFfile_version_string(v::VersionNumber) nchars = 100 s::String = "CDFfile " * string(v) s = s * " "^(nchars-length(s)) diff --git a/src/cdfs.jl b/src/cdfs.jl index 672c69c..afb9269 100644 --- a/src/cdfs.jl +++ b/src/cdfs.jl @@ -20,19 +20,37 @@ return the array holding samples for `cdf`. data(cdf::AbstractEmpiricalCDF) = cdf.xdata # Extend base functions -for f in (:length, :size, :minimum, :maximum, :extrema, :issorted, :iterate, :getindex, :lastindex, :firstindex) +for f in (:length, :size, :issorted, :iterate, :getindex, :lastindex, :firstindex) @eval begin Base.$(f)(cdf::AbstractEmpiricalCDF, args...) = $(f)(cdf.xdata, args...) end end +for f in (:extrema, :maximum, :minimum) + @eval begin + Base.$(f)(cdf::AbstractEmpiricalCDF; kws...) = $(f)(cdf.xdata; kws...) + Base.$(f)(func, cdf::AbstractEmpiricalCDF; kws...) = $(f)(func, cdf.xdata; kws...) + end +end + +# Base.extrema(cdf::AbstractEmpiricalCDF, a::AbstractArray) = extrema(cdf.xdata, a) +import Base.Order +Base.issorted(cdf::AbstractEmpiricalCDF, ord::Order.Ordering) = issorted(cdf.xdata, ord) + # Extend Statistics functions -for f in (:mean, :median, :middle, :std, :stdm, :var, :varm, :quantile) +for f in (:median, :middle, :std, :stdm, :var, :varm, :quantile) @eval begin Statistics.$(f)(cdf::AbstractEmpiricalCDF, args...; kws...) = Statistics.$(f)(cdf.xdata, args...; kws...) end end +for f in (:mean,) + @eval begin + Statistics.$(f)(cdf::AbstractEmpiricalCDF; kws...) = Statistics.$(f)(cdf.xdata; kws...) + Statistics.$(f)(func, cdf::AbstractEmpiricalCDF; kws...) = Statistics.$(f)(func, cdf.xdata; kws...) + end +end + # Same as above, but the return value is the cdf @doc """ @@ -226,12 +244,14 @@ function _inverse(cdf::EmpiricalCDFHi, x) cdf.xdata[ind] end +import Random """ rand(cdf::EmpiricalCDF) Pick a random sample from the distribution represented by `cdf`. """ Base.rand(cdf::AbstractEmpiricalCDF) = _inverse(cdf,rand()) +Base.rand(X::Random.AbstractRNG, cdf::AbstractEmpiricalCDF) = _inverse(cdf,rand(X)) """ finv(cdf::AbstractEmpiricalCDF) --> Function @@ -253,14 +273,14 @@ julia> maximum(cdf) """ function finv(cdf::EmpiricalCDF) function (c::Real) - (c < 0 || c >= 1) && throw(DomainError()) + (c < 0 || c >= 1) && throw(DomainError(c)) _inverse(cdf,c) end end function finv(cdf::EmpiricalCDFHi) function (c::Real) - (c < cdf.lowreject || c >= 1) && throw(DomainError()) + (c < cdf.lowreject || c >= 1) && throw(DomainError(c)) _inverse(cdf,c) end end diff --git a/test/aqua_test.jl b/test/aqua_test.jl new file mode 100644 index 0000000..e4a483a --- /dev/null +++ b/test/aqua_test.jl @@ -0,0 +1,35 @@ +using EmpiricalCDFs +using Aqua: Aqua + +const ThePackage = EmpiricalCDFs + +@testset "aqua deps compat" begin + Aqua.test_deps_compat(ThePackage) +end + +@testset "aqua unbound_args" begin + Aqua.test_unbound_args(ThePackage) +end + +@testset "aqua undefined exports" begin + Aqua.test_undefined_exports(ThePackage) +end + +# TODO: Not sure exactly which versions are ok. +if VERSION >= v"1.7" + @testset "aqua test ambiguities" begin + Aqua.test_ambiguities([ThePackage, Core, Base]) + end +end + +@testset "aqua piracies" begin + Aqua.test_piracies(ThePackage) +end + +@testset "aqua project extras" begin + Aqua.test_project_extras(ThePackage) +end + +@testset "aqua state deps" begin + Aqua.test_stale_deps(ThePackage) +end diff --git a/test/jet_test.jl b/test/jet_test.jl new file mode 100644 index 0000000..447f2f7 --- /dev/null +++ b/test/jet_test.jl @@ -0,0 +1,101 @@ +using Test +using EmpiricalCDFs +using JET + +const package_to_analyze = EmpiricalCDFs + +## Ignore these errors. The first string in the pair is +## the report message. The second is the file it occurs in. +## Not very precise, but ok for now. +const SKIP_MATCHES = [ + # Trying to print a Sym could raise this error. +# ("type Nothing has no field den", "parameters.jl"), +] + +## Skip reports for which return true +## Skip reports for which return true +const SKIP_REP_TESTS = [ +# rep -> rep isa JET.UncaughtExceptionReport, # We intentionally throw MethodError +# rep -> rep isa JET.BuiltinErrorReport, +# rep -> string(rep) == "MethodErrorReport(no matching method found `eps(::Type{Union{}})`: eps(T::Type{Union{}}))", +] + +## +## JET. Static analysis of the package +## + +function analyze_package(package_name=package_to_analyze) + result = report_package( + string(package_name); + report_pass=JET.BasicPass() + # ignored_modules=( + # ) + ) + reports = JET.get_reports(result) + return reports +end + +""" + match_report(package, report::InferenceErrorReport, msg::String, file::String) + +Return `true` if the message is `msg` and the first file in the stack trace is `file`. + +`file` should be given relative to the `src` directory of `package`. +""" +function match_report(package, report, msg::String, file::String) + hasproperty(report, :msg) || return false + report.msg != msg && return false + filepath = joinpath(dirname(pathof(package)), file) + report_filepath = string(report.vst[1].file) + return report_filepath == filepath +end + +function match_reports(package, report, match_data::Vector) + for (msg, file) in match_data + match_report(package, report, msg, file) && return true + end + return false +end + +function do_rep_skip_test(rep, skip_rep_tests=SKIP_REP_TESTS) + for rep_test in skip_rep_tests + rep_test(rep) && return true + end + return false +end + +# Filter out reports that we don't consider failures. +# We could flag some that could be fixed as broken tests. +# This could be more fine grained. + +function filter_reports(reports, package) + somereports = empty(reports) + for rep in reports + do_rep_skip_test(rep) && continue + match_reports(package, rep, SKIP_MATCHES) && continue + push!(somereports, rep) + end + return somereports +end + +# print just some of the report +function print_report(report) + hasproperty(report, :msg) && println(report.msg) + return hasproperty(report, :vst) && println(report.vst) +end + +function run_reports() + reports = analyze_package(package_to_analyze) + somereports = filter_reports(reports, package_to_analyze) + @show somereports + number_of_ignored_jet_reports = length(reports) - length(somereports) + @info string(number_of_ignored_jet_reports, " reports ignored.") + @info string(length(somereports), " reports not ignored.") + for rep in somereports + print_report(rep) + end + @test isempty(somereports) + return (somereports, reports) +end + +run_reports() diff --git a/test/runtests.jl b/test/runtests.jl index a304046..01caacf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,14 @@ using EmpiricalCDFs.IOcdf: readcdf, getcdf using Test using Statistics +include("aqua_test.jl") + +if VERSION >= v"1.7" && VERSION <= v"1.11" + @testset "JET" begin + include("jet_test.jl") + end +end + @testset "construction" begin @test (cdf = EmpiricalCDF(); true) @test (cdf = EmpiricalCDF(); append!(cdf,rand(10^3)) ; true)