diff --git a/.travis.yml b/.travis.yml index 0ff5933..569fd85 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ after_success: jobs: include: - stage: "Documentation" - julia: 1.0 + julia: 1.5 os: linux script: - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' diff --git a/Project.toml b/Project.toml index 4fb1e8f..bb8ef4b 100644 --- a/Project.toml +++ b/Project.toml @@ -17,7 +17,7 @@ TableOperations = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] -AxisKeys = "0.1.5" +AxisKeys = "0.1" Distances = "0.8, 0.9" IterTools = "1.2, 1.3" Missings = "0.4" @@ -35,8 +35,9 @@ Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["AxisArrays", "AxisKeys", "Combinatorics", "DataFrames", "Dates", "Distances", "RDatasets", "Test"] +test = ["AxisArrays", "AxisKeys", "Combinatorics", "DataFrames", "Dates", "Distances", "Documenter", "RDatasets", "Test"] diff --git a/src/chain.jl b/src/chain.jl index 1a415b7..285808e 100644 --- a/src/chain.jl +++ b/src/chain.jl @@ -26,10 +26,17 @@ Compose new chains with the composition operator # Example ```jldoctest -julia> using Impute: impute, Interpolate, NOCB, LOCF +julia> using Impute: Impute, Interpolate, NOCB, LOCF -julia> imp = Interpolate() ∘ NOCB() ∘ LOCF() -Impute.Chain(Impute.Imputor[Interpolate(2), NOCB(2), LOCF(2)]) +julia> M = [missing 2.0 missing missing 5.0; 1.1 2.2 missing 4.4 missing] +2×5 Array{Union{Missing, Float64},2}: + missing 2.0 missing missing 5.0 + 1.1 2.2 missing 4.4 missing + +julia> Impute.run(M, Interpolate() ∘ NOCB() ∘ LOCF(); dims=:rows) +2×5 Array{Union{Missing, Float64},2}: + 2.0 2.0 3.0 4.0 5.0 + 1.1 2.2 3.3 4.4 4.4 ``` """ Base.:(∘)(a::Transform, b::Transform) = Chain(Transform[a, b]) diff --git a/src/deprecated.jl b/src/deprecated.jl index 8d013ca..70cf946 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,4 +1,6 @@ # Introduced in 0.6 +# NOTE: Deprecated Imputor docstrings use julia-repl rather than jldoctest since depwarn +# output isn't consistent across installs. Base.@deprecate_binding( AbstractContext, Assertion, @@ -22,7 +24,7 @@ be handled independently. passed the data with missing data removed (e.g, `mean`) # Example -```jldoctest +```julia-repl julia> using Impute: Fill, impute julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] @@ -30,7 +32,7 @@ julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] 1.0 2.0 missing missing 5.0 1.1 2.2 3.3 missing 5.5 -julia> impute(M, Fill(); dims=2) +julia> impute(M, Fill(); dims=:rows) 2×5 Array{Union{Missing, Float64},2}: 1.0 2.0 2.66667 2.66667 5.0 1.1 2.2 3.3 3.025 5.5 @@ -106,18 +108,22 @@ end provided. # Example -```jldoctest +```julia-repl julia> using Impute: DropObs, impute -julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] -2×5 Array{Union{Missing, Float64},2}: - 1.0 2.0 missing missing 5.0 - 1.1 2.2 3.3 missing 5.5 - -julia> impute(M, DropObs(); dims=2) -2×3 Array{Union{Missing, Float64},2}: - 1.0 2.0 5.0 - 1.1 2.2 5.5 +julia> M = [1.0 1.1; 2.0 2.2; missing 3.3; missing missing; 5.0 5.5] +5×2 Array{Union{Missing, Float64},2}: + 1.0 1.1 + 2.0 2.2 + missing 3.3 + missing missing + 5.0 5.5 + +julia> impute(M, DropObs()) +3×2 Array{Union{Missing, Float64},2}: + 1.0 1.1 + 2.0 2.2 + 5.0 5.5 ``` """ struct DropObs <: Imputor @@ -170,7 +176,7 @@ end `Tables.table` and removes them from the input data. # Examples -```jldoctest +```julia-repl julia> using Impute: DropVars, impute julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] @@ -178,9 +184,10 @@ julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] 1.0 2.0 missing missing 5.0 1.1 2.2 3.3 missing 5.5 -julia> impute(M, DropVars(); dims=2) -1×5 Array{Union{Missing, Float64},2}: - 1.1 2.2 3.3 missing 5.5 +julia> impute(M, DropVars()) +2×3 Array{Union{Missing, Float64},2}: + 1.0 2.0 5.0 + 1.1 2.2 5.5 ``` """ struct DropVars <: Imputor diff --git a/src/functional.jl b/src/functional.jl index c98ee10..0ae450a 100644 --- a/src/functional.jl +++ b/src/functional.jl @@ -113,7 +113,7 @@ filter!(f::Function; kwargs...) = data -> apply!(data, Filter(f); kwargs...) See [DropObs](@ref) for details. # Example -``` +```julia-repl julia> using DataFrames; using Impute: Impute julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, 3.3, missing, 5.5]) @@ -141,17 +141,17 @@ julia> Impute.dropobs(df; dims=2) @doc """ Impute.dropvars(data; dims=1) -[Deprecated] Finds variables with too many missing values in a `AbstractMatrix` or `Tables.table` and +[Deprecated] Finds variables with missing values in a `AbstractMatrix` or `Tables.table` and removes them from the input data. See [DropVars](@ref) for details. # Example -```jldoctest +```julia-repl julia> using DataFrames; using Impute: Impute julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, 3.3, missing, 5.5]) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -160,15 +160,7 @@ julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, │ 5 │ 5.0 │ 5.5 │ julia> Impute.dropvars(df) -5×1 DataFrames.DataFrame -│ Row │ b │ -│ │ Float64 │ -├─────┼──────────┤ -│ 1 │ 1.1 │ -│ 2 │ 2.2 │ -│ 3 │ 3.3 │ -│ 4 │ missing │ -│ 5 │ 5.5 │ +0×0 DataFrame ``` """ dropvars @@ -183,10 +175,11 @@ containing `missing`s. ```jldoctest julia> using DataFrames; using Impute: Impute + julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, 3.3, missing, 5.5]) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -195,24 +188,16 @@ julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, │ 5 │ 5.0 │ 5.5 │ julia> Impute.filter(df; dims=:cols) -5×1 DataFrames.DataFrame -│ Row │ b │ -│ │ Float64 │ -├─────┼──────────┤ -│ 1 │ 1.1 │ -│ 2 │ 2.2 │ -│ 3 │ 3.3 │ -│ 4 │ missing │ -│ 5 │ 5.5 │ +0×0 DataFrame julia> Impute.filter(df; dims=:rows) -3×2 DataFrames.DataFrame -│ Row │ a │ b │ -│ │ Float64 │ Float64 │ -├─────┼──────────┼──────────┤ -│ 1 │ 1.0 │ 1.1 │ -│ 2 │ 2.0 │ 2.2 │ -│ 3 │ 5.0 │ 5.5 │ +3×2 DataFrame +│ Row │ a │ b │ +│ │ Float64 │ Float64 │ +├─────┼─────────┼─────────┤ +│ 1 │ 1.0 │ 1.1 │ +│ 2 │ 2.0 │ 2.2 │ +│ 3 │ 5.0 │ 5.5 │ ``` """ filter @@ -227,9 +212,9 @@ See [Interpolate](@ref) for details. julia> using DataFrames; using Impute: Impute julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, 3.3, missing, 5.5]) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -238,9 +223,9 @@ julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, │ 5 │ 5.0 │ 5.5 │ julia> Impute.interp(df) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -256,13 +241,13 @@ julia> Impute.interp(df) Fills in the missing data with a specific value. See [Fill](@ref) for details. # Example -```jldoctest +```julia-repl julia> using DataFrames; using Impute: Impute julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, 3.3, missing, 5.5]) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -271,9 +256,9 @@ julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, │ 5 │ 5.0 │ 5.5 │ julia> Impute.fill(df; value=-1.0) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -294,9 +279,9 @@ observation. See [LOCF](@ref) for details. julia> using DataFrames; using Impute: Impute julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, 3.3, missing, 5.5]) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -305,9 +290,9 @@ julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, │ 5 │ 5.0 │ 5.5 │ julia> Impute.locf(df) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -328,9 +313,9 @@ observation. See [LOCF](@ref) for details. julia> using DataFrames; using Impute: Impute julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, 3.3, missing, 5.5]) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -339,9 +324,9 @@ julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, │ 5 │ 5.0 │ 5.5 │ julia> Impute.nocb(df) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -359,13 +344,13 @@ categorical variables. Furthermore, it completes imputation while preserving the distributional properties of the variables (e.g., mean, standard deviation). # Example -```jldoctest +```julia-repl julia> using DataFrames; using Random; using Impute: Impute julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, 3.3, missing, 5.5]) -5×2 DataFrames.DataFrame +5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ @@ -376,12 +361,12 @@ julia> df = DataFrame(:a => [1.0, 2.0, missing, missing, 5.0], :b => [1.1, 2.2, julia> Impute.srs(df; rng=MersenneTwister(1234)) 5×2 DataFrame │ Row │ a │ b │ -│ │ Float64 │ Float64 │ +│ │ Float64? │ Float64? │ ├─────┼──────────┼──────────┤ │ 1 │ 1.0 │ 1.1 │ │ 2 │ 2.0 │ 2.2 │ │ 3 │ 1.0 │ 3.3 │ -│ 4 │ 5.0 │ 3.3 │ +│ 4 │ 2.0 │ 3.3 │ │ 5 │ 5.0 │ 5.5 │ ``` """ srs diff --git a/src/imputors/interp.jl b/src/imputors/interp.jl index 205c00a..ce53a58 100644 --- a/src/imputors/interp.jl +++ b/src/imputors/interp.jl @@ -18,7 +18,7 @@ julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] 1.0 2.0 missing missing 5.0 1.1 2.2 3.3 missing 5.5 -julia> impute(M, Interpolate(); dims=2) +julia> impute(M, Interpolate(); dims=:rows) 2×5 Array{Union{Missing, Float64},2}: 1.0 2.0 3.0 4.0 5.0 1.1 2.2 3.3 4.4 5.5 diff --git a/src/imputors/locf.jl b/src/imputors/locf.jl index 19df9c7..33b9710 100644 --- a/src/imputors/locf.jl +++ b/src/imputors/locf.jl @@ -21,7 +21,7 @@ julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] 1.0 2.0 missing missing 5.0 1.1 2.2 3.3 missing 5.5 -julia> impute(M, LOCF(); dims=2) +julia> impute(M, LOCF(); dims=:rows) 2×5 Array{Union{Missing, Float64},2}: 1.0 2.0 2.0 2.0 5.0 1.1 2.2 3.3 3.3 5.5 diff --git a/src/imputors/nocb.jl b/src/imputors/nocb.jl index 33ebad8..ab9b3f9 100644 --- a/src/imputors/nocb.jl +++ b/src/imputors/nocb.jl @@ -22,7 +22,7 @@ julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] 1.0 2.0 missing missing 5.0 1.1 2.2 3.3 missing 5.5 -julia> impute(M, NOCB(); dims=2) +julia> impute(M, NOCB(); dims=:rows) 2×5 Array{Union{Missing, Float64},2}: 1.0 2.0 5.0 5.0 5.0 1.1 2.2 3.3 5.5 5.5 diff --git a/src/imputors/replace.jl b/src/imputors/replace.jl index 5bba471..816f24f 100644 --- a/src/imputors/replace.jl +++ b/src/imputors/replace.jl @@ -14,7 +14,7 @@ If the input data is of a different type then the no replacement will be perform julia> using Impute: Replace, impute julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] -2×5 Array{Union{Nothing, Float64},2}: +2×5 Array{Union{Missing, Float64},2}: 1.0 2.0 missing missing 5.0 1.1 2.2 3.3 missing 5.5 diff --git a/src/imputors/srs.jl b/src/imputors/srs.jl index 08be2bc..6182239 100644 --- a/src/imputors/srs.jl +++ b/src/imputors/srs.jl @@ -3,6 +3,8 @@ struct SRS <: Imputor end +# Docstring below uses julia-repl cause the rng may give different result on different +# versions of julia. """ SRS(; rng=Random.GLOBAL_RNG) @@ -21,7 +23,7 @@ for both categorical and continuous data. * `rng::AbstractRNG`: A random number generator to use for observation selection # Example -```jldoctest +```julia-repl julia> using Random; using Impute: SRS, impute julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] @@ -29,9 +31,9 @@ julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] 1.0 2.0 missing missing 5.0 1.1 2.2 3.3 missing 5.5 -julia> impute(M, SRS(; rng=MersenneTwister(1234)); dims=2) +julia> impute(M, SRS(; rng=MersenneTwister(1234)); dims=:rows) 2×5 Array{Union{Missing, Float64},2}: - 1.0 2.0 1.0 5.0 5.0 + 1.0 2.0 1.0 2.0 5.0 1.1 2.2 3.3 3.3 5.5 ``` """ diff --git a/src/imputors/standardize.jl b/src/imputors/standardize.jl index 7dd3c55..2616c55 100644 --- a/src/imputors/standardize.jl +++ b/src/imputors/standardize.jl @@ -14,15 +14,15 @@ Warning: In-place methods are only applicable for datasets which already `allowm ```jldoctest julia> using Impute: Standardize, impute -julia> M = [1.0 2.0 nothing NaN 5.0; 1.1 2.2 3.3 nothing 5.5] -2×5 Array{Union{Nothing, Float64},2}: - 1.0 2.0 nothing NaN 5.0 - 1.1 2.2 3.3 nothing 5.5 +julia> M = [1.0 2.0 -9999.0 NaN 5.0; 1.1 2.2 3.3 0.0 5.5] +2×5 Array{Float64,2}: + 1.0 2.0 -9999.0 NaN 5.0 + 1.1 2.2 3.3 0.0 5.5 -julia> impute(M, Standardize(; values=(NaN, Nothing)); dims=2) +julia> impute(M, Standardize(; values=(NaN, -9999.0, 0.0))) 2×5 Array{Union{Missing, Float64},2}: - 1.0 2.0 missing missing 5.0 - 1.1 2.2 3.3 missing 5.5 + 1.0 2.0 missing missing 5.0 + 1.1 2.2 3.3 missing 5.5 ``` """ struct Standardize <: Imputor diff --git a/src/imputors/substitute.jl b/src/imputors/substitute.jl index 1b5beef..f2cdcf9 100644 --- a/src/imputors/substitute.jl +++ b/src/imputors/substitute.jl @@ -22,14 +22,14 @@ Our default substitution rules defined in `defaultstats` are as follows: # Example ```jldoctest -julia> using Impute: Substitute, impute +julia> using Statistics; using Impute: Substitute, impute julia> M = [1.0 2.0 missing missing 5.0; 1.1 2.2 3.3 missing 5.5] 2×5 Array{Union{Missing, Float64},2}: 1.0 2.0 missing missing 5.0 1.1 2.2 3.3 missing 5.5 -julia> impute(M, Substitute(; statistic=mean); dims=2) +julia> impute(M, Substitute(; statistic=mean ∘ skipmissing); dims=:rows) 2×5 Array{Union{Missing, Float64},2}: 1.0 2.0 2.66667 2.66667 5.0 1.1 2.2 3.3 3.025 5.5 diff --git a/test/runtests.jl b/test/runtests.jl index 474fe48..a332971 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ using Combinatorics using DataFrames using Dates using Distances +using Documenter using LinearAlgebra using RDatasets using Random @@ -332,6 +333,7 @@ end end include("testutils.jl") + include("utils.jl") include("deprecated.jl") include("filter.jl") include("imputors/replace.jl") @@ -401,4 +403,8 @@ end @test nrmsd(svd_imputed, data) > nrmsd(mean_imputed, data) * 0.9 end end + + # Start running doctests before we wrap up technical changes and work + # on more documentation + doctest(Impute) end diff --git a/test/utils.jl b/test/utils.jl new file mode 100644 index 0000000..bf873d0 --- /dev/null +++ b/test/utils.jl @@ -0,0 +1,12 @@ +@testset "Utilities" begin + @testset "Impute.dim" begin + X = rand(10, 5) + KA = KeyedArray(X; A=1:10, B=collect("abcde")) + + @test Impute.dim(X, 1) == Impute.dim(X, :rows) == Impute.dim(KA, :A) + @test first(eachslice(X, dims=1)) == first(eachslice(KA, dims=1)) == first(eachslice(KA, dims=:A)) + + @test Impute.dim(X, 2) == Impute.dim(X, :cols) == Impute.dim(KA, :B) + @test first(eachslice(X, dims=2)) == first(eachslice(KA, dims=2)) == first(eachslice(KA, dims=:B)) + end +end