Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more tests #39

Merged
merged 6 commits into from
Aug 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ Tables = "0.2"
julia = "1"

[extras]
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["DataFrames", "RDatasets", "Test"]
test = ["AxisArrays", "DataFrames", "Dates", "RDatasets", "Test"]
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,10 @@ julia> Impute.interp(df) |> Impute.locf() |> Impute.nocb()
│ 469 │ -247.6 │ -180.7 │ -70.9 │ 33.7 │ 114.8 │ 222.5 │
```

**Warning**: Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)).
**Warning:**

- Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)).
- In-place calls aren't guaranteedto mutate the original data, but it will try avoid copying if possible.
In the future, it may be possible to detect whether in-place operations are permitted on an array or table using traits:
- https://github.com/JuliaData/Tables.jl/issues/116
- https://github.com/JuliaDiffEq/ArrayInterface.jl/issues/22
8 changes: 7 additions & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,10 @@ Finally, we can chain multiple simple methods together to give a complete datase
Impute.interp(df) |> Impute.locf() |> Impute.nocb()
```

Warning: Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)).
**Warning:**

- Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)).
- In-place calls aren't guaranteedto mutate the original data, but it will try avoid copying if possible.
In the future, it may be possible to detect whether in-place operations are permitted on an array or table using traits:
- https://github.com/JuliaData/Tables.jl/issues/116
- https://github.com/JuliaDiffEq/ArrayInterface.jl/issues/22
70 changes: 25 additions & 45 deletions src/imputors/drop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,24 @@ end
# TODO: Switch to using Base.@kwdef on 1.1
DropObs(; context=Context()) = DropObs(context)

function impute!(data::AbstractVector, imp::DropObs)
imp.context() do c
filter!(x -> !ismissing!(c, x), data)
end
# Special case impute! for vectors because we know filter! will work
function impute!(data::Vector, imp::DropObs)
nickrobinson251 marked this conversation as resolved.
Show resolved Hide resolved
imp.context(c -> filter!(x -> !ismissing!(c, x), data))
end

function impute(data::AbstractVector, imp::DropObs)
imp.context(c -> filter(x -> !ismissing!(c, x), data))
end

function impute!(data::AbstractMatrix, imp::DropObs; dims=1)
function impute(data::AbstractMatrix, imp::DropObs; dims=1)
imp.context() do c
return filterobs(data; dims=dims) do obs
!ismissing!(c, obs)
end
end
end

# Deleting elements from subarrays doesn't work so we need to collect that data into
# a separate array.
impute!(data::SubArray, imp::DropObs) = impute!(collect(data), imp::DropObs)

function impute!(table, imp::DropObs)
function impute(table, imp::DropObs)
imp.context() do c
@assert istable(table)
rows = Tables.rows(table)
Expand Down Expand Up @@ -96,48 +95,29 @@ end
# TODO: Switch to using Base.@kwdef on 1.1
DropVars(; context=Context()) = DropVars(context)

function impute!(data::AbstractMatrix, imp::DropVars; dims=1)
return filtervars(data; dims=dims) do var
try
imp.context() do c
for x in var
ismissing!(c, x)
end
end
return true
catch e
if isa(e, ImputeError)
return false
else
rethrow(e)
end
function impute(data::AbstractMatrix, imp::DropVars; dims=1)
imp.context() do c
return filtervars(data; dims=dims) do vars
!ismissing!(c, vars)
end
end
end

function impute!(table, imp::DropVars)
function impute(table, imp::DropVars)
istable(table) || throw(MethodError(impute!, (table, imp)))
cols = Tables.columns(table)

cnames = Iterators.filter(propertynames(cols)) do cname
try
imp.context() do c
col = getproperty(cols, cname)
for i in eachindex(col)
ismissing!(c, col[i])
end
end
return true
catch e
if isa(e, ImputeError)
return false
else
rethrow(e)
end
imp.context() do c
cnames = Iterators.filter(propertynames(cols)) do cname
!ismissing!(c, getproperty(cols, cname))
end
end

selected = Tables.select(table, cnames...)
table = materializer(table)(selected)
return table
selected = Tables.select(table, cnames...)
table = materializer(table)(selected)
return table
end
end

# Add impute! methods to override the default behaviour in imputors.jl
impute!(data::AbstractMatrix, imp::Union{DropObs, DropVars}) = impute(data, imp)
impute!(data, imp::Union{DropObs, DropVars}) = impute(data, imp)
2 changes: 1 addition & 1 deletion src/imputors/fill.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ function impute!(data::AbstractVector, imp::Fill)
imp.context() do c
fill_val = if isa(imp.value, Function)
# Call `deepcopy` because we can trust that it's available for all types.
imp.value(Impute.drop(deepcopy(data); context=c))
imp.value(Impute.drop(data; context=c))
else
imp.value
end
Expand Down
Loading