@@ -5,6 +5,10 @@ An imputation context records summary information about missing data for an impu
5
5
"""
6
6
abstract type AbstractContext end
7
7
8
+ # We implement a version of copy for all contexts which reconstructs the context from the
9
+ # raw fields.
10
+ Base. copy (ctx:: T ) where {T <: AbstractContext } = T (fieldvalues (ctx)... )
11
+
8
12
"""
9
13
ismissing(ctx::AbstractContext, x) -> Bool
10
14
@@ -18,23 +22,23 @@ exceeds our `ctx.limit` we throw an `ImputeError`
18
22
* `x`: the value to check (may be an single values, abstract array or row)
19
23
"""
20
24
function Base. ismissing (ctx:: AbstractContext , x)
21
- missing = if isa (x, NamedTuple)
22
- any (entry -> ctx. is_missing (entry[ 2 ]), pairs (x))
25
+ was_missing = if isa (x, NamedTuple)
26
+ any (ctx. is_missing, Tuple (x))
23
27
elseif isa (x, AbstractArray)
24
28
any (ctx. is_missing, x)
25
29
else
26
30
ctx. is_missing (x)
27
31
end
28
32
29
- missing_update! (ctx, missing )
33
+ missing_update! (ctx, was_missing )
30
34
31
- return missing
35
+ return was_missing
32
36
end
33
37
34
38
"""
35
39
findfirst(ctx::AbstractContext, data::AbstractVector) -> Int
36
40
37
- Returns the first not missing index in `data`.
41
+ Returns the first non- missing index in `data`.
38
42
39
43
# Arguments
40
44
* `ctx::AbstractContext`: the context to pass into `ismissing`
50
54
"""
51
55
findlast(ctx::AbstractContext, data::AbstractVector) -> Int
52
56
53
- Returns the last not missing index in `data`.
57
+ Returns the last non- missing index in `data`.
54
58
55
59
# Arguments
56
60
* `ctx::AbstractContext`: the context to pass into `ismissing`
66
70
"""
67
71
findnext(ctx::AbstractContext, data::AbstractVector) -> Int
68
72
69
- Returns the next not missing index in `data`.
73
+ Returns the next non- missing index in `data`.
70
74
71
75
# Arguments
72
76
* `ctx::AbstractContext`: the context to pass into `ismissing`
@@ -88,7 +92,7 @@ weighted.
88
92
# Fields
89
93
* `n::Int`: number of observations
90
94
* `count::Int`: number of missing values found
91
- * `limit::Float64`: allowable limit for missing values to impute
95
+ * `limit::Float64`: allowable portion of total values allowed to be imputed (should be between 0.0 and 1.0).
92
96
* `is_missing::Function`: returns a Bool if the value counts as missing
93
97
* `on_complete::Function`: a function to run when imputation is complete
94
98
"""
@@ -105,37 +109,35 @@ function Context(;
105
109
is_missing:: Function = ismissing,
106
110
on_complete:: Function = complete
107
111
)
108
- Context (0 , 0 , limit, is_missing, on_complete)
112
+ return Context (0 , 0 , limit, is_missing, on_complete)
109
113
end
110
114
111
- function (ctx:: Context )(f :: Function )
115
+ function Base . empty (ctx:: Context )
112
116
_ctx = copy (ctx)
113
117
_ctx. num = 0
114
118
_ctx. count = 0
115
119
116
- result = f (_ctx)
117
- ctx. on_complete (_ctx)
118
- return result
120
+ return _ctx
119
121
end
120
122
121
- Base. copy (x:: Context ) = Context (x. num, x. count, x. limit, x. is_missing, x. on_complete)
122
-
123
- function missing_update! (ctx:: Context , miss)
123
+ function missing_update! (ctx:: Context , was_missing)
124
124
ctx. num += 1
125
125
126
- if miss
126
+ if was_missing
127
127
ctx. count += 1
128
128
end
129
129
end
130
130
131
- function complete (ctx:: Context )
131
+ function complete (ctx:: Context , data )
132
132
missing_ratio = ctx. count / ctx. num
133
133
134
134
if missing_ratio > ctx. limit
135
135
throw (ImputeError (
136
136
" More than $(ctx. limit * 100 ) % of values were missing ($missing_ratio )."
137
137
))
138
138
end
139
+
140
+ return data
139
141
end
140
142
141
143
@@ -149,11 +151,11 @@ This context type can be useful if some missing observation are more important t
149
151
# Fields
150
152
* `num::Int`: number of observations
151
153
* `s::Float64`: sum of missing values weights
152
- * `limit::Float64`: allowable limit for missing values to impute
154
+ * `limit::Float64`: allowable portion of total values allowed to be imputed (should be between 0.0 and 1.0).
153
155
* `is_missing::Function`: returns a Bool if the value counts as missing
154
- * `on_complete::Function`: a function to run when imputation is complete
156
+ * `on_complete::Function`: allowable portion of total values allowed to be imputed (should be between 0.0 and 1.0).
155
157
* `wv::AbstractWeights`: a set of statistical weights to use when evaluating the importance
156
- of each observation
158
+ of each observation. Will be accumulated during imputation.
157
159
"""
158
160
mutable struct WeightedContext <: AbstractContext
159
161
num:: Int
@@ -170,37 +172,42 @@ function WeightedContext(
170
172
is_missing:: Function = ismissing,
171
173
on_complete:: Function = complete
172
174
)
173
- WeightedContext (0 , 0.0 , limit, is_missing, on_complete, wv)
175
+ return WeightedContext (0 , 0.0 , limit, is_missing, on_complete, wv)
174
176
end
175
177
176
- function (ctx:: WeightedContext )(f :: Function )
178
+ function Base . empty (ctx:: WeightedContext )
177
179
_ctx = copy (ctx)
178
180
_ctx. num = 0
179
181
_ctx. s = 0.0
180
182
181
- result = f (_ctx)
182
- ctx. on_complete (_ctx)
183
- return result
184
- end
185
-
186
- function Base. copy (x:: WeightedContext )
187
- WeightedContext (x. num, x. s, x. limit, x. is_missing, x. on_complete, x. wv)
183
+ return _ctx
188
184
end
189
185
190
- function missing_update! (ctx:: WeightedContext , miss )
186
+ function missing_update! (ctx:: WeightedContext , was_missing )
191
187
ctx. num += 1
192
188
193
- if miss
189
+ if was_missing
194
190
ctx. s += ctx. wv[ctx. num]
195
191
end
196
192
end
197
193
198
- function complete (ctx:: WeightedContext )
194
+ function complete (ctx:: WeightedContext , data )
199
195
missing_ratio = ctx. s / sum (ctx. wv)
200
196
201
197
if missing_ratio > ctx. limit
202
198
throw (ImputeError (
203
199
" More than $(ctx. limit * 100 ) % of weighted values were missing ($missing_ratio )."
204
200
))
205
201
end
202
+
203
+ return data
204
+ end
205
+
206
+ for T in [Context, WeightedContext]
207
+ @eval begin
208
+ function (ctx:: $T )(f:: Function )
209
+ _ctx = empty (ctx)
210
+ return ctx. on_complete (_ctx, f (_ctx))
211
+ end
212
+ end
206
213
end
0 commit comments