@@ -115,13 +115,28 @@ def __init__(self, filters, error_rates=[0.02, 0.5], growth_factor=1.1,
115
115
self .version = version
116
116
117
117
def initialize (self , * , include , exclude ):
118
- log .debug ("{} include and {} exclude" .format (
119
- len (include ), len (exclude )))
118
+ """
119
+ Arg "exclude" is potentially larger than main memory, so it should
120
+ be assumed to be passed as a lazy-loading iterator. If it isn't,
121
+ that's fine. The "include" arg must fit in memory and should be
122
+ assumed to be a set.
123
+ """
124
+ try :
125
+ iter (exclude )
126
+ except TypeError as te :
127
+ raise TypeError ("exclude is not iterable" , te )
128
+ try :
129
+ len (include )
130
+ except TypeError as te :
131
+ raise TypeError ("include is not a list" , te )
132
+
133
+ include_len = len (include )
134
+
120
135
depth = 1
121
136
maxSequentialGrowthLayers = 3
122
137
sequentialGrowthLayers = 0
123
138
124
- while len ( include ) > 0 :
139
+ while include_len > 0 :
125
140
starttime = datetime .datetime .utcnow ()
126
141
er = self .error_rates [- 1 ]
127
142
if depth < len (self .error_rates ):
@@ -133,24 +148,23 @@ def initialize(self, *, include, exclude):
133
148
# min_filter_length large. This is important for the deep layers near the end.
134
149
Bloomer .filter_with_characteristics (
135
150
max (
136
- int (len ( include ) * self .growth_factor ),
151
+ int (include_len * self .growth_factor ),
137
152
self .min_filter_length ), er , depth ))
138
153
else :
139
154
# Filter already created for this layer. Check size and resize if needed.
140
155
required_size = Bloomer .calc_size (
141
- self .filters [depth - 1 ].nHashFuncs , len ( include ) , er )
156
+ self .filters [depth - 1 ].nHashFuncs , include_len , er )
142
157
if self .filters [depth - 1 ].size < required_size :
143
158
# Resize filter
144
159
self .filters [depth -
145
160
1 ] = Bloomer .filter_with_characteristics (
146
- int (len ( include ) * self .growth_factor ),
161
+ int (include_len * self .growth_factor ),
147
162
er , depth )
148
163
log .info ("Resized filter at {}-depth layer" .format (depth ))
149
164
filter = self .filters [depth - 1 ]
150
165
log .debug (
151
- "Initializing the {}-depth layer. err={} include={} exclude={} size={} hashes={}"
152
- .format (depth , er , len (include ), len (exclude ), filter .size ,
153
- filter .nHashFuncs ))
166
+ "Initializing the {}-depth layer. err={} include_len={} size={} hashes={}"
167
+ .format (depth , er , include_len , filter .size , filter .nHashFuncs ))
154
168
# loop over the elements that *should* be there. Add them to the filter.
155
169
for elem in include :
156
170
filter .add (elem )
@@ -188,7 +202,8 @@ def initialize(self, *, include, exclude):
188
202
sequentialGrowthLayers = 0
189
203
190
204
include , exclude = false_positives , include
191
- if len (include ) > 0 :
205
+ include_len = len (include )
206
+ if include_len > 0 :
192
207
depth = depth + 1
193
208
# Filter characteristics loaded from meta file may result in unused layers.
194
209
# Remove them.
0 commit comments