@@ -11,7 +11,7 @@ use tantivy::{
11
11
} ;
12
12
use tokio:: {
13
13
fs:: { create_dir_all, remove_dir_all} ,
14
- select,
14
+ select, spawn ,
15
15
task:: spawn_blocking,
16
16
time:: sleep,
17
17
} ;
@@ -26,8 +26,8 @@ use crate::{
26
26
} ;
27
27
28
28
use super :: {
29
- dynamic_field_config, field_parser:: FieldParser , get_index_config, write_unified_index ,
30
- DYNAMIC_FIELD_NAME ,
29
+ dynamic_field_config, field_parser:: FieldParser , get_index_config, sources :: CheckpointCommiter ,
30
+ write_unified_index , DYNAMIC_FIELD_NAME ,
31
31
} ;
32
32
33
33
#[ derive( Debug , PartialEq , Eq ) ]
@@ -37,6 +37,13 @@ pub enum BatchResult {
37
37
Restart ,
38
38
}
39
39
40
+ struct IndexCommiter {
41
+ index_name : String ,
42
+ index_path : String ,
43
+ pool : PgPool ,
44
+ checkpoint_commiter : Option < Box < dyn CheckpointCommiter + Send > > ,
45
+ }
46
+
40
47
pub struct IndexRunner {
41
48
source : Box < dyn Source + Send + Sync > ,
42
49
schema : Schema ,
@@ -87,7 +94,7 @@ impl IndexRunner {
87
94
let index_dir = Path :: new ( & self . args . build_dir ) . join ( & id) ;
88
95
let _ = create_dir_all ( & index_dir) . await ;
89
96
let index = Index :: open_or_create ( MmapDirectory :: open ( & index_dir) ?, self . schema . clone ( ) ) ?;
90
- let mut index_writer: IndexWriter = index. writer ( self . args . memory_budget ) ?;
97
+ let index_writer: IndexWriter = index. writer ( self . args . memory_budget ) ?;
91
98
index_writer. set_merge_policy ( Box :: new ( NoMergePolicy ) ) ;
92
99
93
100
let mut added = 0 ;
@@ -160,6 +167,40 @@ impl IndexRunner {
160
167
}
161
168
162
169
info ! ( "Commiting {added} documents" ) ;
170
+
171
+ let commiter = self . index_commiter ( ) . await ;
172
+ if self . args . stream && result != BatchResult :: Eof {
173
+ // Commit in the background to not interfere with next batch in stream.
174
+ spawn ( async move {
175
+ if let Err ( e) =
176
+ Self :: commit_index ( commiter, & id, & index, index_writer, & index_dir) . await
177
+ {
178
+ error ! ( "Failed to commit index of id '{}': {e}" , & id) ;
179
+ }
180
+ } ) ;
181
+ } else {
182
+ Self :: commit_index ( commiter, & id, & index, index_writer, & index_dir) . await ?;
183
+ }
184
+
185
+ Ok ( result)
186
+ }
187
+
188
+ async fn index_commiter ( & mut self ) -> IndexCommiter {
189
+ IndexCommiter {
190
+ index_name : self . config . name . clone ( ) ,
191
+ index_path : self . config . path . clone ( ) ,
192
+ pool : self . pool . clone ( ) ,
193
+ checkpoint_commiter : self . source . get_checkpoint_commiter ( ) . await ,
194
+ }
195
+ }
196
+
197
+ async fn commit_index (
198
+ commiter : IndexCommiter ,
199
+ id : & str ,
200
+ index : & Index ,
201
+ mut index_writer : IndexWriter ,
202
+ input_dir : & Path ,
203
+ ) -> Result < ( ) > {
163
204
index_writer. prepare_commit ( ) ?. commit_future ( ) . await ?;
164
205
165
206
let segment_ids = index. searchable_segment_ids ( ) ?;
@@ -171,18 +212,20 @@ impl IndexRunner {
171
212
spawn_blocking ( move || index_writer. wait_merging_threads ( ) ) . await ??;
172
213
173
214
write_unified_index (
174
- & id,
175
- & index,
176
- & index_dir ,
177
- & self . config . name ,
178
- & self . config . path ,
179
- & self . pool ,
215
+ id,
216
+ index,
217
+ input_dir ,
218
+ & commiter . index_name ,
219
+ & commiter . index_path ,
220
+ & commiter . pool ,
180
221
)
181
222
. await ?;
182
223
183
- self . source . on_index_created ( ) . await ?;
224
+ if let Some ( checkpoint_commiter) = commiter. checkpoint_commiter {
225
+ checkpoint_commiter. commit ( ) . await ?;
226
+ }
184
227
185
- Ok ( result )
228
+ Ok ( ( ) )
186
229
}
187
230
}
188
231
0 commit comments