@@ -22,7 +22,7 @@ use quickwit_common::Progress;
2222use quickwit_common:: uri:: Uri ;
2323use quickwit_metastore:: checkpoint:: PartitionId ;
2424use quickwit_proto:: metastore:: SourceType ;
25- use quickwit_proto:: types:: Position ;
25+ use quickwit_proto:: types:: { Offset , Position } ;
2626use quickwit_storage:: StorageResolver ;
2727use tokio:: io:: { AsyncBufReadExt , AsyncRead , AsyncReadExt , BufReader } ;
2828
@@ -146,8 +146,13 @@ impl DocFileReader {
146146pub struct ObjectUriBatchReader {
147147 partition_id : PartitionId ,
148148 reader : DocFileReader ,
149- current_offset : usize ,
150- is_eof : bool ,
149+ current_position : Position ,
150+ }
151+
152+ fn parse_offset ( offset : & Offset ) -> anyhow:: Result < usize > {
153+ offset
154+ . as_usize ( )
155+ . context ( "file offset should be stored as usize" )
151156}
152157
153158impl ObjectUriBatchReader {
@@ -157,26 +162,22 @@ impl ObjectUriBatchReader {
157162 uri : & Uri ,
158163 position : Position ,
159164 ) -> anyhow:: Result < Self > {
160- let current_offset = match position {
161- Position :: Beginning => 0 ,
162- Position :: Offset ( offset) => offset
163- . as_usize ( )
164- . context ( "file offset should be stored as usize" ) ?,
165+ let current_offset = match & position {
165166 Position :: Eof ( _) => {
166167 return Ok ( ObjectUriBatchReader {
167168 partition_id,
168169 reader : DocFileReader :: empty ( ) ,
169- current_offset : 0 ,
170- is_eof : true ,
170+ current_position : position,
171171 } ) ;
172172 }
173+ Position :: Beginning => 0 ,
174+ Position :: Offset ( offset) => parse_offset ( offset) ?,
173175 } ;
174176 let reader = DocFileReader :: from_uri ( storage_resolver, uri, current_offset) . await ?;
175177 Ok ( ObjectUriBatchReader {
176178 partition_id,
177179 reader,
178- current_offset,
179- is_eof : false ,
180+ current_position : position,
180181 } )
181182 }
182183
@@ -186,11 +187,14 @@ impl ObjectUriBatchReader {
186187 source_type : SourceType ,
187188 ) -> anyhow:: Result < BatchBuilder > {
188189 let mut batch_builder = BatchBuilder :: new ( source_type) ;
189- if self . is_eof {
190- return Ok ( batch_builder) ;
191- }
192- let limit_num_bytes = self . current_offset + BATCH_NUM_BYTES_LIMIT as usize ;
193- let mut new_offset = self . current_offset ;
190+ let current_offset = match & self . current_position {
191+ Position :: Eof ( _) => return Ok ( batch_builder) ,
192+ Position :: Beginning => 0 ,
193+ Position :: Offset ( offset) => parse_offset ( offset) ?,
194+ } ;
195+
196+ let limit_num_bytes = current_offset + BATCH_NUM_BYTES_LIMIT as usize ;
197+ let mut new_offset = current_offset;
194198 let mut eof_position: Option < Position > = None ;
195199 while new_offset < limit_num_bytes {
196200 if let Some ( record) = source_progress
@@ -200,28 +204,26 @@ impl ObjectUriBatchReader {
200204 new_offset = record. next_offset as usize ;
201205 batch_builder. add_doc ( record. doc ) ;
202206 if record. is_last {
203- self . is_eof = true ;
204207 eof_position = Some ( Position :: eof ( new_offset) ) ;
205208 break ;
206209 }
207210 } else {
208- self . is_eof = true ;
209211 eof_position = Some ( Position :: eof ( new_offset) ) ;
210212 break ;
211213 }
212214 }
213215 let to_position = eof_position. unwrap_or ( Position :: offset ( new_offset) ) ;
214216 batch_builder. checkpoint_delta . record_partition_delta (
215217 self . partition_id . clone ( ) ,
216- Position :: offset ( self . current_offset ) ,
217- to_position,
218+ self . current_position . clone ( ) ,
219+ to_position. clone ( ) ,
218220 ) ?;
219- self . current_offset = new_offset ;
221+ self . current_position = to_position ;
220222 Ok ( batch_builder)
221223 }
222224
223225 pub fn is_eof ( & self ) -> bool {
224- self . is_eof
226+ self . current_position . is_eof ( )
225227 }
226228}
227229
0 commit comments