@@ -240,6 +240,54 @@ def test_pyarrow_limit(catalog: Catalog) -> None:
240
240
full_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 999 ).to_arrow ()
241
241
assert len (full_result ) == 10
242
242
243
+ # test `to_arrow_batch_reader`
244
+ limited_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 1 ).to_arrow_batch_reader ().read_all ()
245
+ assert len (limited_result ) == 1
246
+
247
+ empty_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 0 ).to_arrow_batch_reader ().read_all ()
248
+ assert len (empty_result ) == 0
249
+
250
+ full_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 999 ).to_arrow_batch_reader ().read_all ()
251
+ assert len (full_result ) == 10
252
+
253
+
254
+ @pytest .mark .integration
255
+ @pytest .mark .parametrize ("catalog" , [pytest .lazy_fixture ("session_catalog_hive" ), pytest .lazy_fixture ("session_catalog" )])
256
+ def test_pyarrow_limit_with_multiple_files (catalog : Catalog ) -> None :
257
+ table_name = "default.test_pyarrow_limit_with_multiple_files"
258
+ try :
259
+ catalog .drop_table (table_name )
260
+ except NoSuchTableError :
261
+ pass
262
+ reference_table = catalog .load_table ("default.test_limit" )
263
+ data = reference_table .scan ().to_arrow ()
264
+ table_test_limit = catalog .create_table (table_name , schema = reference_table .schema ())
265
+
266
+ n_files = 2
267
+ for _ in range (n_files ):
268
+ table_test_limit .append (data )
269
+ assert len (table_test_limit .inspect .files ()) == n_files
270
+
271
+ # test with multiple files
272
+ limited_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 1 ).to_arrow ()
273
+ assert len (limited_result ) == 1
274
+
275
+ empty_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 0 ).to_arrow ()
276
+ assert len (empty_result ) == 0
277
+
278
+ full_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 999 ).to_arrow ()
279
+ assert len (full_result ) == 10 * n_files
280
+
281
+ # test `to_arrow_batch_reader`
282
+ limited_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 1 ).to_arrow_batch_reader ().read_all ()
283
+ assert len (limited_result ) == 1
284
+
285
+ empty_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 0 ).to_arrow_batch_reader ().read_all ()
286
+ assert len (empty_result ) == 0
287
+
288
+ full_result = table_test_limit .scan (selected_fields = ("idx" ,), limit = 999 ).to_arrow_batch_reader ().read_all ()
289
+ assert len (full_result ) == 10 * n_files
290
+
243
291
244
292
@pytest .mark .integration
245
293
@pytest .mark .filterwarnings ("ignore" )
0 commit comments