@@ -170,7 +170,6 @@ def export_metrics(
170
170
Returns:
171
171
A DataFrame containing the exported metric data.
172
172
"""
173
- import pandas as pd
174
173
175
174
response = self .request(
176
175
" GET" ,
@@ -265,7 +264,6 @@ def export_parameters(
265
264
Returns:
266
265
A DataFrame containing the exported parameter data.
267
266
"""
268
- import pandas as pd
269
267
270
268
response = self .request(
271
269
" GET" ,
@@ -294,36 +292,48 @@ export_runs(
294
292
filter : str | None = None ,
295
293
status: StatusFilter = " completed" ,
296
294
aggregations: list[MetricAggregationType] | None = None ,
297
- ) -> pd.DataFrame
295
+ format : ExportFormat = " parquet" ,
296
+ base_dir: str | None = None ,
297
+ ) -> str
298
298
```
299
299
300
- Exports run data for a specific project .
300
+ Export runs using pagination - always writes to disk .
301
301
302
302
** Parameters:**
303
303
304
304
* ** ` project ` **
305
305
(` str ` )
306
- –The project identifier.
306
+ –The project identifier
307
307
* ** ` filter ` **
308
308
(` str | None ` , default:
309
309
` None `
310
310
)
311
- –A filter to apply to the exported data. Defaults to None.
311
+ –A filter to apply to the exported data
312
312
* ** ` status ` **
313
313
(` StatusFilter ` , default:
314
314
` 'completed' `
315
315
)
316
- –The status of runs to include. Defaults to "completed".
316
+ –The status of runs to include
317
317
* ** ` aggregations ` **
318
318
(` list[MetricAggregationType] | None ` , default:
319
319
` None `
320
320
)
321
- –A list of aggregation types to apply. Defaults to None.
321
+ –A list of aggregation types to apply
322
+ * ** ` format ` **
323
+ (` ExportFormat ` , default:
324
+ ` 'parquet' `
325
+ )
326
+ –Output format - "parquet", "csv", "json", "jsonl"
327
+ * ** ` base_dir ` **
328
+ (` str | None ` , default:
329
+ ` None `
330
+ )
331
+ –Base directory for export (defaults to "./strikes-data")
322
332
323
333
** Returns:**
324
334
325
- * ` DataFrame `
326
- –A DataFrame containing the exported run data.
335
+ * ** ` str ` ** ( ` str `
336
+ ) –Path to the export directory
327
337
328
338
<Accordion title = " Source code in dreadnode/api/client.py" icon = " code" >
329
339
``` python
@@ -332,35 +342,61 @@ def export_runs(
332
342
project : str ,
333
343
* ,
334
344
filter : str | None = None ,
335
- # format: ExportFormat = "parquet",
336
345
status : StatusFilter = " completed" ,
337
346
aggregations : list[MetricAggregationType] | None = None ,
338
- ) -> " pd.DataFrame" :
347
+ format : ExportFormat = " parquet" ,
348
+ base_dir : str | None = None ,
349
+ ) -> str :
339
350
"""
340
- Exports run data for a specific project .
351
+ Export runs using pagination - always writes to disk .
341
352
342
353
Args:
343
- project: The project identifier.
344
- filter: A filter to apply to the exported data. Defaults to None.
345
- status: The status of runs to include. Defaults to "completed".
346
- aggregations: A list of aggregation types to apply. Defaults to None.
354
+ project: The project identifier
355
+ filter: A filter to apply to the exported data
356
+ status: The status of runs to include
357
+ aggregations: A list of aggregation types to apply
358
+ format: Output format - "parquet", "csv", "json", "jsonl"
359
+ base_dir: Base directory for export (defaults to "./strikes-data")
347
360
348
361
Returns:
349
- A DataFrame containing the exported run data.
362
+ str: Path to the export directory
350
363
"""
351
- import pandas as pd
352
364
353
- response = self .request(
365
+ logger.info(f " Starting paginated export for project ' { project} ', format=' { format } ' " )
366
+
367
+ page = 1
368
+ first_response = self .request(
354
369
" GET" ,
355
- f " /strikes/projects/ { project!s } /export " ,
370
+ f " /strikes/projects/ { project!s } /export/paginated " ,
356
371
params = {
357
- " format " : " parquet " ,
372
+ " page " : page ,
358
373
" status" : status,
359
374
** ({" filter" : filter } if filter else {}),
360
375
** ({" aggregations" : aggregations} if aggregations else {}),
361
376
},
362
377
)
363
- return pd.read_parquet(io.BytesIO(response.content))
378
+
379
+ if not first_response.content:
380
+ logger.info(" No data found" )
381
+
382
+ first_chunk = pd.read_parquet(io.BytesIO(first_response.content))
383
+
384
+ total_runs = int (first_response.headers.get(" x-total" , " 0" ))
385
+ has_more = first_response.headers.get(" x-has-more" , " false" ) == " true"
386
+
387
+ logger.info(f " Total runs: { total_runs} , Has more: { has_more} " )
388
+
389
+ logger.info(f " Writing { total_runs} runs to disk " )
390
+ return self ._export_to_disk(
391
+ project,
392
+ first_chunk,
393
+ dict (first_response.headers),
394
+ filter ,
395
+ status,
396
+ aggregations,
397
+ format ,
398
+ str (base_dir) if base_dir else None ,
399
+ )
364
400
```
365
401
366
402
0 commit comments