@@ -121,7 +121,6 @@ def __init__(self, bq_service, project_id, swallow_results=True):
121
121
self .cache = {}
122
122
123
123
def _submit_query_job (self , query_data ):
124
-
125
124
""" Submit a query job to BigQuery.
126
125
127
126
This is similar to BigQueryClient.query, but gives the user
@@ -172,7 +171,6 @@ def _submit_query_job(self, query_data):
172
171
return job_id , [self ._transform_row (row , schema ) for row in rows ]
173
172
174
173
def _insert_job (self , body_object ):
175
-
176
174
""" Submit a job to BigQuery
177
175
178
176
Direct proxy to the insert() method of the offical BigQuery
@@ -243,9 +241,7 @@ def get_query_schema(self, job_id):
243
241
A list of dictionaries that represent the schema.
244
242
"""
245
243
246
- job_collection = self .bigquery .jobs ()
247
- query_reply = self ._get_query_results (
248
- job_collection , self .project_id , job_id , offset = 0 , limit = 0 )
244
+ query_reply = self .get_query_results (job_id , offset = 0 , limit = 0 )
249
245
250
246
if not query_reply ['jobComplete' ]:
251
247
logging .warning ('BigQuery job %s not complete' % job_id )
@@ -289,38 +285,72 @@ def check_job(self, job_id):
289
285
included in the query table if it has completed.
290
286
"""
291
287
292
- job_collection = self .bigquery .jobs ()
293
- query_reply = self ._get_query_results (
294
- job_collection , self .project_id , job_id , offset = 0 , limit = 0 )
288
+ query_reply = self .get_query_results (job_id , offset = 0 , limit = 0 )
295
289
296
290
return (query_reply .get ('jobComplete' , False ),
297
291
int (query_reply .get ('totalRows' , 0 )))
298
292
299
- def get_query_rows (self , job_id , offset = None , limit = None ):
293
+ def get_query_rows (self , job_id , offset = None , limit = None , timeout = 0 ):
300
294
"""Retrieve a list of rows from a query table by job id.
295
+ This method will append results from multiple pages together. If you want
296
+ to manually page through results, you can use `get_query_results`
297
+ method directly.
301
298
302
299
Args:
303
300
job_id: The job id that references a BigQuery query.
304
301
offset: The offset of the rows to pull from BigQuery.
305
302
limit: The number of rows to retrieve from a query table.
306
-
303
+ timeout: Timeout in seconds.
307
304
Returns:
308
305
A list of dictionaries that represent table rows.
309
306
"""
310
307
311
- job_collection = self .bigquery .jobs ()
312
- query_reply = self ._get_query_results (
313
- job_collection , self .project_id , job_id , offset = offset ,
314
- limit = limit )
315
-
308
+ # Get query results
309
+ query_reply = self .get_query_results (job_id , offset = offset , limit = limit , timeout = timeout )
316
310
if not query_reply ['jobComplete' ]:
317
311
logging .warning ('BigQuery job %s not complete' % job_id )
318
312
raise UnfinishedQueryException ()
319
313
320
- schema = query_reply [' schema' ][ ' fields' ]
314
+ schema = query_reply [" schema" ][ " fields" ]
321
315
rows = query_reply .get ('rows' , [])
316
+ page_token = query_reply .get ("pageToken" )
317
+ records = [self ._transform_row (row , schema ) for row in rows ]
318
+
319
+ # Append to records if there are multiple pages for query results
320
+ while page_token :
321
+ query_reply = self .get_query_results (job_id , offset = offset , limit = limit ,
322
+ page_token = page_token , timeout = timeout )
323
+ page_token = query_reply .get ("pageToken" )
324
+ rows = query_reply .get ('rows' , [])
325
+ records += [self ._transform_row (row , schema ) for row in rows ]
326
+ return records
327
+
328
+ def check_dataset (self , dataset_id ):
329
+ """Check to see if a dataset exists.
330
+ Args:
331
+ dataset: dataset unique id
332
+ Returns:
333
+ bool indicating if the table exists.
334
+ """
335
+ dataset = self .get_dataset (dataset_id )
336
+ return bool (dataset )
322
337
323
- return [self ._transform_row (row , schema ) for row in rows ]
338
+ def get_dataset (self , dataset_id ):
339
+ """
340
+ Retrieve a dataset if it exists, otherwise return an empty dict.
341
+ Args:
342
+ dataset: dataset unique id
343
+ Returns:
344
+ dictionary containing the dataset object if it exists, otherwise
345
+ an empty dictionary
346
+ """
347
+ try :
348
+ dataset = self .bigquery .datasets ().get (
349
+ projectId = self .project_id , datasetId = dataset_id ).execute ()
350
+ except HttpError :
351
+ dataset = {}
352
+
353
+ return dataset
324
354
325
355
def check_table (self , dataset , table ):
326
356
"""Check to see if a table exists.
@@ -1039,27 +1069,28 @@ def _in_range(self, start_time, end_time, time):
1039
1069
time <= start_time <= time + ONE_MONTH or \
1040
1070
time <= end_time <= time + ONE_MONTH
1041
1071
1042
- def _get_query_results (self , job_collection , project_id , job_id ,
1043
- offset = None , limit = None ):
1044
- """Execute the query job indicated by the given job id.
1072
+ def get_query_results (self , job_id , offset = None , limit = None , page_token = None , timeout = 0 ):
1073
+ """Execute the query job indicated by the given job id. This is direct mapping to
1074
+ bigquery api https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults
1045
1075
1046
1076
Args:
1047
- job_collection: The collection the job belongs to.
1048
- project_id: The project id of the table.
1049
1077
job_id: The job id of the query to check.
1050
1078
offset: The index the result set should start at.
1051
1079
limit: The maximum number of results to retrieve.
1052
-
1080
+ page_token: Page token, returned by a previous call, to request the next page of results.
1081
+ timeout: Timeout in seconds.
1053
1082
Returns:
1054
1083
The query reply.
1055
1084
"""
1056
1085
1086
+ job_collection = self .bigquery .jobs ()
1057
1087
return job_collection .getQueryResults (
1058
- projectId = project_id ,
1088
+ projectId = self . project_id ,
1059
1089
jobId = job_id ,
1060
1090
startIndex = offset ,
1061
1091
maxResults = limit ,
1062
- timeoutMs = 0 ).execute ()
1092
+ pageToken = page_token ,
1093
+ timeoutMs = timeout * 1000 ).execute ()
1063
1094
1064
1095
def _transform_row (self , row , schema ):
1065
1096
"""Apply the given schema to the given BigQuery data row.
0 commit comments