Skip to content

Commit a767ef0

Browse files
authored
Merge pull request #153 from egeucak/master
Dry runs return bytes processed, and cache hit now
2 parents d404b6d + 01f38be commit a767ef0

File tree

2 files changed

+34
-12
lines changed

2 files changed

+34
-12
lines changed

bigquery/client.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def _submit_query_job(self, query_data):
243243
-------
244244
tuple
245245
job id and query results if query completed. If dry_run is True,
246-
job id will be None and results will be empty if the query is valid
246+
job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid
247247
or a dict containing the response if invalid.
248248
249249
Raises
@@ -269,13 +269,17 @@ def _submit_query_job(self, query_data):
269269
schema = query_reply.get('schema', {'fields': None})['fields']
270270
rows = query_reply.get('rows', [])
271271
job_complete = query_reply.get('jobComplete', False)
272+
cache_hit = query_reply['cacheHit']
273+
total_bytes_processed = query_reply['totalBytesProcessed']
272274

273275
# raise exceptions if it's not an async query
274276
# and job is not completed after timeout
275277
if not job_complete and query_data.get("timeoutMs", False):
276278
logger.error('BigQuery job %s timeout' % job_id)
277279
raise BigQueryTimeoutException()
278-
280+
281+
if query_data.get("dryRun", True):
282+
return job_id, [cache_hit, total_bytes_processed]
279283
return job_id, [self._transform_row(row, schema) for row in rows]
280284

281285
def _get_job_reference(self, job_id):
@@ -345,8 +349,8 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq
345349
How long to wait for the query to complete, in seconds before
346350
the request times out and returns.
347351
dry_run : bool, optional
348-
If True, the query isn't actually run. A valid query will return an
349-
empty response, while an invalid one will return the same error
352+
If True, the query isn't actually run. A valid query will return
353+
cache hit, and total bytes processed, while an invalid one will return the same error
350354
message it would if it wasn't a dry run.
351355
use_legacy_sql : bool, optional. Default True.
352356
If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/)
@@ -359,7 +363,7 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq
359363
-------
360364
tuple
361365
(job id, query results) if the query completed. If dry_run is True,
362-
job id will be None and results will be empty if the query is valid
366+
job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid
363367
or a ``dict`` containing the response if invalid.
364368
365369
Raises

bigquery/tests/test_client.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,9 @@ def test_query(self):
297297

298298
mock_query_job.execute.return_value = {
299299
'jobReference': expected_job_ref,
300-
'jobComplete': True
300+
'jobComplete': True,
301+
'cacheHit': False,
302+
'totalBytesProcessed': 0
301303
}
302304

303305
self.mock_job_collection.query.return_value = mock_query_job
@@ -329,6 +331,8 @@ def test_query_max_results_set(self):
329331
mock_query_job.execute.return_value = {
330332
'jobReference': expected_job_ref,
331333
'jobComplete': True,
334+
'cacheHit': False,
335+
'totalBytesProcessed': 0
332336
}
333337

334338
self.mock_job_collection.query.return_value = mock_query_job
@@ -357,6 +361,8 @@ def test_query_timeout_set(self):
357361
mock_query_job.execute.return_value = {
358362
'jobReference': expected_job_ref,
359363
'jobComplete': True,
364+
'cacheHit': False,
365+
'totalBytesProcessed': 0
360366
}
361367

362368
self.mock_job_collection.query.return_value = mock_query_job
@@ -382,6 +388,8 @@ def test_sync_query_timeout(self):
382388
mock_query_job.execute.return_value = {
383389
'jobReference': expected_job_ref,
384390
'jobComplete': False,
391+
'cacheHit': False,
392+
'totalBytesProcessed': 0
385393
}
386394

387395
self.mock_job_collection.query.return_value = mock_query_job
@@ -400,6 +408,8 @@ def test_async_query_timeout(self):
400408
mock_query_job.execute.return_value = {
401409
'jobReference': expected_job_ref,
402410
'jobComplete': False,
411+
'cacheHit': False,
412+
'totalBytesProcessed': 0
403413
}
404414

405415
self.mock_job_collection.query.return_value = mock_query_job
@@ -409,14 +419,18 @@ def test_async_query_timeout(self):
409419
self.assertEquals(results, [])
410420

411421
def test_query_dry_run_valid(self):
412-
"""Ensure that None and an empty list is returned from the query when
422+
"""Ensure that None and [cacheHit, totalBytesProcessed] is returned from the query when
413423
dry_run is True and the query is valid.
414424
"""
415425

416426
mock_query_job = mock.Mock()
417427

418-
mock_query_job.execute.return_value = {'jobReference': {},
419-
'jobComplete': True}
428+
mock_query_job.execute.return_value = {
429+
'jobReference': {},
430+
'jobComplete': True,
431+
'cacheHit': False,
432+
'totalBytesProcessed': 0
433+
}
420434

421435
self.mock_job_collection.query.return_value = mock_query_job
422436

@@ -428,7 +442,7 @@ def test_query_dry_run_valid(self):
428442
'dryRun': True}
429443
)
430444
self.assertIsNone(job_id)
431-
self.assertEqual([], results)
445+
self.assertEqual([False, 0], results)
432446

433447
def test_query_dry_run_invalid(self):
434448
"""Ensure that None and a dict is returned from the query when dry_run
@@ -468,6 +482,8 @@ def test_query_with_results(self):
468482
'schema': {'fields': [{'name': 'foo', 'type': 'INTEGER'}]},
469483
'rows': [{'f': [{'v': 10}]}],
470484
'jobComplete': True,
485+
'cacheHit': False,
486+
'totalBytesProcessed': 0
471487
}
472488

473489
self.mock_job_collection.query.return_value = mock_query_job
@@ -491,7 +507,9 @@ def test_query_with_using_legacy_sql(self):
491507

492508
mock_query_job.execute.return_value = {
493509
'jobReference': expected_job_ref,
494-
'jobComplete': True
510+
'jobComplete': True,
511+
'cacheHit': False,
512+
'totalBytesProcessed': 0
495513
}
496514

497515
self.mock_job_collection.query.return_value = mock_query_job
@@ -873,7 +891,7 @@ def test_json_job_body_constructed_correctly(self):
873891
body = {
874892
"jobReference": {
875893
"projectId": self.project_id,
876-
"jobId": "job"
894+
"jobId": "job",
877895
},
878896
"configuration": {
879897
"load": {

0 commit comments

Comments
 (0)