Skip to content

Commit 3ae5d4c

Browse files
test: improve to_gbq logic unit test coverage (#449)
* 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent bf0e863 commit 3ae5d4c

18 files changed

+542
-98
lines changed

.coveragerc

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ omit =
2222
google/cloud/__init__.py
2323

2424
[report]
25-
fail_under = 89
25+
fail_under = 94
2626
show_missing = True
2727
exclude_lines =
2828
# Re-enable the standard pragma

noxfile.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def cover(session):
259259
test runs (not system test runs), and then erases coverage data.
260260
"""
261261
session.install("coverage", "pytest-cov")
262-
session.run("coverage", "report", "--show-missing", "--fail-under=89")
262+
session.run("coverage", "report", "--show-missing", "--fail-under=94")
263263

264264
session.run("coverage", "erase")
265265

owlbot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
templated_files = common.py_library(
3434
unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
3535
system_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
36-
cov_level=89,
36+
cov_level=94,
3737
unit_test_extras=extras,
3838
system_test_extras=extras,
3939
intersphinx_dependencies={

pandas_gbq/gbq.py

+23-18
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,20 @@ class InvalidSchema(ValueError):
121121
table in BigQuery.
122122
"""
123123

124-
pass
124+
def __init__(
125+
self, message: str, local_schema: Dict[str, Any], remote_schema: Dict[str, Any]
126+
):
127+
super().__init__(message)
128+
self._local_schema = local_schema
129+
self._remote_schema = remote_schema
130+
131+
@property
132+
def local_schema(self) -> Dict[str, Any]:
133+
return self._local_schema
134+
135+
@property
136+
def remote_schema(self) -> Dict[str, Any]:
137+
return self._remote_schema
125138

126139

127140
class NotFoundException(ValueError):
@@ -354,19 +367,12 @@ def sizeof_fmt(num, suffix="B"):
354367
return fmt % (num, "Y", suffix)
355368

356369
def get_client(self):
370+
import google.api_core.client_info
357371
import pandas
358372

359-
try:
360-
# This module was added in google-api-core 1.11.0.
361-
# We don't have a hard requirement on that version, so only
362-
# populate the client_info if available.
363-
import google.api_core.client_info
364-
365-
client_info = google.api_core.client_info.ClientInfo(
366-
user_agent="pandas-{}".format(pandas.__version__)
367-
)
368-
except ImportError:
369-
client_info = None
373+
client_info = google.api_core.client_info.ClientInfo(
374+
user_agent="pandas-{}".format(pandas.__version__)
375+
)
370376

371377
# In addition to new enough version of google-api-core, a new enough
372378
# version of google-cloud-bigquery is required to populate the
@@ -1057,7 +1063,7 @@ def to_gbq(
10571063
DeprecationWarning,
10581064
stacklevel=2,
10591065
)
1060-
elif api_method == "load_csv":
1066+
else:
10611067
warnings.warn(
10621068
"chunksize will be ignored when using api_method='load_csv' in a future version of pandas-gbq",
10631069
PendingDeprecationWarning,
@@ -1122,12 +1128,14 @@ def to_gbq(
11221128
)
11231129
elif if_exists == "replace":
11241130
connector.delete_and_recreate_table(dataset_id, table_id, table_schema)
1125-
elif if_exists == "append":
1131+
else:
11261132
if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema):
11271133
raise InvalidSchema(
11281134
"Please verify that the structure and "
11291135
"data types in the DataFrame match the "
1130-
"schema of the destination table."
1136+
"schema of the destination table.",
1137+
table_schema,
1138+
original_schema,
11311139
)
11321140

11331141
# Update the local `table_schema` so mode (NULLABLE/REQUIRED)
@@ -1283,9 +1291,6 @@ def delete(self, table_id):
12831291
"""
12841292
from google.api_core.exceptions import NotFound
12851293

1286-
if not self.exists(table_id):
1287-
raise NotFoundException("Table does not exist")
1288-
12891294
table_ref = self._table_ref(table_id)
12901295
try:
12911296
self.client.delete_table(table_ref)

pandas_gbq/load.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ def load_csv_from_file(
185185
chunksize: Optional[int],
186186
schema: Optional[Dict[str, Any]],
187187
):
188+
"""Manually encode a DataFrame to CSV and use the buffer in a load job.
189+
190+
This method is needed for writing with google-cloud-bigquery versions that
191+
don't implment load_table_from_dataframe with the CSV serialization format.
192+
"""
188193
if schema is None:
189194
schema = pandas_gbq.schema.generate_bq_schema(dataframe)
190195

@@ -203,7 +208,7 @@ def load_chunk(chunk, job_config):
203208
finally:
204209
chunk_buffer.close()
205210

206-
return load_csv(dataframe, chunksize, bq_schema, load_chunk,)
211+
return load_csv(dataframe, chunksize, bq_schema, load_chunk)
207212

208213

209214
def load_chunks(

pandas_gbq/schema.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,19 @@ def to_pandas_gbq(client_schema):
2121
"""Given a sequence of :class:`google.cloud.bigquery.schema.SchemaField`,
2222
return a schema in pandas-gbq API format.
2323
"""
24-
remote_fields = [field_remote.to_api_repr() for field_remote in client_schema]
24+
remote_fields = [
25+
# Filter out default values. google-cloud-bigquery versions before
26+
# 2.31.0 (https://github.com/googleapis/python-bigquery/pull/557)
27+
# include a description key, even if not explicitly set. This has the
28+
# potential to unset the description unintentionally in cases where
29+
# pandas-gbq is updating the schema.
30+
{
31+
key: value
32+
for key, value in field_remote.to_api_repr().items()
33+
if value is not None
34+
}
35+
for field_remote in client_schema
36+
]
2537
for field in remote_fields:
2638
field["type"] = field["type"].upper()
2739
field["mode"] = field["mode"].upper()

setup.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@
2828
"pandas >=0.24.2",
2929
"pyarrow >=3.0.0, <7.0dev",
3030
"pydata-google-auth",
31-
"google-auth",
32-
"google-auth-oauthlib",
31+
"google-api-core >=1.14.0",
32+
"google-auth >=1.4.1",
33+
"google-auth-oauthlib >=0.0.1",
3334
# 2.4.* has a bug where waiting for the query can hang indefinitely.
3435
# https://github.com/pydata/pandas-gbq/issues/343
3536
"google-cloud-bigquery[bqstorage,pandas] >=1.11.1,<4.0.0dev,!=2.4.*",

testing/constraints-3.7.txt

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
77
# Then this file should have foo==1.14.0
88
db-dtypes==0.3.1
9+
google-api-core==1.14.0
910
google-auth==1.4.1
1011
google-auth-oauthlib==0.0.1
1112
google-cloud-bigquery==1.11.1

tests/system/test_gbq.py

-5
Original file line numberDiff line numberDiff line change
@@ -1522,11 +1522,6 @@ def test_delete_table(gbq_table):
15221522
assert not gbq_table.exists("test_delete_table")
15231523

15241524

1525-
def test_delete_table_not_found(gbq_table):
1526-
with pytest.raises(gbq.NotFoundException):
1527-
gbq_table.delete("test_delete_table_not_found")
1528-
1529-
15301525
def test_create_table_data_dataset_does_not_exist(
15311526
project, credentials, gbq_dataset, random_dataset_id
15321527
):

tests/unit/conftest.py

+30-29
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,36 @@
99
import pytest
1010

1111

12+
def mock_get_credentials(*args, **kwargs):
13+
import google.auth.credentials
14+
15+
mock_credentials = mock.create_autospec(google.auth.credentials.Credentials)
16+
return mock_credentials, "default-project"
17+
18+
19+
@pytest.fixture
20+
def mock_service_account_credentials():
21+
import google.oauth2.service_account
22+
23+
mock_credentials = mock.create_autospec(google.oauth2.service_account.Credentials)
24+
return mock_credentials
25+
26+
27+
@pytest.fixture
28+
def mock_compute_engine_credentials():
29+
import google.auth.compute_engine
30+
31+
mock_credentials = mock.create_autospec(google.auth.compute_engine.Credentials)
32+
return mock_credentials
33+
34+
35+
@pytest.fixture(autouse=True)
36+
def no_auth(monkeypatch):
37+
import pydata_google_auth
38+
39+
monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials)
40+
41+
1242
@pytest.fixture(autouse=True, scope="function")
1343
def reset_context():
1444
import pandas_gbq
@@ -20,41 +50,12 @@ def reset_context():
2050
@pytest.fixture(autouse=True)
2151
def mock_bigquery_client(monkeypatch):
2252
import google.cloud.bigquery
23-
import google.cloud.bigquery.table
2453

2554
mock_client = mock.create_autospec(google.cloud.bigquery.Client)
2655
# Constructor returns the mock itself, so this mock can be treated as the
2756
# constructor or the instance.
2857
mock_client.return_value = mock_client
29-
30-
mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
31-
mock_query.job_id = "some-random-id"
32-
mock_query.state = "DONE"
33-
mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator)
34-
mock_rows.total_rows = 1
35-
36-
mock_rows.__iter__.return_value = [(1,)]
37-
mock_query.result.return_value = mock_rows
38-
mock_client.list_rows.return_value = mock_rows
39-
mock_client.query.return_value = mock_query
40-
# Mock table creation.
4158
monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client)
4259
mock_client.reset_mock()
4360

44-
# Mock out SELECT 1 query results.
45-
def generate_schema():
46-
query = mock_client.query.call_args[0][0] if mock_client.query.call_args else ""
47-
if query == "SELECT 1 AS int_col":
48-
return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")]
49-
else:
50-
return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")]
51-
52-
type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema)
53-
54-
# Mock out get_table.
55-
def get_table(table_ref_or_id, **kwargs):
56-
return google.cloud.bigquery.Table(table_ref_or_id)
57-
58-
mock_client.get_table.side_effect = get_table
59-
6061
return mock_client

tests/unit/test_auth.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -28,35 +28,32 @@ def test_get_credentials_default_credentials(monkeypatch):
2828
import google.auth
2929
import google.auth.credentials
3030
import google.cloud.bigquery
31+
import pydata_google_auth
3132

32-
def mock_default_credentials(scopes=None, request=None):
33-
return (
34-
mock.create_autospec(google.auth.credentials.Credentials),
35-
"default-project",
36-
)
33+
mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials)
34+
35+
def mock_default_credentials(scopes, **kwargs):
36+
return (mock_user_credentials, "test-project")
3737

38-
monkeypatch.setattr(google.auth, "default", mock_default_credentials)
38+
monkeypatch.setattr(pydata_google_auth, "default", mock_default_credentials)
3939

4040
credentials, project = auth.get_credentials()
41-
assert project == "default-project"
41+
assert project == "test-project"
4242
assert credentials is not None
4343

4444

4545
def test_get_credentials_load_user_no_default(monkeypatch):
4646
import google.auth
4747
import google.auth.credentials
48+
import pydata_google_auth
4849
import pydata_google_auth.cache
4950

50-
def mock_default_credentials(scopes=None, request=None):
51-
return (None, None)
52-
53-
monkeypatch.setattr(google.auth, "default", mock_default_credentials)
5451
mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials)
5552

56-
mock_cache = mock.create_autospec(pydata_google_auth.cache.CredentialsCache)
57-
mock_cache.load.return_value = mock_user_credentials
53+
def mock_default_credentials(scopes, **kwargs):
54+
return (mock_user_credentials, None)
5855

59-
monkeypatch.setattr(auth, "get_credentials_cache", lambda _: mock_cache)
56+
monkeypatch.setattr(pydata_google_auth, "default", mock_default_credentials)
6057

6158
credentials, project = auth.get_credentials()
6259
assert project is None

tests/unit/test_context.py

+16
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,25 @@
66

77
from unittest import mock
88

9+
import google.cloud.bigquery
10+
import google.cloud.bigquery.table
911
import pytest
1012

1113

14+
@pytest.fixture(autouse=True)
15+
def default_bigquery_client(mock_bigquery_client):
16+
mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
17+
mock_query.job_id = "some-random-id"
18+
mock_query.state = "DONE"
19+
mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator)
20+
mock_rows.total_rows = 1
21+
mock_rows.__iter__.return_value = [(1,)]
22+
mock_query.result.return_value = mock_rows
23+
mock_bigquery_client.list_rows.return_value = mock_rows
24+
mock_bigquery_client.query.return_value = mock_query
25+
return mock_bigquery_client
26+
27+
1228
@pytest.fixture(autouse=True)
1329
def mock_get_credentials(monkeypatch):
1430
from pandas_gbq import auth

tests/unit/test_features.py

+19
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
@pytest.fixture(autouse=True)
1111
def fresh_bigquery_version(monkeypatch):
1212
monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None)
13+
monkeypatch.setattr(FEATURES, "_pandas_installed_version", None)
1314

1415

1516
@pytest.mark.parametrize(
@@ -28,3 +29,21 @@ def test_bigquery_has_from_dataframe_with_csv(monkeypatch, bigquery_version, exp
2829

2930
monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version)
3031
assert FEATURES.bigquery_has_from_dataframe_with_csv == expected
32+
33+
34+
@pytest.mark.parametrize(
35+
["pandas_version", "expected"],
36+
[
37+
("0.14.7", False),
38+
("0.22.1", False),
39+
("0.23.0", True),
40+
("0.23.1", True),
41+
("1.0.0", True),
42+
("2.1.3", True),
43+
],
44+
)
45+
def test_pandas_has_deprecated_verbose(monkeypatch, pandas_version, expected):
46+
import pandas
47+
48+
monkeypatch.setattr(pandas, "__version__", pandas_version)
49+
assert FEATURES.pandas_has_deprecated_verbose == expected

0 commit comments

Comments
 (0)