googleapis · kitagry · Jan 19, 2025 · Jan 29, 2025 · Jan 30, 2025 · Feb 5, 2025
@@ -56,17 +56,17 @@ Inferring the DataFrame's dtypes
 The :func:`~pandas_gbq.read_gbq` method infers the pandas dtype for each
 column, based on the BigQuery table schema.
 
-================== =========================
+================== ============================================
 BigQuery Data Type dtype
-================== =========================
+================== ============================================
 BOOL               boolean
 INT64              Int64
 FLOAT64            float64
 TIME               dbtime
 DATE               dbdate or object
-DATETIME           datetime64[ns] or object
-TIMESTAMP          datetime64[ns, UTC] or object
-================== =========================
+DATETIME           datetime64[ns] (datetime64[us] if pandas version >= 2.1.0) or object
+TIMESTAMP          datetime64[ns, UTC] (datetime64[us, UTC] if pandas version >= 2.1.0) or object
+================== ============================================
 
 If any DATE/DATETIME/TIMESTAMP value is outside of the range of `pandas.Timestamp.min
 <https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.min.html>`__

@@ -630,6 +630,7 @@ def _finalize_dtypes(
     """
     import db_dtypes
     import pandas.api.types
+    import pandas
 
     # If you update this mapping, also update the table at
     # `docs/reading.rst`.
@@ -638,6 +639,14 @@ def _finalize_dtypes(
         "DATETIME": "datetime64[ns]",
         "TIMESTAMP": "datetime64[ns]",
     }
+    if tuple(int(part) for part in pandas.__version__.split(".")[:2]) >= (2, 1):
+        # when pandas is 2.1.0 or later, default timestamp dtype is 'datetime64[us]'
+        # and we should use 'datetime64[us]' instead of 'datetime64[ns]'
+        dtype_map = {
+            "DATE": db_dtypes.DateDtype(),
+            "DATETIME": "datetime64[us]",
+            "TIMESTAMP": pandas.DatetimeTZDtype(unit="us", tz="UTC"),
+        }
 
     for field in schema_fields:
         # This method doesn't modify ARRAY/REPEATED columns.

@@ -0,0 +1,2 @@
+numpy==1.26.4
+pandas==2.0.3
@@ -0,0 +1 @@
+pandas==2.1.4
@@ -1,2 +1,2 @@
-numpy==1.19.4
-pandas==1.1.4
+numpy==1.20.3
+pandas==1.5.3
@@ -28,6 +28,9 @@
 pytestmark = pytest.mark.filterwarnings("ignore:credentials from Google Cloud SDK")
 
 
+PANDAS_VERSION = tuple(int(part) for part in pandas.__version__.split(".")[:2])
+
+
 def _make_connector(project_id="some-project", **kwargs):
     return gbq.GbqConnector(project_id, **kwargs)
 
@@ -113,6 +116,74 @@ def test__bqschema_to_nullsafe_dtypes(type_, expected):
         assert result == {"x": expected}
 
 
+@pytest.mark.parametrize(
+    ("data", "schema_type", "expected"),
+    [
+        pytest.param(
+            pandas.to_datetime(["2017-01-01T12:00:00Z"]).astype(
+                pandas.DatetimeTZDtype(
+                    # Microseconds aren't supported until newer pandas.
+                    # https://github.com/googleapis/python-bigquery-pandas/issues/852
+                    unit="us" if PANDAS_VERSION >= (2, 1) else "ns",
+                    tz="UTC",
+                ),
+            ),
+            "TIMESTAMP",
+            pandas.DatetimeTZDtype(
+                # Microseconds aren't supported until newer pandas.
+                # https://github.com/googleapis/python-bigquery-pandas/issues/852
+                unit="us" if PANDAS_VERSION >= (2, 1) else "ns",
+                tz="UTC",
+            ),
+        ),
+        (
+            pandas.to_datetime([]).astype(object),
+            "TIMESTAMP",
+            pandas.DatetimeTZDtype(
+                # Microseconds aren't supported until newer pandas.
+                # https://github.com/googleapis/python-bigquery-pandas/issues/852
+                unit="us" if PANDAS_VERSION >= (2, 1) else "ns",
+                tz="UTC",
+            ),
+        ),
+        (
+            pandas.to_datetime(["2017-01-01T12:00:00"]).astype(
+                # Microseconds aren't supported until newer pandas.
+                # https://github.com/googleapis/python-bigquery-pandas/issues/852
+                "datetime64[us]"
+                if PANDAS_VERSION >= (2, 1)
+                else "datetime64[ns]",
+            ),
+            "DATETIME",
+            numpy.dtype(
+                # Microseconds aren't supported until newer pandas.
+                # https://github.com/googleapis/python-bigquery-pandas/issues/852
+                "datetime64[us]"
+                if PANDAS_VERSION >= (2, 1)
+                else "datetime64[ns]",
+            ),
+        ),
+        (
+            pandas.to_datetime([]).astype(object),
+            "DATETIME",
+            numpy.dtype(
+                # Microseconds aren't supported until newer pandas.
+                # https://github.com/googleapis/python-bigquery-pandas/issues/852
+                "datetime64[us]"
+                if PANDAS_VERSION >= (2, 1)
+                else "datetime64[ns]",
+            ),
+        ),
+    ],
+)
+def test__finalize_dtypes(data, schema_type, expected):
+    result = gbq._finalize_dtypes(
+        pandas.DataFrame(dict(x=data)),
+        [dict(name="x", type=schema_type, mode="NULLABLE")],
+    )
+    assert result["x"].dtype == expected
+
+
 @pytest.mark.parametrize(
     ["query_or_table", "expected"],
     [