|
14 | 14 | StringType,
|
15 | 15 | StructField,
|
16 | 16 | StructType,
|
| 17 | + TimestampType, |
17 | 18 | )
|
18 | 19 | import pytest
|
19 | 20 |
|
@@ -59,6 +60,55 @@ def test_pandas_to_spark_schema_nullable(self):
|
59 | 60 | schema = _pandas_to_spark_schema(df, nullable=False)
|
60 | 61 | assert not schema.fields[0].nullable
|
61 | 62 |
|
| 63 | + def test_pandas_to_spark_schema_datetime_types(self): |
| 64 | + """Test conversion of pandas datetime types to Spark TimestampType.""" |
| 65 | + |
| 66 | + # Create test data with different datetime types |
| 67 | + data = { |
| 68 | + "datetime_ns": pd.to_datetime( |
| 69 | + ["2023-01-01 10:00:00", "2023-01-02 11:00:00"] |
| 70 | + ), |
| 71 | + "datetime_ns_utc": pd.to_datetime( |
| 72 | + ["2023-01-01 10:00:00", "2023-01-02 11:00:00"], utc=True |
| 73 | + ), |
| 74 | + "datetime_ms": pd.to_datetime( |
| 75 | + ["2023-01-01 10:00:00", "2023-01-02 11:00:00"] |
| 76 | + ).astype("datetime64[ms]"), |
| 77 | + "datetime_ms_utc": pd.to_datetime( |
| 78 | + ["2023-01-01 10:00:00", "2023-01-02 11:00:00"], utc=True |
| 79 | + ) |
| 80 | + .tz_localize(None) |
| 81 | + .astype("datetime64[ms]"), |
| 82 | + } |
| 83 | + df = pd.DataFrame(data) |
| 84 | + |
| 85 | + # Convert to Spark schema |
| 86 | + schema = _pandas_to_spark_schema(df) |
| 87 | + |
| 88 | + # Verify the schema |
| 89 | + assert isinstance(schema, StructType) |
| 90 | + assert len(schema.fields) == 4 |
| 91 | + |
| 92 | + # Check that all datetime columns map to TimestampType |
| 93 | + field_dict = {field.name: field for field in schema.fields} |
| 94 | + for field_name in [ |
| 95 | + "datetime_ns", |
| 96 | + "datetime_ns_utc", |
| 97 | + "datetime_ms", |
| 98 | + "datetime_ms_utc", |
| 99 | + ]: |
| 100 | + assert isinstance(field_dict[field_name].dataType, TimestampType), ( |
| 101 | + f"Field {field_name} should be TimestampType, " |
| 102 | + f"got {type(field_dict[field_name].dataType)}" |
| 103 | + ) |
| 104 | + assert field_dict[field_name].nullable |
| 105 | + |
| 106 | + # Verify the actual pandas dtypes to ensure our test data has the expected types |
| 107 | + assert str(df["datetime_ns"].dtype) == "datetime64[ns]" |
| 108 | + assert str(df["datetime_ns_utc"].dtype) == "datetime64[ns, UTC]" |
| 109 | + assert str(df["datetime_ms"].dtype) == "datetime64[ms]" |
| 110 | + assert str(df["datetime_ms_utc"].dtype) == "datetime64[ms]" |
| 111 | + |
62 | 112 |
|
63 | 113 | # Completely isolated test class for QueryAPIDataCloudReader
|
64 | 114 | @pytest.mark.usefixtures("patch_all_requests")
|
|
0 commit comments