Skip to content

Commit 78dfcf1

Browse files
seismanmichaelgrundyvonnefroehlich
authored
geopandas: Correctly handle columns with integer values bigger than the largest 32-bit integer (#2841)
Co-authored-by: Michael Grund <[email protected]> Co-authored-by: Yvonne Fröhlich <[email protected]>
1 parent 716f622 commit 78dfcf1

File tree

2 files changed

+62
-19
lines changed

2 files changed

+62
-19
lines changed

pygmt/helpers/tempfile.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,9 @@ def tempfile_from_geojson(geojson):
131131
os.remove(tmpfile.name) # ensure file is deleted first
132132
ogrgmt_kwargs = {"filename": tmpfile.name, "driver": "OGR_GMT", "mode": "w"}
133133
try:
134-
# Map int/int64 to int32 since OGR_GMT only supports 32-bit integer
134+
# OGR_GMT only supports 32-bit integers. We need to map int/int64
135+
# types to int32/float types depending on if the column has an
136+
# 32-bit integer overflow issue. Related issues:
135137
# https://github.com/geopandas/geopandas/issues/967#issuecomment-842877704
136138
# https://github.com/GenericMappingTools/pygmt/issues/2497
137139
if geojson.index.name is None:
@@ -140,7 +142,8 @@ def tempfile_from_geojson(geojson):
140142
schema = gpd.io.file.infer_schema(geojson)
141143
for col, dtype in schema["properties"].items():
142144
if dtype in ("int", "int64"):
143-
schema["properties"][col] = "int32"
145+
overflow = geojson[col].abs().max() > 2**31 - 1
146+
schema["properties"][col] = "float" if overflow else "int32"
144147
ogrgmt_kwargs["schema"] = schema
145148
# Using geopandas.to_file to directly export to OGR_GMT format
146149
geojson.to_file(**ogrgmt_kwargs)

pygmt/tests/test_geopandas.py

+57-17
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,27 @@ def fixture_gdf():
3636
index=["multipolygon", "polygon", "linestring"],
3737
geometry=[multipolygon, polygon, linestring],
3838
)
39+
return gdf
40+
3941

42+
@pytest.fixture(scope="module", name="gdf_ridge")
43+
def fixture_gdf_ridge():
44+
"""
45+
Read a @RidgeTest.shp shapefile into a geopandas.GeoDataFrame and reproject
46+
the geometry.
47+
"""
48+
# Read shapefile into a geopandas.GeoDataFrame
49+
shapefile = which(
50+
fname="@RidgeTest.shp @RidgeTest.shx @RidgeTest.dbf @RidgeTest.prj",
51+
download="c",
52+
)
53+
gdf = gpd.read_file(shapefile[0])
54+
# Reproject the geometry
55+
gdf["geometry"] = (
56+
gdf.to_crs(crs="EPSG:3857")
57+
.buffer(distance=100000)
58+
.to_crs(crs="OGC:CRS84") # convert to lon/lat to prevent @null in PROJ CRS
59+
)
4060
return gdf
4161

4262

@@ -144,40 +164,60 @@ def test_geopandas_plot3d_non_default_circle():
144164
],
145165
)
146166
@pytest.mark.mpl_image_compare(filename="test_geopandas_plot_int_dtypes.png")
147-
def test_geopandas_plot_int_dtypes(dtype):
167+
def test_geopandas_plot_int_dtypes(gdf_ridge, dtype):
148168
"""
149-
Check that plotting a geopandas GeoDataFrame with integer columns works,
169+
Check that plotting a geopandas.GeoDataFrame with integer columns works,
150170
including int32 and int64 (non-nullable), Int32 and Int64 (nullable).
151171
152172
This is a regression test for
153173
https://github.com/GenericMappingTools/pygmt/issues/2497
154174
"""
155-
# Read shapefile in geopandas.GeoDataFrame
156-
shapefile = which(
157-
fname="@RidgeTest.shp @RidgeTest.shx @RidgeTest.dbf @RidgeTest.prj",
158-
download="c",
159-
)
160-
gdf = gpd.read_file(shapefile[0])
175+
# Convert NPOINTS column to integer type
176+
gdf_ridge["NPOINTS"] = gdf_ridge.NPOINTS.astype(dtype=dtype)
161177

162-
# Reproject geometry and change dtype of NPOINTS column
163-
gdf["geometry"] = (
164-
gdf.to_crs(crs="EPSG:3857")
165-
.buffer(distance=100000)
166-
.to_crs(crs="OGC:CRS84") # convert to lon/lat to prevent @null in PROJ CRS
178+
# Plot figure with three polygons colored based on NPOINTS value
179+
fig = Figure()
180+
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
181+
fig.plot(
182+
data=gdf_ridge,
183+
frame=True,
184+
pen="1p,black",
185+
fill="+z",
186+
cmap=True,
187+
aspatial="Z=NPOINTS",
167188
)
168-
gdf["NPOINTS"] = gdf.NPOINTS.astype(dtype=dtype)
189+
fig.colorbar()
190+
return fig
191+
192+
193+
@pytest.mark.mpl_image_compare(filename="test_geopandas_plot_int_dtypes.png")
194+
def test_geopandas_plot_int64_as_float(gdf_ridge):
195+
"""
196+
Check that big 64-bit integers are correctly mapped to float type in
197+
geopandas.GeoDataFrame object.
198+
"""
199+
factor = 2**32
200+
# Convert NPOINTS column to int64 type and make big integers
201+
gdf_ridge["NPOINTS"] = gdf_ridge.NPOINTS.astype(dtype="int64")
202+
gdf_ridge["NPOINTS"] *= factor
203+
204+
# Make sure the column is bigger than the largest 32-bit integer
205+
assert gdf_ridge["NPOINTS"].abs().max() > 2**31 - 1
169206

170207
# Plot figure with three polygons colored based on NPOINTS value
171208
fig = Figure()
172-
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
209+
makecpt(
210+
cmap="lisbon", series=[10 * factor, 60 * factor, 10 * factor], continuous=True
211+
)
173212
fig.plot(
174-
data=gdf,
213+
data=gdf_ridge,
175214
frame=True,
176215
pen="1p,black",
177-
close=True,
178216
fill="+z",
179217
cmap=True,
180218
aspatial="Z=NPOINTS",
181219
)
220+
# Generate a CPT for 10-60 range and plot to reuse the baseline image
221+
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
182222
fig.colorbar()
183223
return fig

0 commit comments

Comments
 (0)