Skip to content

Commit 78066a4

Browse files
committed
Refactor the data_kind function and improve docstrings
1 parent d7560fa commit 78066a4

File tree

1 file changed

+59
-39
lines changed

1 file changed

+59
-39
lines changed

pygmt/helpers/utils.py

+59-39
Original file line numberDiff line numberDiff line change
@@ -188,30 +188,38 @@ def _check_encoding(
188188

189189

190190
def data_kind(
191-
data: Any = None, required: bool = True
191+
data: Any, required: bool = True
192192
) -> Literal["arg", "file", "geojson", "grid", "image", "matrix", "vectors"]:
193193
"""
194194
Check the kind of data that is provided to a module.
195195
196-
The ``data`` argument can be in any type, but only following types are supported:
196+
Recognized data kinds are:
197197
198-
- a string or a :class:`pathlib.PurePath` object or a sequence of them, representing
199-
a file name or a list of file names
200-
- a 2-D or 3-D :class:`xarray.DataArray` object
201-
- a 2-D matrix
202-
- None, bool, int or float type representing an optional arguments
203-
- a geo-like Python object that implements ``__geo_interface__`` (e.g.,
204-
geopandas.GeoDataFrame or shapely.geometry)
198+
- ``"arg"``: bool, int or float, representing an optional argument, mainly used for
199+
dealing with optional virtual files
200+
- ``"file"``: a string or a :class:`pathlib.PurePath` object or a sequence of them,
201+
representing a file name or a list of file names
202+
- ``"geojson"``: a geo-like Python object that implements ``__geo_interface__``
203+
(e.g., geopandas.GeoDataFrame or shapely.geometry)
204+
- ``"grid"``: a :class:`xarray.DataArray` object with dimensions not equal to 3
205+
- ``"image"``: a :class:`xarray.DataArray` object with 3 dimensions
206+
- ``"matrix"``: a :class:`pandas.DataFrame` object, a 2-D :class:`numpy.ndarray`
207+
or a sequence of sequences
208+
209+
In addition, the data can be given via a series of vectors (e.g., x/y/z). In this
210+
case, the ``data`` argument is ``None`` and the data kind is determined by the
211+
``required`` argument. The data kind is ``"vectors"`` if ``required`` is ``True``,
212+
otherwise the data kind is ``"arg"``.
213+
214+
The function will fallback to ``"matrix"`` for any unrecognized data.
205215
206216
Parameters
207217
----------
208-
data : str, pathlib.PurePath, None, bool, xarray.DataArray or {table-like}
209-
Pass in either a file name or :class:`pathlib.Path` to an ASCII data
210-
table, an :class:`xarray.DataArray`, a 1-D/2-D
211-
{table-classes} or an option argument.
218+
data
219+
The data that is provided to a module.
212220
required
213-
Set to True when 'data' is required, or False when dealing with
214-
optional virtual files. [Default is True].
221+
If the data is required or not. Set to ``False`` when dealing with optional
222+
virtual files.
215223
216224
Returns
217225
-------
@@ -222,46 +230,58 @@ def data_kind(
222230
--------
223231
>>> import numpy as np
224232
>>> import xarray as xr
233+
>>> import pandas as pd
225234
>>> import pathlib
235+
>>> [data_kind(data=data) for data in (2, 2.0, True, False)]
236+
['arg', 'arg', 'arg', 'arg']
226237
>>> data_kind(data=None)
227238
'vectors'
228-
>>> data_kind(data=np.arange(10).reshape((5, 2)))
229-
'matrix'
239+
>>> data_kind(data=None, required=False)
240+
'arg'
230241
>>> data_kind(data="my-data-file.txt")
231242
'file'
232243
>>> data_kind(data=pathlib.Path("my-data-file.txt"))
233244
'file'
234-
>>> data_kind(data=None, required=False)
235-
'arg'
236-
>>> data_kind(data=2.0, required=False)
237-
'arg'
238-
>>> data_kind(data=True, required=False)
239-
'arg'
245+
>>> data_kind(data=["data1.txt", "data2.txt"])
246+
'file'
240247
>>> data_kind(data=xr.DataArray(np.random.rand(4, 3)))
241248
'grid'
242249
>>> data_kind(data=xr.DataArray(np.random.rand(3, 4, 5)))
243250
'image'
251+
>>> data_kind(data=np.arange(10).reshape((5, 2)))
252+
'matrix'
253+
>>> data_kind(data=pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}))
254+
'matrix'
255+
>>> data_kind(data=[1, 2, 3])
256+
'matrix'
244257
"""
245-
kind: Literal["arg", "file", "geojson", "grid", "image", "matrix", "vectors"]
258+
# data is None, so data must be given via a series of vectors (i.e., x/y/z).
259+
# The only exception is when dealing with optional virtual files.
260+
if data is None:
261+
return "vectors" if required else "arg"
262+
263+
# A file or a list of files
246264
if isinstance(data, str | pathlib.PurePath) or (
247265
isinstance(data, list | tuple)
248266
and all(isinstance(_file, str | pathlib.PurePath) for _file in data)
249267
):
250-
# One or more files
251-
kind = "file"
252-
elif isinstance(data, bool | int | float) or (data is None and not required):
253-
kind = "arg"
254-
elif isinstance(data, xr.DataArray):
255-
kind = "image" if len(data.dims) == 3 else "grid"
256-
elif hasattr(data, "__geo_interface__"):
257-
# geo-like Python object that implements ``__geo_interface__``
258-
# (geopandas.GeoDataFrame or shapely.geometry)
259-
kind = "geojson"
260-
elif data is not None:
261-
kind = "matrix"
262-
else:
263-
kind = "vectors"
264-
return kind
268+
return "file"
269+
270+
# An option argument
271+
if isinstance(data, bool | int | float):
272+
return "arg"
273+
274+
# A xr.DataArray grid or image
275+
if isinstance(data, xr.DataArray):
276+
return "image" if len(data.dims) == 3 else "grid"
277+
278+
# Geo-like Python object that implements ``__geo_interface__`` (e.g.,
279+
# geopandas.GeoDataFrame or shapely.geometry)
280+
if hasattr(data, "__geo_interface__"):
281+
return "geojson"
282+
283+
# Fallback to "matrix" for anything else
284+
return "matrix"
265285

266286

267287
def non_ascii_to_octal(

0 commit comments

Comments
 (0)