Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pyarrow: Check compatibility of pyarrow.array with string type #2933

Merged
merged 30 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1f32a7c
Check passing pyarrow.array with string type to pygmt.text
weiji14 Dec 30, 2023
4c4e064
Check passing pyarrow.array with string type to virtualfile_from_vectors
weiji14 Dec 30, 2023
07fbca6
Merge branch 'main' into pyarrow/string
weiji14 Oct 11, 2024
d379e46
Enable passing pyarrow.StringArray to clib.Session.put_strings
weiji14 Oct 11, 2024
cfda386
Use "string" instead of pyarrow.string() in case pyarrow not installed
weiji14 Oct 11, 2024
0a6cda5
Try to fix type hints
weiji14 Oct 11, 2024
f59f93c
Add np.ndarray to StringArrayTypes and fix/ignore remaining type errors
weiji14 Oct 11, 2024
757da24
Merge branch 'main' into pyarrow/string
weiji14 Oct 11, 2024
17c1e9c
Move StringArrayTypes to pygmt/_typing.py
weiji14 Oct 11, 2024
0105d64
Add pyarrow to docs CI
weiji14 Oct 11, 2024
3ad0c86
Use np.asarray to convert pa.StringArray instead of .to_pylist()
weiji14 Oct 11, 2024
4bea288
Update note to say that PyArrow string types are now supported
weiji14 Oct 11, 2024
371174a
Add back pytest.mark.benchmark marker
weiji14 Oct 11, 2024
b588730
Add intersphinx link for pyarrow
weiji14 Oct 11, 2024
faf2065
Apply suggestions from code review
weiji14 Nov 6, 2024
b2efbb4
Merge branch 'main' into pyarrow/string
weiji14 Nov 6, 2024
9fd77dc
format
weiji14 Nov 6, 2024
ccf4eff
Merge branch 'main' into pyarrow/string
weiji14 Nov 7, 2024
44d01ed
Merge branch 'main' into pyarrow/string
weiji14 Nov 15, 2024
a927202
Revert "Enable passing pyarrow.StringArray to clib.Session.put_strings"
weiji14 Nov 15, 2024
7b00248
Reduce diff from messy revert handling
weiji14 Nov 15, 2024
7dc353b
Revert support of pyarrow.array inputs to put_strings
weiji14 Nov 15, 2024
ce76152
Remove StringArrayTypes type hint
weiji14 Nov 15, 2024
ef431af
Revert "Remove StringArrayTypes type hint"
weiji14 Nov 15, 2024
acaf350
Improve type-hint of text parameter in pygmt.Figure.text
weiji14 Nov 15, 2024
6ad6eb9
Move pa.array parametrizations to another file
weiji14 Nov 15, 2024
d88accd
Pass a tuple of vectors to virtualfile_from_vectors
weiji14 Nov 15, 2024
265132e
Move skip_if_no and pyarrow import to test_clib_virtualfile_from_vectors
weiji14 Nov 15, 2024
edb3438
The text argument can be None
weiji14 Nov 15, 2024
8172102
Simplify to remove getattr(pa, "array", None) call
weiji14 Nov 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"contextily": ("https://contextily.readthedocs.io/en/stable/", None),
"geopandas": ("https://geopandas.org/en/stable/", None),
"numpy": ("https://numpy.org/doc/stable/", None),
"pyarrow": ("https://arrow.apache.org/docs/", None),
"python": ("https://docs.python.org/3/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
"rasterio": ("https://rasterio.readthedocs.io/en/stable/", None),
Expand Down
6 changes: 3 additions & 3 deletions doc/ecosystem.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ Python objects. They are based on the C++ implementation of Arrow.
```{note}
If you have [PyArrow][] installed, PyGMT does have some initial support for
`pandas.Series` and `pandas.DataFrame` objects with Apache Arrow-backed arrays.
Specifically, only uint/int/float and date32/date64 are supported for now.
Support for string Array dtypes, Duration types and GeoArrow geometry types is still a work in progress.
For more details, see
Specifically, only uint/int/float, date32/date64 and string types are supported for now.
Support for Duration types and GeoArrow geometry types is still a work in progress. For
more details, see
[issue #2800](https://github.com/GenericMappingTools/pygmt/issues/2800).
```

Expand Down
10 changes: 10 additions & 0 deletions pygmt/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,17 @@
Type aliases for type hints.
"""

import contextlib
import importlib
from collections.abc import Sequence
from typing import Literal

import numpy as np

# Anchor codes
AnchorCode = Literal["TL", "TC", "TR", "ML", "MC", "MR", "BL", "BC", "BR"]

# String array types
StringArrayTypes = Sequence[str] | np.ndarray
with contextlib.suppress(ImportError):
StringArrayTypes |= importlib.import_module(name="pyarrow").StringArray
weiji14 marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 3 additions & 2 deletions pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,12 +280,13 @@ def sequence_to_ctypes_array(

def strings_to_ctypes_array(strings: Sequence[str] | np.ndarray) -> ctp.Array:
"""
Convert a sequence (e.g., a list) of strings into a ctypes array.
Convert a sequence (e.g., a list) of strings or numpy.ndarray of strings into a
ctypes array.

Parameters
----------
strings
A sequence of strings.
A sequence of strings, or a numpy.ndarray of str dtype.

Returns
-------
Expand Down
6 changes: 3 additions & 3 deletions pygmt/src/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collections.abc import Sequence

import numpy as np
from pygmt._typing import AnchorCode
from pygmt._typing import AnchorCode, StringArrayTypes
from pygmt.clib import Session
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import (
Expand Down Expand Up @@ -48,7 +48,7 @@ def text_( # noqa: PLR0912
x=None,
y=None,
position: AnchorCode | None = None,
text=None,
text: str | StringArrayTypes | None = None,
angle=None,
font=None,
justify: bool | None | AnchorCode | Sequence[AnchorCode] = None,
Expand Down Expand Up @@ -104,7 +104,7 @@ def text_( # noqa: PLR0912

For example, ``position="TL"`` plots the text at the Top Left corner
of the map.
text : str or 1-D array
text
The text string, or an array of strings to plot on the figure.
angle: float, str, bool or list
Set the angle measured in degrees counter-clockwise from
Expand Down
33 changes: 27 additions & 6 deletions pygmt/tests/test_clib_virtualfile_from_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
from pygmt.clib.session import DTYPES_NUMERIC
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import GMTTempFile
from pygmt.helpers.testing import skip_if_no

try:
import pyarrow as pa

pa_array = pa.array
except ImportError:
pa_array = None


@pytest.fixture(scope="module", name="dtypes")
Expand Down Expand Up @@ -53,17 +61,30 @@ def test_virtualfile_from_vectors(dtypes):


@pytest.mark.benchmark
@pytest.mark.parametrize("dtype", [str, object])
def test_virtualfile_from_vectors_one_string_or_object_column(dtype):
"""
Test passing in one column with string or object dtype into virtual file dataset.
@pytest.mark.parametrize(
("array_func", "dtype"),
[
pytest.param(np.array, {"dtype": np.str_}, id="str"),
pytest.param(np.array, {"dtype": np.object_}, id="object"),
pytest.param(
pa_array,
{}, # {"type": pa.string()}
marks=skip_if_no(package="pyarrow"),
id="pyarrow",
),
],
)
def test_virtualfile_from_vectors_one_string_or_object_column(array_func, dtype):
"""
Test passing in one column with string (numpy/pyarrow) or object (numpy)
dtype into virtual file dataset.
"""
size = 5
x = np.arange(size, dtype=np.int32)
y = np.arange(size, size * 2, 1, dtype=np.int32)
strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=dtype)
strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors((x, y, strings)) as vfile:
with lib.virtualfile_from_vectors(vectors=(x, y, strings)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("convert", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand Down
22 changes: 19 additions & 3 deletions pygmt/tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
from pygmt import Figure
from pygmt.exceptions import GMTCLibError, GMTInvalidInput
from pygmt.helpers import GMTTempFile
from pygmt.helpers.testing import skip_if_no

try:
import pyarrow as pa

pa_array = pa.array
except ImportError:
pa_array = None

TEST_DATA_DIR = Path(__file__).parent / "data"
POINTS_DATA = TEST_DATA_DIR / "points.txt"
Expand Down Expand Up @@ -48,8 +56,16 @@ def test_text_single_line_of_text(region, projection):


@pytest.mark.benchmark
@pytest.mark.mpl_image_compare
def test_text_multiple_lines_of_text(region, projection):
@pytest.mark.mpl_image_compare(filename="test_text_multiple_lines_of_text.png")
@pytest.mark.parametrize(
"array_func",
[
list,
pytest.param(np.array, id="numpy"),
pytest.param(pa_array, marks=skip_if_no(package="pyarrow"), id="pyarrow"),
],
)
def test_text_multiple_lines_of_text(region, projection, array_func):
"""
Place multiple lines of text at their respective x, y locations.
"""
Expand All @@ -59,7 +75,7 @@ def test_text_multiple_lines_of_text(region, projection):
projection=projection,
x=[1.2, 1.6],
y=[0.6, 0.3],
text=["This is a line of text", "This is another line of text"],
text=array_func(["This is a line of text", "This is another line of text"]),
)
return fig

Expand Down
Loading