Skip to content

Commit 68bb695

Browse files
seismanmichaelgrundyvonnefroehlich
authored
clib.Session.create_data: Improve docstrings and fix the comments for the dim parameter (#3532)
Co-authored-by: Michael Grund <[email protected]> Co-authored-by: Yvonne Fröhlich <[email protected]>
1 parent ed18111 commit 68bb695

7 files changed

+144
-81
lines changed

pygmt/clib/conversion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]:
195195

196196

197197
def sequence_to_ctypes_array(
198-
sequence: Sequence | None, ctype, size: int
198+
sequence: Sequence[int | float] | np.ndarray | None, ctype, size: int
199199
) -> ctp.Array | None:
200200
"""
201201
Convert a sequence of numbers into a ctypes array variable.

pygmt/clib/session.py

+129-66
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,19 @@
6868

6969
DIRECTIONS = ["GMT_IN", "GMT_OUT"]
7070

71-
MODES = ["GMT_CONTAINER_ONLY", "GMT_IS_OUTPUT"]
71+
MODES = [
72+
"GMT_CONTAINER_AND_DATA", # Create/Read/Write both container and the data array
73+
"GMT_CONTAINER_ONLY", # Cread/Read/Write the container but no data array
74+
"GMT_DATA_ONLY", # Create/Read/Write the container's data array only
75+
"GMT_IS_OUTPUT", # For creating a resource as a container for output
76+
]
7277
MODE_MODIFIERS = [
73-
"GMT_GRID_IS_CARTESIAN",
74-
"GMT_GRID_IS_GEO",
75-
"GMT_WITH_STRINGS",
78+
"GMT_GRID_IS_CARTESIAN", # Grid is not geographic but Cartesian
79+
"GMT_GRID_IS_GEO", # Grid is geographic, not Cartesian
80+
"GMT_WITH_STRINGS", # Allocate string array for GMT_DATASET/GMT_VECTOR/GMT_MATRIX
7681
]
7782

78-
REGISTRATIONS = ["GMT_GRID_PIXEL_REG", "GMT_GRID_NODE_REG"]
83+
REGISTRATIONS = ["GMT_GRID_NODE_REG", "GMT_GRID_PIXEL_REG"]
7984

8085
DTYPES = {
8186
np.int8: "GMT_CHAR",
@@ -643,53 +648,109 @@ def call_module(self, module: str, args: str | list[str]):
643648

644649
def create_data(
645650
self,
646-
family,
647-
geometry,
648-
mode,
649-
dim=None,
650-
ranges=None,
651-
inc=None,
652-
registration="GMT_GRID_NODE_REG",
653-
pad=None,
654-
):
651+
family: str,
652+
geometry: str,
653+
mode: str,
654+
dim: Sequence[int] | None = None,
655+
ranges: Sequence[float] | None = None,
656+
inc: Sequence[float] | None = None,
657+
registration: Literal[
658+
"GMT_GRID_NODE_REG", "GMT_GRID_PIXEL_REG"
659+
] = "GMT_GRID_NODE_REG",
660+
pad: int | None = None,
661+
) -> ctp.c_void_p:
655662
"""
656-
Create an empty GMT data container.
663+
Create an empty GMT data container and allocate space to hold data.
664+
665+
Valid data families and geometries are in ``FAMILIES`` and ``GEOMETRIES``.
666+
667+
There are two ways to define the dimensions needed to actually allocate memory:
668+
669+
1. Via ``ranges``, ``inc`` and ``registration``.
670+
2. Via ``dim`` and ``registration``.
671+
672+
``dim`` contains up to 4 values and they have different meanings for
673+
different GMT data families:
674+
675+
For ``GMT_DATASET``:
676+
677+
- 0: number of tables
678+
- 1: number of segments per table
679+
- 2: number of rows per segment
680+
- 3: number of columns per row
681+
682+
For ``GMT_VECTOR``:
683+
684+
- 0: number of columns
685+
- 1: number of rows [optional, can be 0 if unknown]
686+
- 2: data type (e.g., ``GMT_DOUBLE``) [Will be overwritten by ``put_vector``]
687+
688+
For ``GMT_GRID``/``GMT_IMAGE``/``GMT_CUBE``/``GMT_MATRIX``:
689+
690+
- 0: number of columns
691+
- 1: number of rows
692+
- 2: number of bands or layers [Ignored for ``GMT_GRID``]
693+
- 3: data type (e.g., ``GMT_DOUBLE``) [For ``GMT_MATRIX`` only, but will be
694+
overwritten by ``put_matrix``]
695+
696+
In other words, ``inc`` is assumed to be 1.0, and ``ranges`` is
697+
[0, dim[0], 0, dim[1]] for pixel registration or
698+
[0, dim[0]-1.0, 0, dim[1]-1.0] for grid registration.
699+
700+
701+
When creating a grid/image/cube, you can do it in one or two steps:
702+
703+
1. Call this function with ``mode="GMT_CONTAINER_AND_DATA"``. This creates
704+
a header and allocates a grid or an image
705+
2. Call this function twice:
706+
707+
1. First with ``mode="GMT_CONTAINER_ONLY"``, to create a header only and
708+
compute the dimensions based on other parameters
709+
2. Second with ``mode="GMT_DATA_ONLY"``, to allocate the grid/image/cube
710+
array based on the dimensions already set. This time, you pass NULL for
711+
``dim``/``ranges``/``inc``/``registration``/``pad`` and let ``data`` be
712+
the void pointer returned in the first step.
713+
714+
**Note**: This is not implemented yet, since this function doesn't have the
715+
``data`` parameter.
657716
658717
Parameters
659718
----------
660-
family : str
661-
A valid GMT data family name (e.g., ``'GMT_IS_DATASET'``). See the
662-
``FAMILIES`` attribute for valid names.
663-
geometry : str
664-
A valid GMT data geometry name (e.g., ``'GMT_IS_POINT'``). See the
665-
``GEOMETRIES`` attribute for valid names.
666-
mode : str
667-
A valid GMT data mode (e.g., ``'GMT_IS_OUTPUT'``). See the
668-
``MODES`` attribute for valid names.
669-
dim : list of 4 integers
670-
The dimensions of the dataset. See the documentation for the GMT C
671-
API function ``GMT_Create_Data`` (``src/gmt_api.c``) for the full
672-
range of options regarding 'dim'. If ``None``, will pass in the
673-
NULL pointer.
674-
ranges : list of 4 floats
675-
The dataset extent. Also a bit of a complicated argument. See the C
676-
function documentation. It's called ``range`` in the C function but
677-
it would conflict with the Python built-in ``range`` function.
678-
inc : list of 2 floats
679-
The increments between points of the dataset. See the C function
680-
documentation.
681-
registration : str
682-
The node registration (what the coordinates mean). Can be
683-
``'GMT_GRID_PIXEL_REG'`` or ``'GMT_GRID_NODE_REG'``. Defaults to
684-
``'GMT_GRID_NODE_REG'``.
685-
pad : int
686-
The grid padding. Defaults to ``GMT_PAD_DEFAULT``.
719+
family
720+
A valid GMT data family name (e.g., ``"GMT_IS_DATASET"``). See ``FAMILIES``
721+
for valid names.
722+
geometry
723+
A valid GMT data geometry name (e.g., ``"GMT_IS_POINT"``). See
724+
``GEOMETRIES`` for valid names.
725+
mode
726+
A valid GMT data mode. See ``MODES`` for valid names. For
727+
``GMT_IS_DATASET``/``GMT_IS_MATRIX``/``GMT_IS_VECTOR``, adding
728+
``GMT_WITH_STRINGS`` to the ``mode`` will allocate the corresponding arrays
729+
of string pointers.
730+
dim
731+
The dimensions of the dataset, as explained above. If ``None``, will pass in
732+
the NULL pointer.
733+
ranges
734+
The data extent.
735+
inc
736+
The increments between points of the dataset.
737+
registration
738+
The node registration. Can be ``"GMT_GRID_PIXEL_REG"`` or
739+
``"GMT_GRID_NODE_REG"``.
740+
pad
741+
The padding for ``GMT_IS_GRID``/``GMT_IS_IMAGE``/``GMT_IS_CUBE``. If
742+
``None``, defaults to ``"GMT_PAD_DEFAULT"``.
743+
744+
For ``GMT_IS_MATRIX``, it can be:
745+
746+
- 0: default row/col orientation [Default]
747+
- 1: row-major format (C)
748+
- 2: column-major format (FORTRAN)
687749
688750
Returns
689751
-------
690-
data_ptr : int
691-
A ctypes pointer (an integer) to the allocated ``GMT_Dataset``
692-
object.
752+
data_ptr
753+
A ctypes pointer (an integer) to the allocated GMT data container.
693754
"""
694755
c_create_data = self.get_libgmt_func(
695756
"GMT_Create_Data",
@@ -703,8 +764,8 @@ def create_data(
703764
ctp.POINTER(ctp.c_double), # inc
704765
ctp.c_uint, # registration
705766
ctp.c_int, # pad
706-
ctp.c_void_p,
707-
], # data
767+
ctp.c_void_p, # data
768+
],
708769
restype=ctp.c_void_p,
709770
)
710771

@@ -717,31 +778,30 @@ def create_data(
717778
geometry_int = self._parse_constant(geometry, valid=GEOMETRIES)
718779
registration_int = self._parse_constant(registration, valid=REGISTRATIONS)
719780

720-
# Convert dim, ranges, and inc to ctypes arrays if given (will be None
721-
# if not given to represent NULL pointers)
722-
dim = sequence_to_ctypes_array(dim, ctp.c_uint64, 4)
723-
ranges = sequence_to_ctypes_array(ranges, ctp.c_double, 4)
724-
inc = sequence_to_ctypes_array(inc, ctp.c_double, 2)
781+
# Convert dim, ranges, and inc to ctypes arrays if given (will be None if not
782+
# given to represent NULL pointers)
783+
dim_ctp = sequence_to_ctypes_array(dim, ctp.c_uint64, 4)
784+
ranges_ctp = sequence_to_ctypes_array(ranges, ctp.c_double, 4)
785+
inc_ctp = sequence_to_ctypes_array(inc, ctp.c_double, 2)
725786

726-
# Use a NULL pointer (None) for existing data to indicate that the
727-
# container should be created empty. Fill it in later using put_vector
728-
# and put_matrix.
787+
# Use a NULL pointer (None) for existing data to indicate that the container
788+
# should be created empty. Fill it in later using put_vector and put_matrix.
729789
data_ptr = c_create_data(
730790
self.session_pointer,
731791
family_int,
732792
geometry_int,
733793
mode_int,
734-
dim,
735-
ranges,
736-
inc,
794+
dim_ctp,
795+
ranges_ctp,
796+
inc_ctp,
737797
registration_int,
738798
self._parse_pad(family, pad),
739799
None,
740800
)
741801

742802
if data_ptr is None:
743-
raise GMTCLibError("Failed to create an empty GMT data pointer.")
744-
803+
msg = "Failed to create an empty GMT data pointer."
804+
raise GMTCLibError(msg)
745805
return data_ptr
746806

747807
def _parse_pad(self, family, pad):
@@ -1248,7 +1308,7 @@ def open_virtualfile(
12481308
... family=family,
12491309
... geometry=geometry,
12501310
... mode="GMT_CONTAINER_ONLY",
1251-
... dim=[2, 5, 1, 0], # columns, lines, segments, type
1311+
... dim=[2, 5, lib["GMT_INT"], 0], # ncolumns, nrows, dtype, unused
12521312
... )
12531313
... lib.put_vector(dataset, column=0, vector=x)
12541314
... lib.put_vector(dataset, column=1, vector=y)
@@ -1413,7 +1473,10 @@ def virtualfile_from_vectors(
14131473
geometry = "GMT_IS_POINT"
14141474

14151475
dataset = self.create_data(
1416-
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0]
1476+
family,
1477+
geometry,
1478+
mode="GMT_CONTAINER_ONLY",
1479+
dim=[columns, rows, self["GMT_DOUBLE"], 0],
14171480
)
14181481

14191482
# Use put_vector for columns with numerical type data
@@ -1501,12 +1564,13 @@ def virtualfile_from_matrix(self, matrix: np.ndarray) -> Generator[str, None, No
15011564
# around until the virtual file is closed.
15021565
matrix = np.ascontiguousarray(matrix)
15031566
rows, columns = matrix.shape
1567+
layers = 1
15041568

15051569
family = "GMT_IS_DATASET|GMT_VIA_MATRIX"
15061570
geometry = "GMT_IS_POINT"
15071571

15081572
dataset = self.create_data(
1509-
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0]
1573+
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, layers, 0]
15101574
)
15111575

15121576
self.put_matrix(dataset, matrix)
@@ -1592,7 +1656,7 @@ def virtualfile_from_grid(self, grid: xr.DataArray) -> Generator[str, None, None
15921656
mode=f"GMT_CONTAINER_ONLY|{_gtype}",
15931657
ranges=region,
15941658
inc=inc,
1595-
registration=_reg,
1659+
registration=_reg, # type: ignore[arg-type]
15961660
)
15971661
self.put_matrix(gmt_grid, matrix)
15981662
with self.open_virtualfile(
@@ -1677,8 +1741,7 @@ def virtualfile_from_stringio(
16771741
mode="GMT_CONTAINER_ONLY|GMT_WITH_STRINGS",
16781742
dim=[n_tables, n_segments, n_rows, n_columns],
16791743
)
1680-
dataset = ctp.cast(dataset, ctp.POINTER(_GMT_DATASET))
1681-
table = dataset.contents.table[0].contents
1744+
table = ctp.cast(dataset, ctp.POINTER(_GMT_DATASET)).contents.table[0].contents
16821745
for i, segment in enumerate(segments):
16831746
seg = table.segment[i].contents
16841747
if segment["header"]:

pygmt/tests/test_clib_create_data.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ def test_create_data_dataset():
1818
family="GMT_IS_DATASET|GMT_VIA_VECTOR",
1919
geometry="GMT_IS_POINT",
2020
mode="GMT_CONTAINER_ONLY",
21-
dim=[10, 20, 1, 0], # columns, rows, layers, dtype
21+
dim=[10, 20, 0, 0], # ncolumns, nrows, dtype, unused
2222
)
2323
# Dataset from matrices
2424
data_matrix = lib.create_data(
2525
family="GMT_IS_DATASET|GMT_VIA_MATRIX",
2626
geometry="GMT_IS_POINT",
2727
mode="GMT_CONTAINER_ONLY",
28-
dim=[10, 20, 1, 0],
28+
dim=[10, 20, 1, 0], # ncolumns, nrows, nlayer, dtype
2929
)
3030
assert data_vector != data_matrix
3131

@@ -40,7 +40,7 @@ def test_create_data_grid_dim():
4040
family="GMT_IS_GRID|GMT_VIA_MATRIX",
4141
geometry="GMT_IS_SURFACE",
4242
mode="GMT_CONTAINER_ONLY",
43-
dim=[10, 20, 1, 0],
43+
dim=[10, 20, 1, 0], # ncolumns, nrows, nlayer, dtype
4444
)
4545

4646

@@ -94,5 +94,5 @@ def test_create_data_fails():
9494
family="GMT_IS_DATASET",
9595
geometry="GMT_IS_SURFACE",
9696
mode="GMT_CONTAINER_ONLY",
97-
dim=[11, 10, 2, 0],
97+
dim=[11, 10, 2, 0], # n_tables, n_segments, n_rows, n_columns
9898
)

pygmt/tests/test_clib_put_matrix.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def test_put_matrix(dtypes):
3232
family="GMT_IS_DATASET|GMT_VIA_MATRIX",
3333
geometry="GMT_IS_POINT",
3434
mode="GMT_CONTAINER_ONLY",
35-
dim=[shape[1], shape[0], 1, 0], # columns, rows, layers, dtype
35+
dim=[shape[1], shape[0], 1, 0], # ncolumns, nrows, nlayers, dtype
3636
)
3737
data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
3838
lib.put_matrix(dataset, matrix=data)

pygmt/tests/test_clib_put_strings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_put_strings():
2020
family="GMT_IS_DATASET|GMT_VIA_VECTOR",
2121
geometry="GMT_IS_POINT",
2222
mode="GMT_CONTAINER_ONLY",
23-
dim=[2, 5, 1, 0], # columns, rows, layers, dtype
23+
dim=[2, 5, 0, 0], # ncolumns, nrows, dtype, unused
2424
)
2525
x = np.array([1, 2, 3, 4, 5], dtype=np.int32)
2626
y = np.array([6, 7, 8, 9, 10], dtype=np.int32)

0 commit comments

Comments
 (0)