diff --git a/bench/ndarray/transpose.ipynb b/bench/ndarray/transpose.ipynb
new file mode 100644
index 00000000..621a97b9
--- /dev/null
+++ b/bench/ndarray/transpose.ipynb
@@ -0,0 +1,163 @@
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "import numpy as np\n",
+    "import blosc2\n",
+    "import time\n",
+    "import plotly.express as px\n",
+    "import pandas as pd"
+   ],
+   "id": "55765646130156ef",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "sizes = [(100, 100), (500, 500), (500, 1000), (1000, 1000), (2000, 2000), (3000, 3000), (4000, 4000), (5000, 5000)]\n",
+    "sizes_mb = [(np.prod(size) * 8) / 2**20 for size in sizes]  # Convert to MB\n",
+    "results = {\"numpy\": [], \"blosc2\": []}"
+   ],
+   "id": "1cfb7daa6eee1401",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "for method in [\"numpy\", \"blosc2\"]:\n",
+    "    for size in sizes:\n",
+    "        arr = np.random.rand(*size)\n",
+    "        arr_b2 = blosc2.asarray(arr)\n",
+    "\n",
+    "        start_time = time.perf_counter()\n",
+    "\n",
+    "        if method == \"numpy\":\n",
+    "            np.transpose(arr).copy()\n",
+    "        elif method == \"blosc2\":\n",
+    "            blosc2.transpose(arr_b2)\n",
+    "\n",
+    "        end_time = time.perf_counter()\n",
+    "        time_b = end_time - start_time\n",
+    "\n",
+    "        print(f\"{method}: shape={size}, Performance = {time_b:.6f} s\")\n",
+    "        results[method].append(time_b)"
+   ],
+   "id": "384d0ad7983a8d26",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "df = pd.DataFrame({\n",
+    "    \"Matrix Size (MB)\": sizes_mb,\n",
+    "    \"NumPy Time (s)\": results[\"numpy\"],\n",
+    "    \"Blosc2 Time (s)\": results[\"blosc2\"]\n",
+    "})\n",
+    "\n",
+    "fig = px.line(df,\n",
+    "              x=\"Matrix Size (MB)\",\n",
+    "              y=[\"NumPy Time (s)\", \"Blosc2 Time (s)\"],\n",
+    "              title=\"Performance of Matrix Transposition (NumPy vs Blosc2)\",\n",
+    "              labels={\"value\": \"Time (s)\", \"variable\": \"Method\"},\n",
+    "              markers=True)\n",
+    "\n",
+    "fig.show()"
+   ],
+   "id": "c71ffb39eb28992c",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "%%time\n",
+    "shapes = [\n",
+    "    (100, 100), (2000, 2000), (3000, 3000), (4000, 4000), (3000, 7000),\n",
+    "    (5000, 5000), (6000, 6000), (7000, 7000), (8000, 8000), (6000, 12000),\n",
+    "    (9000, 9000), (10000, 10000),\n",
+    "    (10500, 10500), (11000, 11000), (11500, 11500), (12000, 12000),\n",
+    "    (12500, 12500), (13000, 13000), (13500, 13500), (14000, 14000),\n",
+    "    (14500, 14500), (15000, 15000), (15500, 15500), (16000, 16000),\n",
+    "    (16500, 16500), (17000, 17000)\n",
+    "]\n",
+    "chunkshapes = [None, (150, 300), (200, 500), (500, 200), (1000, 1000)]\n",
+    "\n",
+    "sizes = []\n",
+    "time_total = []\n",
+    "chunk_labels = []\n",
+    "\n",
+    "for shape in shapes:\n",
+    "    size_mb = (np.prod(shape) * 8) / (2 ** 20)\n",
+    "\n",
+    "    matrix_np = np.linspace(0, 1, np.prod(shape)).reshape(shape)\n",
+    "\n",
+    "    t0 = time.perf_counter()\n",
+    "    result_numpy = np.transpose(matrix_np).copy()\n",
+    "    numpy_time = time.perf_counter() - t0\n",
+    "\n",
+    "    time_total.append(numpy_time)\n",
+    "    sizes.append(size_mb)\n",
+    "    chunk_labels.append(\"NumPy\")\n",
+    "\n",
+    "    print(f\"NumPy:  Shape={shape}, Time = {numpy_time:.6f} s\")\n",
+    "\n",
+    "    for chunk in chunkshapes:\n",
+    "        matrix_blosc2 = blosc2.asarray(matrix_np, chunks=chunk)\n",
+    "\n",
+    "        t0 = time.perf_counter()\n",
+    "        result_blosc2 = blosc2.transpose(matrix_blosc2)\n",
+    "        blosc2_time = time.perf_counter() - t0\n",
+    "\n",
+    "        sizes.append(size_mb)\n",
+    "        time_total.append(blosc2_time)\n",
+    "        chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n",
+    "\n",
+    "        print(f\"Blosc2: Shape={shape}, Chunks = {matrix_blosc2.chunks}, Time = {blosc2_time:.6f} s\")\n",
+    "\n",
+    "df = pd.DataFrame({\n",
+    "    \"Matrix Size (MB)\": sizes,\n",
+    "    \"Time (s)\": time_total,\n",
+    "    \"Chunk Shape\": chunk_labels\n",
+    "})\n",
+    "\n",
+    "fig = px.line(df,\n",
+    "              x=\"Matrix Size (MB)\",\n",
+    "              y=\"Time (s)\",\n",
+    "              color=\"Chunk Shape\",\n",
+    "              title=\"Performance of Matrix Transposition (Blosc2 vs NumPy)\",\n",
+    "              labels={\"value\": \"Time (s)\", \"variable\": \"Metric\"},\n",
+    "              markers=True)\n",
+    "fig.show()"
+   ],
+   "id": "bcdd8aa5f65df561",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "1d2f48f370ba7e7a",
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python3",
+   "language": "python",
+   "display_name": "Python 3 (ipykernel)"
+  }
+ },
+ "nbformat": 5,
+ "nbformat_minor": 9
+}
diff --git a/doc/reference/linear_algebra.rst b/doc/reference/linear_algebra.rst
index fbbda988..5effc568 100644
--- a/doc/reference/linear_algebra.rst
+++ b/doc/reference/linear_algebra.rst
@@ -12,3 +12,4 @@ The next functions can be used for computing linear algebra operations with :ref
    :nosignatures:
 
     matmul
+    transpose
diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py
index 2c474beb..772eeda5 100644
--- a/src/blosc2/__init__.py
+++ b/src/blosc2/__init__.py
@@ -248,6 +248,7 @@ class Tuner(Enum):
     full,
     save,
     matmul,
+    transpose,
 )
 
 from .c2array import c2context, C2Array, URLPath
diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py
index 2ef73d5a..8b5b5c2a 100644
--- a/src/blosc2/ndarray.py
+++ b/src/blosc2/ndarray.py
@@ -3671,9 +3671,9 @@ def matmul(x1: NDArray, x2: NDArray, **kwargs: Any) -> NDArray:
 
     Parameters
     ----------
-    x1: `NDArray`
+    x1: :ref:`NDArray`
         The first input array.
-    x2: `NDArray`
+    x2: :ref:`NDArray`
         The second input array.
     kwargs: Any, optional
         Keyword arguments that are supported by the :func:`empty` constructor.
@@ -3772,6 +3772,49 @@ def matmul(x1: NDArray, x2: NDArray, **kwargs: Any) -> NDArray:
     return result.squeeze()
 
 
+def transpose(x, **kwargs: Any) -> NDArray:
+    """
+    Returns a Blosc2 NDArray with axes transposed.
+
+    Parameters
+    ----------
+    x: :ref:`NDArray`
+        The input array.
+    kwargs: Any, optional
+        Keyword arguments that are supported by the :func:`empty` constructor.
+
+    Returns
+    -------
+    out: :ref:`NDArray`
+        The Blosc2 NDArray with axes transposed.
+
+    References
+    ----------
+    `numpy.transpose <https://numpy.org/doc/2.2/reference/generated/numpy.transpose.html>`_
+    """
+
+    # If arguments are dimension < 2 they are returned
+    if np.isscalar(x) or x.ndim < 2:
+        return x
+
+    # Validate arguments are dimension 2
+    if x.ndim > 2:
+        raise ValueError("Transposing arrays with dimension greater than 2 is not supported yet.")
+
+    n, m = x.shape
+    p, q = x.chunks
+    result = blosc2.zeros((m, n), dtype=np.result_type(x), **kwargs)
+
+    for row in range(0, n, p):
+        row_end = (row + p) if (row + p) < n else n
+        for col in range(0, m, q):
+            col_end = (col + q) if (col + q) < m else m
+            aux = x[row:row_end, col:col_end]
+            result[col:col_end, row:row_end] = np.transpose(aux).copy()
+
+    return result
+
+
 # Class for dealing with fields in an NDArray
 # This will allow to access fields by name in the dtype of the NDArray
 class NDField(Operand):
diff --git a/tests/ndarray/test_matmul.py b/tests/ndarray/test_matmul.py
index 25f6ba3d..1070ea1c 100644
--- a/tests/ndarray/test_matmul.py
+++ b/tests/ndarray/test_matmul.py
@@ -21,11 +21,50 @@
 )
 @pytest.mark.parametrize(
     "dtype",
-    {np.float32, np.float64, np.complex64, np.complex128},
+    {np.float32, np.float64},
 )
 def test_matmul(ashape, achunks, ablocks, bshape, bchunks, bblocks, dtype):
-    a = blosc2.linspace(0, 10, dtype=dtype, shape=ashape, chunks=achunks, blocks=ablocks)
-    b = blosc2.linspace(0, 10, dtype=dtype, shape=bshape, chunks=bchunks, blocks=bblocks)
+    a = blosc2.linspace(0, 1, dtype=dtype, shape=ashape, chunks=achunks, blocks=ablocks)
+    b = blosc2.linspace(0, 1, dtype=dtype, shape=bshape, chunks=bchunks, blocks=bblocks)
+    c = blosc2.matmul(a, b)
+
+    na = a[:]
+    nb = b[:]
+    nc = np.matmul(na, nb)
+
+    np.testing.assert_allclose(c, nc, rtol=1e-6)
+
+
+@pytest.mark.parametrize(
+    ("ashape", "achunks", "ablocks"),
+    {
+        ((12, 10), (7, 5), (3, 3)),
+        ((10,), (9,), (7,)),
+    },
+)
+@pytest.mark.parametrize(
+    ("bshape", "bchunks", "bblocks"),
+    {
+        ((10,), (4,), (2,)),
+        ((10, 5), (3, 4), (1, 3)),
+        ((10, 12), (2, 4), (1, 2)),
+    },
+)
+@pytest.mark.parametrize(
+    "dtype",
+    {np.complex64, np.complex128},
+)
+def test_complex(ashape, achunks, ablocks, bshape, bchunks, bblocks, dtype):
+    real_part = blosc2.linspace(0, 1, shape=ashape, chunks=achunks, blocks=ablocks, dtype=dtype)
+    imag_part = blosc2.linspace(0, 1, shape=ashape, chunks=achunks, blocks=ablocks, dtype=dtype)
+    complex_matrix_a = real_part + 1j * imag_part
+    a = blosc2.asarray(complex_matrix_a)
+
+    real_part = blosc2.linspace(1, 2, shape=bshape, chunks=bchunks, blocks=bblocks, dtype=dtype)
+    imag_part = blosc2.linspace(1, 2, shape=bshape, chunks=bchunks, blocks=bblocks, dtype=dtype)
+    complex_matrix_b = real_part + 1j * imag_part
+    b = blosc2.asarray(complex_matrix_b)
+
     c = blosc2.matmul(a, b)
 
     na = a[:]
diff --git a/tests/ndarray/test_transpose.py b/tests/ndarray/test_transpose.py
new file mode 100644
index 00000000..04218e5f
--- /dev/null
+++ b/tests/ndarray/test_transpose.py
@@ -0,0 +1,104 @@
+import numpy as np
+import pytest
+
+import blosc2
+
+
+@pytest.fixture(
+    params=[
+        ((3, 3), (2, 2), (1, 1)),
+        ((12, 11), (7, 5), (6, 2)),
+        ((1, 5), (1, 4), (1, 3)),
+        ((51, 603), (22, 99), (13, 29)),
+        ((10,), (5,), None),
+        ((31,), (14,), (9,)),
+    ]
+)
+def shape_chunks_blocks(request):
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    {np.int32, np.int64, np.float32, np.float64},
+)
+def test_transpose(shape_chunks_blocks, dtype):
+    shape, chunks, blocks = shape_chunks_blocks
+    a = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype)
+    at = blosc2.transpose(a)
+
+    na = a[:]
+    nat = np.transpose(na)
+
+    np.testing.assert_allclose(at, nat)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    {np.complex64, np.complex128},
+)
+def test_complex(shape_chunks_blocks, dtype):
+    shape, chunks, blocks = shape_chunks_blocks
+    real_part = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype)
+    imag_part = blosc2.linspace(1, 0, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype)
+    complex_matrix = real_part + 1j * imag_part
+
+    a = blosc2.asarray(complex_matrix)
+    at = blosc2.transpose(a)
+
+    na = a[:]
+    nat = np.transpose(na)
+
+    np.testing.assert_allclose(at, nat)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    {
+        1,  # int
+        5.1,  # float
+        1 + 2j,  # complex
+        np.int8(2),  # NumPy int8
+        np.int16(3),  # NumPy int16
+        np.int32(4),  # NumPy int32
+        np.int64(5),  # NumPy int64
+        np.float32(5.2),  # NumPy float32
+        np.float64(5.3),  # NumPy float64
+        np.complex64(0 + 3j),  # NumPy complex64
+        np.complex128(2 - 4j),  # NumPy complex128
+    },
+)
+def test_scalars(scalar):
+    at = blosc2.transpose(scalar)
+    nat = np.transpose(scalar)
+
+    np.testing.assert_allclose(at, nat)
+
+
+@pytest.mark.parametrize(
+    "shape",
+    [
+        (3, 3, 3),
+        (12, 10, 10),
+        (10, 10, 10, 11),
+        (5, 4, 3, 2, 1, 1),
+    ],
+)
+def test_dims(shape):
+    a = blosc2.linspace(0, 1, shape=shape)
+
+    with pytest.raises(ValueError):
+        blosc2.transpose(a)
+
+
+def test_disk():
+    a = blosc2.linspace(0, 1, shape=(3, 4), urlpath="a_test.b2nd", mode="w")
+    c = blosc2.transpose(a, urlpath="c_test.b2nd", mode="w")
+
+    na = a[:]
+    nc = np.transpose(na)
+
+    np.testing.assert_allclose(c, nc, rtol=1e-6)
+
+    blosc2.remove_urlpath("a_test.b2nd")
+    blosc2.remove_urlpath("c_test.b2nd")