From e431c2e8a981db7e0029e4e0ef9a5133d82ac5ce Mon Sep 17 00:00:00 2001 From: mariakrzywnicka Date: Thu, 6 Feb 2025 22:30:45 +0100 Subject: [PATCH 1/5] tests numpy --- send-to-server.sh | 11 ++++ setup-daphne-dams-cluster-3.sh | 43 ++++++++++++ setup-daphne-dams-cluster.sh | 50 ++++++++++++++ src/api/python/daphne/utils/consts.py | 2 +- test/api/python/DaphneLibTest.cpp | 21 ++++++ test/api/python/data_transfer_numpy.daphne | 13 ++++ test/api/python/data_transfer_numpy.py | 66 +++++++++++++++++++ test/api/python/data_transfer_numpy_1.py | 2 +- test/api/python/data_transfer_numpy_2.daphne | 1 + ...ata_transfer_numpy_array_float64_1d.daphne | 7 ++ .../data_transfer_numpy_array_float64_1d.py | 14 ++++ ...umpy_array_float64_1d_shared_memory.daphne | 7 ++ ...er_numpy_array_float64_1d_shared_memory.py | 14 ++++ ...ata_transfer_numpy_array_float64_2d.daphne | 9 +++ .../data_transfer_numpy_array_float64_2d.py | 16 +++++ ...umpy_array_float64_2d_shared_memory.daphne | 9 +++ ...er_numpy_array_float64_2d_shared_memory.py | 16 +++++ ...ata_transfer_numpy_array_inf_values.daphne | 3 + .../data_transfer_numpy_array_inf_values.py | 10 +++ ...umpy_array_inf_values_shared_memory.daphne | 3 + ...er_numpy_array_inf_values_shared_memory.py | 10 +++ .../data_transfer_numpy_array_int64.daphne | 3 + .../python/data_transfer_numpy_array_int64.py | 10 +++ .../data_transfer_numpy_array_large.daphne | 3 + .../python/data_transfer_numpy_array_large.py | 10 +++ ...fer_numpy_array_large_shared_memory.daphne | 3 + ...ransfer_numpy_array_large_shared_memory.py | 10 +++ ...a_transfer_numpy_array_large_sparse.daphne | 3 + .../data_transfer_numpy_array_large_sparse.py | 10 +++ ...py_array_large_sparse_shared_memory.daphne | 3 + ..._numpy_array_large_sparse_shared_memory.py | 10 +++ ...ata_transfer_numpy_array_nan_values.daphne | 3 + .../data_transfer_numpy_array_nan_values.py | 10 +++ ...umpy_array_nan_values_shared_memory.daphne | 3 + ...er_numpy_array_nan_values_shared_memory.py | 10 +++ ...ransfer_numpy_array_negative_values.daphne | 3 + ...ta_transfer_numpy_array_negative_values.py | 10 +++ ...array_negative_values_shared_memory.daphne | 3 + ...mpy_array_negative_values_shared_memory.py | 10 +++ ...a_transfer_numpy_array_small_values.daphne | 3 + .../data_transfer_numpy_array_small_values.py | 10 +++ ...py_array_small_values_shared_memory.daphne | 3 + ..._numpy_array_small_values_shared_memory.py | 10 +++ test/api/python/data_transfer_pandas.py | 57 ++++++++++++++++ ...a_ttransfer_numpy_array_negative_values.py | 10 +++ .../python/function_numpy_column_stack.daphne | 8 +++ .../api/python/function_numpy_column_stack.py | 22 +++++++ .../python/function_numpy_concatenate.daphne | 6 ++ test/api/python/function_numpy_concatenate.py | 18 +++++ test/api/python/function_numpy_empty.daphne | 3 + test/api/python/function_numpy_empty.py | 9 +++ test/api/python/function_numpy_fill.daphne | 7 ++ test/api/python/function_numpy_fill.py | 12 ++++ 53 files changed, 620 insertions(+), 2 deletions(-) create mode 100755 send-to-server.sh create mode 100644 setup-daphne-dams-cluster-3.sh create mode 100644 setup-daphne-dams-cluster.sh create mode 100644 test/api/python/data_transfer_numpy.daphne create mode 100644 test/api/python/data_transfer_numpy.py create mode 100644 test/api/python/data_transfer_numpy_array_float64_1d.daphne create mode 100644 test/api/python/data_transfer_numpy_array_float64_1d.py create mode 100644 test/api/python/data_transfer_numpy_array_float64_1d_shared_memory.daphne create mode 100644 test/api/python/data_transfer_numpy_array_float64_1d_shared_memory.py create mode 100644 test/api/python/data_transfer_numpy_array_float64_2d.daphne create mode 100644 test/api/python/data_transfer_numpy_array_float64_2d.py create mode 100644 test/api/python/data_transfer_numpy_array_float64_2d_shared_memory.daphne create mode 100644 test/api/python/data_transfer_numpy_array_float64_2d_shared_memory.py create mode 100644 test/api/python/data_transfer_numpy_array_inf_values.daphne create mode 100644 test/api/python/data_transfer_numpy_array_inf_values.py create mode 100644 test/api/python/data_transfer_numpy_array_inf_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_numpy_array_inf_values_shared_memory.py create mode 100644 test/api/python/data_transfer_numpy_array_int64.daphne create mode 100644 test/api/python/data_transfer_numpy_array_int64.py create mode 100644 test/api/python/data_transfer_numpy_array_large.daphne create mode 100644 test/api/python/data_transfer_numpy_array_large.py create mode 100644 test/api/python/data_transfer_numpy_array_large_shared_memory.daphne create mode 100644 test/api/python/data_transfer_numpy_array_large_shared_memory.py create mode 100644 test/api/python/data_transfer_numpy_array_large_sparse.daphne create mode 100644 test/api/python/data_transfer_numpy_array_large_sparse.py create mode 100644 test/api/python/data_transfer_numpy_array_large_sparse_shared_memory.daphne create mode 100644 test/api/python/data_transfer_numpy_array_large_sparse_shared_memory.py create mode 100644 test/api/python/data_transfer_numpy_array_nan_values.daphne create mode 100644 test/api/python/data_transfer_numpy_array_nan_values.py create mode 100644 test/api/python/data_transfer_numpy_array_nan_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_numpy_array_nan_values_shared_memory.py create mode 100644 test/api/python/data_transfer_numpy_array_negative_values.daphne create mode 100644 test/api/python/data_transfer_numpy_array_negative_values.py create mode 100644 test/api/python/data_transfer_numpy_array_negative_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_numpy_array_negative_values_shared_memory.py create mode 100644 test/api/python/data_transfer_numpy_array_small_values.daphne create mode 100644 test/api/python/data_transfer_numpy_array_small_values.py create mode 100644 test/api/python/data_transfer_numpy_array_small_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_numpy_array_small_values_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas.py create mode 100644 test/api/python/data_ttransfer_numpy_array_negative_values.py create mode 100644 test/api/python/function_numpy_column_stack.daphne create mode 100644 test/api/python/function_numpy_column_stack.py create mode 100644 test/api/python/function_numpy_concatenate.daphne create mode 100644 test/api/python/function_numpy_concatenate.py create mode 100644 test/api/python/function_numpy_empty.daphne create mode 100644 test/api/python/function_numpy_empty.py create mode 100644 test/api/python/function_numpy_fill.daphne create mode 100644 test/api/python/function_numpy_fill.py diff --git a/send-to-server.sh b/send-to-server.sh new file mode 100755 index 000000000..e86af9c48 --- /dev/null +++ b/send-to-server.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Your user name on the server. +user=krzywnicka +# The IP address of the server. +ip=130.149.237.23 # so013 +# The path on the server (make sure that it exists by first creating it on the server, if necessary). +path="/home/$user/daphne-$user" + + rsync -av --exclude=/.git/ --exclude=/send-to-server.sh --rsh="ssh -J $user@130.149.237.11" . "$user@$ip:$path" + #rsync -av --exclude='.git/' --exclude='send-to-server.sh' --include='*/' --include='*.py' --rsh="ssh -J $user@130.149.237.11" . "$user@$ip:$path" diff --git a/setup-daphne-dams-cluster-3.sh b/setup-daphne-dams-cluster-3.sh new file mode 100644 index 000000000..5bfddc2fc --- /dev/null +++ b/setup-daphne-dams-cluster-3.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# ***************************************************************************** +# This script builds DAPHNE including all dependencies. +# +# It is intended to be used on a scale-out node of the DAMS Lab cluster. +# It contains a few work-arounds that are currently needed in this environment. +# ***************************************************************************** + +# Stop if any command fails. +set -e + +# Create a Python virtual environment to make numpy and pandas available. +python3 -m venv daphne-venv +source daphne-venv/bin/activate +pip install numpy pandas + +# Use gcc-9/g++-9 for building the dependencies (work-around because we don't have gfortran-11 yet). +export CC=/usr/bin/gcc-9 +export CXX=/usr/bin/g++-9 + +# Build the dependencies and DAPHNE (just some random target that does not require C++-20, s.t. we can build it with g++-9). +./build.sh --target DaphneDSLParser + +# Remove the DAPHNE build artifacts. +./build.sh --clean -y + +# Use gcc-11/g++-11 for building DAPHNE. +export CC=/usr/bin/gcc-11 +export CXX=/usr/bin/g++-11 + +# Build DAPHNE (including all test cases). +./build.sh --target run_tests + +# Run the test cases. +./test.sh -d yes + +set +e + +# Each time you log in to your node: + source daphne-venv/bin/activate + export CC=/usr/bin/gcc-11 + export CXX=/usr/bin/g++-11 diff --git a/setup-daphne-dams-cluster.sh b/setup-daphne-dams-cluster.sh new file mode 100644 index 000000000..3f8bf0f05 --- /dev/null +++ b/setup-daphne-dams-cluster.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# ***************************************************************************** +# This script builds DAPHNE including all dependencies. +# +# It is intended to be used on a scale-out node of the DAMS Lab cluster. +# It contains a few work-arounds that are currently needed in this environment. +# ***************************************************************************** + +# Stop if any command fails. +set -e + +# Create a Python virtual environment to make numpy and pandas available. +python3 -m venv daphne-venv +source daphne-venv/bin/activate +pip install numpy pandas + +# Use gcc-9/g++-9 for building the dependencies (work-around because we don't have gfortran-11 yet). +export CC=/usr/bin/gcc-9 +export CXX=/usr/bin/g++-9 + +# Build the dependencies and DAPHNE (just some random target that does not require C++-20, s.t. we can build it with g++-9). +./build.sh --target DaphneDSLParser + +# Remove the DAPHNE build artifacts. +#./build.sh --clean -y + +# Use gcc-11/g++-11 for building DAPHNE. +export CC=/usr/bin/gcc-11 +export CXX=/usr/bin/g++-11 + +# Build DAPHNE (including all test cases). +./build.sh --target run_tests +#./build.sh -nd + +# Run the test cases. +./test.sh -d yes + +set +e + +# Each time you log in to your node: +# cd path/to/your/daphne + source daphne-venv/bin/activate + export CC=/usr/bin/gcc-11 + export CXX=/usr/bin/g++-11 + export PYTHONPATH=$(pwd)/src/api/python:$PYTHONPATH + export LD_LIBRARY_PATH=$(pwd)/lib:$LD_LIBRARY_PATH + export PATH=$(pwd)/bin:$PATH + export DAPHNELIB_DIR_PATH=$(pwd)/lib + diff --git a/src/api/python/daphne/utils/consts.py b/src/api/python/daphne/utils/consts.py index c1a7a7c9f..ac22fddea 100644 --- a/src/api/python/daphne/utils/consts.py +++ b/src/api/python/daphne/utils/consts.py @@ -36,7 +36,7 @@ VALID_ARITHMETIC_TYPES = Union['DAGNode', int, float] VALID_COMPUTED_TYPES = Union['Matrix', 'Frame', 'Scalar'] -TMP_PATH = os.path.join("/tmp/", "DaphneLib") +TMP_PATH = os.path.join("home", "krzywnicka", "DaphneLib") os.makedirs(TMP_PATH, exist_ok=True) _PROTOTYPE_PATH_ENV_VAR_NAME = "DAPHNELIB_DIR_PATH" diff --git a/test/api/python/DaphneLibTest.cpp b/test/api/python/DaphneLibTest.cpp index 95179f171..560eaa1b6 100644 --- a/test/api/python/DaphneLibTest.cpp +++ b/test/api/python/DaphneLibTest.cpp @@ -66,6 +66,27 @@ const std::string dirPath = "test/api/python/"; compareDaphneLibToStr(str, prefix + ".py"); \ } +MAKE_TEST_CASE("function_numpy_fill") +MAKE_TEST_CASE("function_numpy_column_stack") +MAKE_TEST_CASE("function_numpy_concatenate") +MAKE_TEST_CASE("function_numpy_empty") +MAKE_TEST_CASE("data_transfer_numpy_array_float64_1d") +MAKE_TEST_CASE("data_transfer_numpy_array_float64_1d_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_float64_2d") +MAKE_TEST_CASE("data_transfer_numpy_array_float64_2d_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_nan_values") +MAKE_TEST_CASE("data_transfer_numpy_array_nan_values_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_inf_values") +MAKE_TEST_CASE("data_transfer_numpy_array_inf_values_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_negative_values") +MAKE_TEST_CASE("data_transfer_numpy_array_negative_values_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_small_values") +MAKE_TEST_CASE("data_transfer_numpy_array_small_values_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_large") +MAKE_TEST_CASE("data_transfer_numpy_array_large_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_large_sparse") +MAKE_TEST_CASE("data_transfer_numpy_array_large_sparse_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_int64") MAKE_TEST_CASE("data_transfer_numpy_1") MAKE_TEST_CASE("data_transfer_numpy_2") MAKE_TEST_CASE("data_transfer_numpy_3") diff --git a/test/api/python/data_transfer_numpy.daphne b/test/api/python/data_transfer_numpy.daphne new file mode 100644 index 000000000..9dac21dc2 --- /dev/null +++ b/test/api/python/data_transfer_numpy.daphne @@ -0,0 +1,13 @@ +X1 = [1.0, 2.0, 3.0](3, 1); +X2 = [1.0, 2.0, 3.0, 4.0](2, 2); +X3 = [1, 2, 3](3, 1); +X4 = [1, 2, 3, 4](2, 2); +X5 = [1.0, 2.0, 3.0](0, 1); +X6 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0](2, 2, 2); + +print(X1); +print(X2); +print(X3); +print(X4); +print(X5); +print(X6); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy.py b/test/api/python/data_transfer_numpy.py new file mode 100644 index 000000000..512911219 --- /dev/null +++ b/test/api/python/data_transfer_numpy.py @@ -0,0 +1,66 @@ +import numpy as np +from daphne.context.daphne_context import DaphneContext + +dctx = DaphneContext() + +test_cases = [ + # 1D arrays + (np.array([1.0, 2.0, 3.0], dtype=np.float64).reshape(-1, 1), "float64_1d"), + (np.array([4.0, 5.0, 6.0, 7.0], dtype=np.float64).reshape(-1, 1), "float64_1d_longer"), + (np.array([8.0, 9.0, 10.0, 11.0, 12.0], dtype=np.float64).reshape(-1, 1), "float64_1d_even_longer"), + + # 2D arrays + (np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64), "float64_2d"), + (np.array([[5.0, 6.0, 7.0], [8.0, 9.0, 10.0]], dtype=np.float64), "float64_2d_wider"), + (np.array([[11.0, 12.0], [13.0, 14.0], [15.0, 16.0]], dtype=np.float64), "float64_2d_taller"), + (np.array([[17.0, 18.0, 19.0], [20.0, 21.0, 22.0], [23.0, 24.0, 25.0]], dtype=np.float64), "float64_2d_square"), + + # Edge cases + (np.array([np.nan, np.nan, np.nan], dtype=np.float64).reshape(-1, 1), "float64_nan"), + (np.array([np.inf, -np.inf, np.inf], dtype=np.float64).reshape(-1, 1), "float64_inf"), + (np.array([1, 2.0, 3], dtype=np.float64).reshape(-1, 1), "float64_mixed"), + (np.array([-1.0, -2.0, -3.0], dtype=np.float64).reshape(-1, 1), "float64_negative"), + (np.array([1e-10, 2e-10, 3e-10], dtype=np.float64).reshape(-1, 1), "float64_small"), + (np.array([1e10, 2e10, 3e10], dtype=np.float64).reshape(-1, 1), "float64_large"), + + # Higher-dimensional arrays + (np.array([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]], dtype=np.float64), "float64_3d"), + (np.array([[[9.0, 10.0], [11.0, 12.0]], [[13.0, 14.0], [15.0, 16.0]], [[17.0, 18.0], [19.0, 20.0]]], dtype=np.float64), "float64_3d_larger"), + (np.array([[[21.0, 22.0, 23.0], [24.0, 25.0, 26.0]], [[27.0, 28.0, 29.0], [30.0, 31.0, 32.0]]], dtype=np.float64), "float64_3d_wider"), + (np.array([[[33.0, 34.0], [35.0, 36.0], [37.0, 38.0]], [[39.0, 40.0], [41.0, 42.0], [43.0, 44.0]]], dtype=np.float64), "float64_3d_taller"), + (np.random.rand(2, 3, 4, 5), "float64_4d"), + (np.random.rand(2, 2, 3, 4, 5), "float64_5d"), + + # Very large arrays + (np.random.rand(1000000).reshape(-1, 1), "float64_1d_large"), + (np.random.rand(1000, 1000), "float64_2d_large"), + + # Sparse arrays + (np.zeros((1000, 1000)), "sparse_np_array"), + (np.random.choice([0, 1.0], size=(1000, 1000), p=[0.99, 0.01]), "sparse_np_array_1_percent"), # 1% non-zero + (np.random.choice([0, 1.0], size=(1000, 1000), p=[0.95, 0.05]), "sparse_np_array_5_percent"), # 5% non-zero + (np.random.choice([0, 1.0], size=(1000, 1000), p=[0.9, 0.1]), "sparse_np_array_10_percent"), # 10% non-zero + + # Categorical data (using float representation) + (np.array([0, 1, 2, 0, 1, 2], dtype=np.float64).reshape(-1, 1), "categorical_1d"), + (np.array([0, 1, 2, 3, 4, 5], dtype=np.int32).reshape(-1, 1), "categorical_1d_more_categories"), + (np.array([0, 1, 0, 1, 0, 1], dtype=np.int32).reshape(-1, 1), "categorical_1d_fewer_categories"), + (np.array([0, 0, 0, 0, 0, 0], dtype=np.int32).reshape(-1, 1), "categorical_1d_single_category"), + (np.array([0, 1, 2, 1, 0, 2, 1, 0, 2], dtype=np.int32).reshape(-1, 1), "categorical_1d_repeated_categories"), + + # Different data types + (np.array([1, 2, 3], dtype=np.int64).reshape(-1, 1), "int64_1d"), + (np.array([1, 2, 3], dtype=np.uint8).reshape(-1, 1), "uint8_1d"), + + # Non-standard shapes + (np.random.rand(1, 1000), "float64_1x1000"), + (np.random.rand(1000, 1), "float64_1000x1"), +] + +for X, name in test_cases: + try: + + dctx.from_numpy(X, shared_memory=False).print().compute() + + except Exception as e: + print(f"Error for {name}: {e}") \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_1.py b/test/api/python/data_transfer_numpy_1.py index 4a946a2c2..ab611497c 100644 --- a/test/api/python/data_transfer_numpy_1.py +++ b/test/api/python/data_transfer_numpy_1.py @@ -31,4 +31,4 @@ dctx = DaphneContext() -(dctx.from_numpy(m1, shared_memory=False)).print().compute() \ No newline at end of file +(dctx.from_numpy(m1, shared_memory=False)).print().compute() diff --git a/test/api/python/data_transfer_numpy_2.daphne b/test/api/python/data_transfer_numpy_2.daphne index 8d01017a4..d00b54d8f 100644 --- a/test/api/python/data_transfer_numpy_2.daphne +++ b/test/api/python/data_transfer_numpy_2.daphne @@ -15,4 +15,5 @@ */ m1 = reshape(as.f64([1, 2, 3, 4, 5, 6]), 2, 3); + print(m1); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_float64_1d.daphne b/test/api/python/data_transfer_numpy_array_float64_1d.daphne new file mode 100644 index 000000000..171a17aad --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_float64_1d.daphne @@ -0,0 +1,7 @@ +m1 = reshape(as.f64([1.0, 2.0, 3.0]), 3, 1); +m2 = reshape(as.f64([4.0, 5.0, 6.0, 7.0]), 4, 1); +m3 = reshape(as.f64([8.0, 9.0, 10.0, 11.0, 12.0]), 5, 1); + +print(m1); +print(m2); +print(m3); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_float64_1d.py b/test/api/python/data_transfer_numpy_array_float64_1d.py new file mode 100644 index 000000000..b6a3a799a --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_float64_1d.py @@ -0,0 +1,14 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([1.0, 2.0, 3.0], dtype=np.float64).reshape(-1, 1) +m2 = np.array([4.0, 5.0, 6.0, 7.0], dtype=np.float64).reshape(-1, 1) +m3 = np.array([8.0, 9.0, 10.0, 11.0, 12.0], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) +(dctx.from_numpy(m2, shared_memory=False).print().compute()) +(dctx.from_numpy(m3, shared_memory=False).print().compute()) \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_float64_1d_shared_memory.daphne b/test/api/python/data_transfer_numpy_array_float64_1d_shared_memory.daphne new file mode 100644 index 000000000..171a17aad --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_float64_1d_shared_memory.daphne @@ -0,0 +1,7 @@ +m1 = reshape(as.f64([1.0, 2.0, 3.0]), 3, 1); +m2 = reshape(as.f64([4.0, 5.0, 6.0, 7.0]), 4, 1); +m3 = reshape(as.f64([8.0, 9.0, 10.0, 11.0, 12.0]), 5, 1); + +print(m1); +print(m2); +print(m3); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_float64_1d_shared_memory.py b/test/api/python/data_transfer_numpy_array_float64_1d_shared_memory.py new file mode 100644 index 000000000..3d744eca1 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_float64_1d_shared_memory.py @@ -0,0 +1,14 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([1.0, 2.0, 3.0], dtype=np.float64).reshape(-1, 1) +m2 = np.array([4.0, 5.0, 6.0, 7.0], dtype=np.float64).reshape(-1, 1) +m3 = np.array([8.0, 9.0, 10.0, 11.0, 12.0], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=True).print().compute()) +(dctx.from_numpy(m2, shared_memory=True).print().compute()) +(dctx.from_numpy(m3, shared_memory=True).print().compute()) \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_float64_2d.daphne b/test/api/python/data_transfer_numpy_array_float64_2d.daphne new file mode 100644 index 000000000..7d1f39f16 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_float64_2d.daphne @@ -0,0 +1,9 @@ +m1 = reshape(as.f64([1.0, 2.0, 3.0, 4.0]), 2, 2); +m2 = reshape(as.f64([5.0, 6.0, 7.0, 8.0, 9.0, 10.0]), 2, 3); +m3 = reshape(as.f64([11.0, 12.0, 13.0, 14.0, 15.0, 16.0]), 3, 2); +m4 = reshape(as.f64([17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0]), 3, 3); + +print(m1); +print(m2); +print(m3); +print(m4); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_float64_2d.py b/test/api/python/data_transfer_numpy_array_float64_2d.py new file mode 100644 index 000000000..674c956e4 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_float64_2d.py @@ -0,0 +1,16 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64) +m2 = np.array([[5.0, 6.0, 7.0], [8.0, 9.0, 10.0]], dtype=np.float64) +m3 = np.array([[11.0, 12.0], [13.0, 14.0], [15.0, 16.0]], dtype=np.float64) +m4 = np.array([[17.0, 18.0, 19.0], [20.0, 21.0, 22.0], [23.0, 24.0, 25.0]], dtype=np.float64) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) +(dctx.from_numpy(m2, shared_memory=False).print().compute()) +(dctx.from_numpy(m3, shared_memory=False).print().compute()) +(dctx.from_numpy(m4, shared_memory=False).print().compute()) \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_float64_2d_shared_memory.daphne b/test/api/python/data_transfer_numpy_array_float64_2d_shared_memory.daphne new file mode 100644 index 000000000..7d1f39f16 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_float64_2d_shared_memory.daphne @@ -0,0 +1,9 @@ +m1 = reshape(as.f64([1.0, 2.0, 3.0, 4.0]), 2, 2); +m2 = reshape(as.f64([5.0, 6.0, 7.0, 8.0, 9.0, 10.0]), 2, 3); +m3 = reshape(as.f64([11.0, 12.0, 13.0, 14.0, 15.0, 16.0]), 3, 2); +m4 = reshape(as.f64([17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0]), 3, 3); + +print(m1); +print(m2); +print(m3); +print(m4); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_float64_2d_shared_memory.py b/test/api/python/data_transfer_numpy_array_float64_2d_shared_memory.py new file mode 100644 index 000000000..45a2bdaa3 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_float64_2d_shared_memory.py @@ -0,0 +1,16 @@ +# Data transfer from numpy to DAPHNE and back, via shared memory. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64) +m2 = np.array([[5.0, 6.0, 7.0], [8.0, 9.0, 10.0]], dtype=np.float64) +m3 = np.array([[11.0, 12.0], [13.0, 14.0], [15.0, 16.0]], dtype=np.float64) +m4 = np.array([[17.0, 18.0, 19.0], [20.0, 21.0, 22.0], [23.0, 24.0, 25.0]], dtype=np.float64) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=True).print().compute()) +(dctx.from_numpy(m2, shared_memory=True).print().compute()) +(dctx.from_numpy(m3, shared_memory=True).print().compute()) +(dctx.from_numpy(m4, shared_memory=True).print().compute()) \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_inf_values.daphne b/test/api/python/data_transfer_numpy_array_inf_values.daphne new file mode 100644 index 000000000..eb7b771d6 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_inf_values.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.f64([inf, -inf, inf]), 3, 1); + +print(m1); diff --git a/test/api/python/data_transfer_numpy_array_inf_values.py b/test/api/python/data_transfer_numpy_array_inf_values.py new file mode 100644 index 000000000..5fa9830f7 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_inf_values.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([np.inf, -np.inf, np.inf], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) diff --git a/test/api/python/data_transfer_numpy_array_inf_values_shared_memory.daphne b/test/api/python/data_transfer_numpy_array_inf_values_shared_memory.daphne new file mode 100644 index 000000000..eb7b771d6 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_inf_values_shared_memory.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.f64([inf, -inf, inf]), 3, 1); + +print(m1); diff --git a/test/api/python/data_transfer_numpy_array_inf_values_shared_memory.py b/test/api/python/data_transfer_numpy_array_inf_values_shared_memory.py new file mode 100644 index 000000000..dd14abdff --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_inf_values_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via shared memory. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([np.inf, -np.inf, np.inf], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=True).print().compute()) diff --git a/test/api/python/data_transfer_numpy_array_int64.daphne b/test/api/python/data_transfer_numpy_array_int64.daphne new file mode 100644 index 000000000..d4ba9c1a3 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_int64.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.si64([1, 2, 3]), 3, 1); + +print(m1); diff --git a/test/api/python/data_transfer_numpy_array_int64.py b/test/api/python/data_transfer_numpy_array_int64.py new file mode 100644 index 000000000..c6c9ffa6a --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_int64.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([1, 2, 3], dtype=np.int64).reshape(-1, 1) + +dctx = DaphneContext() + +dctx.from_numpy(m1, shared_memory=False).print().compute() \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_large.daphne b/test/api/python/data_transfer_numpy_array_large.daphne new file mode 100644 index 000000000..070021eea --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_large.daphne @@ -0,0 +1,3 @@ +Y = fill(1.0, 1000, 1000); + +Y; \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_large.py b/test/api/python/data_transfer_numpy_array_large.py new file mode 100644 index 000000000..0fdb0b8e2 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_large.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.ones((1000, 1000), dtype=np.float64) + +dctx = DaphneContext() + +dctx.from_numpy(m1, shared_memory=False).compute() \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_large_shared_memory.daphne b/test/api/python/data_transfer_numpy_array_large_shared_memory.daphne new file mode 100644 index 000000000..b564e5b5a --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_large_shared_memory.daphne @@ -0,0 +1,3 @@ +Y = fill(1, 1000, 1000); + +Y; \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_large_shared_memory.py b/test/api/python/data_transfer_numpy_array_large_shared_memory.py new file mode 100644 index 000000000..03d410f08 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_large_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via shared memory. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.ones((1000, 1000), dtype=np.float64) + +dctx = DaphneContext() + +dctx.from_numpy(m1, shared_memory=True).compute() \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_large_sparse.daphne b/test/api/python/data_transfer_numpy_array_large_sparse.daphne new file mode 100644 index 000000000..04ce2be1b --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_large_sparse.daphne @@ -0,0 +1,3 @@ +Y = fill(0, 1000, 1000); + +Y; \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_large_sparse.py b/test/api/python/data_transfer_numpy_array_large_sparse.py new file mode 100644 index 000000000..9c0e4dbab --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_large_sparse.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.zeros((1000, 1000), dtype=np.float64) + +dctx = DaphneContext() + +dctx.from_numpy(m1, shared_memory=False).compute() \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_large_sparse_shared_memory.daphne b/test/api/python/data_transfer_numpy_array_large_sparse_shared_memory.daphne new file mode 100644 index 000000000..04ce2be1b --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_large_sparse_shared_memory.daphne @@ -0,0 +1,3 @@ +Y = fill(0, 1000, 1000); + +Y; \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_large_sparse_shared_memory.py b/test/api/python/data_transfer_numpy_array_large_sparse_shared_memory.py new file mode 100644 index 000000000..5c947ce7c --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_large_sparse_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via shared memory. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.zeros((1000, 1000), dtype=np.float64) + +dctx = DaphneContext() + +dctx.from_numpy(m1, shared_memory=True).compute() \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_nan_values.daphne b/test/api/python/data_transfer_numpy_array_nan_values.daphne new file mode 100644 index 000000000..d101400be --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_nan_values.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.f64([nan, nan, nan]), 3, 1); + +print(m1); diff --git a/test/api/python/data_transfer_numpy_array_nan_values.py b/test/api/python/data_transfer_numpy_array_nan_values.py new file mode 100644 index 000000000..47bdc90e0 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_nan_values.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([np.nan, np.nan, np.nan], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) diff --git a/test/api/python/data_transfer_numpy_array_nan_values_shared_memory.daphne b/test/api/python/data_transfer_numpy_array_nan_values_shared_memory.daphne new file mode 100644 index 000000000..d101400be --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_nan_values_shared_memory.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.f64([nan, nan, nan]), 3, 1); + +print(m1); diff --git a/test/api/python/data_transfer_numpy_array_nan_values_shared_memory.py b/test/api/python/data_transfer_numpy_array_nan_values_shared_memory.py new file mode 100644 index 000000000..dd75d94be --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_nan_values_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via shared memory. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([np.nan, np.nan, np.nan], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=True).print().compute()) diff --git a/test/api/python/data_transfer_numpy_array_negative_values.daphne b/test/api/python/data_transfer_numpy_array_negative_values.daphne new file mode 100644 index 000000000..4f1a5ba03 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_negative_values.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.f64([-1.0, -2.0, -3.0]), 3, 1); + +print(m1); diff --git a/test/api/python/data_transfer_numpy_array_negative_values.py b/test/api/python/data_transfer_numpy_array_negative_values.py new file mode 100644 index 000000000..0aba829bf --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_negative_values.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([-1.0, -2.0, -3.0], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) diff --git a/test/api/python/data_transfer_numpy_array_negative_values_shared_memory.daphne b/test/api/python/data_transfer_numpy_array_negative_values_shared_memory.daphne new file mode 100644 index 000000000..5ff0b22a8 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_negative_values_shared_memory.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.f64([-1.0, -2.0, -3.0]), 3, 1); + +print(m1); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_negative_values_shared_memory.py b/test/api/python/data_transfer_numpy_array_negative_values_shared_memory.py new file mode 100644 index 000000000..81d3113a7 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_negative_values_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via shared memory. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([-1.0, -2.0, -3.0], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=True).print().compute()) diff --git a/test/api/python/data_transfer_numpy_array_small_values.daphne b/test/api/python/data_transfer_numpy_array_small_values.daphne new file mode 100644 index 000000000..86aae56e6 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_small_values.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.f64([1e-10, 2e-10, 3e-10]), 3, 1); + +print(m1); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_small_values.py b/test/api/python/data_transfer_numpy_array_small_values.py new file mode 100644 index 000000000..c0476ab30 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_small_values.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([1e-10, 2e-10, 3e-10], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) diff --git a/test/api/python/data_transfer_numpy_array_small_values_shared_memory.daphne b/test/api/python/data_transfer_numpy_array_small_values_shared_memory.daphne new file mode 100644 index 000000000..86aae56e6 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_small_values_shared_memory.daphne @@ -0,0 +1,3 @@ +m1 = reshape(as.f64([1e-10, 2e-10, 3e-10]), 3, 1); + +print(m1); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_small_values_shared_memory.py b/test/api/python/data_transfer_numpy_array_small_values_shared_memory.py new file mode 100644 index 000000000..2460fb8ab --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_small_values_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via shared memory. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([1e-10, 2e-10, 3e-10], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=True).print().compute()) diff --git a/test/api/python/data_transfer_pandas.py b/test/api/python/data_transfer_pandas.py new file mode 100644 index 000000000..ab2f13be6 --- /dev/null +++ b/test/api/python/data_transfer_pandas.py @@ -0,0 +1,57 @@ +import pandas as pd +import numpy as np +from daphne.context.daphne_context import DaphneContext + +dctx = DaphneContext() + +# Test cases for Pandas Series +series_test_cases = [ + # Simple Series + (pd.Series([1.0, 2.0, 3.0]), "float64_series"), + (pd.Series([1, 2, 3], dtype=np.int32), "int32_series"), + + # Series with different shapes + (pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]), "float64_series_longer"), + + # Edge cases + (pd.Series([], dtype=np.float64), "float64_empty_series"), + + # Series with categorical data + (pd.Series(pd.Categorical(["a", "b", "c"])), "categorical_series"), +] + +# Test cases for Pandas DataFrames +dataframe_test_cases = [ + # Simple DataFrames + (pd.DataFrame({"A": [1.0, 2.0, 3.0], "B": [4.0, 5.0, 6.0]}), "float64_dataframe"), + (pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype=np.int32), "int32_dataframe"), + + # DataFrames with different shapes + (pd.DataFrame({"A": [1.0, 2.0], "B": [3.0, 4.0], "C": [5.0, 6.0]}), "float64_dataframe_wider"), + (pd.DataFrame({"A": [1.0, 2.0, 3.0, 4.0], "B": [5.0, 6.0, 7.0, 8.0]}), "float64_dataframe_taller"), + + # Edge cases + (pd.DataFrame({"A": [], "B": []}, dtype=np.float64), "float64_empty_dataframe"), +] + +# Testing Pandas Series +for series, name in series_test_cases: + try: + series_daphne = dctx.from_pandas(series, shared_memory=True) + + series_daphne.print().compute(type="shared memory") + + except Exception as e: + print(f"Error for {name}: {e}") + +# Testing Pandas DataFrames +for df, name in dataframe_test_cases: + try: + # Transfer Pandas DataFrame to DAPHNE + df_daphne = dctx.from_pandas(df, shared_memory=True) + + # Print the Daphne frame + df_daphne.print().compute(type="shared memory") + + except Exception as e: + print(f"Error for {name}: {e}") \ No newline at end of file diff --git a/test/api/python/data_ttransfer_numpy_array_negative_values.py b/test/api/python/data_ttransfer_numpy_array_negative_values.py new file mode 100644 index 000000000..0aba829bf --- /dev/null +++ b/test/api/python/data_ttransfer_numpy_array_negative_values.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([-1.0, -2.0, -3.0], dtype=np.float64).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) diff --git a/test/api/python/function_numpy_column_stack.daphne b/test/api/python/function_numpy_column_stack.daphne new file mode 100644 index 000000000..f1b38e276 --- /dev/null +++ b/test/api/python/function_numpy_column_stack.daphne @@ -0,0 +1,8 @@ +X1 = [1.0, 2.0, 3.0](3, 1); +X2 = [4.0, 5.0, 6.0](3, 1); +X3 = [7.0, 8.0, 9.0](3, 1); + +Y = cbind(X1, X2); +Y = cbind(Y, X3); + +print(Y); \ No newline at end of file diff --git a/test/api/python/function_numpy_column_stack.py b/test/api/python/function_numpy_column_stack.py new file mode 100644 index 000000000..6feca220c --- /dev/null +++ b/test/api/python/function_numpy_column_stack.py @@ -0,0 +1,22 @@ +import numpy as np +from daphne.context.daphne_context import DaphneContext + +X1 = np.array([1, 2, 3], dtype=np.float64) +X2 = np.array([4, 5, 6], dtype=np.float64) +X3 = np.array([7, 8, 9], dtype=np.float64) + +dctx = DaphneContext() + +# Convert Numpy arrays to Daphne matrices +X1_daphne = dctx.from_numpy(X1.reshape(-1, 1), shared_memory=False) +X2_daphne = dctx.from_numpy(X2.reshape(-1, 1), shared_memory=False) +X3_daphne = dctx.from_numpy(X3.reshape(-1, 1), shared_memory=False) + +X1c = X1_daphne.reshape(X1_daphne.ncell(), 1) +X2c = X2_daphne.reshape(X2_daphne.ncell(), 1) +X3c = X3_daphne.reshape(X3_daphne.ncell(), 1) + +Y = X1c.cbind(X2c).cbind(X3c) + +Y.print().compute() + diff --git a/test/api/python/function_numpy_concatenate.daphne b/test/api/python/function_numpy_concatenate.daphne new file mode 100644 index 000000000..aee5d5540 --- /dev/null +++ b/test/api/python/function_numpy_concatenate.daphne @@ -0,0 +1,6 @@ +X1 = [1.0, 2.0, 3.0](3, 1); +X2 = [4.0, 5.0, 6.0](3, 1); + +Y = rbind(X1, X2); + +print(Y); \ No newline at end of file diff --git a/test/api/python/function_numpy_concatenate.py b/test/api/python/function_numpy_concatenate.py new file mode 100644 index 000000000..1c489ea38 --- /dev/null +++ b/test/api/python/function_numpy_concatenate.py @@ -0,0 +1,18 @@ +import numpy as np +from daphne.context.daphne_context import DaphneContext + +X1 = np.array([1, 2, 3], dtype=np.float64) +X2 = np.array([4, 5, 6], dtype=np.float64) + +dctx = DaphneContext() + +X1_daphne = dctx.from_numpy(X1.reshape(-1, 1), shared_memory=False) +X2_daphne = dctx.from_numpy(X2.reshape(-1, 1), shared_memory=False) + +X1c = X1_daphne.reshape(X1_daphne.ncell(), 1) +X2c = X2_daphne.reshape(X2_daphne.ncell(), 1) + +Y = X1c.rbind(X2c) + +Y.print().compute() + diff --git a/test/api/python/function_numpy_empty.daphne b/test/api/python/function_numpy_empty.daphne new file mode 100644 index 000000000..16752809f --- /dev/null +++ b/test/api/python/function_numpy_empty.daphne @@ -0,0 +1,3 @@ +Y = fill(0, 3, 2); + +print(Y); \ No newline at end of file diff --git a/test/api/python/function_numpy_empty.py b/test/api/python/function_numpy_empty.py new file mode 100644 index 000000000..78e950290 --- /dev/null +++ b/test/api/python/function_numpy_empty.py @@ -0,0 +1,9 @@ +import numpy as np +from daphne.context.daphne_context import DaphneContext + +dctx = DaphneContext() + +Y_daphne = dctx.fill(0, 3, 2) + +Y_daphne.print().compute() + diff --git a/test/api/python/function_numpy_fill.daphne b/test/api/python/function_numpy_fill.daphne new file mode 100644 index 000000000..20e1aa25d --- /dev/null +++ b/test/api/python/function_numpy_fill.daphne @@ -0,0 +1,7 @@ +m = 3; +n = 2; +v = 5.0; + +Y = fill(v, m, n); + +print(Y); \ No newline at end of file diff --git a/test/api/python/function_numpy_fill.py b/test/api/python/function_numpy_fill.py new file mode 100644 index 000000000..cbf259bf1 --- /dev/null +++ b/test/api/python/function_numpy_fill.py @@ -0,0 +1,12 @@ +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m, n = 3, 2 +v = 5.0 + +dctx = DaphneContext() + +Y_daphne = dctx.fill(v, m, n) + +Y_daphne.print().compute() + From e627b5e3996df717fde2f2ab3c854b7915a93531 Mon Sep 17 00:00:00 2001 From: mariakrzywnicka Date: Mon, 17 Feb 2025 13:15:18 +0100 Subject: [PATCH 2/5] test pandas --- test/api/python/DaphneLibTest.cpp | 24 ++++- test/api/python/data_transfer_pandas.py | 96 ++++++++++++------- ...ransfer_pandas_df_mixed_data_types.daphne} | 0 ...ta_transfer_pandas_df_mixed_data_types.py} | 0 ... => data_transfer_pandas_df_sparse.daphne} | 0 .../python/data_transfer_pandas_df_sparse.py | 31 ++++++ ...sfer_pandas_df_sparse_shared_memory.daphne | 19 ++++ ...ransfer_pandas_df_sparse_shared_memory.py} | 0 .../data_transfer_pandas_series_float.daphne | 5 + .../data_transfer_pandas_series_float.py | 12 +++ ...r_pandas_series_float_shared_memory.daphne | 5 + ...nsfer_pandas_series_float_shared_memory.py | 12 +++ ...a_transfer_pandas_series_inf_values.daphne | 3 + .../data_transfer_pandas_series_inf_values.py | 10 ++ ...das_series_inf_values_shared_memory.daphne | 3 + ..._pandas_series_inf_values_shared_memory.py | 10 ++ .../data_transfer_pandas_series_int.daphne | 5 + .../python/data_transfer_pandas_series_int.py | 12 +++ ...fer_pandas_series_int_shared_memory.daphne | 5 + ...ransfer_pandas_series_int_shared_memory.py | 12 +++ .../data_transfer_pandas_series_large.daphne | 4 + .../data_transfer_pandas_series_large.py | 10 ++ ...r_pandas_series_large_shared_memory.daphne | 4 + ...nsfer_pandas_series_large_shared_memory.py | 10 ++ ...transfer_pandas_series_large_values.daphne | 3 + ...ata_transfer_pandas_series_large_values.py | 9 ++ ...s_series_large_values_shared_memory.daphne | 3 + ...andas_series_large_values_shared_memory.py | 9 ++ ...as_series_nagative_values_shared_memory.py | 10 ++ ...a_transfer_pandas_series_nan_values.daphne | 3 + .../data_transfer_pandas_series_nan_values.py | 10 ++ ...das_series_nan_values_shared_memory.daphne | 3 + ..._pandas_series_nan_values_shared_memory.py | 10 ++ ...nsfer_pandas_series_negative_values.daphne | 3 + ..._transfer_pandas_series_negative_values.py | 9 ++ ...eries_negative_values_shared_memory.daphne | 3 + ...as_series_negative_values_shared_memory.py | 9 ++ ...transfer_pandas_series_small_values.daphne | 3 + ...ata_transfer_pandas_series_small_values.py | 9 ++ ...s_series_small_values_shared_memory.daphne | 3 + ...andas_series_small_values_shared_memory.py | 9 ++ .../data_transfer_pandas_series_sparse.daphne | 4 + .../data_transfer_pandas_series_sparse.py | 10 ++ ..._pandas_series_sparse_shared_memory.daphne | 4 + ...sfer_pandas_series_sparse_shared_memory.py | 10 ++ 45 files changed, 388 insertions(+), 40 deletions(-) rename test/api/python/{data_transfer_pandas_1.daphne => data_transfer_pandas_df_mixed_data_types.daphne} (100%) rename test/api/python/{data_transfer_pandas_1.py => data_transfer_pandas_df_mixed_data_types.py} (100%) rename test/api/python/{data_transfer_pandas_4_sparse_dataframe.daphne => data_transfer_pandas_df_sparse.daphne} (100%) create mode 100644 test/api/python/data_transfer_pandas_df_sparse.py create mode 100644 test/api/python/data_transfer_pandas_df_sparse_shared_memory.daphne rename test/api/python/{data_transfer_pandas_4_sparse_dataframe.py => data_transfer_pandas_df_sparse_shared_memory.py} (100%) create mode 100644 test/api/python/data_transfer_pandas_series_float.daphne create mode 100644 test/api/python/data_transfer_pandas_series_float.py create mode 100644 test/api/python/data_transfer_pandas_series_float_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_float_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_inf_values.daphne create mode 100644 test/api/python/data_transfer_pandas_series_inf_values.py create mode 100644 test/api/python/data_transfer_pandas_series_inf_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_inf_values_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_int.daphne create mode 100644 test/api/python/data_transfer_pandas_series_int.py create mode 100644 test/api/python/data_transfer_pandas_series_int_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_int_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_large.daphne create mode 100644 test/api/python/data_transfer_pandas_series_large.py create mode 100644 test/api/python/data_transfer_pandas_series_large_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_large_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_large_values.daphne create mode 100644 test/api/python/data_transfer_pandas_series_large_values.py create mode 100644 test/api/python/data_transfer_pandas_series_large_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_large_values_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_nagative_values_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_nan_values.daphne create mode 100644 test/api/python/data_transfer_pandas_series_nan_values.py create mode 100644 test/api/python/data_transfer_pandas_series_nan_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_nan_values_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_negative_values.daphne create mode 100644 test/api/python/data_transfer_pandas_series_negative_values.py create mode 100644 test/api/python/data_transfer_pandas_series_negative_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_negative_values_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_small_values.daphne create mode 100644 test/api/python/data_transfer_pandas_series_small_values.py create mode 100644 test/api/python/data_transfer_pandas_series_small_values_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_small_values_shared_memory.py create mode 100644 test/api/python/data_transfer_pandas_series_sparse.daphne create mode 100644 test/api/python/data_transfer_pandas_series_sparse.py create mode 100644 test/api/python/data_transfer_pandas_series_sparse_shared_memory.daphne create mode 100644 test/api/python/data_transfer_pandas_series_sparse_shared_memory.py diff --git a/test/api/python/DaphneLibTest.cpp b/test/api/python/DaphneLibTest.cpp index 560eaa1b6..471056646 100644 --- a/test/api/python/DaphneLibTest.cpp +++ b/test/api/python/DaphneLibTest.cpp @@ -86,14 +86,32 @@ MAKE_TEST_CASE("data_transfer_numpy_array_large") MAKE_TEST_CASE("data_transfer_numpy_array_large_shared_memory") MAKE_TEST_CASE("data_transfer_numpy_array_large_sparse") MAKE_TEST_CASE("data_transfer_numpy_array_large_sparse_shared_memory") -MAKE_TEST_CASE("data_transfer_numpy_array_int64") +//MAKE_TEST_CASE("data_transfer_pandas_series_float") +MAKE_TEST_CASE("data_transfer_pandas_series_float_shared_memory") +//MAKE_TEST_CASE("data_transfer_pandas_series_int") +MAKE_TEST_CASE("data_transfer_pandas_series_int_shared_memory") +//MAKE_TEST_CASE("data_transfer_pandas_series_nan_values") +MAKE_TEST_CASE("data_transfer_pandas_series_nan_values_shared_memory") +//MAKE_TEST_CASE("data_transfer_pandas_series_inf_values") +MAKE_TEST_CASE("data_transfer_pandas_series_inf_values_shared_memory") +//MAKE_TEST_CASE("data_transfer_pandas_series_negative_values") +MAKE_TEST_CASE("data_transfer_pandas_series_negative_values_shared_memory") +//MAKE_TEST_CASE("data_transfer_pandas_series_large_values") +MAKE_TEST_CASE("data_transfer_pandas_series_large_values_shared_memory") +//MAKE_TEST_CASE("data_transfer_pandas_series_small_values") +MAKE_TEST_CASE("data_transfer_pandas_series_small_values_shared_memory") +//MAKE_TEST_CASE("data_transfer_pandas_series_large") +MAKE_TEST_CASE("data_transfer_pandas_series_large_shared_memory") +//MAKE_TEST_CASE("data_transfer_pandas_series_sparse") +MAKE_TEST_CASE("data_transfer_pandas_series_sparse_shared_memory") +MAKE_TEST_CASE("data_transfer_pandas_df_mixed_data_types") +MAKE_TEST_CASE("data_transfer_pandas_df_sparse_shared_memory") +MAKE_TEST_CASE("data_transfer_pandas_df_sparse") MAKE_TEST_CASE("data_transfer_numpy_1") MAKE_TEST_CASE("data_transfer_numpy_2") MAKE_TEST_CASE("data_transfer_numpy_3") -MAKE_TEST_CASE("data_transfer_pandas_1") MAKE_TEST_CASE("data_transfer_pandas_2") MAKE_TEST_CASE("data_transfer_pandas_3_series") -MAKE_TEST_CASE("data_transfer_pandas_4_sparse_dataframe") MAKE_TEST_CASE("data_transfer_pandas_5_categorical_dataframe") MAKE_TEST_CASE_ENVVAR("data_transfer_pytorch_1", "DAPHNE_DEP_AVAIL_PYTORCH") MAKE_TEST_CASE_ENVVAR("data_transfer_tensorflow_1", "DAPHNE_DEP_AVAIL_TENSFORFLOW") diff --git a/test/api/python/data_transfer_pandas.py b/test/api/python/data_transfer_pandas.py index ab2f13be6..7a63e9296 100644 --- a/test/api/python/data_transfer_pandas.py +++ b/test/api/python/data_transfer_pandas.py @@ -1,57 +1,79 @@ -import pandas as pd import numpy as np +import pandas as pd from daphne.context.daphne_context import DaphneContext dctx = DaphneContext() -# Test cases for Pandas Series +# ==== TEST CASES FOR pd.Series ==== series_test_cases = [ - # Simple Series - (pd.Series([1.0, 2.0, 3.0]), "float64_series"), - (pd.Series([1, 2, 3], dtype=np.int32), "int32_series"), - - # Series with different shapes - (pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]), "float64_series_longer"), + # 1D Series + (pd.Series([1.0, 2.0, 3.0]), "float64_1d"), + (pd.Series([4.0, 5.0, 6.0, 7.0]), "float64_1d_longer"), # Edge cases - (pd.Series([], dtype=np.float64), "float64_empty_series"), + (pd.Series([np.nan, np.nan, np.nan]), "float64_nan"), + (pd.Series([np.inf, -np.inf, np.inf]), "float64_inf"), + (pd.Series([-1.0, -2.0, -3.0]), "float64_negative"), + + # Small and Large Numbers + (pd.Series([1e-10, 2e-10, 3e-10]), "float64_small"), + (pd.Series([1e10, 2e10, 3e10]), "float64_large"), + + # Large Series + (pd.Series(np.random.rand(1000000)), "float64_1d_large"), + + # Sparse Series + (pd.Series(pd.arrays.SparseArray(np.zeros(1000))), "sparse_pd_series"), + (pd.Series(pd.arrays.SparseArray(np.random.choice([0, 1.0], size=1000, p=[0.99, 0.01]))), "sparse_pd_series_1_percent"), + + # Categorical Data + (pd.Series(pd.Categorical([0, 1, 2, 0, 1, 2])), "categorical_1d"), + (pd.Series(pd.Categorical([0, 1, 2, 3, 4, 5])), "categorical_1d_more_categories"), - # Series with categorical data - (pd.Series(pd.Categorical(["a", "b", "c"])), "categorical_series"), + # Integer Data + (pd.Series([1, 2, 3], dtype=np.int64), "int64_1d"), + (pd.Series([1, 2, 3], dtype=np.uint8), "uint8_1d"), ] -# Test cases for Pandas DataFrames -dataframe_test_cases = [ - # Simple DataFrames - (pd.DataFrame({"A": [1.0, 2.0, 3.0], "B": [4.0, 5.0, 6.0]}), "float64_dataframe"), - (pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype=np.int32), "int32_dataframe"), +# ==== TEST CASES FOR pd.DataFrame ==== +df_test_cases = [ + # Basic numerical DataFrames + (pd.DataFrame(np.array([[1.0, 2.0], [3.0, 4.0]]), columns=["A", "B"]), "float64_2d"), + (pd.DataFrame(np.array([[5, 6, 7], [8, 9, 10]]), columns=["X", "Y", "Z"]), "int64_2d"), - # DataFrames with different shapes - (pd.DataFrame({"A": [1.0, 2.0], "B": [3.0, 4.0], "C": [5.0, 6.0]}), "float64_dataframe_wider"), - (pd.DataFrame({"A": [1.0, 2.0, 3.0, 4.0], "B": [5.0, 6.0, 7.0, 8.0]}), "float64_dataframe_taller"), + # Mixed data types + (pd.DataFrame({"A": [1.0, 2.0, 3.0], "B": ["x", "y", "z"]}), "mixed_numeric_string"), + (pd.DataFrame({"A": [1, 2, 3], "B": [1.1, 2.2, 3.3], "C": ["a", "b", "c"]}), "mixed_int_float_string"), - # Edge cases - (pd.DataFrame({"A": [], "B": []}, dtype=np.float64), "float64_empty_dataframe"), + # Sparse DataFrames + (pd.DataFrame(pd.arrays.SparseArray(np.zeros((1000,))), columns=["SparseCol"]), "sparse_pd_dataframe"), + (pd.DataFrame(pd.arrays.SparseArray(np.random.choice([0, 1.0], size=1000, p=[0.99, 0.01]))), "sparse_pd_dataframe_1_percent"), + + # Time Series DataFrames + (pd.DataFrame({"Date": pd.date_range("2022-01-01", periods=10), "Value": np.random.rand(10)}), "time_series_df"), + (pd.DataFrame({"Date": pd.date_range("2022-01-01", periods=10, freq="D"), "Category": list("AAABBBCCDD"), "Value": np.random.rand(10)}), "time_series_categorical_df"), + + # Large DataFrames + (pd.DataFrame(np.random.rand(1000, 1000)), "float64_2d_large"), + + # Multi-Index DataFrame + (pd.DataFrame( + np.random.rand(6, 3), + index=pd.MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 1), ("B", 2), ("C", 1), ("C", 2)], names=["Group", "Subgroup"]), + columns=["X", "Y", "Z"] + ), "multi_index_df"), ] -# Testing Pandas Series +# Run tests for Series for series, name in series_test_cases: try: - series_daphne = dctx.from_pandas(series, shared_memory=True) - - series_daphne.print().compute(type="shared memory") - + dctx.from_pandas(series, shared_memory=False).print().compute() except Exception as e: - print(f"Error for {name}: {e}") + print(f"Error for {name} (Series): {e}") -# Testing Pandas DataFrames -for df, name in dataframe_test_cases: - try: - # Transfer Pandas DataFrame to DAPHNE - df_daphne = dctx.from_pandas(df, shared_memory=True) - - # Print the Daphne frame - df_daphne.print().compute(type="shared memory") - +# Run tests for DataFrames +for df, name in df_test_cases: + try: + dctx.from_pandas(df, shared_memory=False).print().compute() except Exception as e: - print(f"Error for {name}: {e}") \ No newline at end of file + print(f"Error for {name} (DataFrame): {e}") diff --git a/test/api/python/data_transfer_pandas_1.daphne b/test/api/python/data_transfer_pandas_df_mixed_data_types.daphne similarity index 100% rename from test/api/python/data_transfer_pandas_1.daphne rename to test/api/python/data_transfer_pandas_df_mixed_data_types.daphne diff --git a/test/api/python/data_transfer_pandas_1.py b/test/api/python/data_transfer_pandas_df_mixed_data_types.py similarity index 100% rename from test/api/python/data_transfer_pandas_1.py rename to test/api/python/data_transfer_pandas_df_mixed_data_types.py diff --git a/test/api/python/data_transfer_pandas_4_sparse_dataframe.daphne b/test/api/python/data_transfer_pandas_df_sparse.daphne similarity index 100% rename from test/api/python/data_transfer_pandas_4_sparse_dataframe.daphne rename to test/api/python/data_transfer_pandas_df_sparse.daphne diff --git a/test/api/python/data_transfer_pandas_df_sparse.py b/test/api/python/data_transfer_pandas_df_sparse.py new file mode 100644 index 000000000..5c3af310e --- /dev/null +++ b/test/api/python/data_transfer_pandas_df_sparse.py @@ -0,0 +1,31 @@ +#!/usr/bin/python + +# Copyright 2023 The DAPHNE Consortium +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Data transfer from pandas to DAPHNE and back, via files. +# pd.DataFrame with sparse data + +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +sdf = pd.DataFrame({ + "A": pd.arrays.SparseArray([1, 0, 0]), + "B": pd.arrays.SparseArray([0, 2, 0]), + "C": pd.arrays.SparseArray([0, 0, 3]) +}) + +dctx = DaphneContext() + +dctx.from_pandas(sdf, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_df_sparse_shared_memory.daphne b/test/api/python/data_transfer_pandas_df_sparse_shared_memory.daphne new file mode 100644 index 000000000..3a53366c5 --- /dev/null +++ b/test/api/python/data_transfer_pandas_df_sparse_shared_memory.daphne @@ -0,0 +1,19 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +sdf = createFrame([1, 0, 0], [0, 2, 0], [0, 0, 3], "A", "B", "C"); + +print(sdf); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_4_sparse_dataframe.py b/test/api/python/data_transfer_pandas_df_sparse_shared_memory.py similarity index 100% rename from test/api/python/data_transfer_pandas_4_sparse_dataframe.py rename to test/api/python/data_transfer_pandas_df_sparse_shared_memory.py diff --git a/test/api/python/data_transfer_pandas_series_float.daphne b/test/api/python/data_transfer_pandas_series_float.daphne new file mode 100644 index 000000000..f8d0925c0 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_float.daphne @@ -0,0 +1,5 @@ +ser1 = createFrame([10.0, 12.0, 14.0], "0"); +ser2 = createFrame([16.0, 18.0, 20.0, 22.0], "0"); + +print(ser1); +print(ser2); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_float.py b/test/api/python/data_transfer_pandas_series_float.py new file mode 100644 index 000000000..086f516fc --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_float.py @@ -0,0 +1,12 @@ +# Data transfer from pandas to DAPHNE and back, via files. + +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([10.0, 12.0, 14.0]) +ser2 = pd.Series([16.0, 18.0, 20.0, 22.0]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") +dctx.from_pandas(ser2, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_float_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_float_shared_memory.daphne new file mode 100644 index 000000000..f8d0925c0 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_float_shared_memory.daphne @@ -0,0 +1,5 @@ +ser1 = createFrame([10.0, 12.0, 14.0], "0"); +ser2 = createFrame([16.0, 18.0, 20.0, 22.0], "0"); + +print(ser1); +print(ser2); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_float_shared_memory.py b/test/api/python/data_transfer_pandas_series_float_shared_memory.py new file mode 100644 index 000000000..0c2bd01dd --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_float_shared_memory.py @@ -0,0 +1,12 @@ +# Data transfer from pandas to DAPHNE and back, via shared memory. + +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([10.0, 12.0, 14.0]) +ser2 = pd.Series([16.0, 18.0, 20.0, 22.0]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared memory") +dctx.from_pandas(ser2, shared_memory=True).print().compute(type="shared memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_inf_values.daphne b/test/api/python/data_transfer_pandas_series_inf_values.daphne new file mode 100644 index 000000000..a7b2cdb68 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_inf_values.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([inf, -inf, inf], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_inf_values.py b/test/api/python/data_transfer_pandas_series_inf_values.py new file mode 100644 index 000000000..00abbc401 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_inf_values.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, via files. +import numpy as np +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([np.inf, -np.inf, np.inf]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_inf_values_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_inf_values_shared_memory.daphne new file mode 100644 index 000000000..a7b2cdb68 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_inf_values_shared_memory.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([inf, -inf, inf], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_inf_values_shared_memory.py b/test/api/python/data_transfer_pandas_series_inf_values_shared_memory.py new file mode 100644 index 000000000..102e46460 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_inf_values_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, via shared memory. +import numpy as np +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([np.inf, -np.inf, np.inf]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_int.daphne b/test/api/python/data_transfer_pandas_series_int.daphne new file mode 100644 index 000000000..f7f4ce7ea --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_int.daphne @@ -0,0 +1,5 @@ +ser1 = createFrame([10, 12, 14], "0"); +ser2 = createFrame([16, 18, 20, 22], "0"); + +print(ser1); +print(ser2); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_int.py b/test/api/python/data_transfer_pandas_series_int.py new file mode 100644 index 000000000..164db266f --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_int.py @@ -0,0 +1,12 @@ +# Data transfer from pandas to DAPHNE and back, via files. + +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([10, 12, 14]) +ser2 = pd.Series([16, 18, 20, 22]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") +dctx.from_pandas(ser2, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_int_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_int_shared_memory.daphne new file mode 100644 index 000000000..f7f4ce7ea --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_int_shared_memory.daphne @@ -0,0 +1,5 @@ +ser1 = createFrame([10, 12, 14], "0"); +ser2 = createFrame([16, 18, 20, 22], "0"); + +print(ser1); +print(ser2); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_int_shared_memory.py b/test/api/python/data_transfer_pandas_series_int_shared_memory.py new file mode 100644 index 000000000..b8a6bdca4 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_int_shared_memory.py @@ -0,0 +1,12 @@ +# Data transfer from pandas to DAPHNE and back, via shared memory. + +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([10, 12, 14]) +ser2 = pd.Series([16, 18, 20, 22]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared memory") +dctx.from_pandas(ser2, shared_memory=True).print().compute(type="shared memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_large.daphne b/test/api/python/data_transfer_pandas_series_large.daphne new file mode 100644 index 000000000..485ad4ab6 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_large.daphne @@ -0,0 +1,4 @@ +m = fill(1.0, 1000000, 1); +ser1 = createFrame(m, "0"); + +print(ser1); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_large.py b/test/api/python/data_transfer_pandas_series_large.py new file mode 100644 index 000000000..695e6efb9 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_large.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, via files. +import pandas as pd +import numpy as np +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series(np.ones(1000000)) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_large_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_large_shared_memory.daphne new file mode 100644 index 000000000..be1fb76f4 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_large_shared_memory.daphne @@ -0,0 +1,4 @@ +m = fill(1.0, 100000, 1); +ser1 = createFrame(m, "0"); + +print(ser1); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_large_shared_memory.py b/test/api/python/data_transfer_pandas_series_large_shared_memory.py new file mode 100644 index 000000000..2c3efccd9 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_large_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, via shared memory. +import pandas as pd +import numpy as np +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series(np.ones(100000)) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_large_values.daphne b/test/api/python/data_transfer_pandas_series_large_values.daphne new file mode 100644 index 000000000..773ab76d0 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_large_values.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([1e10, 2e10, 3e10], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_large_values.py b/test/api/python/data_transfer_pandas_series_large_values.py new file mode 100644 index 000000000..831060f35 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_large_values.py @@ -0,0 +1,9 @@ +# Data transfer from pandas to DAPHNE and back, via files. +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([1e10, 2e10, 3e10]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_large_values_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_large_values_shared_memory.daphne new file mode 100644 index 000000000..773ab76d0 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_large_values_shared_memory.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([1e10, 2e10, 3e10], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_large_values_shared_memory.py b/test/api/python/data_transfer_pandas_series_large_values_shared_memory.py new file mode 100644 index 000000000..b36960821 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_large_values_shared_memory.py @@ -0,0 +1,9 @@ +# Data transfer from pandas to DAPHNE and back, via shared memory. +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([1e10, 2e10, 3e10]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_nagative_values_shared_memory.py b/test/api/python/data_transfer_pandas_series_nagative_values_shared_memory.py new file mode 100644 index 000000000..42e6fe48e --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_nagative_values_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, shared memory. +import numpy as np +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([-1.0, -2.0, -3.0]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_nan_values.daphne b/test/api/python/data_transfer_pandas_series_nan_values.daphne new file mode 100644 index 000000000..da5d05583 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_nan_values.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([nan, nan, nan], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_nan_values.py b/test/api/python/data_transfer_pandas_series_nan_values.py new file mode 100644 index 000000000..c58e42381 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_nan_values.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, via files. +import numpy as np +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([np.nan, np.nan, np.nan]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_nan_values_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_nan_values_shared_memory.daphne new file mode 100644 index 000000000..da5d05583 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_nan_values_shared_memory.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([nan, nan, nan], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_nan_values_shared_memory.py b/test/api/python/data_transfer_pandas_series_nan_values_shared_memory.py new file mode 100644 index 000000000..85575c712 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_nan_values_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, shared memory. +import numpy as np +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([np.nan, np.nan, np.nan]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_negative_values.daphne b/test/api/python/data_transfer_pandas_series_negative_values.daphne new file mode 100644 index 000000000..e1124c86f --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_negative_values.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([-1.0, -2.0, -3.0], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_negative_values.py b/test/api/python/data_transfer_pandas_series_negative_values.py new file mode 100644 index 000000000..960b72534 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_negative_values.py @@ -0,0 +1,9 @@ +# Data transfer from pandas to DAPHNE and back, via files. +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([-1.0, -2.0, -3.0]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_negative_values_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_negative_values_shared_memory.daphne new file mode 100644 index 000000000..e1124c86f --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_negative_values_shared_memory.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([-1.0, -2.0, -3.0], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_negative_values_shared_memory.py b/test/api/python/data_transfer_pandas_series_negative_values_shared_memory.py new file mode 100644 index 000000000..28bd8f265 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_negative_values_shared_memory.py @@ -0,0 +1,9 @@ +# Data transfer from pandas to DAPHNE and back, via shared memory. +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([-1.0, -2.0, -3.0]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared_memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_small_values.daphne b/test/api/python/data_transfer_pandas_series_small_values.daphne new file mode 100644 index 000000000..fb3bd5a8e --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_small_values.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([1e-10, 2e-10, 3e-10], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_small_values.py b/test/api/python/data_transfer_pandas_series_small_values.py new file mode 100644 index 000000000..0688048fa --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_small_values.py @@ -0,0 +1,9 @@ +# Data transfer from pandas to DAPHNE and back, via files. +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([1e-10, 2e-10, 3e-10]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_small_values_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_small_values_shared_memory.daphne new file mode 100644 index 000000000..fb3bd5a8e --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_small_values_shared_memory.daphne @@ -0,0 +1,3 @@ +ser1 = createFrame([1e-10, 2e-10, 3e-10], "0"); + +print(ser1); diff --git a/test/api/python/data_transfer_pandas_series_small_values_shared_memory.py b/test/api/python/data_transfer_pandas_series_small_values_shared_memory.py new file mode 100644 index 000000000..7744cd28f --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_small_values_shared_memory.py @@ -0,0 +1,9 @@ +# Data transfer from pandas to DAPHNE and back, via shared memory. +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series([1e-10, 2e-10, 3e-10]) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared_memory") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_sparse.daphne b/test/api/python/data_transfer_pandas_series_sparse.daphne new file mode 100644 index 000000000..97ff4c063 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_sparse.daphne @@ -0,0 +1,4 @@ +m = fill(0.0, 1000, 1); +ser1 = createFrame(m, "0"); + +print(ser1); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_sparse.py b/test/api/python/data_transfer_pandas_series_sparse.py new file mode 100644 index 000000000..b10ea1d27 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_sparse.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, via files. +import pandas as pd +import numpy as np +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series(np.zeros(1000)) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_sparse_shared_memory.daphne b/test/api/python/data_transfer_pandas_series_sparse_shared_memory.daphne new file mode 100644 index 000000000..97ff4c063 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_sparse_shared_memory.daphne @@ -0,0 +1,4 @@ +m = fill(0.0, 1000, 1); +ser1 = createFrame(m, "0"); + +print(ser1); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_sparse_shared_memory.py b/test/api/python/data_transfer_pandas_series_sparse_shared_memory.py new file mode 100644 index 000000000..dc85c817f --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_sparse_shared_memory.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, via shared memory. +import pandas as pd +import numpy as np +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series(np.zeros(1000)) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=True).print().compute(type="shared memory") \ No newline at end of file From ecf7eecbd2355259cd0d52aeb83c298a3a5636ef Mon Sep 17 00:00:00 2001 From: mariakrzywnicka Date: Sun, 2 Mar 2025 23:49:48 +0100 Subject: [PATCH 3/5] string data transfer via files for np.arrays and pd.series --- .../python/daphne/context/daphne_context.py | 124 +++++++++++++++++- .../python/daphne/operator/nodes/matrix.py | 87 +++++++----- .../python/daphne/operator/operation_node.py | 41 ++++-- .../python/daphne/script_building/script.py | 19 +-- src/api/python/daphne/utils/consts.py | 2 +- src/runtime/local/kernels/ReceiveFromNumpy.h | 8 ++ src/runtime/local/kernels/kernels.json | 3 +- test/api/python/DaphneLibTest.cpp | 3 + test/api/python/data_transfer_numpy.py | 59 ++++++--- ...data_transfer_numpy_array_1d_string.daphne | 3 + .../data_transfer_numpy_array_1d_string.py | 10 ++ ...data_transfer_numpy_array_2d_string.daphne | 3 + .../data_transfer_numpy_array_2d_string.py | 10 ++ .../data_transfer_pandas_df_string.daphne | 3 + .../python/data_transfer_pandas_df_string.py | 11 ++ .../data_transfer_pandas_series_string.daphne | 3 + .../data_transfer_pandas_series_string.py | 10 ++ 17 files changed, 330 insertions(+), 69 deletions(-) create mode 100644 test/api/python/data_transfer_numpy_array_1d_string.daphne create mode 100644 test/api/python/data_transfer_numpy_array_1d_string.py create mode 100644 test/api/python/data_transfer_numpy_array_2d_string.daphne create mode 100644 test/api/python/data_transfer_numpy_array_2d_string.py create mode 100644 test/api/python/data_transfer_pandas_df_string.daphne create mode 100644 test/api/python/data_transfer_pandas_df_string.py create mode 100644 test/api/python/data_transfer_pandas_series_string.daphne create mode 100644 test/api/python/data_transfer_pandas_series_string.py diff --git a/src/api/python/daphne/context/daphne_context.py b/src/api/python/daphne/context/daphne_context.py index 4ec5924d9..df0b0c083 100644 --- a/src/api/python/daphne/context/daphne_context.py +++ b/src/api/python/daphne/context/daphne_context.py @@ -36,6 +36,9 @@ import numpy as np import pandas as pd +import os +import json + try: import torch as torch except ImportError as e: @@ -70,7 +73,7 @@ def readFrame(self, file: str) -> Frame: unnamed_params = ['\"'+file+'\"'] return Frame(self, 'readFrame', unnamed_params) - def from_numpy(self, mat: np.array, shared_memory=True, verbose=False, return_shape=False): + def from_numpy_numerical(self, mat: np.array, shared_memory=True, verbose=False, return_shape=False): """Generates a `DAGNode` representing a matrix with data given by a numpy `array`. :param mat: The numpy array. :param shared_memory: Whether to use shared memory data transfer (True) or not (False). @@ -140,8 +143,93 @@ def from_numpy(self, mat: np.array, shared_memory=True, verbose=False, return_sh print(f"from_numpy(): total Python-side execution time: {(time.time() - start_time):.10f} seconds") return (res, original_shape) if return_shape else res + + def from_numpy(self, mat, shared_memory=True, verbose=False, return_shape=False): + """Generates a `DAGNode` representing a matrix with data given by a numpy `array`. + :param mat: The numpy array. + :param shared_memory: Whether to use shared memory data transfer (True) or not (False). + :param verbose: Whether to print timing information (True) or not (False). + :param return_shape: Whether to return the original shape of the input array. + :return: The data from numpy as a Matrix. + """ - def from_pandas(self, df: pd.DataFrame, shared_memory=True, verbose=False, keepIndex=False) -> Frame: + if isinstance(mat, (pd.Series, pd.DataFrame)): + mat = mat.to_numpy() + + original_shape = mat.shape + if mat.ndim == 1: + mat = mat.reshape(-1, 1) + elif mat.ndim >= 2: + if mat.ndim > 2: + mat = mat.reshape((original_shape[0], -1)) + rows, cols = mat.shape + + if mat.dtype.kind in {'U', 'S', 'O'}: + + original_shape = mat.shape + + if verbose: + start_time = time.time() + + if mat.ndim == 1: + rows = mat.shape[0] + cols = 1 + elif mat.ndim >= 2: + if mat.ndim > 2: + mat = mat.reshape((original_shape[0], -1)) + rows, cols = mat.shape + + file_name = os.path.join(TMP_PATH, "numpy_data") + csv_file_path = file_name + ".csv" + meta_file_path = csv_file_path + ".meta" + + if shared_memory: + shared_memory = False + + string_data = mat.astype(str).tolist() + + try: + np.savetxt(csv_file_path, mat, delimiter=",", fmt='%s') + except IOError as e: + print(f"Error writing to file {csv_file_path}: {e}") + return None + + try: + with open(meta_file_path, "w") as f: + meta_content = { + "numRows": mat.shape[0], + "numCols": mat.shape[1], + "valueType": "str", + } + json.dump(meta_content, f, indent=2) + except IOError as e: + print(f"Error writing to file {meta_file_path}: {e}") + return None + + if not os.access(meta_file_path, os.R_OK): + print(f"Metadata file is not readable: {meta_file_path}") + return None + + data_path_param = f"\"{csv_file_path}\"" + unnamed_params = [data_path_param] + named_params = [] + + try: + res = Matrix(self, 'readMatrix', unnamed_params, named_params, local_data=mat) + except Exception as e: + print(f"Error creating Matrix object: {e}") + return None + else: + return self.from_numpy_numerical(mat, shared_memory, verbose, return_shape) + + try: + return (res, original_shape) if return_shape else res + except Exception as e: + print(f"Error in return statement: {e}") + return None + + + def from_pandas_numerical(self, df: pd.DataFrame, shared_memory=True, verbose=False, keepIndex=False) -> Frame: """Generates a `DAGNode` representing a frame with data given by a pandas `DataFrame`. :param df: The pandas DataFrame. :param shared_memory: Whether to use shared memory data transfer (True) or not (False). @@ -149,7 +237,7 @@ def from_pandas(self, df: pd.DataFrame, shared_memory=True, verbose=False, keepI :param keepIndex: Whether the frame should keep its index from pandas within DAPHNE :return: A Frame """ - + if verbose: start_time = time.time() @@ -253,9 +341,35 @@ def from_pandas(self, df: pd.DataFrame, shared_memory=True, verbose=False, keepI # Return the Frame. return Frame(self, 'readFrame', unnamed_params, named_params, local_data=df, column_names=df.columns) + def from_pandas(self, df: pd.DataFrame, shared_memory=True, verbose=False, keepIndex=False) -> Frame: + """Generates a `DAGNode` representing a frame with data given by a pandas `DataFrame`. + :param df: The pandas DataFrame. + :param shared_memory: Whether to use shared memory data transfer (True) or not (False). + :param verbose: Whether the execution time and further information should be output to the console. + :param keepIndex: Whether the frame should keep its index from pandas within DAPHNE + :return: A Frame + """ + if verbose: + start_time = time.time() + + # Handle pandas Series separately + if isinstance(df, pd.Series): + if df.dtype.kind in {'O', 'U', 'S'}: + return self.from_numpy(df, shared_memory=shared_memory, verbose=verbose, return_shape=False) + + # Check if any column in DataFrame contains string data + if isinstance(df, pd.DataFrame): + for col in df.columns: + if df[col].dtype.kind in {'O', 'U', 'S'}: + return self.from_numpy(df, shared_memory=shared_memory, verbose=verbose, return_shape=False) + + # Existing logic for handling non-string data + return self.from_pandas_numerical(df, shared_memory=shared_memory, verbose=verbose, keepIndex=keepIndex) + + # This feature is only available if TensorFlow is available. if isinstance(tf, ImportError): - def from_tensorflow(self, tensor , shared_memory=True, verbose=False, return_shape=False): + def from_tensorflow(self, tensor, shared_memory=True, verbose=False, return_shape=False): raise tf else: def from_tensorflow(self, tensor: tf.Tensor, shared_memory=True, verbose=False, return_shape=False): @@ -349,7 +463,7 @@ def from_pytorch(self, tensor: torch.Tensor, shared_memory=True, verbose=False, # Return the matrix, and the original shape if return_shape is set to True. return (matrix, original_shape) if return_shape else matrix - + def fill(self, arg, rows:int, cols:int) -> Matrix: named_input_nodes = {'arg':arg, 'rows':rows, 'cols':cols} return Matrix(self, 'fill', [], named_input_nodes=named_input_nodes) diff --git a/src/api/python/daphne/operator/nodes/matrix.py b/src/api/python/daphne/operator/nodes/matrix.py index 1f9b79f00..c9dd5dfda 100644 --- a/src/api/python/daphne/operator/nodes/matrix.py +++ b/src/api/python/daphne/operator/nodes/matrix.py @@ -48,56 +48,83 @@ def __init__(self, daphne_context: 'DaphneContext', operation:str, unnamed_input local_data: np.array = None, brackets:bool = False, left_brackets: bool = False, copy: bool = False, consumer_list: List['OperationNode'] = None)->'Matrix': self.__copy = copy - is_python_local_data = False if local_data is not None: - self._np_array = local_data is_python_local_data = True else: self._np_array = None + is_python_local_data = False + super().__init__(daphne_context, operation, unnamed_input_nodes, named_input_nodes, OutputType.MATRIX,is_python_local_data, brackets, left_brackets, consumer_list) - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], - named_input_vars: Dict[str, str]) -> str: + # Debug statements to verify input parameters and file paths + # print(f"Unnamed input nodes: {unnamed_input_nodes}") + # print(f"Named input nodes: {named_input_nodes}") + # print(f"Local data: {local_data}") + + def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], named_input_vars: Dict[str, str]) -> str: if self.__copy: return f'{var_name}={unnamed_input_vars[0]};' - code_line = super().code_line(var_name, unnamed_input_vars, named_input_vars).format(file_name=var_name, TMP_PATH = TMP_PATH) - + code_line = super().code_line(var_name, unnamed_input_vars, named_input_vars).format(file_name=var_name, TMP_PATH=TMP_PATH) if self._is_numpy() and self.operation == "readMatrix": - with open(TMP_PATH+"/"+var_name+".csv", "wb") as f: - np.savetxt(f, self._np_array, delimiter=",") - with open(TMP_PATH+"/"+var_name+".csv.meta", "w") as f: - json.dump( - { - "numRows": np.shape(self._np_array)[0], - "numCols": np.shape(self._np_array)[1], - "valueType": self.getDType(self._np_array.dtype), - }, - f, indent=2 - ) + if self._np_array.dtype.kind in {'U', 'S', 'O'}: + json_file_path = f"{TMP_PATH}/{var_name}.json" + json_meta_file_path = f"{TMP_PATH}/{var_name}.json.meta" + with open(json_file_path, "w", encoding='utf-8') as f: + json.dump(self._np_array.tolist(), f, ensure_ascii=False) + with open(json_meta_file_path, "w") as f: + json.dump( + { + "numRows": np.shape(self._np_array)[0], + "numCols": np.shape(self._np_array)[1], + "valueType": self.getDType(self._np_array.dtype), + }, + f, indent=2 + ) + else: + csv_file_path = f"{TMP_PATH}/{var_name}.csv" + csv_meta_file_path = f"{TMP_PATH}/{var_name}.csv.meta" + with open(csv_file_path, "wb") as f: + np.savetxt(f, self._np_array, delimiter=",") + with open(csv_meta_file_path, "w") as f: + json.dump( + { + "numRows": np.shape(self._np_array)[0], + "numCols": np.shape(self._np_array)[1], + "valueType": self.getDType(self._np_array.dtype), + }, + f, indent=2 + ) return code_line + def _is_numpy(self): + return isinstance(self._np_array, np.ndarray) + def getDType(self, d_type): - if d_type == np.dtype('f4'): + if d_type == np.dtype('float32'): return "f32" - elif d_type == np.dtype('f8'): + elif d_type == np.dtype('float64'): return "f64" - elif d_type == np.dtype('si2'): - return "si8" - elif d_type == np.dtype('si4'): + elif d_type == np.dtype('int16'): + return "si16" + elif d_type == np.dtype('int32'): return "si32" - elif d_type == np.dtype('si8'): + elif d_type == np.dtype('int64'): return "si64" - elif d_type == np.dtype('ui2'): - return "ui8" - elif d_type == np.dtype('ui4'): - return "ui8" - elif d_type == np.dtype('ui8'): + elif d_type == np.dtype('uint8'): return "ui8" + elif d_type == np.dtype('uint16'): + return "ui16" + elif d_type == np.dtype('uint32'): + return "ui32" + elif d_type == np.dtype('uint64'): + return "ui64" + elif d_type == np.dtype("S") or d_type == np.dtype('U'): + return "str" else: - print("Error") - + return "object" + def _is_numpy(self) -> bool: return self._np_array is not None diff --git a/src/api/python/daphne/operator/operation_node.py b/src/api/python/daphne/operator/operation_node.py index 5b5e0a854..bca07da8c 100644 --- a/src/api/python/daphne/operator/operation_node.py +++ b/src/api/python/daphne/operator/operation_node.py @@ -91,7 +91,7 @@ def update_node_in_input_list(self, new_node, current_node): current_index = self._unnamed_input_nodes.index(current_node) self._unnamed_input_nodes[current_index] = new_node - def compute(self, type="shared memory", verbose=False, asTensorFlow=False, asPyTorch=False, shape=None, useIndexColumn=False): + def compute(self, type="shared memory", verbose=False, asTensorFlow=False, asPyTorch=False, shape=None, useIndexColumn=False) -> Union[np.array, pd.DataFrame, 'tf.Tensor', 'torch.Tensor', float]: """ Compute function for processing the Daphne Object or operation node and returning the results. The function builds a DaphneDSL script from the node and its context, executes it, and processes the results @@ -110,8 +110,8 @@ def compute(self, type="shared memory", verbose=False, asTensorFlow=False, asPyT - A scalar value for scalar outputs. - TensorFlow or PyTorch tensors if `asTensorFlow` or `asPyTorch` is set to True respectively. """ - if self._result_var is None: + if verbose: start_time = time.time() @@ -129,13 +129,12 @@ def compute(self, type="shared memory", verbose=False, asTensorFlow=False, asPyT if verbose: print(f"compute(): Python-side execution time of the execute() function: {(time.time() - exec_start_time):.10f} seconds") - if self._output_type == OutputType.FRAME and type=="shared memory": + if self._output_type == OutputType.FRAME and type == "shared memory": if verbose: dt_start_time = time.time() daphneLibResult = DaphneLib.getResult() - # Read the frame's address into a numpy array. if daphneLibResult.columns is not None: # Read the column labels and dtypes from the Frame's labels and dtypes directly. labels = [ctypes.cast(daphneLibResult.labels[i], ctypes.c_char_p).value.decode() for i in range(daphneLibResult.cols)] @@ -179,22 +178,31 @@ def compute(self, type="shared memory", verbose=False, asTensorFlow=False, asPyT if verbose: print(f"compute(): time for Python side data transfer (Frame, shared memory): {(time.time() - dt_start_time):.10f} seconds") - elif self._output_type == OutputType.FRAME and type=="files": + elif self._output_type == OutputType.FRAME and type == "files": df = pd.read_csv(result) with open(result + ".meta", "r") as f: fmd = json.load(f) df.columns = [x["label"] for x in fmd["schema"]] result = df self.clear_tmp() - elif self._output_type == OutputType.MATRIX and type=="shared memory": + elif self._output_type == OutputType.MATRIX and type == "shared memory": daphneLibResult = DaphneLib.getResult() result = np.ctypeslib.as_array( ctypes.cast(daphneLibResult.address, ctypes.POINTER(self.getType(daphneLibResult.vtc))), shape=[daphneLibResult.rows, daphneLibResult.cols] ) self.clear_tmp() - elif self._output_type == OutputType.MATRIX and type=="files": - arr = np.genfromtxt(result, delimiter=',') + elif self._output_type == OutputType.MATRIX and type == "files": + # Ensure string data is handled correctly + arr = np.genfromtxt(result, delimiter=',', dtype=None, encoding='utf-8') + meta_file_name = result + ".meta" + if os.path.exists(meta_file_name): + with open(meta_file_name, "r") as meta_file: + meta_data = json.load(meta_file) + if meta_data.get("valueType") == "string": + arr = arr.astype(str) + else: + print(f"Metadata file not found: {meta_file_name}") self.clear_tmp() return arr elif self._output_type == OutputType.SCALAR: @@ -249,6 +257,23 @@ def compute(self, type="shared memory", verbose=False, asTensorFlow=False, asPyT return return result + # Handle the 'print' operation + if self.operation == 'print': + if self._result_var is not None: + if isinstance(self._result_var, np.ndarray): + print("Matrix content:") + print(self._result_var) + elif isinstance(self._result_var, pd.DataFrame): + print("DataFrame content:") + print(self._result_var) + else: + print("Result content:") + print(self._result_var) + else: + print("No result to print.") + return None + + def clear_tmp(self): for f in os.listdir(TMP_PATH): os.remove(os.path.join(TMP_PATH, f)) diff --git a/src/api/python/daphne/script_building/script.py b/src/api/python/daphne/script_building/script.py index f1d5d01b0..d1e608ba2 100644 --- a/src/api/python/daphne/script_building/script.py +++ b/src/api/python/daphne/script_building/script.py @@ -87,24 +87,25 @@ def add_code(self, code:str)->None: def clear(self, dag_root:DAGNode): self._dfs_clear_dag_nodes(dag_root) self._variable_counter = 0 - + def execute(self): temp_out_path = os.path.join(TMP_PATH, "tmpdaphne.daphne") - temp_out_file = open(temp_out_path, "w") - temp_out_file.writelines(self.daphnedsl_script) - temp_out_file.close() + with open(temp_out_path, "w") as temp_out_file: + temp_out_file.writelines(self.daphnedsl_script) + + # Check if the file exists + if not os.path.exists(temp_out_path): + print(f"Error: File {temp_out_path} does not exist.") + return - #os.environ['OPENBLAS_NUM_THREADS'] = '1' res = DaphneLib.daphne(ctypes.c_char_p(str.encode(PROTOTYPE_PATH)), ctypes.c_char_p(str.encode(temp_out_path))) if res != 0: # Error message with DSL code line. error_message = DaphneLib.getResult().error_message.decode("utf-8") - # Remove DSL code line from error message. - # index_code_line = error_message.find("Source file ->") - 29 - # error_message = error_message[:index_code_line] + print(f"Error message from DaphneLib.daphne: {error_message}") raise RuntimeError(f"Error in DaphneDSL script: {error_message}") - #os.environ['OPENBLAS_NUM_THREADS'] = '32' + def _dfs_dag_nodes(self, dag_node: VALID_INPUT_TYPES)->str: """Uses Depth-First-Search to create code from DAG diff --git a/src/api/python/daphne/utils/consts.py b/src/api/python/daphne/utils/consts.py index ac22fddea..4ad3518a4 100644 --- a/src/api/python/daphne/utils/consts.py +++ b/src/api/python/daphne/utils/consts.py @@ -30,7 +30,7 @@ from daphne.operator.nodes.frame import Frame from daphne.operator.nodes.scalar import Scalar -VALID_INPUT_TYPES = Union['DAGNode', str, int, float, bool] +VALID_INPUT_TYPES = Union['DAGNode', str, int, float, bool,object] # These are the operator symbols used in DaphneDSL (not in Python). BINARY_OPERATIONS = ['+', '-', '/', '*', '^', '%', '<', '<=', '>', '>=', '==', '!=', '@', '&&', '||'] VALID_ARITHMETIC_TYPES = Union['DAGNode', int, float] diff --git a/src/runtime/local/kernels/ReceiveFromNumpy.h b/src/runtime/local/kernels/ReceiveFromNumpy.h index 2e90f98f5..4d947d45e 100644 --- a/src/runtime/local/kernels/ReceiveFromNumpy.h +++ b/src/runtime/local/kernels/ReceiveFromNumpy.h @@ -62,4 +62,12 @@ template struct ReceiveFromNumpy> { } }; +template <> struct ReceiveFromNumpy> { + static void apply(DenseMatrix *&res, uint32_t upper, uint32_t lower, int64_t rows, int64_t cols, DCTX(ctx)) { + //res = DataObjectFactory::create>( + // rows, cols, std::shared_ptr((VT *)(((uint64_t)upper << 32) | lower), NoOpDeleter())); + std::cerr << "ReceiveFromNumpy> not implemented yet" << std::endl; + } +}; + #endif // SRC_RUNTIME_LOCAL_KERNELS_RECEIVEFROMNUMPY_H diff --git a/src/runtime/local/kernels/kernels.json b/src/runtime/local/kernels/kernels.json index 19597655e..d40339b41 100644 --- a/src/runtime/local/kernels/kernels.json +++ b/src/runtime/local/kernels/kernels.json @@ -4207,7 +4207,8 @@ [["DenseMatrix", "int8_t"]], [["DenseMatrix", "uint64_t"]], [["DenseMatrix", "uint32_t"]], - [["DenseMatrix", "uint8_t"]] + [["DenseMatrix", "uint8_t"]], + [["DenseMatrix", "std::string"]] ] }, { diff --git a/test/api/python/DaphneLibTest.cpp b/test/api/python/DaphneLibTest.cpp index 471056646..1f9019af1 100644 --- a/test/api/python/DaphneLibTest.cpp +++ b/test/api/python/DaphneLibTest.cpp @@ -86,6 +86,8 @@ MAKE_TEST_CASE("data_transfer_numpy_array_large") MAKE_TEST_CASE("data_transfer_numpy_array_large_shared_memory") MAKE_TEST_CASE("data_transfer_numpy_array_large_sparse") MAKE_TEST_CASE("data_transfer_numpy_array_large_sparse_shared_memory") +MAKE_TEST_CASE("data_transfer_numpy_array_1d_string") +MAKE_TEST_CASE("data_transfer_numpy_array_2d_string") //MAKE_TEST_CASE("data_transfer_pandas_series_float") MAKE_TEST_CASE("data_transfer_pandas_series_float_shared_memory") //MAKE_TEST_CASE("data_transfer_pandas_series_int") @@ -104,6 +106,7 @@ MAKE_TEST_CASE("data_transfer_pandas_series_small_values_shared_memory") MAKE_TEST_CASE("data_transfer_pandas_series_large_shared_memory") //MAKE_TEST_CASE("data_transfer_pandas_series_sparse") MAKE_TEST_CASE("data_transfer_pandas_series_sparse_shared_memory") +MAKE_TEST_CASE("data_transfer_pandas_series_string") MAKE_TEST_CASE("data_transfer_pandas_df_mixed_data_types") MAKE_TEST_CASE("data_transfer_pandas_df_sparse_shared_memory") MAKE_TEST_CASE("data_transfer_pandas_df_sparse") diff --git a/test/api/python/data_transfer_numpy.py b/test/api/python/data_transfer_numpy.py index 512911219..a36a56da9 100644 --- a/test/api/python/data_transfer_numpy.py +++ b/test/api/python/data_transfer_numpy.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd from daphne.context.daphne_context import DaphneContext dctx = DaphneContext() @@ -31,16 +32,6 @@ (np.random.rand(2, 3, 4, 5), "float64_4d"), (np.random.rand(2, 2, 3, 4, 5), "float64_5d"), - # Very large arrays - (np.random.rand(1000000).reshape(-1, 1), "float64_1d_large"), - (np.random.rand(1000, 1000), "float64_2d_large"), - - # Sparse arrays - (np.zeros((1000, 1000)), "sparse_np_array"), - (np.random.choice([0, 1.0], size=(1000, 1000), p=[0.99, 0.01]), "sparse_np_array_1_percent"), # 1% non-zero - (np.random.choice([0, 1.0], size=(1000, 1000), p=[0.95, 0.05]), "sparse_np_array_5_percent"), # 5% non-zero - (np.random.choice([0, 1.0], size=(1000, 1000), p=[0.9, 0.1]), "sparse_np_array_10_percent"), # 10% non-zero - # Categorical data (using float representation) (np.array([0, 1, 2, 0, 1, 2], dtype=np.float64).reshape(-1, 1), "categorical_1d"), (np.array([0, 1, 2, 3, 4, 5], dtype=np.int32).reshape(-1, 1), "categorical_1d_more_categories"), @@ -51,16 +42,54 @@ # Different data types (np.array([1, 2, 3], dtype=np.int64).reshape(-1, 1), "int64_1d"), (np.array([1, 2, 3], dtype=np.uint8).reshape(-1, 1), "uint8_1d"), + (np.array(["apple", "banana", "cherry"], dtype=object).reshape(-1, 1), "string_fruits") +] + +test_cases_string = [ + # 1D arrays + (np.array(["apple", "banana", "cherry"], dtype=object).reshape(-1, 1), "string_fruits"), + (np.array(["apple", "banana", "cherry", "date"], dtype=object).reshape(-1, 1), "string_fruits_longer"), + (np.array(["apple", "banana", "cherry", "date", "elderberry"], dtype=object).reshape(-1, 1), "string_fruits_even_longer"), - # Non-standard shapes - (np.random.rand(1, 1000), "float64_1x1000"), - (np.random.rand(1000, 1), "float64_1000x1"), + # 2D arrays + (np.array([["apple", "banana"], ["cherry", "date"]], dtype=object), "string_fruits_2d"), + (np.array([["apple", "banana", "cherry"], ["date", "elderberry", "fig"]], dtype=object), "string_fruits_2d_wider"), + (np.array([["apple", "banana"], ["cherry", "date"], ["elderberry", "fig"]], dtype=object), "string_fruits_2d_taller"), + (np.array([["apple", "banana", "cherry"], ["date", "elderberry", "fig"], ["grape", "honeydew", "imbe"]], dtype=object), "string_fruits_2d_square"), + + # Edge cases + (np.array(["apple", "banana", np.nan], dtype=object).reshape(-1, 1), "string_nan_mixed"), + (np.array(["apple", "banana", 1.0], dtype=object).reshape(-1, 1), "string_nan_mixed"), + + # Higher-dimensional arrays + (np.array([[["apple", "banana"], ["cherry", "date"]], [["elderberry", "fig"], ["grape", "honeydew"]]], dtype=object), "string_fruits_3d"), +] + +test_cases_string_pandas = [ + # Pandas Series + (pd.Series(["apple", "banana", "cherry"], dtype=str), "string_series"), + (pd.Series(["dog", "elephant", "fox", "giraffe"], dtype=str), "string_series_longer"), + + # Pandas DataFrames + (pd.DataFrame({"col1": ["red", "green", "blue"], "col2": ["circle", "square", "triangle"]}), "string_df"), + (pd.DataFrame({"col1": ["cat", "dog"], "col2": ["fish", "bird"], "col3": ["hamster", "rabbit"]}), "string_df_wider"), + (pd.DataFrame({"col1": ["one", "two", "three"], "col2": ["four", "five", "six"], "col3": ["seven", "eight", "nine"], "col4": ["ten", "eleven", "twelve"]}), "string_df_taller"), ] for X, name in test_cases: try: + result = dctx.from_numpy(X, shared_memory=True).print().compute() + except Exception as e: + print(f"Error for f{name}: {e}") - dctx.from_numpy(X, shared_memory=False).print().compute() +for X, name in test_cases_string: + try: + result = dctx.from_numpy(X, shared_memory=True).print().compute() + except Exception as e: + print(f"Error for f{name}: {e}") +for X, name in test_cases_string_pandas: + try: + result = dctx.from_pandas(X, shared_memory=True).print().compute() except Exception as e: - print(f"Error for {name}: {e}") \ No newline at end of file + print(f"Error for f{name}: {e}") \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_1d_string.daphne b/test/api/python/data_transfer_numpy_array_1d_string.daphne new file mode 100644 index 000000000..0b23d2658 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_1d_string.daphne @@ -0,0 +1,3 @@ +m1 = reshape(["apple","banana","cherry"], 3, 1); + +print(m1); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_1d_string.py b/test/api/python/data_transfer_numpy_array_1d_string.py new file mode 100644 index 000000000..f65ae171d --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_1d_string.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array(["apple", "banana", "cherry"], dtype=str).reshape(-1, 1) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) diff --git a/test/api/python/data_transfer_numpy_array_2d_string.daphne b/test/api/python/data_transfer_numpy_array_2d_string.daphne new file mode 100644 index 000000000..1a312e698 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_2d_string.daphne @@ -0,0 +1,3 @@ +m1 = reshape(["apple","banana","cherry","fig"] , 2, 2); + +print(m1); \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy_array_2d_string.py b/test/api/python/data_transfer_numpy_array_2d_string.py new file mode 100644 index 000000000..d12cc9019 --- /dev/null +++ b/test/api/python/data_transfer_numpy_array_2d_string.py @@ -0,0 +1,10 @@ +# Data transfer from numpy to DAPHNE and back, via files. + +import numpy as np +from daphne.context.daphne_context import DaphneContext + +m1 = np.array([["apple", "banana"], ["cherry", "fig"]], dtype=str) + +dctx = DaphneContext() + +(dctx.from_numpy(m1, shared_memory=False).print().compute()) \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_df_string.daphne b/test/api/python/data_transfer_pandas_df_string.daphne new file mode 100644 index 000000000..a42cb6b95 --- /dev/null +++ b/test/api/python/data_transfer_pandas_df_string.daphne @@ -0,0 +1,3 @@ +m1 = reshape(["red", "green", "blue","circle", "square", "triangle"], 3, 2); + +print(m1); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_df_string.py b/test/api/python/data_transfer_pandas_df_string.py new file mode 100644 index 000000000..812b55dce --- /dev/null +++ b/test/api/python/data_transfer_pandas_df_string.py @@ -0,0 +1,11 @@ +# Data transfer from pandas to DAPHNE and back, via files. +# pd.DataFrame + +import pandas as pd +from daphne.context.daphne_context import DaphneContext + +df = pd.DataFrame({"col1": ["red", "green", "blue"], "col2": ["circle", "square", "triangle"]}) + +dctx = DaphneContext() + +dctx.from_pandas(df, shared_memory=False).print().compute(type="files") \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_string.daphne b/test/api/python/data_transfer_pandas_series_string.daphne new file mode 100644 index 000000000..803b7a27f --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_string.daphne @@ -0,0 +1,3 @@ +ser1 = reshape(["apple", "banana", "cherry"], 3,1); + +print(ser1); \ No newline at end of file diff --git a/test/api/python/data_transfer_pandas_series_string.py b/test/api/python/data_transfer_pandas_series_string.py new file mode 100644 index 000000000..e9aea2637 --- /dev/null +++ b/test/api/python/data_transfer_pandas_series_string.py @@ -0,0 +1,10 @@ +# Data transfer from pandas to DAPHNE and back, via files. +import pandas as pd +import numpy as np +from daphne.context.daphne_context import DaphneContext + +ser1 = pd.Series(["apple", "banana", "cherry"], dtype=str) + +dctx = DaphneContext() + +dctx.from_pandas(ser1, shared_memory=False).print().compute(type="files") \ No newline at end of file From b3fcf4c0f5044665ea828e32b675fdff24f1c087 Mon Sep 17 00:00:00 2001 From: mariakrzywnicka Date: Mon, 17 Mar 2025 00:27:58 +0100 Subject: [PATCH 4/5] string data transfer --- .../python/daphne/context/daphne_context.py | 188 +++++++++++++++++- .../python/daphne/operator/nodes/matrix.py | 2 + src/api/python/daphne/utils/consts.py | 1 + src/parser/daphnedsl/DaphneDSLBuiltins.cpp | 2 + src/runtime/local/kernels/ReceiveFromNumpy.h | 14 +- src/util/DeduceType.h | 3 + test/api/python/data_transfer_numpy.py | 75 ++----- 7 files changed, 217 insertions(+), 68 deletions(-) diff --git a/src/api/python/daphne/context/daphne_context.py b/src/api/python/daphne/context/daphne_context.py index df0b0c083..d995bf9a6 100644 --- a/src/api/python/daphne/context/daphne_context.py +++ b/src/api/python/daphne/context/daphne_context.py @@ -32,12 +32,13 @@ from daphne.operator.nodes.do_while_loop import DoWhileLoop from daphne.operator.nodes.multi_return import MultiReturn from daphne.operator.operation_node import OperationNode -from daphne.utils.consts import VALID_INPUT_TYPES, VALID_COMPUTED_TYPES, TMP_PATH, F64, F32, SI64, SI32, SI8, UI64, UI32, UI8 +from daphne.utils.consts import VALID_INPUT_TYPES, VALID_COMPUTED_TYPES, TMP_PATH, F64, F32, SI64, SI32, SI8, UI64, UI32, UI8, STR import numpy as np import pandas as pd import os import json +import mmap try: import torch as torch @@ -227,6 +228,191 @@ def from_numpy(self, mat, shared_memory=True, verbose=False, return_shape=False) except Exception as e: print(f"Error in return statement: {e}") return None + + def from_numpy2(self, mat, shared_memory=True, verbose=False, return_shape=False): + """Generates a `DAGNode` representing a matrix with data given by a numpy `array` or pandas `Series`/`DataFrame`. + :param mat: The numpy array or pandas Series/DataFrame. + :param shared_memory: Whether to use shared memory data transfer (True) or not (False). + :param verbose: Whether to print timing information (True) or not (False). + :param return_shape: Whether to return the original shape of the input array. + :return: The data from numpy as a Matrix. + """ + print(f"from_numpy2()") + + if isinstance(mat, (pd.Series, pd.DataFrame)): + print("Series or DataFrame detected, converting to numpy array") + mat = mat.to_numpy() + print(f"from_numpy(): mat={mat}") + + original_shape = mat.shape + print(f"Original shape: {original_shape}") + + if mat.ndim == 1: + mat = mat.reshape(-1, 1) + elif mat.ndim >= 2: + if mat.ndim > 2: + mat = mat.reshape((original_shape[0], -1)) + rows, cols = mat.shape + print(f"Reshaped matrix: {mat.shape}") + + print(f"from_numpy(): dtype={mat.dtype}") + + # Assign value type code (vtc) based on dtype + try: + vtc = mat.dtype + print(f"Value type code: {vtc}") + except ValueError as e: + print(f"Unsupported numpy dtype: {mat.dtype}") + return None + + if shared_memory: + if mat.dtype.kind in {'O', 'U', 'S'}: + print("Data transfer via shared memory for string data.") + # Serialize the string data + try: + serialized_data = '\0'.join(map(str, mat.flatten())).encode('utf-8') + print(f"Serialized data: {serialized_data}") + except Exception as e: + print(f"Error serializing data: {e}") + return None + data_size = len(serialized_data) + print(f"Data size: {data_size}") + + # Allocate shared memory + shm = mmap.mmap(-1, data_size, access=mmap.ACCESS_WRITE) + shm.write(serialized_data) + shm.seek(0) + print(f"Shared memory allocated and data written") + + # Get the address of the shared memory + address = shm.find(serialized_data) + upper = (address & 0xFFFFFFFF00000000) >> 32 + lower = (address & 0xFFFFFFFF) + + # Create metadata + meta_content = { + "numRows": mat.shape[0], + "numCols": mat.shape[1], + "valueType": "str", + "shm_size": data_size + } + print(f"Metadata: {meta_content}") + + # Store metadata if needed + self.store_metadata("string_data", meta_content) # added for testing + + # Create a Matrix object with shared memory metadata + vtc = STR # added for testing + unnamed_params = [upper, lower, rows, cols, vtc] + named_params = [] + res = Matrix(self, 'receiveFromNumpy', unnamed_params, named_params, local_data=mat) + print(f"Matrix object created with shared memory metadata") + else: + # Handle numerical data + address = mat.ctypes.data_as(np.ctypeslib.ndpointer(dtype=mat.dtype, ndim=1, flags='C_CONTIGUOUS')).value + upper = (address & 0xFFFFFFFF00000000) >> 32 + lower = (address & 0xFFFFFFFF) + + # Change the data type, if int16 or uint16 is handed over. + if mat.dtype == np.int16: + mat = mat.astype(np.int32, copy=False) + elif mat.dtype == np.uint16: + mat = mat.astype(np.uint32, copy=False) + + d_type = mat.dtype + if d_type == np.double or d_type == np.float64: + vtc = "F64" + elif d_type == np.float32: + vtc = "F32" + elif d_type == np.int8: + vtc = "SI8" + elif d_type == np.int32: + vtc = "SI32" + elif d_type == np.int64: + vtc = "SI64" + elif d_type == np.uint8: + vtc = "UI8" + elif d_type == np.uint32: + vtc = "UI32" + elif d_type == np.uint64: + vtc = "UI64" + else: + print("unsupported numpy dtype") + return None + + res = Matrix(self, 'receiveFromNumpy', [upper, lower, rows, cols, vtc], local_data=mat) + else: + # Fallback to file-based transfer if shared memory is not used + file_name = os.path.join(TMP_PATH, "numpy_data") + csv_file_path = file_name + ".csv" + meta_file_path = file_name + ".meta" + + print(f"CSV file path: {csv_file_path}") + print(f"Metadata file path: {meta_file_path}") + + string_data = mat.astype(str).tolist() + print(f"String data: {string_data}") + + # Write the string data to a temporary CSV file + try: + np.savetxt(csv_file_path, mat, delimiter=",", fmt='%s') + print(f"CSV file created at: {csv_file_path}") + except IOError as e: + print(f"Error writing to file {csv_file_path}: {e}") + return None + + # Verify if the file was created + if not os.path.exists(csv_file_path): + print(f"Error: CSV file {csv_file_path} does not exist.") + return None + + # Write metadata to a temporary JSON file + try: + with open(meta_file_path, "w") as f: + meta_content = { + "numRows": mat.shape[0], + "numCols": mat.shape[1], + "valueType": vtc + } + json.dump(meta_content, f, indent=2) + print(f"Metadata file created at: {meta_file_path}") + print(f"Metadata file content: {json.dumps(meta_content, indent=2)}") + except IOError as e: + print(f"Error writing to file {meta_file_path}: {e}") + return None + + # Verify if the metadata file was created + if not os.path.exists(meta_file_path): + print(f"Error: Metadata file {meta_file_path} does not exist.") + return None + + # Data transfer via a file + data_path_param = f"\"{csv_file_path}\"" + unnamed_params = [data_path_param] + named_params = [] + + print(f"from_numpy(): dtype={mat.dtype}") + + print("Creating Matrix object for readMatrix") + try: + res = Matrix(self, 'readMatrix', unnamed_params, named_params, local_data=mat) + print("Matrix object created successfully") + except Exception as e: + print(f"Error creating Matrix object: {e}") + return None + print(f"from_numpy(): Matrix object created: {res}") + return res + + def store_metadata(self, var_name: str, meta_content: dict): + """Store metadata for later use.""" + metadata_path = os.path.join(TMP_PATH, f"{var_name}_metadata.json") + try: + with open(metadata_path, "w") as f: + json.dump(meta_content, f, indent=2) + print(f"Metadata stored at: {metadata_path}") + except IOError as e: + print(f"Error storing metadata: {e}") + def from_pandas_numerical(self, df: pd.DataFrame, shared_memory=True, verbose=False, keepIndex=False) -> Frame: diff --git a/src/api/python/daphne/operator/nodes/matrix.py b/src/api/python/daphne/operator/nodes/matrix.py index c9dd5dfda..9dc752f16 100644 --- a/src/api/python/daphne/operator/nodes/matrix.py +++ b/src/api/python/daphne/operator/nodes/matrix.py @@ -48,6 +48,7 @@ def __init__(self, daphne_context: 'DaphneContext', operation:str, unnamed_input local_data: np.array = None, brackets:bool = False, left_brackets: bool = False, copy: bool = False, consumer_list: List['OperationNode'] = None)->'Matrix': self.__copy = copy + if local_data is not None: self._np_array = local_data is_python_local_data = True @@ -96,6 +97,7 @@ def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], named_inpu }, f, indent=2 ) + print(f"Code line: {code_line}") return code_line def _is_numpy(self): diff --git a/src/api/python/daphne/utils/consts.py b/src/api/python/daphne/utils/consts.py index 4ad3518a4..7c47518de 100644 --- a/src/api/python/daphne/utils/consts.py +++ b/src/api/python/daphne/utils/consts.py @@ -55,3 +55,4 @@ UI64 = 5 F32 = 6 F64 = 7 +STR = 8 diff --git a/src/parser/daphnedsl/DaphneDSLBuiltins.cpp b/src/parser/daphnedsl/DaphneDSLBuiltins.cpp index 10208d788..3e8433e9e 100644 --- a/src/parser/daphnedsl/DaphneDSLBuiltins.cpp +++ b/src/parser/daphnedsl/DaphneDSLBuiltins.cpp @@ -1181,6 +1181,8 @@ antlrcpp::Any DaphneDSLBuiltins::build(mlir::Location loc, const std::string &fu vt = builder.getIntegerType(32, false); else if (valueTypeCode == (int64_t)ValueTypeCode::UI64) vt = builder.getIntegerType(64, false); + else if (valueTypeCode == (int64_t)ValueTypeCode::STR) // added for testing + vt = mlir::daphne::StringType::get(builder.getContext()); else throw ErrorHandler::compilerError(loc, "DSLBuiltins", "invalid value type code"); diff --git a/src/runtime/local/kernels/ReceiveFromNumpy.h b/src/runtime/local/kernels/ReceiveFromNumpy.h index 4d947d45e..e511ce4c5 100644 --- a/src/runtime/local/kernels/ReceiveFromNumpy.h +++ b/src/runtime/local/kernels/ReceiveFromNumpy.h @@ -62,12 +62,12 @@ template struct ReceiveFromNumpy> { } }; -template <> struct ReceiveFromNumpy> { - static void apply(DenseMatrix *&res, uint32_t upper, uint32_t lower, int64_t rows, int64_t cols, DCTX(ctx)) { - //res = DataObjectFactory::create>( - // rows, cols, std::shared_ptr((VT *)(((uint64_t)upper << 32) | lower), NoOpDeleter())); - std::cerr << "ReceiveFromNumpy> not implemented yet" << std::endl; - } -}; +//template <> struct ReceiveFromNumpy> { + // static void apply(DenseMatrix *&res, uint32_t upper, uint32_t lower, int64_t rows, int64_t cols, DCTX(ctx)) { + // //res = DataObjectFactory::create>( + // // rows, cols, std::shared_ptr((VT *)(((uint64_t)upper << 32) | lower), NoOpDeleter())); + // std::cerr << "ReceiveFromNumpy> not implemented yet" << std::endl; + // } +//}; #endif // SRC_RUNTIME_LOCAL_KERNELS_RECEIVEFROMNUMPY_H diff --git a/src/util/DeduceType.h b/src/util/DeduceType.h index eadd7fd21..f716f3879 100644 --- a/src/util/DeduceType.h +++ b/src/util/DeduceType.h @@ -101,6 +101,9 @@ template typename TExec, typename... TLi case ValueTypeCode::F64: DeduceValueType_Helper::apply(std::forward(args)...); return; + case ValueTypeCode::STR: // added for testing + DeduceValueType_Helper::apply(std::forward(args)...); + return; default: throw std::runtime_error("DeduceValueType_Helper::apply: unknown value type code"); } diff --git a/test/api/python/data_transfer_numpy.py b/test/api/python/data_transfer_numpy.py index a36a56da9..49907ea64 100644 --- a/test/api/python/data_transfer_numpy.py +++ b/test/api/python/data_transfer_numpy.py @@ -5,91 +5,46 @@ dctx = DaphneContext() test_cases = [ - # 1D arrays (np.array([1.0, 2.0, 3.0], dtype=np.float64).reshape(-1, 1), "float64_1d"), (np.array([4.0, 5.0, 6.0, 7.0], dtype=np.float64).reshape(-1, 1), "float64_1d_longer"), - (np.array([8.0, 9.0, 10.0, 11.0, 12.0], dtype=np.float64).reshape(-1, 1), "float64_1d_even_longer"), - - # 2D arrays - (np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64), "float64_2d"), - (np.array([[5.0, 6.0, 7.0], [8.0, 9.0, 10.0]], dtype=np.float64), "float64_2d_wider"), - (np.array([[11.0, 12.0], [13.0, 14.0], [15.0, 16.0]], dtype=np.float64), "float64_2d_taller"), - (np.array([[17.0, 18.0, 19.0], [20.0, 21.0, 22.0], [23.0, 24.0, 25.0]], dtype=np.float64), "float64_2d_square"), - - # Edge cases - (np.array([np.nan, np.nan, np.nan], dtype=np.float64).reshape(-1, 1), "float64_nan"), - (np.array([np.inf, -np.inf, np.inf], dtype=np.float64).reshape(-1, 1), "float64_inf"), - (np.array([1, 2.0, 3], dtype=np.float64).reshape(-1, 1), "float64_mixed"), - (np.array([-1.0, -2.0, -3.0], dtype=np.float64).reshape(-1, 1), "float64_negative"), - (np.array([1e-10, 2e-10, 3e-10], dtype=np.float64).reshape(-1, 1), "float64_small"), - (np.array([1e10, 2e10, 3e10], dtype=np.float64).reshape(-1, 1), "float64_large"), - - # Higher-dimensional arrays - (np.array([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]], dtype=np.float64), "float64_3d"), - (np.array([[[9.0, 10.0], [11.0, 12.0]], [[13.0, 14.0], [15.0, 16.0]], [[17.0, 18.0], [19.0, 20.0]]], dtype=np.float64), "float64_3d_larger"), - (np.array([[[21.0, 22.0, 23.0], [24.0, 25.0, 26.0]], [[27.0, 28.0, 29.0], [30.0, 31.0, 32.0]]], dtype=np.float64), "float64_3d_wider"), - (np.array([[[33.0, 34.0], [35.0, 36.0], [37.0, 38.0]], [[39.0, 40.0], [41.0, 42.0], [43.0, 44.0]]], dtype=np.float64), "float64_3d_taller"), - (np.random.rand(2, 3, 4, 5), "float64_4d"), - (np.random.rand(2, 2, 3, 4, 5), "float64_5d"), - - # Categorical data (using float representation) - (np.array([0, 1, 2, 0, 1, 2], dtype=np.float64).reshape(-1, 1), "categorical_1d"), - (np.array([0, 1, 2, 3, 4, 5], dtype=np.int32).reshape(-1, 1), "categorical_1d_more_categories"), - (np.array([0, 1, 0, 1, 0, 1], dtype=np.int32).reshape(-1, 1), "categorical_1d_fewer_categories"), - (np.array([0, 0, 0, 0, 0, 0], dtype=np.int32).reshape(-1, 1), "categorical_1d_single_category"), - (np.array([0, 1, 2, 1, 0, 2, 1, 0, 2], dtype=np.int32).reshape(-1, 1), "categorical_1d_repeated_categories"), - - # Different data types - (np.array([1, 2, 3], dtype=np.int64).reshape(-1, 1), "int64_1d"), - (np.array([1, 2, 3], dtype=np.uint8).reshape(-1, 1), "uint8_1d"), - (np.array(["apple", "banana", "cherry"], dtype=object).reshape(-1, 1), "string_fruits") + #(np.array(["apple", "banana", "cherry"], dtype=str).reshape(-1, 1), "string_fruits"), ] test_cases_string = [ # 1D arrays - (np.array(["apple", "banana", "cherry"], dtype=object).reshape(-1, 1), "string_fruits"), - (np.array(["apple", "banana", "cherry", "date"], dtype=object).reshape(-1, 1), "string_fruits_longer"), - (np.array(["apple", "banana", "cherry", "date", "elderberry"], dtype=object).reshape(-1, 1), "string_fruits_even_longer"), - - # 2D arrays - (np.array([["apple", "banana"], ["cherry", "date"]], dtype=object), "string_fruits_2d"), - (np.array([["apple", "banana", "cherry"], ["date", "elderberry", "fig"]], dtype=object), "string_fruits_2d_wider"), - (np.array([["apple", "banana"], ["cherry", "date"], ["elderberry", "fig"]], dtype=object), "string_fruits_2d_taller"), - (np.array([["apple", "banana", "cherry"], ["date", "elderberry", "fig"], ["grape", "honeydew", "imbe"]], dtype=object), "string_fruits_2d_square"), - - # Edge cases - (np.array(["apple", "banana", np.nan], dtype=object).reshape(-1, 1), "string_nan_mixed"), - (np.array(["apple", "banana", 1.0], dtype=object).reshape(-1, 1), "string_nan_mixed"), - - # Higher-dimensional arrays - (np.array([[["apple", "banana"], ["cherry", "date"]], [["elderberry", "fig"], ["grape", "honeydew"]]], dtype=object), "string_fruits_3d"), + (np.array(["apple", "banana", "cherry"], dtype=str).reshape(-1, 1), "string_fruits"), + #(np.array(["apple", "banana", "cherry", "date"], dtype=object).reshape(-1, 1), "string_fruits_longer"), + #(np.array(["apple", "banana", "cherry", "date", "elderberry"], dtype=object).reshape(-1, 1), "string_fruits_even_longer"), ] test_cases_string_pandas = [ # Pandas Series (pd.Series(["apple", "banana", "cherry"], dtype=str), "string_series"), - (pd.Series(["dog", "elephant", "fox", "giraffe"], dtype=str), "string_series_longer"), + #(pd.Series(["dog", "elephant", "fox", "giraffe"], dtype=str), "string_series_longer"), # Pandas DataFrames - (pd.DataFrame({"col1": ["red", "green", "blue"], "col2": ["circle", "square", "triangle"]}), "string_df"), - (pd.DataFrame({"col1": ["cat", "dog"], "col2": ["fish", "bird"], "col3": ["hamster", "rabbit"]}), "string_df_wider"), - (pd.DataFrame({"col1": ["one", "two", "three"], "col2": ["four", "five", "six"], "col3": ["seven", "eight", "nine"], "col4": ["ten", "eleven", "twelve"]}), "string_df_taller"), + #(pd.DataFrame({"col1": ["red", "green", "blue"], "col2": ["circle", "square", "triangle"]}), "string_df"), + #(pd.DataFrame({"col1": ["cat", "dog"], "col2": ["fish", "bird"], "col3": ["hamster", "rabbit"]}), "string_df_wider"), + #(pd.DataFrame({"col1": ["one", "two", "three"], "col2": ["four", "five", "six"], "col3": ["seven", "eight", "nine"], "col4": ["ten", "eleven", "twelve"]}), "string_df_taller"), ] for X, name in test_cases: try: result = dctx.from_numpy(X, shared_memory=True).print().compute() + print(f"Result for {name}: {result}") except Exception as e: - print(f"Error for f{name}: {e}") + print(f"Error for {name}: {e}") for X, name in test_cases_string: try: - result = dctx.from_numpy(X, shared_memory=True).print().compute() + result = dctx.from_numpy2(X, shared_memory=True).print().compute() + print(f"Result for {name}: {result}") except Exception as e: - print(f"Error for f{name}: {e}") + print(f"Error for {name}: {e}") for X, name in test_cases_string_pandas: try: result = dctx.from_pandas(X, shared_memory=True).print().compute() + print(f"Result for {name}: {result}") except Exception as e: - print(f"Error for f{name}: {e}") \ No newline at end of file + print(f"Error for {name}: {e}") \ No newline at end of file From 6f1302f0da6a4af7d052d63816473a0f9a0a1b15 Mon Sep 17 00:00:00 2001 From: mariakrzywnicka Date: Sat, 29 Mar 2025 14:33:14 +0100 Subject: [PATCH 5/5] update --- .../python/daphne/context/daphne_context.py | 129 +++++++++++++----- .../python/daphne/script_building/script.py | 5 +- src/parser/daphnedsl/DaphneDSLBuiltins.cpp | 13 ++ src/runtime/local/kernels/ReceiveFromNumpy.h | 55 ++++++-- test/api/python/data_transfer_numpy.py | 3 - 5 files changed, 157 insertions(+), 48 deletions(-) diff --git a/src/api/python/daphne/context/daphne_context.py b/src/api/python/daphne/context/daphne_context.py index d995bf9a6..f4d4a69b1 100644 --- a/src/api/python/daphne/context/daphne_context.py +++ b/src/api/python/daphne/context/daphne_context.py @@ -39,6 +39,9 @@ import os import json import mmap +import struct +import ctypes + try: import torch as torch @@ -230,21 +233,25 @@ def from_numpy(self, mat, shared_memory=True, verbose=False, return_shape=False) return None def from_numpy2(self, mat, shared_memory=True, verbose=False, return_shape=False): - """Generates a `DAGNode` representing a matrix with data given by a numpy `array` or pandas `Series`/`DataFrame`. + """ + Generates a `DAGNode` representing a matrix with data given by a numpy `array` or pandas `Series`/`DataFrame`. + :param mat: The numpy array or pandas Series/DataFrame. :param shared_memory: Whether to use shared memory data transfer (True) or not (False). :param verbose: Whether to print timing information (True) or not (False). :param return_shape: Whether to return the original shape of the input array. :return: The data from numpy as a Matrix. """ +<<<<<<< HEAD print(f"from_numpy2()") +======= +>>>>>>> 1237b6e5 (update) if isinstance(mat, (pd.Series, pd.DataFrame)): - print("Series or DataFrame detected, converting to numpy array") mat = mat.to_numpy() - print(f"from_numpy(): mat={mat}") original_shape = mat.shape +<<<<<<< HEAD print(f"Original shape: {original_shape}") if mat.ndim == 1: @@ -256,16 +263,41 @@ def from_numpy2(self, mat, shared_memory=True, verbose=False, return_shape=False print(f"Reshaped matrix: {mat.shape}") print(f"from_numpy(): dtype={mat.dtype}") +======= + + # Ensure the matrix is 2D + if mat.ndim == 1: + mat = mat.reshape(-1, 1) + elif mat.ndim > 2: + mat = mat.reshape((original_shape[0], -1)) + rows, cols = mat.shape +>>>>>>> 1237b6e5 (update) # Assign value type code (vtc) based on dtype - try: - vtc = mat.dtype - print(f"Value type code: {vtc}") - except ValueError as e: - print(f"Unsupported numpy dtype: {mat.dtype}") - return None + d_type = mat.dtype + if d_type == np.float64: + vtc = "F64" + elif d_type == np.float32: + vtc = "F32" + elif d_type == np.int8: + vtc = "SI8" + elif d_type == np.int32: + vtc = "SI32" + elif d_type == np.int64: + vtc = "SI64" + elif d_type == np.uint8: + vtc = "UI8" + elif d_type == np.uint32: + vtc = "UI32" + elif d_type == np.uint64: + vtc = "UI64" + elif d_type.kind in {'O', 'U', 'S'}: # Handle string data + vtc = "STR" + else: + raise ValueError(f"Unsupported numpy dtype: {d_type}") if shared_memory: +<<<<<<< HEAD if mat.dtype.kind in {'O', 'U', 'S'}: print("Data transfer via shared memory for string data.") # Serialize the string data @@ -336,13 +368,46 @@ def from_numpy2(self, mat, shared_memory=True, verbose=False, return_shape=False vtc = "UI32" elif d_type == np.uint64: vtc = "UI64" - else: - print("unsupported numpy dtype") - return None + + if vtc == "STR": # Handle string data + # Convert Unicode strings to bytes + if mat.dtype.kind == 'U': + mat = np.char.encode(mat, 'utf-8') + else: + mat = mat.astype('S') + + # Serialize strings with null delimiters + serialized = b'\0'.join( + x.item() if isinstance(x, np.bytes_) else str(x).encode() + for x in mat.flatten() + ) + b'\0' + + # Create shared memory + shm = mmap.mmap(-1, len(serialized)) + shm.write(serialized) + + # Get address of shared memory + buf = memoryview(shm) + address = ctypes.addressof(ctypes.c_char.from_buffer(buf)) + upper = (address >> 32) & 0xFFFFFFFF + lower = address & 0xFFFFFFFF + + # Create Matrix with handle to keep memory alive + res = Matrix(self, 'receiveFromNumpy', [upper, lower, rows, cols, 8]) + else: # Handle numerical data + # Ensure data is C-contiguous + mat = np.ascontiguousarray(mat) + + # Get address of the data + address = mat.ctypes.data + upper = (address >> 32) & 0xFFFFFFFF + lower = address & 0xFFFFFFFF + + # Create Matrix res = Matrix(self, 'receiveFromNumpy', [upper, lower, rows, cols, vtc], local_data=mat) else: - # Fallback to file-based transfer if shared memory is not used + # Fallback to file-based transfer file_name = os.path.join(TMP_PATH, "numpy_data") csv_file_path = file_name + ".csv" meta_file_path = file_name + ".meta" @@ -354,62 +419,54 @@ def from_numpy2(self, mat, shared_memory=True, verbose=False, return_shape=False print(f"String data: {string_data}") # Write the string data to a temporary CSV file + try: np.savetxt(csv_file_path, mat, delimiter=",", fmt='%s') - print(f"CSV file created at: {csv_file_path}") except IOError as e: print(f"Error writing to file {csv_file_path}: {e}") return None - # Verify if the file was created - if not os.path.exists(csv_file_path): - print(f"Error: CSV file {csv_file_path} does not exist.") - return None - - # Write metadata to a temporary JSON file + # Write metadata to JSON try: with open(meta_file_path, "w") as f: meta_content = { - "numRows": mat.shape[0], - "numCols": mat.shape[1], + "numRows": rows, + "numCols": cols, "valueType": vtc } json.dump(meta_content, f, indent=2) - print(f"Metadata file created at: {meta_file_path}") - print(f"Metadata file content: {json.dumps(meta_content, indent=2)}") + except IOError as e: print(f"Error writing to file {meta_file_path}: {e}") return None - # Verify if the metadata file was created - if not os.path.exists(meta_file_path): - print(f"Error: Metadata file {meta_file_path} does not exist.") + # Verify files exist + if not os.path.exists(csv_file_path) or not os.path.exists(meta_file_path): + print(f"Error: Required files for data transfer do not exist.") return None - # Data transfer via a file + # Data transfer via file data_path_param = f"\"{csv_file_path}\"" unnamed_params = [data_path_param] named_params = [] - print(f"from_numpy(): dtype={mat.dtype}") - - print("Creating Matrix object for readMatrix") try: res = Matrix(self, 'readMatrix', unnamed_params, named_params, local_data=mat) - print("Matrix object created successfully") except Exception as e: print(f"Error creating Matrix object: {e}") return None - print(f"from_numpy(): Matrix object created: {res}") - return res - + + if return_shape: + return res, original_shape + return res + def store_metadata(self, var_name: str, meta_content: dict): """Store metadata for later use.""" metadata_path = os.path.join(TMP_PATH, f"{var_name}_metadata.json") try: with open(metadata_path, "w") as f: json.dump(meta_content, f, indent=2) - print(f"Metadata stored at: {metadata_path}") + #print(f"Metadata stored at: {metadata_path}") except IOError as e: print(f"Error storing metadata: {e}") diff --git a/src/api/python/daphne/script_building/script.py b/src/api/python/daphne/script_building/script.py index d1e608ba2..45cbbb1e8 100644 --- a/src/api/python/daphne/script_building/script.py +++ b/src/api/python/daphne/script_building/script.py @@ -89,9 +89,11 @@ def clear(self, dag_root:DAGNode): self._variable_counter = 0 def execute(self): + print("Executing DaphneDSL script") temp_out_path = os.path.join(TMP_PATH, "tmpdaphne.daphne") with open(temp_out_path, "w") as temp_out_file: temp_out_file.writelines(self.daphnedsl_script) + print("line: ", self.daphnedsl_script) # Check if the file exists if not os.path.exists(temp_out_path): @@ -99,6 +101,7 @@ def execute(self): return res = DaphneLib.daphne(ctypes.c_char_p(str.encode(PROTOTYPE_PATH)), ctypes.c_char_p(str.encode(temp_out_path))) + print(f"Result from DaphneLib.daphne: {res}") if res != 0: # Error message with DSL code line. error_message = DaphneLib.getResult().error_message.decode("utf-8") @@ -144,7 +147,7 @@ def _dfs_dag_nodes(self, dag_node: VALID_INPUT_TYPES)->str: self.add_input_from_python(dag_node.daphnedsl_name, dag_node) code_line = dag_node.code_line( - dag_node.daphnedsl_name, unnamed_input_vars, named_input_vars) + dag_node.daphnedsl_name, unnamed_input_vars, named_input_vars) self.add_code(code_line) return dag_node.daphnedsl_name diff --git a/src/parser/daphnedsl/DaphneDSLBuiltins.cpp b/src/parser/daphnedsl/DaphneDSLBuiltins.cpp index 3e8433e9e..e540274fe 100644 --- a/src/parser/daphnedsl/DaphneDSLBuiltins.cpp +++ b/src/parser/daphnedsl/DaphneDSLBuiltins.cpp @@ -1158,6 +1158,15 @@ antlrcpp::Any DaphneDSLBuiltins::build(mlir::Location loc, const std::string &fu mlir::Value rows = args[2]; mlir::Value cols = args[3]; mlir::Value valueType = args[4]; + mlir::Value itemsize = args[5]; // added for testing + + //std::cerr << "Arguments received: " << std::endl; + //std::cerr << " Upper: " << upper << std::endl; + //std::cerr << " Lower: " << lower << std::endl; + //std::cerr << " Rows: " << rows << std::endl; + //std::cerr << " Cols: " << cols << std::endl; + //std::cerr << " ValueType: " << valueType << std::endl; + int64_t valueTypeCode = CompilerUtils::constantOrThrow( valueType, "the value type code in ReceiveFromNumpyOp must be a constant"); @@ -1186,6 +1195,10 @@ antlrcpp::Any DaphneDSLBuiltins::build(mlir::Location loc, const std::string &fu else throw ErrorHandler::compilerError(loc, "DSLBuiltins", "invalid value type code"); + //std::cerr << "Creating ReceiveFromNumpyOp with the following parameters:" << std::endl; + //std::cerr << " Matrix Type: " << vt << std::endl; + //std::cerr << " Rows: " << rows << ", Cols: " << cols << std::endl; + return static_cast( builder.create(loc, utils.matrixOf(vt), upper, lower, rows, cols)); } diff --git a/src/runtime/local/kernels/ReceiveFromNumpy.h b/src/runtime/local/kernels/ReceiveFromNumpy.h index e511ce4c5..c7cbade67 100644 --- a/src/runtime/local/kernels/ReceiveFromNumpy.h +++ b/src/runtime/local/kernels/ReceiveFromNumpy.h @@ -57,17 +57,56 @@ template struct NoOpDeleter { template struct ReceiveFromNumpy> { static void apply(DenseMatrix *&res, uint32_t upper, uint32_t lower, int64_t rows, int64_t cols, DCTX(ctx)) { + std::cerr << "ReceiveFromNumpy: Extracting data from shared memory" << std::endl; res = DataObjectFactory::create>( rows, cols, std::shared_ptr((VT *)(((uint64_t)upper << 32) | lower), NoOpDeleter())); } }; -//template <> struct ReceiveFromNumpy> { - // static void apply(DenseMatrix *&res, uint32_t upper, uint32_t lower, int64_t rows, int64_t cols, DCTX(ctx)) { - // //res = DataObjectFactory::create>( - // // rows, cols, std::shared_ptr((VT *)(((uint64_t)upper << 32) | lower), NoOpDeleter())); - // std::cerr << "ReceiveFromNumpy> not implemented yet" << std::endl; - // } -//}; +template <> +struct ReceiveFromNumpy> { + static void apply(DenseMatrix *&res, + uint32_t upper, + uint32_t lower, + int64_t rows, + int64_t cols, + DCTX(ctx)) { + try { + // Calculate shared memory address + char* shared_mem = reinterpret_cast( + (static_cast(upper) << 32) | lower; -#endif // SRC_RUNTIME_LOCAL_KERNELS_RECEIVEFROMNUMPY_H + // Create result matrix + res = DataObjectFactory::create>(rows, cols, false); + + // Parse NULL-delimited strings + for (int64_t r = 0; r < rows; ++r) { + for (int64_t c = 0; c < cols; ++c) { + const char* str_start = shared_mem; + + // Find string length (until NULL terminator) + size_t len = 0; + while (shared_mem[len] != '\0' && len < 256) { // 256 = max expected string length + len++; + } + + // Construct string properly + res->set(r, c, std::string(str_start, len)); + + // Move to next string (skip NULL terminator) + shared_mem += len + 1; + } + } + } + catch (const std::exception& e) { + if (res) { + DataObjectFactory::destroy(res); + res = nullptr; + } + std::cerr << "String transfer error: " << e.what() << std::endl; + throw; + } + } +}; + +#endif // SRC_RUNTIME_LOCAL_KERNELS_RECEIVEFROMNUMPY_H \ No newline at end of file diff --git a/test/api/python/data_transfer_numpy.py b/test/api/python/data_transfer_numpy.py index 49907ea64..7f0da8fc9 100644 --- a/test/api/python/data_transfer_numpy.py +++ b/test/api/python/data_transfer_numpy.py @@ -31,20 +31,17 @@ for X, name in test_cases: try: result = dctx.from_numpy(X, shared_memory=True).print().compute() - print(f"Result for {name}: {result}") except Exception as e: print(f"Error for {name}: {e}") for X, name in test_cases_string: try: result = dctx.from_numpy2(X, shared_memory=True).print().compute() - print(f"Result for {name}: {result}") except Exception as e: print(f"Error for {name}: {e}") for X, name in test_cases_string_pandas: try: result = dctx.from_pandas(X, shared_memory=True).print().compute() - print(f"Result for {name}: {result}") except Exception as e: print(f"Error for {name}: {e}") \ No newline at end of file