diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 95e38b2..ca362c1 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -52,6 +52,14 @@ jobs: Sys.setenv("NOT_CRAN" = "true") install.packages("arrow") shell: Rscript {0} + - name: Install R dependency "polars" + run: | + if (Sys.info()["sysname"] == "Darwin") { + install.packages("polars", repos = "https://rpolars.r-universe.dev") + } else { + install.packages("polars", repos = "https://rpolars.r-universe.dev/bin/linux/jammy/4.3") + } + shell: Rscript {0} - name: Install dependencies run: | python -m pip install --upgrade pip @@ -63,7 +71,7 @@ jobs: python -m build - name: Install package run: | - pip install $(ls -t1 dist/*.whl | tail -1) + pip install $(ls -t1 dist/*.whl | tail -1)`[all]` - name: Test with pytest run: | export LD_LIBRARY_PATH=$(python -m rpy2.situation LD_LIBRARY_PATH):${LD_LIBRARY_PATH} diff --git a/doc/index.rst b/doc/index.rst index cef1cdd..56366bd 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -49,7 +49,14 @@ availble to R was measured to be 200 times faster with the use of Arrow .. code-block:: r install.packages("arrow") - + + +Polars +====== + +If using :mod:`polars`, how to use conversion rules for it are in +section :ref:`polars`. + Indices and tables ================== diff --git a/pyproject.toml b/pyproject.toml index 5c8de64..d3f9175 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,13 +9,12 @@ build-backend = "setuptools.build_meta" name = "rpy2-arrow" description = "Bridge Arrow between Python and R when using rpy2" readme = "README.md" -requires-python = ">=3.7" +requires-python = ">=3.8" license = { text = "MIT" } authors = [{ name = "Laurent Gautier", email = "lgautier@gmail.com" }] classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -26,14 +25,15 @@ classifiers = [ ] dependencies = [ "pyarrow", - "rpy2 >= 3.4" + "rpy2 >= 3.5.15" ] dynamic = ["version"] [project.optional-dependencies] +polars = ["polars"] test = ["pytest", "pandas"] R6 = ["rpy2-R6"] -all = ["pytest", "pandas", "rpy2-R6"] +all = ["pytest", "pandas", "rpy2-R6", "polars"] [project.urls] Homepage = "https://rpy2.github.io/rpy2-arrow/" diff --git a/rpy2_arrow/__init__.py b/rpy2_arrow/__init__.py index 9123cf0..b794fd4 100644 --- a/rpy2_arrow/__init__.py +++ b/rpy2_arrow/__init__.py @@ -1 +1 @@ -__version__ = '0.0.8' +__version__ = '0.1.0' diff --git a/rpy2_arrow/polars.py b/rpy2_arrow/polars.py new file mode 100644 index 0000000..3d59745 --- /dev/null +++ b/rpy2_arrow/polars.py @@ -0,0 +1,87 @@ +import polars +import pyarrow +import rpy2.rinterface +import rpy2.robjects +import rpy2.robjects.conversion as conversion +import rpy2_arrow.arrow as rpy2arrow +import types +import typing + + +# This can be accessed as a global to lazily import +# the R package polars. +rpack_polars: typing.Optional[types.ModuleType] = None +rpack_arrow: typing.Optional[types.ModuleType] = None + + +def ensure_r_polars(): + global rpack_polars + if rpack_polars is None: + rpack_polars = rpy2.robjects.packages.importr('polars', + on_conflict='warn') + return rpack_polars + + +def ensure_r_arrow(): + global rpack_arrow + if rpack_arrow is None: + rpack_arrow = rpy2.robjects.packages.importr('arrow', + on_conflict='warn') + return rpack_arrow + + +def pypolars_to_rarrow(dataf: polars.DataFrame) -> rpy2.robjects.Environment: + _ = dataf.to_arrow() + return rpy2arrow.pyarrow_table_to_r_table(_) + + +def rarrow_to_pypolars(dataf: rpy2.robjects.Environment) -> polars.DataFrame: + _ = rpy2arrow.rarrow_to_py_table(dataf) + return polars.from_arrow(_) + + +def pypolars_to_rpolars(dataf: polars.DataFrame) -> rpy2.robjects.Environment: + r_arrow_table = pypolars_to_rarrow(dataf) + rpack_polars = ensure_r_polars() + # TODO: There appear to be an odd shortcircuiting that requires toggling + # additional conversion off. + with rpy2.robjects.default_converter.context() as ctx: + return rpack_polars.pl['from_arrow'](r_arrow_table) + + +# TODO: rpy2.rinterface.SexpExtPtr should have an robjects-level wrapper? +def rpolar_to_pypolars(dataf: rpy2.rinterface.SexpExtPtr) -> polars.DataFrame: + # R polars to R arrow. + rpack_arrow = ensure_r_arrow() + ensure_r_polars() + r_arrow_table = rpack_arrow.as_arrow_table(dataf) + return rarrow_to_pypolars(r_arrow_table) + + +converter: conversion.Converter = conversion.Converter( + 'default polars conversion', + template=rpy2.robjects.default_converter +) + +converter.py2rpy.register(polars.dataframe.frame.DataFrame, pypolars_to_rpolars) + +converter._rpy2py_nc_map.update( + { + rpy2.rinterface.SexpEnvironment: + conversion.NameClassMap(rpy2.robjects.Environment), + rpy2.rinterface.SexpExtPtr: + conversion.NameClassMap(rpy2.robjects.ExternalPointer) + } +) + +converter._rpy2py_nc_map[rpy2.rinterface.SexpEnvironment].update( + { + 'Table': rarrow_to_pypolars, + } +) + +converter._rpy2py_nc_map[rpy2.rinterface.SexpExtPtr].update( + { + 'DataFrame': rpolar_to_pypolars, + } +) diff --git a/rpy2_arrow/tests_polars.py b/rpy2_arrow/tests_polars.py new file mode 100644 index 0000000..8136c3d --- /dev/null +++ b/rpy2_arrow/tests_polars.py @@ -0,0 +1,56 @@ +try: + import polars + import rpy2_arrow.polars as rpy2polars + HAS_POLARS = True +except ImportError: + HAS_POLARS = False + +import pytest +import pyarrow +import rpy2.rinterface +import rpy2.robjects +import rpy2_arrow.arrow as rpy2arrow + + +@pytest.mark.skipif(not HAS_POLARS, + reason='The Python package "polars" is required.') +class TestPolars: + def test_ensure_r_polars(self): + rpack_polars = rpy2polars.ensure_r_polars() + assert rpack_polars.__name__ == 'polars' + + + def test_ensure_r_arrow(self): + rpack_arrow = rpy2polars.ensure_r_arrow() + assert rpack_arrow.__rname__ == 'arrow' + + + def test_pypolars_to_rarrow(self): + podataf = polars.DataFrame({'a': [1, 2], 'b': [3, 4]}) + rartable = rpy2polars.pypolars_to_rarrow(podataf) + + + def test_rarrow_to_pypolars(self): + artable = pyarrow.Table.from_pylist([{'a': 1, 'b': 3}, {'a': 2, 'b': 4}]) + rartable = rpy2arrow.pyarrow_table_to_r_table(artable) + podataf = rpy2polars.rarrow_to_pypolars(rartable) + + + def test_rpolar_to_pypolars(self): + rpack_polars = rpy2polars.ensure_r_polars() + rpodataf = rpack_polars.pl['DataFrame'](a=1, b=2) + podataf = rpy2polars.rpolar_to_pypolars(rpodataf) + + + def test_converter_py2rpy(self): + podataf = polars.DataFrame({'a': [1, 2], 'b': [3, 4]}) + with rpy2polars.converter.context() as ctx: + rpy2.robjects.globalenv['podataf'] = podataf + assert tuple(rpy2.robjects.globalenv['podataf'].rclass) == ('DataFrame',) + + def test_converter_rpy2py(self): + rpy2.robjects.r('require(polars); podataf <- pl$DataFrame(a = 1, b = 2)') + podataf_ri = rpy2.rinterface.globalenv['podataf'] + with rpy2polars.converter.context() as ctx: + podataf = ctx.rpy2py(podataf_ri) + assert isinstance(podataf, polars.dataframe.frame.DataFrame)