Skip to content

Commit

Permalink
Conversion rules for polars.
Browse files Browse the repository at this point in the history
  • Loading branch information
lgautier committed Dec 28, 2023
1 parent a5762cf commit 0cf0075
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 7 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ jobs:
Sys.setenv("NOT_CRAN" = "true")
install.packages("arrow")
shell: Rscript {0}
- name: Install R dependency "polars"
run: |
if (Sys.info()["sysname"] == "Darwin") {
install.packages("polars", repos = "https://rpolars.r-universe.dev")
} else {
install.packages("polars", repos = "https://rpolars.r-universe.dev/bin/linux/jammy/4.3")
}
shell: Rscript {0}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand All @@ -63,7 +71,7 @@ jobs:
python -m build
- name: Install package
run: |
pip install $(ls -t1 dist/*.whl | tail -1)
pip install $(ls -t1 dist/*.whl | tail -1)`[all]`
- name: Test with pytest
run: |
export LD_LIBRARY_PATH=$(python -m rpy2.situation LD_LIBRARY_PATH):${LD_LIBRARY_PATH}
Expand Down
9 changes: 8 additions & 1 deletion doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,14 @@ availble to R was measured to be 200 times faster with the use of Arrow
.. code-block:: r
install.packages("arrow")
Polars
======

If using :mod:`polars`, how to use conversion rules for it are in
section :ref:`polars`.


Indices and tables
==================
Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@ build-backend = "setuptools.build_meta"
name = "rpy2-arrow"
description = "Bridge Arrow between Python and R when using rpy2"
readme = "README.md"
requires-python = ">=3.7"
requires-python = ">=3.8"
license = { text = "MIT" }
authors = [{ name = "Laurent Gautier", email = "[email protected]" }]
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
Expand All @@ -26,14 +25,15 @@ classifiers = [
]
dependencies = [
"pyarrow",
"rpy2 >= 3.4"
"rpy2 >= 3.5.15"
]
dynamic = ["version"]

[project.optional-dependencies]
polars = ["polars"]
test = ["pytest", "pandas"]
R6 = ["rpy2-R6"]
all = ["pytest", "pandas", "rpy2-R6"]
all = ["pytest", "pandas", "rpy2-R6", "polars"]

[project.urls]
Homepage = "https://rpy2.github.io/rpy2-arrow/"
Expand Down
2 changes: 1 addition & 1 deletion rpy2_arrow/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.0.8'
__version__ = '0.1.0'
87 changes: 87 additions & 0 deletions rpy2_arrow/polars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import polars
import pyarrow
import rpy2.rinterface
import rpy2.robjects
import rpy2.robjects.conversion as conversion
import rpy2_arrow.arrow as rpy2arrow
import types
import typing


# This can be accessed as a global to lazily import
# the R package polars.
rpack_polars: typing.Optional[types.ModuleType] = None
rpack_arrow: typing.Optional[types.ModuleType] = None


def ensure_r_polars():
global rpack_polars
if rpack_polars is None:
rpack_polars = rpy2.robjects.packages.importr('polars',
on_conflict='warn')
return rpack_polars


def ensure_r_arrow():
global rpack_arrow
if rpack_arrow is None:
rpack_arrow = rpy2.robjects.packages.importr('arrow',
on_conflict='warn')
return rpack_arrow


def pypolars_to_rarrow(dataf: polars.DataFrame) -> rpy2.robjects.Environment:
_ = dataf.to_arrow()
return rpy2arrow.pyarrow_table_to_r_table(_)


def rarrow_to_pypolars(dataf: rpy2.robjects.Environment) -> polars.DataFrame:
_ = rpy2arrow.rarrow_to_py_table(dataf)
return polars.from_arrow(_)


def pypolars_to_rpolars(dataf: polars.DataFrame) -> rpy2.robjects.Environment:
r_arrow_table = pypolars_to_rarrow(dataf)
rpack_polars = ensure_r_polars()
# TODO: There appear to be an odd shortcircuiting that requires toggling
# additional conversion off.
with rpy2.robjects.default_converter.context() as ctx:
return rpack_polars.pl['from_arrow'](r_arrow_table)


# TODO: rpy2.rinterface.SexpExtPtr should have an robjects-level wrapper?
def rpolar_to_pypolars(dataf: rpy2.rinterface.SexpExtPtr) -> polars.DataFrame:
# R polars to R arrow.
rpack_arrow = ensure_r_arrow()
ensure_r_polars()
r_arrow_table = rpack_arrow.as_arrow_table(dataf)
return rarrow_to_pypolars(r_arrow_table)


converter: conversion.Converter = conversion.Converter(
'default polars conversion',
template=rpy2.robjects.default_converter
)

converter.py2rpy.register(polars.dataframe.frame.DataFrame, pypolars_to_rpolars)

converter._rpy2py_nc_map.update(
{
rpy2.rinterface.SexpEnvironment:
conversion.NameClassMap(rpy2.robjects.Environment),
rpy2.rinterface.SexpExtPtr:
conversion.NameClassMap(rpy2.robjects.ExternalPointer)
}
)

converter._rpy2py_nc_map[rpy2.rinterface.SexpEnvironment].update(
{
'Table': rarrow_to_pypolars,
}
)

converter._rpy2py_nc_map[rpy2.rinterface.SexpExtPtr].update(
{
'DataFrame': rpolar_to_pypolars,
}
)
56 changes: 56 additions & 0 deletions rpy2_arrow/tests_polars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
try:
import polars
import rpy2_arrow.polars as rpy2polars
HAS_POLARS = True
except ImportError:
HAS_POLARS = False

import pytest
import pyarrow
import rpy2.rinterface
import rpy2.robjects
import rpy2_arrow.arrow as rpy2arrow


@pytest.mark.skipif(not HAS_POLARS,
reason='The Python package "polars" is required.')
class TestPolars:
def test_ensure_r_polars(self):
rpack_polars = rpy2polars.ensure_r_polars()
assert rpack_polars.__name__ == 'polars'


def test_ensure_r_arrow(self):
rpack_arrow = rpy2polars.ensure_r_arrow()
assert rpack_arrow.__rname__ == 'arrow'


def test_pypolars_to_rarrow(self):
podataf = polars.DataFrame({'a': [1, 2], 'b': [3, 4]})
rartable = rpy2polars.pypolars_to_rarrow(podataf)


def test_rarrow_to_pypolars(self):
artable = pyarrow.Table.from_pylist([{'a': 1, 'b': 3}, {'a': 2, 'b': 4}])
rartable = rpy2arrow.pyarrow_table_to_r_table(artable)
podataf = rpy2polars.rarrow_to_pypolars(rartable)


def test_rpolar_to_pypolars(self):
rpack_polars = rpy2polars.ensure_r_polars()
rpodataf = rpack_polars.pl['DataFrame'](a=1, b=2)
podataf = rpy2polars.rpolar_to_pypolars(rpodataf)


def test_converter_py2rpy(self):
podataf = polars.DataFrame({'a': [1, 2], 'b': [3, 4]})
with rpy2polars.converter.context() as ctx:
rpy2.robjects.globalenv['podataf'] = podataf
assert tuple(rpy2.robjects.globalenv['podataf'].rclass) == ('DataFrame',)

def test_converter_rpy2py(self):
rpy2.robjects.r('require(polars); podataf <- pl$DataFrame(a = 1, b = 2)')
podataf_ri = rpy2.rinterface.globalenv['podataf']
with rpy2polars.converter.context() as ctx:
podataf = ctx.rpy2py(podataf_ri)
assert isinstance(podataf, polars.dataframe.frame.DataFrame)

0 comments on commit 0cf0075

Please sign in to comment.