Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: Render docs

on:
push:
branches: ["main"]
workflow_dispatch:

permissions:
contents: read
pages: write
id-token: write

concurrency:
group: "pages"
cancel-in-progress: false

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v5
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: 3
cache: pip
cache-dependency-path: docs/requirements.txt
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
python -m pip install .
python -m pip install -r docs/requirements.txt
- name: Render the documentation
run: >
sphinx-build
-M html ./docs/source ./docs/build
--jobs=auto
-T
--keep-going
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: docs/build/html/

deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
20 changes: 20 additions & 0 deletions docs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build

# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
35 changes: 35 additions & 0 deletions docs/make.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Sphinx
5 changes: 5 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
API
===

.. automodule:: dataplan
:members: DataPlan
38 changes: 38 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = 'DataPlan'
copyright = '2025, Sidney Mau'
author = 'Sidney Mau'
# release = '0.1'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions = [
'sphinx.ext.autodoc',
# 'sphinx.ext.autosummary',
]

templates_path = ['_templates']
exclude_patterns = []



# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'alabaster'
html_static_path = ['_static']
html_theme_options = {
"description": "DataPlan helps build and run ExecPlans",
"github_user": "sidneymau",
"github_repo": "dataplan",
"github_type": None,
"fixed_sidebar": True,
}
24 changes: 24 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
.. dataplan documentation master file, created by
sphinx-quickstart on Mon Oct 13 14:01:26 2025.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.

DataPlan Documentation
======================

DataPlan provides a user-friendly API to pyarrow's Acero_ module for performing streamed queries over data.

.. _Acero: https://arrow.apache.org/docs/python/api/acero.html

.. toctree::
:maxdepth: 2
:caption: Contents:

Contents
--------

.. toctree::

installation
usage
api
8 changes: 8 additions & 0 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Installation
============

This package can currently be installed via pip from git:

.. code-block:: console

$ pip install git+https://github.com/sidneymau/dataplan.git
8 changes: 8 additions & 0 deletions docs/source/usage.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Usage
=====

.. code-block:: console

>>> import dataplan as dp
>>> dataplan = dp.DataPlan.from_dataset(...)
>>> res = dataplan.to_table()
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
numpy
pyarrow
38 changes: 31 additions & 7 deletions src/dataplan/dataplan.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ class DataPlan:
def __init__(self, execplan, source=None):
"""
Construct a DataPlan from an exec plan

:param execplan: ExecPlan to process.
:param source: Sources of data to be consumed by excecplan.
:return: The DataPlan.
"""
self._execplan = execplan
if isinstance(source, list):
Expand Down Expand Up @@ -59,10 +63,21 @@ def execplan(self):
"""
return self._execplan

@property
def source(self):
"""
The source over which the plan will operate.
"""
return self._source

@classmethod
def from_dict(cls, mapping, **kwargs):
"""
Construct a DataPlan from a dictionary.

:param mapping: Dictionary from which to construct Table source
:param kwargs: Optional keyword arguments to pa.Table.from_pydict`
:return: The DataPlan.
"""
table = pa.Table.from_pydict(mapping, **kwargs)
_plan = declare_table(table)
Expand All @@ -72,6 +87,10 @@ def from_dict(cls, mapping, **kwargs):
def from_list(cls, mapping, **kwargs):
"""
Construct a DataPlan from a list.

:param mapping: List of dictionaries from which to construct Table source
:param kwargs: Optional keyword arguments to pa.Table.from_pylist`
:return: The DataPlan.
"""
table = pa.Table.from_pylist(mapping, **kwargs)
_plan = declare_table(table)
Expand All @@ -81,6 +100,10 @@ def from_list(cls, mapping, **kwargs):
def from_dataframe(cls, df, **kwargs):
"""
Construct a DataPlan from a pandas DataFrame.

:param df: pandas dataframe from which to construct table source
:param kwargs: optional keyword arguments to pa.table.from_pandas`
:return: the DataPlan.
"""
table = pa.Table.from_pandas(df, **kwargs)
_plan = declare_table(table)
Expand All @@ -90,6 +113,9 @@ def from_dataframe(cls, df, **kwargs):
def from_table(cls, table):
"""
Construct a DataPlan from a pyarrow Table.

:param table: pyarrow Table to be declared as source
:return: the DataPlan.
"""
_plan = declare_table(table)
return cls(_plan, source=table)
Expand All @@ -98,17 +124,15 @@ def from_table(cls, table):
def from_dataset(cls, dataset, columns=None, filter=None):
"""
Construct a DataPlan from a pyarrow Dataset.

:param dataset: pyarrow Dataset to be declared as source
:param columns: Optional projection to apply
:param filter: Optional predicate to apply
:return: the DataPlan.
"""
_plan = declare_dataset(dataset, columns=columns, filter=filter)
return cls(_plan, source=dataset)

@property
def source(self):
"""
The source over which the plan will operate.
"""
return self._source

def project(self, *args, **kwargs):
"""
Apply a projection (e.g., select columns, rename columns, construct new
Expand Down