diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 0000000..56b9b57 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,59 @@ +name: Render docs + +on: + push: + branches: ["main"] + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: 3 + cache: pip + cache-dependency-path: docs/requirements.txt + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + python -m pip install . + python -m pip install -r docs/requirements.txt + - name: Render the documentation + run: > + sphinx-build + -M html ./docs/source ./docs/build + --jobs=auto + -T + --keep-going + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: docs/build/html/ + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..2806c16 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +Sphinx diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..824fb97 --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,5 @@ +API +=== + +.. automodule:: dataplan + :members: DataPlan diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..c9f9fb5 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,38 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'DataPlan' +copyright = '2025, Sidney Mau' +author = 'Sidney Mau' +# release = '0.1' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx.ext.autodoc', + # 'sphinx.ext.autosummary', +] + +templates_path = ['_templates'] +exclude_patterns = [] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'alabaster' +html_static_path = ['_static'] +html_theme_options = { + "description": "DataPlan helps build and run ExecPlans", + "github_user": "sidneymau", + "github_repo": "dataplan", + "github_type": None, + "fixed_sidebar": True, +} diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..a17bef3 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,24 @@ +.. dataplan documentation master file, created by + sphinx-quickstart on Mon Oct 13 14:01:26 2025. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +DataPlan Documentation +====================== + +DataPlan provides a user-friendly API to pyarrow's Acero_ module for performing streamed queries over data. + +.. _Acero: https://arrow.apache.org/docs/python/api/acero.html + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + +Contents +-------- + +.. toctree:: + + installation + usage + api diff --git a/docs/source/installation.rst b/docs/source/installation.rst new file mode 100644 index 0000000..81fe53c --- /dev/null +++ b/docs/source/installation.rst @@ -0,0 +1,8 @@ +Installation +============ + +This package can currently be installed via pip from git: + +.. code-block:: console + + $ pip install git+https://github.com/sidneymau/dataplan.git diff --git a/docs/source/usage.rst b/docs/source/usage.rst new file mode 100644 index 0000000..24e1f3c --- /dev/null +++ b/docs/source/usage.rst @@ -0,0 +1,8 @@ +Usage +===== + +.. code-block:: console + + >>> import dataplan as dp + >>> dataplan = dp.DataPlan.from_dataset(...) + >>> res = dataplan.to_table() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..32df5a7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +numpy +pyarrow diff --git a/src/dataplan/dataplan.py b/src/dataplan/dataplan.py index 4e328ef..ea8bdd1 100644 --- a/src/dataplan/dataplan.py +++ b/src/dataplan/dataplan.py @@ -28,6 +28,10 @@ class DataPlan: def __init__(self, execplan, source=None): """ Construct a DataPlan from an exec plan + + :param execplan: ExecPlan to process. + :param source: Sources of data to be consumed by excecplan. + :return: The DataPlan. """ self._execplan = execplan if isinstance(source, list): @@ -59,10 +63,21 @@ def execplan(self): """ return self._execplan + @property + def source(self): + """ + The source over which the plan will operate. + """ + return self._source + @classmethod def from_dict(cls, mapping, **kwargs): """ Construct a DataPlan from a dictionary. + + :param mapping: Dictionary from which to construct Table source + :param kwargs: Optional keyword arguments to pa.Table.from_pydict` + :return: The DataPlan. """ table = pa.Table.from_pydict(mapping, **kwargs) _plan = declare_table(table) @@ -72,6 +87,10 @@ def from_dict(cls, mapping, **kwargs): def from_list(cls, mapping, **kwargs): """ Construct a DataPlan from a list. + + :param mapping: List of dictionaries from which to construct Table source + :param kwargs: Optional keyword arguments to pa.Table.from_pylist` + :return: The DataPlan. """ table = pa.Table.from_pylist(mapping, **kwargs) _plan = declare_table(table) @@ -81,6 +100,10 @@ def from_list(cls, mapping, **kwargs): def from_dataframe(cls, df, **kwargs): """ Construct a DataPlan from a pandas DataFrame. + + :param df: pandas dataframe from which to construct table source + :param kwargs: optional keyword arguments to pa.table.from_pandas` + :return: the DataPlan. """ table = pa.Table.from_pandas(df, **kwargs) _plan = declare_table(table) @@ -90,6 +113,9 @@ def from_dataframe(cls, df, **kwargs): def from_table(cls, table): """ Construct a DataPlan from a pyarrow Table. + + :param table: pyarrow Table to be declared as source + :return: the DataPlan. """ _plan = declare_table(table) return cls(_plan, source=table) @@ -98,17 +124,15 @@ def from_table(cls, table): def from_dataset(cls, dataset, columns=None, filter=None): """ Construct a DataPlan from a pyarrow Dataset. + + :param dataset: pyarrow Dataset to be declared as source + :param columns: Optional projection to apply + :param filter: Optional predicate to apply + :return: the DataPlan. """ _plan = declare_dataset(dataset, columns=columns, filter=filter) return cls(_plan, source=dataset) - @property - def source(self): - """ - The source over which the plan will operate. - """ - return self._source - def project(self, *args, **kwargs): """ Apply a projection (e.g., select columns, rename columns, construct new