diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..a00c325 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +ignore = E203, E266, E501, W503, F403, F401 +max-line-length = 87 +max-complexity = 18 +select = B,C,E,F,W,T4,B9 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..18112e6 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,56 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: jpetrucciani/black-check@master + + pytest: + strategy: + matrix: + os: [macos-latest, windows-latest, ubuntu-latest] + python-version: ['3.8', '3.9', '3.10'] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: 1.1.10 + + - name: Set Poetry config + run: | + poetry config virtualenvs.create true + poetry config virtualenvs.in-project false + poetry config virtualenvs.path ~/.virtualenvs + + - name: Cache Poetry virtualenv + uses: actions/cache@v1 + id: cache + with: + path: ~/.virtualenvs + key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install Dependencies + run: poetry install + if: steps.cache.outputs.cache-hit != 'true' + + - name: Test with pytest + run: poetry run pytest diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..901ae63 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,75 @@ +name: website + +# Build the documentation whenever there are new commits on main +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +# Security: restrict permissions for CI jobs. +permissions: + contents: read + +jobs: + # Build the documentation and upload the static HTML files as an artifact. + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: 1.1.10 + + - name: Set Poetry config + run: | + poetry config virtualenvs.create true + poetry config virtualenvs.in-project false + poetry config virtualenvs.path ~/.virtualenvs + + - name: Cache Poetry virtualenv + uses: actions/cache@v1 + id: cache + with: + path: ~/.virtualenvs + key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + poetry-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install Dependencies + run: poetry install + if: steps.cache.outputs.cache-hit != 'true' + + - name: Build documentation + run: poetry run pdoc --docformat google src/doubt -o docs + + - name: Compress documentation + run: tar --directory docs/ -hcf artifact.tar . + + - name: Upload documentation + uses: actions/upload-artifact@v3 + with: + name: github-pages + path: ./artifact.tar + + # Deploy the artifact to GitHub pages. + # This is a separate job so that only actions/deploy-pages has the necessary permissions. + deploy: + needs: build + runs-on: ubuntu-latest + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@v1 diff --git a/.gitignore b/.gitignore index a33a78c..8df7382 100644 --- a/.gitignore +++ b/.gitignore @@ -1,36 +1,99 @@ -# Virtual environment -**/.venv - -# Environment variables -**/.env - -# VIm swap files -**/*.swp - -# Vim plugins config -**/.vim - -# Python compiled files -**/*.pyc - -# Caches and logs -**/.pytest_cache -**/__pycache__ -**/.mypy_cache -**.dataset_cache -**/checkpoints -**/lightning_logs -**/.ipynb_checkpoints - -# Build artifacts -**/build -**/dist -**/*egg-info - -# Jupyter notebooks for testing -**/*.ipynb -**/ipynb_checkpoints - -# Ignore rst files except index.rst -!**/index.rst -**/*.rst +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +.venv +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# DotEnv configuration +.env + +# Database +*.db +*.rdb + +# Pycharm +.idea + +# VS Code +.vscode/ + +# Spyder +.spyproject/ + +# Jupyter NB Checkpoints +.ipynb_checkpoints/ + +# Mac OS-specific storage files +.DS_Store + +# vim +*.swp +*.swo + +# Mypy cache +.mypy_cache/ + +# pytest cache +.pytest_cache/ + +# Checkpoints +checkpoint-* + +# Documentation +docs/doubt/ +docs/index.html +docs/doubt.html +docs/search.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9b5cd3c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +repos: +- repo: https://github.com/ambv/black + rev: 22.3.0 + hooks: + - id: black +- repo: https://github.com/timothycrosley/isort + rev: 5.7.0 + hooks: + - id: isort +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 +- repo: https://github.com/kynan/nbstripout + rev: 0.5.0 + hooks: + - id: nbstripout +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.782 + hooks: + - id: mypy + args: [--ignore-missing-imports] diff --git a/bump_version.py b/bump_version.py deleted file mode 100644 index 5effe2e..0000000 --- a/bump_version.py +++ /dev/null @@ -1,127 +0,0 @@ -'''Script that fetches and bumps versions''' - -from pathlib import Path -import re -from typing import Union, Tuple -import subprocess -import datetime as dt - - -def get_current_version(return_tuple: bool = False) \ - -> Union[str, Tuple[int, int, int]]: - '''Fetch the current version without import __init__.py. - - Args: - return_tuple (bool, optional): - Whether to return a tuple of three numbers, corresponding to the - major, minor and patch version. Defaults to False. - - Returns: - str or tuple of three ints: The current version - ''' - init_file = Path('doubt') / '__init__.py' - init = init_file.read_text() - version_regex = r"(?<=__version__ = ')[0-9]+\.[0-9]+\.[0-9]+(?=')" - version = re.search(version_regex, init)[0] - if return_tuple: - major, minor, patch = [int(v) for v in version.split('.')] - return (major, minor, patch) - else: - return version - - -def set_new_version(major: int, minor: int, patch: int): - '''Sets a new version. - - Args: - major (int): - The major version. This only changes when the code stops being - backwards compatible. - minor (int): - The minor version. This changes when a backwards compatible change - happened. - patch (init): - The patch version. This changes when the only new changes are bug - fixes. - ''' - version = f'{major}.{minor}.{patch}' - - # Get current changelog and ensure that it has an [Unreleased] entry - changelog_path = Path('CHANGELOG.md') - changelog = changelog_path.read_text() - if '[Unreleased]' not in changelog: - raise RuntimeError('No [Unreleased] entry in CHANGELOG.md.') - - # Add version to CHANGELOG - today = dt.date.today().strftime('%Y-%m-%d') - new_changelog = re.sub(r'\[Unreleased\].*', f'[v{version}] - {today}', - changelog) - changelog_path.write_text(new_changelog) - - # Get current __init__.py content - init_file = Path('doubt') / '__init__.py' - init = init_file.read_text() - - # Replace __version__ in __init__.py with the new one - version_regex = r"(?<=__version__ = ')[0-9]+\.[0-9]+\.[0-9]+(?=')" - new_init = re.sub(version_regex, version, init) - with init_file.open('w') as f: - f.write(new_init) - - # Get current Sphinx conf.py content - sphinx_conf_file = Path('docs') / 'source' / 'conf.py' - sphinx_conf = sphinx_conf_file.read_text() - - # Replace `release` in conf.py with the new one - version_regex = r"(?<=release = ')v[0-9]+\.[0-9]+\.[0-9]+(?=')" - new_sphinx_conf = re.sub(version_regex, version, sphinx_conf) - with sphinx_conf_file.open('w') as f: - f.write(new_sphinx_conf) - - # Add to version control - subprocess.run(['git', 'add', 'doubt/__init__.py']) - subprocess.run(['git', 'add', 'CHANGELOG.md']) - subprocess.run(['git', 'add', 'docs/source/conf.py']) - subprocess.run(['git', 'commit', '-m', f'feat: v{version}']) - subprocess.run(['git', 'tag', f'v{version}']) - - -def bump_major(): - '''Add one to the major version''' - major, minor, patch = get_current_version(return_tuple=True) - set_new_version(major + 1, 0, 0) - - -def bump_minor(): - '''Add one to the minor version''' - major, minor, patch = get_current_version(return_tuple=True) - set_new_version(major, minor + 1, 0) - - -def bump_patch(): - '''Add one to the patch version''' - major, minor, patch = get_current_version(return_tuple=True) - set_new_version(major, minor, patch + 1) - - -if __name__ == '__main__': - from argparse import ArgumentParser - - parser = ArgumentParser() - parser.add_argument('--major', const=True, nargs='?', default=False, - help='Bump the major version by one.') - parser.add_argument('--minor', const=True, nargs='?', default=False, - help='Bump the minor version by one.') - parser.add_argument('--patch', const=True, nargs='?', default=False, - help='Bump the patch version by one.') - args = parser.parse_args() - - if args.major + args.minor + args.patch != 1: - raise RuntimeError('Exactly one of --major, --minor and --patch must ' - 'be selected.') - elif args.major: - bump_major() - elif args.minor: - bump_minor() - elif args.patch: - bump_patch() diff --git a/docs/.gitkeep b/docs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d0c3cbf..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 6247f7e..0000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index d0227cb..0000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,60 +0,0 @@ -import sphinx_rtd_theme - -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -sys.path.insert(0, os.path.abspath('../..')) - - -# -- Project information ----------------------------------------------------- - -project = 'doubt' -copyright = '2021, Dan Saattrup Nielsen' -author = 'Dan Saattrup Nielsen' - -# The full version, including alpha/beta/rc tags -release = '4.0.0' - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', # Documentation from docstrings - 'sphinx.ext.napoleon' # Using Google-style docstrings -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] diff --git a/docs/source/doubt.datasets.rst b/docs/source/doubt.datasets.rst deleted file mode 100644 index 5dcb11f..0000000 --- a/docs/source/doubt.datasets.rst +++ /dev/null @@ -1,205 +0,0 @@ -doubt.datasets package -====================== - -Submodules ----------- - -doubt.datasets.airfoil module ------------------------------ - -.. automodule:: doubt.datasets.airfoil - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.bike\_sharing\_daily module ------------------------------------------- - -.. automodule:: doubt.datasets.bike_sharing_daily - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.bike\_sharing\_hourly module -------------------------------------------- - -.. automodule:: doubt.datasets.bike_sharing_hourly - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.blog module --------------------------- - -.. automodule:: doubt.datasets.blog - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.concrete module ------------------------------- - -.. automodule:: doubt.datasets.concrete - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.cpu module -------------------------- - -.. automodule:: doubt.datasets.cpu - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.facebook\_comments module ----------------------------------------- - -.. automodule:: doubt.datasets.facebook_comments - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.facebook\_metrics module ---------------------------------------- - -.. automodule:: doubt.datasets.facebook_metrics - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.fish\_bioconcentration module --------------------------------------------- - -.. automodule:: doubt.datasets.fish_bioconcentration - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.fish\_toxicity module ------------------------------------- - -.. automodule:: doubt.datasets.fish_toxicity - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.forest\_fire module ----------------------------------- - -.. automodule:: doubt.datasets.forest_fire - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.gas\_turbine module ----------------------------------- - -.. automodule:: doubt.datasets.gas_turbine - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.nanotube module ------------------------------- - -.. automodule:: doubt.datasets.nanotube - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.new\_taipei\_housing module ------------------------------------------- - -.. automodule:: doubt.datasets.new_taipei_housing - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.parkinsons module --------------------------------- - -.. automodule:: doubt.datasets.parkinsons - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.power\_plant module ----------------------------------- - -.. automodule:: doubt.datasets.power_plant - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.protein module ------------------------------ - -.. automodule:: doubt.datasets.protein - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.servo module ---------------------------- - -.. automodule:: doubt.datasets.servo - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.solar\_flare module ----------------------------------- - -.. automodule:: doubt.datasets.solar_flare - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.space\_shuttle module ------------------------------------- - -.. automodule:: doubt.datasets.space_shuttle - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.stocks module ----------------------------- - -.. automodule:: doubt.datasets.stocks - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.superconductivity module ---------------------------------------- - -.. automodule:: doubt.datasets.superconductivity - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.tehran\_housing module -------------------------------------- - -.. automodule:: doubt.datasets.tehran_housing - :members: - :undoc-members: - :show-inheritance: - -doubt.datasets.yacht module ---------------------------- - -.. automodule:: doubt.datasets.yacht - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: doubt.datasets - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/doubt.models.boot.rst b/docs/source/doubt.models.boot.rst deleted file mode 100644 index 36d91ef..0000000 --- a/docs/source/doubt.models.boot.rst +++ /dev/null @@ -1,21 +0,0 @@ -doubt.models.boot package -========================= - -Submodules ----------- - -doubt.models.boot.boot module ------------------------------ - -.. automodule:: doubt.models.boot.boot - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: doubt.models.boot - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/doubt.models.glm.rst b/docs/source/doubt.models.glm.rst deleted file mode 100644 index f6dc32e..0000000 --- a/docs/source/doubt.models.glm.rst +++ /dev/null @@ -1,29 +0,0 @@ -doubt.models.glm package -======================== - -Submodules ----------- - -doubt.models.glm.quantile\_loss module --------------------------------------- - -.. automodule:: doubt.models.glm.quantile_loss - :members: - :undoc-members: - :show-inheritance: - -doubt.models.glm.quantile\_regressor module -------------------------------------------- - -.. automodule:: doubt.models.glm.quantile_regressor - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: doubt.models.glm - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/doubt.models.rst b/docs/source/doubt.models.rst deleted file mode 100644 index 932b2a5..0000000 --- a/docs/source/doubt.models.rst +++ /dev/null @@ -1,20 +0,0 @@ -doubt.models package -==================== - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - doubt.models.boot - doubt.models.glm - doubt.models.tree - -Module contents ---------------- - -.. automodule:: doubt.models - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/doubt.models.tree.rst b/docs/source/doubt.models.tree.rst deleted file mode 100644 index 8ac0858..0000000 --- a/docs/source/doubt.models.tree.rst +++ /dev/null @@ -1,37 +0,0 @@ -doubt.models.tree package -========================= - -Submodules ----------- - -doubt.models.tree.forest module -------------------------------- - -.. automodule:: doubt.models.tree.forest - :members: - :undoc-members: - :show-inheritance: - -doubt.models.tree.tree module ------------------------------ - -.. automodule:: doubt.models.tree.tree - :members: - :undoc-members: - :show-inheritance: - -doubt.models.tree.utils module ------------------------------- - -.. automodule:: doubt.models.tree.utils - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: doubt.models.tree - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/doubt.rst b/docs/source/doubt.rst deleted file mode 100644 index c3d3a39..0000000 --- a/docs/source/doubt.rst +++ /dev/null @@ -1,19 +0,0 @@ -doubt package -============= - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - doubt.datasets - doubt.models - -Module contents ---------------- - -.. automodule:: doubt - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index a123c20..0000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. doubt documentation master file, created by - sphinx-quickstart on Sun Apr 11 22:33:31 2021. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to doubt's documentation! -================================= - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - modules - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/modules.rst b/docs/source/modules.rst deleted file mode 100644 index aa8d9aa..0000000 --- a/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -doubt -===== - -.. toctree:: - :maxdepth: 4 - - doubt diff --git a/doubt/__init__.py b/doubt/__init__.py deleted file mode 100644 index 6ca4bed..0000000 --- a/doubt/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -__version__ = '4.1.0' # noqa - -from .models import QuantileRegressionTree # noqa -from .models import QuantileRegressionForest # noqa -from .models import QuantileRegressor # noqa -from .models import Boot # noqa diff --git a/doubt/datasets/__init__.py b/doubt/datasets/__init__.py deleted file mode 100644 index dc85b8f..0000000 --- a/doubt/datasets/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -from .airfoil import Airfoil # noqa -from .bike_sharing_daily import BikeSharingDaily # noqa -from .bike_sharing_hourly import BikeSharingHourly # noqa -from .blog import Blog # noqa -from .concrete import Concrete # noqa -from .cpu import CPU # noqa -from .facebook_comments import FacebookComments # noqa -from .facebook_metrics import FacebookMetrics # noqa -from .fish_bioconcentration import FishBioconcentration # noqa -from .fish_toxicity import FishToxicity # noqa -from .forest_fire import ForestFire # noqa -from .gas_turbine import GasTurbine # noqa -from .nanotube import Nanotube # noqa -from .new_taipei_housing import NewTaipeiHousing # noqa -from .parkinsons import Parkinsons # noqa -from .power_plant import PowerPlant # noqa -from .protein import Protein # noqa -from .servo import Servo # noqa -from .solar_flare import SolarFlare # noqa -from .space_shuttle import SpaceShuttle # noqa -from .stocks import Stocks # noqa -from .superconductivity import Superconductivity # noqa -from .tehran_housing import TehranHousing # noqa -from .yacht import Yacht # noqa diff --git a/doubt/models/_model.py b/doubt/models/_model.py deleted file mode 100644 index 07685f0..0000000 --- a/doubt/models/_model.py +++ /dev/null @@ -1,27 +0,0 @@ -''' Base class for estimators ''' - -import abc -from typing import Sequence, Union - -FloatMatrix = Sequence[Sequence[float]] -FloatNDArray = Union[Sequence[float], FloatMatrix] - - -class BaseModel(abc.ABC): - @abc.abstractmethod - def __init__(self, *args, **kwargs): - pass - - @abc.abstractmethod - def predict(self, X: FloatMatrix, **kwargs) -> FloatNDArray: - return - - @abc.abstractmethod - def fit(self, X: FloatMatrix, y: FloatNDArray, **kwargs): - return - - def plot_pred_interval(self, X: FloatMatrix, y: FloatNDArray, **kwargs): - return - - def __call__(self, X: FloatMatrix, **kwargs) -> FloatNDArray: - return self.predict(X) diff --git a/doubt/models/boot/__init__.py b/doubt/models/boot/__init__.py deleted file mode 100644 index 7e0dfb1..0000000 --- a/doubt/models/boot/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .boot import Boot # noqa diff --git a/doubt/models/tree/__init__.py b/doubt/models/tree/__init__.py deleted file mode 100644 index 0956748..0000000 --- a/doubt/models/tree/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .tree import QuantileRegressionTree # noqa -from .forest import QuantileRegressionForest # noqa diff --git a/makefile b/makefile index eede873..862a94e 100644 --- a/makefile +++ b/makefile @@ -1,48 +1,105 @@ -include .env -export $(shell sed 's/=.*//' .env) - -documentation: - sphinx-apidoc -o docs/source --force doubt && \ - make -C docs html - -release-major: - pytest -n 4 && \ - make documentation && \ - python bump_version.py --major && \ - git pull origin master && \ - git push && \ - git checkout master && \ - git merge dev && \ - git push && \ - git push --tags && \ - git checkout dev && \ - python setup.py sdist bdist_wheel && \ - twine upload dist/* - -release-minor: - pytest -n 4 && \ - make documentation && \ - python bump_version.py --minor && \ - git pull origin master && \ - git push && \ - git checkout master && \ - git merge dev && \ - git push && \ - git push --tags && \ - git checkout dev && \ - python setup.py sdist bdist_wheel && \ - twine upload dist/* - -release-patch: - pytest -n 4 && \ - make documentation && \ - python bump_version.py --patch && \ - git pull origin master && \ - git push && \ - git checkout master && \ - git merge dev && \ - git push && \ - git push --tags && \ - git checkout dev && \ - python setup.py sdist bdist_wheel && \ - twine upload dist/* +# This ensures that we can call `make ` even if `` exists as a file or +# directory. +.PHONY: notebook docs + +# Exports all variables defined in the makefile available to scripts +.EXPORT_ALL_VARIABLES: + +install-poetry: + @echo "Installing poetry..." + @curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3 - + +uninstall-poetry: + @echo "Uninstalling poetry..." + @curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3 - --uninstall + +install: + @echo "Installing..." + @if [ "$(shell which poetry)" = "" ]; then \ + make install-poetry; \ + fi + @if [ "$(shell which gpg)" = "" ]; then \ + echo "GPG not installed, so an error will occur. Install GPG on MacOS with "\ + "`brew install gnupg` or on Ubuntu with `apt install gnupg` and run "\ + "`make install` again."; \ + fi + @poetry env use python3 + @poetry run python3 -m src.scripts.fix_dot_env_file + @git init + @. .env; \ + git config --local user.name "$${GIT_NAME}"; \ + git config --local user.email "$${GIT_EMAIL}" + @. .env; \ + if [ "$${GPG_KEY_ID}" = "" ]; then \ + echo "No GPG key ID specified. Skipping GPG signing."; \ + git config --local commit.gpgsign false; \ + else \ + echo "Signing with GPG key ID $${GPG_KEY_ID}..."; \ + git config --local commit.gpgsign true; \ + git config --local user.signingkey "$${GPG_KEY_ID}"; \ + fi + @poetry install + @poetry run pre-commit install + +remove-env: + @poetry env remove python3 + @echo "Removed virtual environment." + +docs: + @poetry run pdoc --docformat google -o docs src/doubt + @echo "Saved documentation." + +view-docs: + @echo "Viewing API documentation..." + @open docs/doubt.html + +clean: + @find . -type f -name "*.py[co]" -delete + @find . -type d -name "__pycache__" -delete + @rm -rf .pytest_cache + @echo "Cleaned repository." + +test: + @pytest --cov=src/doubt -n 8 tests/ + @readme-cov + +tree: + @tree -a \ + -I .git \ + -I .mypy_cache . \ + -I .env \ + -I .venv \ + -I poetry.lock \ + -I .ipynb_checkpoints \ + -I dist \ + -I .gitkeep \ + -I docs \ + -I .pytest_cache + +bump-major: + @poetry run python -m src.scripts.versioning --major + @echo "Bumped major version." + +bump-minor: + @poetry run python -m src.scripts.versioning --minor + @echo "Bumped minor version." + +bump-patch: + @poetry run python -m src.scripts.versioning --patch + @echo "Bumped patch version." + +publish: + @. .env; \ + printf "Preparing to publish to PyPI. Have you ensured to change the package version with `make bump-X` for `X` being `major`, `minor` or `patch`? [y/n] : "; \ + read -r answer; \ + if [ "$${answer}" = "y" ]; then \ + if [ "$${PYPI_API_TOKEN}" = "" ]; then \ + echo "No PyPI API token specified in the `.env` file, so cannot publish."; \ + else \ + echo "Publishing to PyPI..."; \ + poetry publish --build --username "__token__" --password "$${PYPI_API_TOKEN}"; \ + echo "Published!"; \ + fi \ + else \ + echo "Publishing aborted."; \ + fi diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..10b738f --- /dev/null +++ b/poetry.lock @@ -0,0 +1,1297 @@ +[[package]] +name = "appnope" +version = "0.1.3" +description = "Disable App Nap on macOS >= 10.9" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "asttokens" +version = "2.0.5" +description = "Annotate AST trees with source code positions" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + +[package.extras] +test = ["astroid", "pytest"] + +[[package]] +name = "astunparse" +version = "1.6.3" +description = "An AST unparser for Python" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.6.1,<2.0" + +[[package]] +name = "atomicwrites" +version = "1.4.1" +description = "Atomic file writes." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "attrs" +version = "21.4.0" +description = "Classes Without Boilerplate" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] + +[[package]] +name = "backcall" +version = "0.2.0" +description = "Specifications for callback functions passed in to an API" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "black" +version = "22.6.0" +description = "The uncompromising code formatter." +category = "dev" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +click = ">=8.0.0" +ipython = {version = ">=7.8.0", optional = true, markers = "extra == \"jupyter\""} +mypy-extensions = ">=0.4.3" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tokenize-rt = {version = ">=3.2.0", optional = true, markers = "extra == \"jupyter\""} +tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "certifi" +version = "2022.6.15" +description = "Python package for providing Mozilla's CA Bundle." +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "cfgv" +version = "3.3.1" +description = "Validate configuration and produce human readable error messages." +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[[package]] +name = "charset-normalizer" +version = "2.1.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "dev" +optional = false +python-versions = ">=3.6.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.5" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "coverage" +version = "5.5" +description = "Code coverage measurement for Python" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.dependencies] +toml = {version = "*", optional = true, markers = "extra == \"toml\""} + +[package.extras] +toml = ["toml"] + +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +category = "dev" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "distlib" +version = "0.3.5" +description = "Distribution utilities" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "execnet" +version = "1.9.0" +description = "execnet: rapid multi-Python deployment" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +testing = ["pre-commit"] + +[[package]] +name = "executing" +version = "0.8.3" +description = "Get the currently executing AST node of a frame, and other information" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "filelock" +version = "3.7.1" +description = "A platform independent file lock." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo (>=2021.8.17b43)", "sphinx (>=4.1)", "sphinx-autodoc-typehints (>=1.12)"] +testing = ["covdefaults (>=1.2.0)", "coverage (>=4)", "pytest (>=4)", "pytest-cov", "pytest-timeout (>=1.4.2)"] + +[[package]] +name = "identify" +version = "2.5.1" +description = "File identification library for Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +license = ["ukkonen"] + +[[package]] +name = "idna" +version = "3.3" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "dev" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "iniconfig" +version = "1.1.1" +description = "iniconfig: brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "ipython" +version = "8.4.0" +description = "IPython: Productive Interactive Computing" +category = "dev" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +appnope = {version = "*", markers = "sys_platform == \"darwin\""} +backcall = "*" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +decorator = "*" +jedi = ">=0.16" +matplotlib-inline = "*" +pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} +pickleshare = "*" +prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0" +pygments = ">=2.4.0" +stack-data = "*" +traitlets = ">=5" + +[package.extras] +all = ["black", "Sphinx (>=1.3)", "ipykernel", "nbconvert", "nbformat", "ipywidgets", "notebook", "ipyparallel", "qtconsole", "pytest (<7.1)", "pytest-asyncio", "testpath", "curio", "matplotlib (!=3.2.0)", "numpy (>=1.19)", "pandas", "trio"] +black = ["black"] +doc = ["Sphinx (>=1.3)"] +kernel = ["ipykernel"] +nbconvert = ["nbconvert"] +nbformat = ["nbformat"] +notebook = ["ipywidgets", "notebook"] +parallel = ["ipyparallel"] +qtconsole = ["qtconsole"] +test = ["pytest (<7.1)", "pytest-asyncio", "testpath"] +test_extra = ["pytest (<7.1)", "pytest-asyncio", "testpath", "curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.19)", "pandas", "trio"] + +[[package]] +name = "isort" +version = "5.10.1" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.6.1,<4.0" + +[package.extras] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] +requirements_deprecated_finder = ["pipreqs", "pip-api"] +colors = ["colorama (>=0.4.3,<0.5.0)"] +plugins = ["setuptools"] + +[[package]] +name = "jedi" +version = "0.18.1" +description = "An autocompletion tool for Python that can be used for text editors." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +parso = ">=0.8.0,<0.9.0" + +[package.extras] +qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] +testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"] + +[[package]] +name = "jinja2" +version = "3.1.2" +description = "A very fast and expressive template engine." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "joblib" +version = "1.1.0" +description = "Lightweight pipelining with Python functions" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "lxml" +version = "4.9.1" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["beautifulsoup4"] +source = ["Cython (>=0.29.7)"] + +[[package]] +name = "markupsafe" +version = "2.1.1" +description = "Safely add untrusted strings to HTML/XML markup." +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "matplotlib-inline" +version = "0.1.3" +description = "Inline Matplotlib backend for Jupyter" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +traitlets = "*" + +[[package]] +name = "mypy-extensions" +version = "0.4.3" +description = "Experimental type system extensions for programs checked with the mypy typechecker." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "nodeenv" +version = "1.7.0" +description = "Node.js virtual environment builder" +category = "dev" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" + +[[package]] +name = "numpy" +version = "1.23.1" +description = "NumPy is the fundamental package for array computing with Python." +category = "main" +optional = false +python-versions = ">=3.8" + +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + +[[package]] +name = "pandas" +version = "1.4.3" +description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +numpy = [ + {version = ">=1.18.5", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, + {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, +] +python-dateutil = ">=2.8.1" +pytz = ">=2020.1" + +[package.extras] +test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] + +[[package]] +name = "parso" +version = "0.8.3" +description = "A Python Parser" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] +testing = ["docopt", "pytest (<6.0.0)"] + +[[package]] +name = "pathspec" +version = "0.9.0" +description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[[package]] +name = "pdoc" +version = "7.4.0" +description = "API Documentation for Python Projects" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +astunparse = {version = "*", markers = "python_version < \"3.9\""} +Jinja2 = ">=2.11.0" +MarkupSafe = "*" +pygments = "*" + +[package.extras] +dev = ["flake8", "hypothesis", "mypy", "pytest", "pytest-cov", "pytest-timeout", "tox"] + +[[package]] +name = "pexpect" +version = "4.8.0" +description = "Pexpect allows easy control of interactive console applications." +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +ptyprocess = ">=0.5" + +[[package]] +name = "pickleshare" +version = "0.7.5" +description = "Tiny 'shelve'-like database with concurrency support" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "platformdirs" +version = "2.5.2" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"] +test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"] + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pre-commit" +version = "2.20.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +toml = "*" +virtualenv = ">=20.0.8" + +[[package]] +name = "prompt-toolkit" +version = "3.0.30" +description = "Library for building powerful interactive command lines in Python" +category = "dev" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +wcwidth = "*" + +[[package]] +name = "ptyprocess" +version = "0.7.0" +description = "Run a subprocess in a pseudo terminal" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "pure-eval" +version = "0.2.2" +description = "Safely evaluate AST nodes without side effects" +category = "dev" +optional = false +python-versions = "*" + +[package.extras] +tests = ["pytest"] + +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "pygments" +version = "2.11.0" +description = "Pygments is a syntax highlighting package written in Python." +category = "dev" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "dev" +optional = false +python-versions = ">=3.6.8" + +[package.extras] +diagrams = ["railroad-diagrams", "jinja2"] + +[[package]] +name = "pytest" +version = "6.2.5" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +py = ">=1.8.2" +toml = "*" + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] + +[[package]] +name = "pytest-cov" +version = "3.0.0" +description = "Pytest plugin for measuring coverage." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "six", "pytest-xdist", "virtualenv"] + +[[package]] +name = "pytest-forked" +version = "1.4.0" +description = "run tests in isolated forked subprocesses" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +py = "*" +pytest = ">=3.10" + +[[package]] +name = "pytest-xdist" +version = "2.5.0" +description = "pytest xdist plugin for distributed testing and loop-on-failing modes" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +execnet = ">=1.1" +pytest = ">=6.2.0" +pytest-forked = "*" + +[package.extras] +psutil = ["psutil (>=3.0)"] +setproctitle = ["setproctitle"] +testing = ["filelock"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2022.1" +description = "World timezone definitions, modern and historical" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyyaml" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "readme-coverage-badger" +version = "0.1.2" +description = "automatically generates your project's coverage badge using the shields.io service, and then updates your README" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +colorama = "*" +coverage = "<6" + +[[package]] +name = "requests" +version = "2.28.1" +description = "Python HTTP for Humans." +category = "dev" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "scikit-learn" +version = "1.1.1" +description = "A set of python modules for machine learning and data mining" +category = "main" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +joblib = ">=1.0.0" +numpy = ">=1.17.3" +scipy = ">=1.3.2" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.1.2)", "pandas (>=1.0.5)", "memory-profiler (>=0.57.0)"] +docs = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "seaborn (>=0.9.0)", "memory-profiler (>=0.57.0)", "sphinx (>=4.0.1)", "sphinx-gallery (>=0.7.0)", "numpydoc (>=1.2.0)", "Pillow (>=7.1.2)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "seaborn (>=0.9.0)"] +tests = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "pytest (>=5.0.1)", "pytest-cov (>=2.9.0)", "flake8 (>=3.8.2)", "black (>=22.3.0)", "mypy (>=0.770)", "pyamg (>=4.0.0)", "numpydoc (>=1.2.0)"] + +[[package]] +name = "scipy" +version = "1.8.1" +description = "SciPy: Scientific Library for Python" +category = "main" +optional = false +python-versions = ">=3.8,<3.11" + +[package.dependencies] +numpy = ">=1.17.3,<1.25.0" + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "stack-data" +version = "0.3.0" +description = "Extract data from python stack frames and tracebacks for informative displays" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +asttokens = "*" +executing = "*" +pure-eval = "*" + +[package.extras] +tests = ["pytest", "typeguard", "pygments", "littleutils", "cython"] + +[[package]] +name = "threadpoolctl" +version = "3.1.0" +description = "threadpoolctl" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "tokenize-rt" +version = "4.2.1" +description = "A wrapper around the stdlib `tokenize` which roundtrips." +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "tqdm" +version = "4.64.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "traitlets" +version = "5.3.0" +description = "" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pre-commit", "pytest"] + +[[package]] +name = "typing-extensions" +version = "4.3.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "urllib3" +version = "1.26.10" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" + +[package.extras] +brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "virtualenv" +version = "20.15.1" +description = "Virtual Python Environment builder" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[package.dependencies] +distlib = ">=0.3.1,<1" +filelock = ">=3.2,<4" +platformdirs = ">=2,<3" +six = ">=1.9.0,<2" + +[package.extras] +docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=21.3)"] +testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"] + +[[package]] +name = "wcwidth" +version = "0.2.5" +description = "Measures the displayed width of unicode strings in a terminal" +category = "dev" +optional = false +python-versions = "*" + +[metadata] +lock-version = "1.1" +python-versions = ">=3.8,<3.11" +content-hash = "443fa6e83c008da07d53960e118fecd10fdc96d49acc084bbea8d0ed27ac9065" + +[metadata.files] +appnope = [ + {file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"}, + {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"}, +] +asttokens = [ + {file = "asttokens-2.0.5-py2.py3-none-any.whl", hash = "sha256:0844691e88552595a6f4a4281a9f7f79b8dd45ca4ccea82e5e05b4bbdb76705c"}, + {file = "asttokens-2.0.5.tar.gz", hash = "sha256:9a54c114f02c7a9480d56550932546a3f1fe71d8a02f1bc7ccd0ee3ee35cf4d5"}, +] +astunparse = [] +atomicwrites = [] +attrs = [ + {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, + {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, +] +backcall = [ + {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, + {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, +] +black = [ + {file = "black-22.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f586c26118bc6e714ec58c09df0157fe2d9ee195c764f630eb0d8e7ccce72e69"}, + {file = "black-22.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b270a168d69edb8b7ed32c193ef10fd27844e5c60852039599f9184460ce0807"}, + {file = "black-22.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6797f58943fceb1c461fb572edbe828d811e719c24e03375fd25170ada53825e"}, + {file = "black-22.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c85928b9d5f83b23cee7d0efcb310172412fbf7cb9d9ce963bd67fd141781def"}, + {file = "black-22.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6fe02afde060bbeef044af7996f335fbe90b039ccf3f5eb8f16df8b20f77666"}, + {file = "black-22.6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cfaf3895a9634e882bf9d2363fed5af8888802d670f58b279b0bece00e9a872d"}, + {file = "black-22.6.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94783f636bca89f11eb5d50437e8e17fbc6a929a628d82304c80fa9cd945f256"}, + {file = "black-22.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2ea29072e954a4d55a2ff58971b83365eba5d3d357352a07a7a4df0d95f51c78"}, + {file = "black-22.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e439798f819d49ba1c0bd9664427a05aab79bfba777a6db94fd4e56fae0cb849"}, + {file = "black-22.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187d96c5e713f441a5829e77120c269b6514418f4513a390b0499b0987f2ff1c"}, + {file = "black-22.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90"}, + {file = "black-22.6.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a218d7e5856f91d20f04e931b6f16d15356db1c846ee55f01bac297a705ca24f"}, + {file = "black-22.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:568ac3c465b1c8b34b61cd7a4e349e93f91abf0f9371eda1cf87194663ab684e"}, + {file = "black-22.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6c1734ab264b8f7929cef8ae5f900b85d579e6cbfde09d7387da8f04771b51c6"}, + {file = "black-22.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9a3ac16efe9ec7d7381ddebcc022119794872abce99475345c5a61aa18c45ad"}, + {file = "black-22.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b9fd45787ba8aa3f5e0a0a98920c1012c884622c6c920dbe98dbd05bc7c70fbf"}, + {file = "black-22.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7ba9be198ecca5031cd78745780d65a3f75a34b2ff9be5837045dce55db83d1c"}, + {file = "black-22.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a3db5b6409b96d9bd543323b23ef32a1a2b06416d525d27e0f67e74f1446c8f2"}, + {file = "black-22.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:560558527e52ce8afba936fcce93a7411ab40c7d5fe8c2463e279e843c0328ee"}, + {file = "black-22.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b154e6bbde1e79ea3260c4b40c0b7b3109ffcdf7bc4ebf8859169a6af72cd70b"}, + {file = "black-22.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4af5bc0e1f96be5ae9bd7aaec219c901a94d6caa2484c21983d043371c733fc4"}, + {file = "black-22.6.0-py3-none-any.whl", hash = "sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c"}, + {file = "black-22.6.0.tar.gz", hash = "sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9"}, +] +certifi = [ + {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, + {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, +] +cfgv = [ + {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"}, + {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, +] +charset-normalizer = [ + {file = "charset-normalizer-2.1.0.tar.gz", hash = "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"}, + {file = "charset_normalizer-2.1.0-py3-none-any.whl", hash = "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5"}, +] +click = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] +colorama = [ + {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, + {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, +] +coverage = [] +decorator = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] +distlib = [] +execnet = [] +executing = [ + {file = "executing-0.8.3-py2.py3-none-any.whl", hash = "sha256:d1eef132db1b83649a3905ca6dd8897f71ac6f8cac79a7e58a1a09cf137546c9"}, + {file = "executing-0.8.3.tar.gz", hash = "sha256:c6554e21c6b060590a6d3be4b82fb78f8f0194d809de5ea7df1c093763311501"}, +] +filelock = [ + {file = "filelock-3.7.1-py3-none-any.whl", hash = "sha256:37def7b658813cda163b56fc564cdc75e86d338246458c4c28ae84cabefa2404"}, + {file = "filelock-3.7.1.tar.gz", hash = "sha256:3a0fd85166ad9dbab54c9aec96737b744106dc5f15c0b09a6744a445299fcf04"}, +] +identify = [ + {file = "identify-2.5.1-py2.py3-none-any.whl", hash = "sha256:0dca2ea3e4381c435ef9c33ba100a78a9b40c0bab11189c7cf121f75815efeaa"}, + {file = "identify-2.5.1.tar.gz", hash = "sha256:3d11b16f3fe19f52039fb7e39c9c884b21cb1b586988114fbe42671f03de3e82"}, +] +idna = [ + {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, + {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, +] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] +ipython = [ + {file = "ipython-8.4.0-py3-none-any.whl", hash = "sha256:7ca74052a38fa25fe9bedf52da0be7d3fdd2fb027c3b778ea78dfe8c212937d1"}, + {file = "ipython-8.4.0.tar.gz", hash = "sha256:f2db3a10254241d9b447232cec8b424847f338d9d36f9a577a6192c332a46abd"}, +] +isort = [ + {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, + {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, +] +jedi = [ + {file = "jedi-0.18.1-py2.py3-none-any.whl", hash = "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d"}, + {file = "jedi-0.18.1.tar.gz", hash = "sha256:74137626a64a99c8eb6ae5832d99b3bdd7d29a3850fe2aa80a4126b2a7d949ab"}, +] +jinja2 = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] +joblib = [ + {file = "joblib-1.1.0-py2.py3-none-any.whl", hash = "sha256:f21f109b3c7ff9d95f8387f752d0d9c34a02aa2f7060c2135f465da0e5160ff6"}, + {file = "joblib-1.1.0.tar.gz", hash = "sha256:4158fcecd13733f8be669be0683b96ebdbbd38d23559f54dca7205aea1bf1e35"}, +] +lxml = [ + {file = "lxml-4.9.1-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:98cafc618614d72b02185ac583c6f7796202062c41d2eeecdf07820bad3295ed"}, + {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c62e8dd9754b7debda0c5ba59d34509c4688f853588d75b53c3791983faa96fc"}, + {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:21fb3d24ab430fc538a96e9fbb9b150029914805d551deeac7d7822f64631dfc"}, + {file = "lxml-4.9.1-cp27-cp27m-win32.whl", hash = "sha256:86e92728ef3fc842c50a5cb1d5ba2bc66db7da08a7af53fb3da79e202d1b2cd3"}, + {file = "lxml-4.9.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4cfbe42c686f33944e12f45a27d25a492cc0e43e1dc1da5d6a87cbcaf2e95627"}, + {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dad7b164905d3e534883281c050180afcf1e230c3d4a54e8038aa5cfcf312b84"}, + {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a614e4afed58c14254e67862456d212c4dcceebab2eaa44d627c2ca04bf86837"}, + {file = "lxml-4.9.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:49a866923e69bc7da45a0565636243707c22752fc38f6b9d5c8428a86121022c"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f9ced82717c7ec65a67667bb05865ffe38af0e835cdd78728f1209c8fffe0cad"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d9fc0bf3ff86c17348dfc5d322f627d78273eba545db865c3cd14b3f19e57fa5"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e5f66bdf0976ec667fc4594d2812a00b07ed14d1b44259d19a41ae3fff99f2b8"}, + {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fe17d10b97fdf58155f858606bddb4e037b805a60ae023c009f760d8361a4eb8"}, + {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8caf4d16b31961e964c62194ea3e26a0e9561cdf72eecb1781458b67ec83423d"}, + {file = "lxml-4.9.1-cp310-cp310-win32.whl", hash = "sha256:4780677767dd52b99f0af1f123bc2c22873d30b474aa0e2fc3fe5e02217687c7"}, + {file = "lxml-4.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:b122a188cd292c4d2fcd78d04f863b789ef43aa129b233d7c9004de08693728b"}, + {file = "lxml-4.9.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:be9eb06489bc975c38706902cbc6888f39e946b81383abc2838d186f0e8b6a9d"}, + {file = "lxml-4.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f1be258c4d3dc609e654a1dc59d37b17d7fef05df912c01fc2e15eb43a9735f3"}, + {file = "lxml-4.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:927a9dd016d6033bc12e0bf5dee1dde140235fc8d0d51099353c76081c03dc29"}, + {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9232b09f5efee6a495a99ae6824881940d6447debe272ea400c02e3b68aad85d"}, + {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318"}, + {file = "lxml-4.9.1-cp35-cp35m-win32.whl", hash = "sha256:4d5bae0a37af799207140652a700f21a85946f107a199bcb06720b13a4f1f0b7"}, + {file = "lxml-4.9.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4878e667ebabe9b65e785ac8da4d48886fe81193a84bbe49f12acff8f7a383a4"}, + {file = "lxml-4.9.1-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:1355755b62c28950f9ce123c7a41460ed9743c699905cbe664a5bcc5c9c7c7fb"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:bcaa1c495ce623966d9fc8a187da80082334236a2a1c7e141763ffaf7a405067"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eafc048ea3f1b3c136c71a86db393be36b5b3d9c87b1c25204e7d397cee9536"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:13c90064b224e10c14dcdf8086688d3f0e612db53766e7478d7754703295c7c8"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:206a51077773c6c5d2ce1991327cda719063a47adc02bd703c56a662cdb6c58b"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e8f0c9d65da595cfe91713bc1222af9ecabd37971762cb830dea2fc3b3bb2acf"}, + {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8f0a4d179c9a941eb80c3a63cdb495e539e064f8054230844dcf2fcb812b71d3"}, + {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:830c88747dce8a3e7525defa68afd742b4580df6aa2fdd6f0855481e3994d391"}, + {file = "lxml-4.9.1-cp36-cp36m-win32.whl", hash = "sha256:1e1cf47774373777936c5aabad489fef7b1c087dcd1f426b621fda9dcc12994e"}, + {file = "lxml-4.9.1-cp36-cp36m-win_amd64.whl", hash = "sha256:5974895115737a74a00b321e339b9c3f45c20275d226398ae79ac008d908bff7"}, + {file = "lxml-4.9.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:1423631e3d51008871299525b541413c9b6c6423593e89f9c4cfbe8460afc0a2"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:2aaf6a0a6465d39b5ca69688fce82d20088c1838534982996ec46633dc7ad6cc"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:9f36de4cd0c262dd9927886cc2305aa3f2210db437aa4fed3fb4940b8bf4592c"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae06c1e4bc60ee076292e582a7512f304abdf6c70db59b56745cca1684f875a4"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:57e4d637258703d14171b54203fd6822fda218c6c2658a7d30816b10995f29f3"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d279033bf614953c3fc4a0aa9ac33a21e8044ca72d4fa8b9273fe75359d5cca"}, + {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a60f90bba4c37962cbf210f0188ecca87daafdf60271f4c6948606e4dabf8785"}, + {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ca2264f341dd81e41f3fffecec6e446aa2121e0b8d026fb5130e02de1402785"}, + {file = "lxml-4.9.1-cp37-cp37m-win32.whl", hash = "sha256:27e590352c76156f50f538dbcebd1925317a0f70540f7dc8c97d2931c595783a"}, + {file = "lxml-4.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:eea5d6443b093e1545ad0210e6cf27f920482bfcf5c77cdc8596aec73523bb7e"}, + {file = "lxml-4.9.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f05251bbc2145349b8d0b77c0d4e5f3b228418807b1ee27cefb11f69ed3d233b"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:487c8e61d7acc50b8be82bda8c8d21d20e133c3cbf41bd8ad7eb1aaeb3f07c97"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d1a92d8e90b286d491e5626af53afef2ba04da33e82e30744795c71880eaa21"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:b570da8cd0012f4af9fa76a5635cd31f707473e65a5a335b186069d5c7121ff2"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ef87fca280fb15342726bd5f980f6faf8b84a5287fcc2d4962ea8af88b35130"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:93e414e3206779ef41e5ff2448067213febf260ba747fc65389a3ddaa3fb8715"}, + {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6653071f4f9bac46fbc30f3c7838b0e9063ee335908c5d61fb7a4a86c8fd2036"}, + {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:32a73c53783becdb7eaf75a2a1525ea8e49379fb7248c3eeefb9412123536387"}, + {file = "lxml-4.9.1-cp38-cp38-win32.whl", hash = "sha256:1a7c59c6ffd6ef5db362b798f350e24ab2cfa5700d53ac6681918f314a4d3b94"}, + {file = "lxml-4.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:1436cf0063bba7888e43f1ba8d58824f085410ea2025befe81150aceb123e345"}, + {file = "lxml-4.9.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:4beea0f31491bc086991b97517b9683e5cfb369205dac0148ef685ac12a20a67"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:41fb58868b816c202e8881fd0f179a4644ce6e7cbbb248ef0283a34b73ec73bb"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bd34f6d1810d9354dc7e35158aa6cc33456be7706df4420819af6ed966e85448"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:edffbe3c510d8f4bf8640e02ca019e48a9b72357318383ca60e3330c23aaffc7"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d949f53ad4fc7cf02c44d6678e7ff05ec5f5552b235b9e136bd52e9bf730b91"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:079b68f197c796e42aa80b1f739f058dcee796dc725cc9a1be0cdb08fc45b000"}, + {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9c3a88d20e4fe4a2a4a84bf439a5ac9c9aba400b85244c63a1ab7088f85d9d25"}, + {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4e285b5f2bf321fc0857b491b5028c5f276ec0c873b985d58d7748ece1d770dd"}, + {file = "lxml-4.9.1-cp39-cp39-win32.whl", hash = "sha256:ef72013e20dd5ba86a8ae1aed7f56f31d3374189aa8b433e7b12ad182c0d2dfb"}, + {file = "lxml-4.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:10d2017f9150248563bb579cd0d07c61c58da85c922b780060dcc9a3aa9f432d"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538747a9d7827ce3e16a8fdd201a99e661c7dee3c96c885d8ecba3c35d1032c"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0645e934e940107e2fdbe7c5b6fb8ec6232444260752598bc4d09511bd056c0b"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6daa662aba22ef3258934105be2dd9afa5bb45748f4f702a3b39a5bf53a1f4dc"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:603a464c2e67d8a546ddaa206d98e3246e5db05594b97db844c2f0a1af37cf5b"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c4b2e0559b68455c085fb0f6178e9752c4be3bba104d6e881eb5573b399d1eb2"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0f3f0059891d3254c7b5fb935330d6db38d6519ecd238ca4fce93c234b4a0f73"}, + {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c852b1530083a620cb0de5f3cd6826f19862bafeaf77586f1aef326e49d95f0c"}, + {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:287605bede6bd36e930577c5925fcea17cb30453d96a7b4c63c14a257118dbb9"}, + {file = "lxml-4.9.1.tar.gz", hash = "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f"}, +] +markupsafe = [ + {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"}, + {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"}, + {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"}, + {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"}, + {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"}, + {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, +] +matplotlib-inline = [ + {file = "matplotlib-inline-0.1.3.tar.gz", hash = "sha256:a04bfba22e0d1395479f866853ec1ee28eea1485c1d69a6faf00dc3e24ff34ee"}, + {file = "matplotlib_inline-0.1.3-py3-none-any.whl", hash = "sha256:aed605ba3b72462d64d475a21a9296f400a19c4f74a31b59103d2a99ffd5aa5c"}, +] +mypy-extensions = [ + {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, + {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, +] +nodeenv = [ + {file = "nodeenv-1.7.0-py2.py3-none-any.whl", hash = "sha256:27083a7b96a25f2f5e1d8cb4b6317ee8aeda3bdd121394e5ac54e498028a042e"}, + {file = "nodeenv-1.7.0.tar.gz", hash = "sha256:e0e7f7dfb85fc5394c6fe1e8fa98131a2473e04311a45afb6508f7cf1836fa2b"}, +] +numpy = [] +packaging = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] +pandas = [ + {file = "pandas-1.4.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d51674ed8e2551ef7773820ef5dab9322be0828629f2cbf8d1fc31a0c4fed640"}, + {file = "pandas-1.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:16ad23db55efcc93fa878f7837267973b61ea85d244fc5ff0ccbcfa5638706c5"}, + {file = "pandas-1.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:958a0588149190c22cdebbc0797e01972950c927a11a900fe6c2296f207b1d6f"}, + {file = "pandas-1.4.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e48fbb64165cda451c06a0f9e4c7a16b534fcabd32546d531b3c240ce2844112"}, + {file = "pandas-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f803320c9da732cc79210d7e8cc5c8019aad512589c910c66529eb1b1818230"}, + {file = "pandas-1.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:2893e923472a5e090c2d5e8db83e8f907364ec048572084c7d10ef93546be6d1"}, + {file = "pandas-1.4.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:24ea75f47bbd5574675dae21d51779a4948715416413b30614c1e8b480909f81"}, + {file = "pandas-1.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d5ebc990bd34f4ac3c73a2724c2dcc9ee7bf1ce6cf08e87bb25c6ad33507e318"}, + {file = "pandas-1.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d6c0106415ff1a10c326c49bc5dd9ea8b9897a6ca0c8688eb9c30ddec49535ef"}, + {file = "pandas-1.4.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78b00429161ccb0da252229bcda8010b445c4bf924e721265bec5a6e96a92e92"}, + {file = "pandas-1.4.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dfbf16b1ea4f4d0ee11084d9c026340514d1d30270eaa82a9f1297b6c8ecbf0"}, + {file = "pandas-1.4.3-cp38-cp38-win32.whl", hash = "sha256:48350592665ea3cbcd07efc8c12ff12d89be09cd47231c7925e3b8afada9d50d"}, + {file = "pandas-1.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:605d572126eb4ab2eadf5c59d5d69f0608df2bf7bcad5c5880a47a20a0699e3e"}, + {file = "pandas-1.4.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a3924692160e3d847e18702bb048dc38e0e13411d2b503fecb1adf0fcf950ba4"}, + {file = "pandas-1.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07238a58d7cbc8a004855ade7b75bbd22c0db4b0ffccc721556bab8a095515f6"}, + {file = "pandas-1.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:755679c49460bd0d2f837ab99f0a26948e68fa0718b7e42afbabd074d945bf84"}, + {file = "pandas-1.4.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41fc406e374590a3d492325b889a2686b31e7a7780bec83db2512988550dadbf"}, + {file = "pandas-1.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d9382f72a4f0e93909feece6fef5500e838ce1c355a581b3d8f259839f2ea76"}, + {file = "pandas-1.4.3-cp39-cp39-win32.whl", hash = "sha256:0daf876dba6c622154b2e6741f29e87161f844e64f84801554f879d27ba63c0d"}, + {file = "pandas-1.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:721a3dd2f06ef942f83a819c0f3f6a648b2830b191a72bbe9451bcd49c3bd42e"}, + {file = "pandas-1.4.3.tar.gz", hash = "sha256:2ff7788468e75917574f080cd4681b27e1a7bf36461fe968b49a87b5a54d007c"}, +] +parso = [ + {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, + {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"}, +] +pathspec = [ + {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, + {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, +] +pdoc = [] +pexpect = [ + {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, + {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, +] +pickleshare = [ + {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, + {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, +] +platformdirs = [ + {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, + {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +pre-commit = [] +prompt-toolkit = [ + {file = "prompt_toolkit-3.0.30-py3-none-any.whl", hash = "sha256:d8916d3f62a7b67ab353a952ce4ced6a1d2587dfe9ef8ebc30dd7c386751f289"}, + {file = "prompt_toolkit-3.0.30.tar.gz", hash = "sha256:859b283c50bde45f5f97829f77a4674d1c1fcd88539364f1b28a37805cfd89c0"}, +] +ptyprocess = [ + {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, + {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, +] +pure-eval = [ + {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, + {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, +] +py = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] +pygments = [] +pyparsing = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] +pytest = [ + {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, + {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, +] +pytest-cov = [] +pytest-forked = [] +pytest-xdist = [] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] +pytz = [ + {file = "pytz-2022.1-py2.py3-none-any.whl", hash = "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"}, + {file = "pytz-2022.1.tar.gz", hash = "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7"}, +] +pyyaml = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] +readme-coverage-badger = [] +requests = [ + {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, + {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, +] +scikit-learn = [ + {file = "scikit-learn-1.1.1.tar.gz", hash = "sha256:3e77b71e8e644f86c8b5be7f1c285ef597de4c384961389ee3e9ca36c445b256"}, + {file = "scikit_learn-1.1.1-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:102f51797cd8944bf44a038d106848ddf2804f2c1edf7aea45fba81a4fdc4d80"}, + {file = "scikit_learn-1.1.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:723cdb278b1fa57a55f68945bc4e501a2f12abe82f76e8d21e1806cbdbef6fc5"}, + {file = "scikit_learn-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33cf061ed0b79d647a3e4c3f6c52c412172836718a7cd4d11c1318d083300133"}, + {file = "scikit_learn-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47464c110eaa9ed9d1fe108cb403510878c3d3a40f110618d2a19b2190a3e35c"}, + {file = "scikit_learn-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:542ccd2592fe7ad31f5c85fed3a3deb3e252383960a85e4b49a629353fffaba4"}, + {file = "scikit_learn-1.1.1-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:3be10d8d325821ca366d4fe7083d87c40768f842f54371a9c908d97c45da16fc"}, + {file = "scikit_learn-1.1.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b2db720e13e697d912a87c1a51194e6fb085dc6d8323caa5ca51369ca6948f78"}, + {file = "scikit_learn-1.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e851f8874398dcd50d1e174e810e9331563d189356e945b3271c0e19ee6f4d6f"}, + {file = "scikit_learn-1.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b928869072366dc138762fe0929e7dc88413f8a469aebc6a64adc10a9226180c"}, + {file = "scikit_learn-1.1.1-cp38-cp38-win32.whl", hash = "sha256:e9d228ced1214d67904f26fb820c8abbea12b2889cd4aa8cda20a4ca0ed781c1"}, + {file = "scikit_learn-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:f2d5b5d6e87d482e17696a7bfa03fe9515fdfe27e462a4ad37f3d7774a5e2fd6"}, + {file = "scikit_learn-1.1.1-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:0403ad13f283e27d43b0ad875f187ec7f5d964903d92d1ed06c51439560ecea0"}, + {file = "scikit_learn-1.1.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:8fe80df08f5b9cee5dd008eccc672e543976198d790c07e5337f7dfb67eaac05"}, + {file = "scikit_learn-1.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ff56d07b9507fbe07ca0f4e5c8f3e171f74a429f998da03e308166251316b34"}, + {file = "scikit_learn-1.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2dad2bfc502344b869d4a3f4aa7271b2a5f4fe41f7328f404844c51612e2c58"}, + {file = "scikit_learn-1.1.1-cp39-cp39-win32.whl", hash = "sha256:22145b60fef02e597a8e7f061ebc7c51739215f11ce7fcd2ca9af22c31aa9f86"}, + {file = "scikit_learn-1.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:45c0f6ae523353f1d99b85469d746f9c497410adff5ba8b24423705b6956a86e"}, +] +scipy = [ + {file = "scipy-1.8.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:65b77f20202599c51eb2771d11a6b899b97989159b7975e9b5259594f1d35ef4"}, + {file = "scipy-1.8.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e013aed00ed776d790be4cb32826adb72799c61e318676172495383ba4570aa4"}, + {file = "scipy-1.8.1-cp310-cp310-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:02b567e722d62bddd4ac253dafb01ce7ed8742cf8031aea030a41414b86c1125"}, + {file = "scipy-1.8.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1da52b45ce1a24a4a22db6c157c38b39885a990a566748fc904ec9f03ed8c6ba"}, + {file = "scipy-1.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0aa8220b89b2e3748a2836fbfa116194378910f1a6e78e4675a095bcd2c762d"}, + {file = "scipy-1.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:4e53a55f6a4f22de01ffe1d2f016e30adedb67a699a310cdcac312806807ca81"}, + {file = "scipy-1.8.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:28d2cab0c6ac5aa131cc5071a3a1d8e1366dad82288d9ec2ca44df78fb50e649"}, + {file = "scipy-1.8.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:6311e3ae9cc75f77c33076cb2794fb0606f14c8f1b1c9ff8ce6005ba2c283621"}, + {file = "scipy-1.8.1-cp38-cp38-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:3b69b90c9419884efeffaac2c38376d6ef566e6e730a231e15722b0ab58f0328"}, + {file = "scipy-1.8.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6cc6b33139eb63f30725d5f7fa175763dc2df6a8f38ddf8df971f7c345b652dc"}, + {file = "scipy-1.8.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c4e3ae8a716c8b3151e16c05edb1daf4cb4d866caa385e861556aff41300c14"}, + {file = "scipy-1.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23b22fbeef3807966ea42d8163322366dd89da9bebdc075da7034cee3a1441ca"}, + {file = "scipy-1.8.1-cp38-cp38-win32.whl", hash = "sha256:4b93ec6f4c3c4d041b26b5f179a6aab8f5045423117ae7a45ba9710301d7e462"}, + {file = "scipy-1.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:70ebc84134cf0c504ce6a5f12d6db92cb2a8a53a49437a6bb4edca0bc101f11c"}, + {file = "scipy-1.8.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f3e7a8867f307e3359cc0ed2c63b61a1e33a19080f92fe377bc7d49f646f2ec1"}, + {file = "scipy-1.8.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:2ef0fbc8bcf102c1998c1f16f15befe7cffba90895d6e84861cd6c6a33fb54f6"}, + {file = "scipy-1.8.1-cp39-cp39-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:83606129247e7610b58d0e1e93d2c5133959e9cf93555d3c27e536892f1ba1f2"}, + {file = "scipy-1.8.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:93d07494a8900d55492401917a119948ed330b8c3f1d700e0b904a578f10ead4"}, + {file = "scipy-1.8.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3b3c8924252caaffc54d4a99f1360aeec001e61267595561089f8b5900821bb"}, + {file = "scipy-1.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70de2f11bf64ca9921fda018864c78af7147025e467ce9f4a11bc877266900a6"}, + {file = "scipy-1.8.1-cp39-cp39-win32.whl", hash = "sha256:1166514aa3bbf04cb5941027c6e294a000bba0cf00f5cdac6c77f2dad479b434"}, + {file = "scipy-1.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:9dd4012ac599a1e7eb63c114d1eee1bcfc6dc75a29b589ff0ad0bb3d9412034f"}, + {file = "scipy-1.8.1.tar.gz", hash = "sha256:9e3fb1b0e896f14a85aa9a28d5f755daaeeb54c897b746df7a55ccb02b340f33"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +stack-data = [ + {file = "stack_data-0.3.0-py3-none-any.whl", hash = "sha256:aa1d52d14d09c7a9a12bb740e6bdfffe0f5e8f4f9218d85e7c73a8c37f7ae38d"}, + {file = "stack_data-0.3.0.tar.gz", hash = "sha256:77bec1402dcd0987e9022326473fdbcc767304892a533ed8c29888dacb7dddbc"}, +] +threadpoolctl = [ + {file = "threadpoolctl-3.1.0-py3-none-any.whl", hash = "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b"}, + {file = "threadpoolctl-3.1.0.tar.gz", hash = "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"}, +] +tokenize-rt = [ + {file = "tokenize_rt-4.2.1-py2.py3-none-any.whl", hash = "sha256:08a27fa032a81cf45e8858d0ac706004fcd523e8463415ddf1442be38e204ea8"}, + {file = "tokenize_rt-4.2.1.tar.gz", hash = "sha256:0d4f69026fed520f8a1e0103aa36c406ef4661417f20ca643f913e33531b3b94"}, +] +toml = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +tqdm = [ + {file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"}, + {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"}, +] +traitlets = [ + {file = "traitlets-5.3.0-py3-none-any.whl", hash = "sha256:65fa18961659635933100db8ca120ef6220555286949774b9cfc106f941d1c7a"}, + {file = "traitlets-5.3.0.tar.gz", hash = "sha256:0bb9f1f9f017aa8ec187d8b1b2a7a6626a2a1d877116baba52a129bfa124f8e2"}, +] +typing-extensions = [ + {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, + {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"}, +] +urllib3 = [] +virtualenv = [ + {file = "virtualenv-20.15.1-py2.py3-none-any.whl", hash = "sha256:b30aefac647e86af6d82bfc944c556f8f1a9c90427b2fb4e3bfbf338cb82becf"}, + {file = "virtualenv-20.15.1.tar.gz", hash = "sha256:288171134a2ff3bfb1a2f54f119e77cd1b81c29fc1265a2356f3e8d14c7d58c4"}, +] +wcwidth = [ + {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, + {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, +] diff --git a/poetry.toml b/poetry.toml new file mode 100644 index 0000000..53b35d3 --- /dev/null +++ b/poetry.toml @@ -0,0 +1,3 @@ +[virtualenvs] +create = true +in-project = true diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5ef184b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,67 @@ +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "doubt" +version = "4.1.0" +description = "Bringing back uncertainty to machine learning." +authors = ["Dan Saattrup Nielsen "] +readme = "README.md" +license = "MIT" +repository = "https://github.com/saattrupdan/doubt" + +[tool.poetry.dependencies] +python = ">=3.8,<3.11" +tqdm = "^4.62.0" +numpy = "^1.23.0" +pandas = "^1.4.0" +joblib = "^1.1.0" +scikit-learn = "^1.1.1" + +[tool.poetry.dev-dependencies] +pdoc = "^7.1.1" +pytest = "^6.2.5" +pre-commit = "^2.17.0" +black = {extras = ["jupyter"], version = "^22.3.0"} +requests = "^2.28.0" +lxml = "^4.9.0" +isort = "^5.10.1" +pytest-xdist = "^2.5.0" +pytest-cov = "^3.0.0" +readme-coverage-badger = ">=0.1.2,<1.0.0" +Pygments = "2.11" + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = [ + '--verbose', + '--durations=10', + '--color=yes', + '-s', + '-vv', + '--doctest-modules' +] +xfail_strict = true +filterwarnings = ["error"] +log_cli_level = "info" +testpaths = ["tests"] + +[tool.black] +line-length = 88 +include = '\.pyi?$' +exclude = ''' +/( + \.git +| \.hg +| \.mypy_cache +| \.tox +| \.venv +| _build +| buck-out +| build +)/ +''' + +[tool.isort] +profile = "black" diff --git a/src/doubt/__init__.py b/src/doubt/__init__.py new file mode 100644 index 0000000..6b07599 --- /dev/null +++ b/src/doubt/__init__.py @@ -0,0 +1,13 @@ +""" +.. include:: ../../README.md +""" + +import pkg_resources + +from .models import Boot # noqa +from .models import QuantileRegressionForest # noqa +from .models import QuantileRegressionTree # noqa +from .models import QuantileRegressor # noqa + +# Fetches the version of the package as defined in pyproject.toml +__version__ = pkg_resources.get_distribution("doubt").version diff --git a/src/doubt/datasets/__init__.py b/src/doubt/datasets/__init__.py new file mode 100644 index 0000000..2dda6e8 --- /dev/null +++ b/src/doubt/datasets/__init__.py @@ -0,0 +1,24 @@ +from .airfoil import Airfoil # noqa +from .bike_sharing_daily import BikeSharingDaily # noqa +from .bike_sharing_hourly import BikeSharingHourly # noqa +from .blog import Blog # noqa +from .concrete import Concrete # noqa +from .cpu import CPU # noqa +from .facebook_comments import FacebookComments # noqa +from .facebook_metrics import FacebookMetrics # noqa +from .fish_bioconcentration import FishBioconcentration # noqa +from .fish_toxicity import FishToxicity # noqa +from .forest_fire import ForestFire # noqa +from .gas_turbine import GasTurbine # noqa +from .nanotube import Nanotube # noqa +from .new_taipei_housing import NewTaipeiHousing # noqa +from .parkinsons import Parkinsons # noqa +from .power_plant import PowerPlant # noqa +from .protein import Protein # noqa +from .servo import Servo # noqa +from .solar_flare import SolarFlare # noqa +from .space_shuttle import SpaceShuttle # noqa +from .stocks import Stocks # noqa +from .superconductivity import Superconductivity # noqa +from .tehran_housing import TehranHousing # noqa +from .yacht import Yacht # noqa diff --git a/doubt/datasets/_dataset.py b/src/doubt/datasets/_dataset.py similarity index 81% rename from doubt/datasets/_dataset.py rename to src/doubt/datasets/_dataset.py index 63d3806..127de67 100644 --- a/doubt/datasets/_dataset.py +++ b/src/doubt/datasets/_dataset.py @@ -1,16 +1,15 @@ -'''Base class for data sets''' +"""Base class for data sets""" -import warnings -import requests -import abc import re -from typing import Optional, Iterable, Tuple +import warnings +from abc import ABC, abstractmethod +from typing import Iterable, Optional, Tuple, Union import numpy as np import pandas as pd +import requests - -BASE_DATASET_DESCRIPTION = ''' +BASE_DATASET_DESCRIPTION = """ Parameters: cache (str or None, optional): The name of the cache. It will be saved to `cache` in the @@ -24,42 +23,42 @@ Dimensions of the data set columns (list of strings): List of column names in the data set -''' +""" -class BaseDataset(abc.ABC): +class BaseDataset(ABC): _url: str _features: Iterable _targets: Iterable - def __init__(self, cache: Optional[str] = '.dataset_cache'): + def __init__(self, cache: Optional[str] = ".dataset_cache"): self.cache = cache self._data = self.get_data() self.shape = self._data.shape self.columns = self._data.columns - @abc.abstractmethod + @abstractmethod def _prep_data(self, data: bytes) -> pd.DataFrame: return def get_data(self) -> pd.DataFrame: - ''' Download and prepare the dataset. + """Download and prepare the dataset. Returns: Pandas DataFrame: The dataset. - ''' + """ # Get name of dataset, being the class name converted to snake case - name = re.sub(r'([A-Z])', r'_\1', type(self).__name__) - name = name.lower().strip('_') + name = re.sub(r"([A-Z])", r"_\1", type(self).__name__) + name = name.lower().strip("_") try: if self.cache is not None: data = pd.read_hdf(self.cache, name) except (FileNotFoundError, KeyError): with warnings.catch_warnings(): - warnings.simplefilter('ignore') + warnings.simplefilter("ignore") response = requests.get(self._url, verify=False) data = self._prep_data(response.content) if self.cache is not None: @@ -91,11 +90,13 @@ def __repr__(self) -> str: def _repr_html_(self): return self._data._repr_html_() - def split(self, - test_size: Optional[float] = None, - random_seed: Optional[float] = None - ) -> Tuple[np.ndarray, np.ndarray]: - '''Split dataset into features and targets and optionally train/test. + def split( + self, test_size: Optional[float] = None, random_seed: Optional[float] = None + ) -> Union[ + Tuple[np.ndarray, np.ndarray], + Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], + ]: + """Split dataset into features and targets and optionally train/test. Args: test_size (float or None): @@ -110,7 +111,7 @@ def split(self, If `test_size` is not `None` then a tuple of numpy arrays (X_train, y_train, X_test, y_test) is returned, and otherwise the tuple (X, y) of numpy arrays is returned. - ''' + """ # Initialise random number generator rng = np.random.default_rng(random_seed) diff --git a/doubt/datasets/airfoil.py b/src/doubt/datasets/airfoil.py similarity index 86% rename from doubt/datasets/airfoil.py rename to src/doubt/datasets/airfoil.py index 6f57d16..2c5a63e 100644 --- a/doubt/datasets/airfoil.py +++ b/src/doubt/datasets/airfoil.py @@ -1,18 +1,19 @@ -'''Airfoil data set. +"""Airfoil data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Airfoil(BaseDataset): - __doc__ = f''' + __doc__ = f""" The NASA data set comprises different size NACA 0012 airfoils at various wind tunnel speeds and angles of attack. The span of the airfoil and the observer position were the same in all of the experiments. @@ -63,26 +64,28 @@ class Airfoil(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00291/airfoil_self_noise.dat') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00291/airfoil_self_noise.dat" + ) _features = range(5) _targets = [5] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) # Read the file-like object into a data frame - df = pd.read_csv(csv_file, sep='\t', header=None) + df = pd.read_csv(csv_file, sep="\t", header=None) return df diff --git a/doubt/datasets/bike_sharing_daily.py b/src/doubt/datasets/bike_sharing_daily.py similarity index 91% rename from doubt/datasets/bike_sharing_daily.py rename to src/doubt/datasets/bike_sharing_daily.py index dbfe1fb..7192f3c 100644 --- a/doubt/datasets/bike_sharing_daily.py +++ b/src/doubt/datasets/bike_sharing_daily.py @@ -1,19 +1,20 @@ -'''Daily bike sharing data set. +"""Daily bike sharing data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class BikeSharingDaily(BaseDataset): - __doc__ = f''' + __doc__ = f""" Bike sharing systems are new generation of traditional bike rentals where whole process from membership, rental and return back has become automatic. Through these systems, user is able to easily rent a bike from a @@ -103,29 +104,31 @@ class BikeSharingDaily(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00275/Bike-Sharing-Dataset.zip') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00275/Bike-Sharing-Dataset.zip" + ) _features = range(12) _targets = [12, 13, 14] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out day.csv as a string - with zipfile.ZipFile(buffer, 'r') as zip_file: - csv = zip_file.read('day.csv').decode('utf-8') + with zipfile.ZipFile(buffer, "r") as zip_file: + csv = zip_file.read("day.csv").decode("utf-8") # Convert the string into a file-like object csv_file = io.StringIO(csv) diff --git a/doubt/datasets/bike_sharing_hourly.py b/src/doubt/datasets/bike_sharing_hourly.py similarity index 91% rename from doubt/datasets/bike_sharing_hourly.py rename to src/doubt/datasets/bike_sharing_hourly.py index 65507fd..60b4f9a 100644 --- a/doubt/datasets/bike_sharing_hourly.py +++ b/src/doubt/datasets/bike_sharing_hourly.py @@ -1,19 +1,20 @@ -'''Hourly bike sharing data set. +"""Hourly bike sharing data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class BikeSharingHourly(BaseDataset): - __doc__ = f''' + __doc__ = f""" Bike sharing systems are new generation of traditional bike rentals where whole process from membership, rental and return back has become automatic. Through these systems, user is able to easily rent a bike from a @@ -106,29 +107,31 @@ class BikeSharingHourly(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00275/Bike-Sharing-Dataset.zip') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00275/Bike-Sharing-Dataset.zip" + ) _features = range(13) _targets = [13, 14, 15] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out hour.csv as a string - with zipfile.ZipFile(buffer, 'r') as zip_file: - csv = zip_file.read('hour.csv').decode('utf-8') + with zipfile.ZipFile(buffer, "r") as zip_file: + csv = zip_file.read("hour.csv").decode("utf-8") # Convert the string into a file-like object csv_file = io.StringIO(csv) diff --git a/doubt/datasets/blog.py b/src/doubt/datasets/blog.py similarity index 92% rename from doubt/datasets/blog.py rename to src/doubt/datasets/blog.py index fdd0770..037de44 100644 --- a/doubt/datasets/blog.py +++ b/src/doubt/datasets/blog.py @@ -1,19 +1,20 @@ -'''Blog post data set. +"""Blog post data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Blog(BaseDataset): - __doc__ = f''' + __doc__ = f""" This data originates from blog posts. The raw HTML-documents of the blog posts were crawled and processed. The prediction task associated with the data is the prediction @@ -117,29 +118,31 @@ class Blog(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00304/BlogFeedback.zip') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00304/BlogFeedback.zip" + ) _features = range(279) _targets = [279] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out blogData_train.csv as a string - with zipfile.ZipFile(buffer, 'r') as zip_file: - csv = zip_file.read('blogData_train.csv').decode('utf-8') + with zipfile.ZipFile(buffer, "r") as zip_file: + csv = zip_file.read("blogData_train.csv").decode("utf-8") # Convert the string into a file-like object csv_file = io.StringIO(csv) diff --git a/doubt/datasets/concrete.py b/src/doubt/datasets/concrete.py similarity index 89% rename from doubt/datasets/concrete.py rename to src/doubt/datasets/concrete.py index 8310f3e..fd379a6 100644 --- a/doubt/datasets/concrete.py +++ b/src/doubt/datasets/concrete.py @@ -1,18 +1,19 @@ -'''Concrete data set. +"""Concrete data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Concrete(BaseDataset): - __doc__ = f''' + __doc__ = f""" Concrete is the most important material in civil engineering. The concrete compressive strength is a highly nonlinear function of age and ingredients. @@ -69,23 +70,25 @@ class Concrete(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - 'concrete/compressive/Concrete_Data.xls') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "concrete/compressive/Concrete_Data.xls" + ) _features = range(8) _targets = [8] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object xls_file = io.BytesIO(data) diff --git a/doubt/datasets/cpu.py b/src/doubt/datasets/cpu.py similarity index 82% rename from doubt/datasets/cpu.py rename to src/doubt/datasets/cpu.py index ab1c4fe..cbf0c04 100644 --- a/doubt/datasets/cpu.py +++ b/src/doubt/datasets/cpu.py @@ -1,18 +1,19 @@ -'''CPU data set. +"""CPU data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class CPU(BaseDataset): - __doc__ = f''' + __doc__ = f""" Relative CPU Performance Data, described in terms of its cycle time, memory size, etc. @@ -68,30 +69,41 @@ class CPU(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - 'cpu-performance/machine.data') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "cpu-performance/machine.data" + ) _features = range(8) _targets = [8] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) # Name the columns - cols = ['vendor_name', 'model_name', 'myct', 'mmin', 'mmax', - 'cach', 'chmin', 'chmax', 'prp'] + cols = [ + "vendor_name", + "model_name", + "myct", + "mmin", + "mmax", + "cach", + "chmin", + "chmax", + "prp", + ] # Load the file-like object into a data frame df = pd.read_csv(csv_file, header=None, usecols=range(9), names=cols) diff --git a/doubt/datasets/facebook_comments.py b/src/doubt/datasets/facebook_comments.py similarity index 82% rename from doubt/datasets/facebook_comments.py rename to src/doubt/datasets/facebook_comments.py index 9339eeb..7efcc6b 100644 --- a/doubt/datasets/facebook_comments.py +++ b/src/doubt/datasets/facebook_comments.py @@ -1,19 +1,20 @@ -'''Facebook comments data set. +"""Facebook comments data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class FacebookComments(BaseDataset): - __doc__ = f''' + __doc__ = f""" Instances in this dataset contain features extracted from Facebook posts. The task associated with the data is to predict how many comments the post will receive. @@ -104,43 +105,55 @@ class FacebookComments(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00363/Dataset.zip') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" "00363/Dataset.zip" + ) _features = range(54) _targets = [53] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out the csv file - with zipfile.ZipFile(buffer, 'r') as zip_file: - csv = zip_file.read('Dataset/Training/Features_Variant_5.csv') + with zipfile.ZipFile(buffer, "r") as zip_file: + csv = zip_file.read("Dataset/Training/Features_Variant_5.csv") # Convert the string into a file-like object csv_file = io.BytesIO(csv) # Name the columns - cols = ['page_popularity', 'page_checkins', 'page_talking_about', - 'page_category'] + \ - [f'agg{n}' for n in range(25)] + \ - ['cc1', 'cc2', 'cc3', 'cc4', 'cc5', 'base_time', 'post_length', - 'post_share_count', 'post_promotion_status', 'h_local'] + \ - [f'day_published{n}' for n in range(7)] + \ - [f'day{n}' for n in range(7)] + \ - ['ncomments'] + cols = ( + ["page_popularity", "page_checkins", "page_talking_about", "page_category"] + + [f"agg{n}" for n in range(25)] + + [ + "cc1", + "cc2", + "cc3", + "cc4", + "cc5", + "base_time", + "post_length", + "post_share_count", + "post_promotion_status", + "h_local", + ] + + [f"day_published{n}" for n in range(7)] + + [f"day{n}" for n in range(7)] + + ["ncomments"] + ) # Read the file-like object into a dataframe df = pd.read_csv(csv_file, header=None, names=cols) diff --git a/doubt/datasets/facebook_metrics.py b/src/doubt/datasets/facebook_metrics.py similarity index 76% rename from doubt/datasets/facebook_metrics.py rename to src/doubt/datasets/facebook_metrics.py index 2dad084..56e0f26 100644 --- a/doubt/datasets/facebook_metrics.py +++ b/src/doubt/datasets/facebook_metrics.py @@ -1,19 +1,20 @@ -'''Facebook metrics data set. +"""Facebook metrics data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class FacebookMetrics(BaseDataset): - __doc__ = f''' + __doc__ = f""" The data is related to posts' published during the year of 2014 on the Facebook's page of a renowned cosmetics brand. @@ -89,44 +90,60 @@ class FacebookMetrics(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00368/Facebook_metrics.zip') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00368/Facebook_metrics.zip" + ) _features = range(7) _targets = range(7, 18) def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out the csv file - with zipfile.ZipFile(buffer, 'r') as zip_file: - csv = zip_file.read('dataset_Facebook.csv') + with zipfile.ZipFile(buffer, "r") as zip_file: + csv = zip_file.read("dataset_Facebook.csv") # Convert the bytes into a file-like object csv_file = io.BytesIO(csv) # Read the file-like object into a dataframe - cols = ['page_likes', 'post_type', 'post_category', 'post_month', - 'post_weekday', 'post_hour', 'paid', 'total_reach', - 'total_impressions', 'engaged_users', 'post_consumers', - 'post_consumptions', 'post_impressions', 'post_reach', - 'post_engagements', 'comments', 'shares', 'total_interactions'] - df = pd.read_csv(csv_file, sep=';', names=cols, header=0, - index_col=False) + cols = [ + "page_likes", + "post_type", + "post_category", + "post_month", + "post_weekday", + "post_hour", + "paid", + "total_reach", + "total_impressions", + "engaged_users", + "post_consumers", + "post_consumptions", + "post_impressions", + "post_reach", + "post_engagements", + "comments", + "shares", + "total_interactions", + ] + df = pd.read_csv(csv_file, sep=";", names=cols, header=0, index_col=False) # Numericalise post type post_types = list(df.post_type.unique()) - df['post_type'] = df.post_type.map(lambda txt: post_types.index(txt)) + df["post_type"] = df.post_type.map(lambda txt: post_types.index(txt)) return df diff --git a/doubt/datasets/fish_bioconcentration.py b/src/doubt/datasets/fish_bioconcentration.py similarity index 79% rename from doubt/datasets/fish_bioconcentration.py rename to src/doubt/datasets/fish_bioconcentration.py index 9b2dac5..9593319 100644 --- a/doubt/datasets/fish_bioconcentration.py +++ b/src/doubt/datasets/fish_bioconcentration.py @@ -1,19 +1,20 @@ -'''Fish bioconcentration data set. +"""Fish bioconcentration data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class FishBioconcentration(BaseDataset): - __doc__ = f''' + __doc__ = f""" This dataset contains manually-curated experimental bioconcentration factor (BCF) for 1058 molecules (continuous values). Each row contains a molecule, identified by a CAS number, a name (if available), and a SMILES @@ -105,75 +106,82 @@ class FishBioconcentration(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00511/QSAR_fish_BCF.zip') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00511/QSAR_fish_BCF.zip" + ) _features = range(128) _targets = [128] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out the csv file - with zipfile.ZipFile(buffer, 'r') as zip_file: - csv = zip_file.read('QSAR_BCF_Kow.csv') + with zipfile.ZipFile(buffer, "r") as zip_file: + csv = zip_file.read("QSAR_BCF_Kow.csv") # Convert the string into a file-like object csv_file = io.BytesIO(csv) # Read the file-like object into a dataframe - cols = ['cas', 'name', 'smiles', 'logkow', 'kow_exp', 'logbcf'] + cols = ["cas", "name", "smiles", "logkow", "kow_exp", "logbcf"] df = pd.read_csv( csv_file, names=cols, header=0, - usecols=[col for col in cols if col not in ['cas', 'name']] + usecols=[col for col in cols if col not in ["cas", "name"]], ) # Drop NaNs df = df.dropna() # Encode KOW types - kow_types = ['pred', 'exp'] - df['kow_exp'] = df.kow_exp.map(lambda txt: kow_types.index(txt)) + kow_types = ["pred", "exp"] + df["kow_exp"] = df.kow_exp.map(lambda txt: kow_types.index(txt)) # Get maximum SMILE string length and pull out all the SMILE string # symbols, along with a '-' symbol for padding max_smile = max(len(smile_string) for smile_string in df.smiles) - smile_symbols = ['x'] + sorted({symbol for smile_string in df.smiles - for symbol in set(smile_string)}) + smile_symbols = ["x"] + sorted( + {symbol for smile_string in df.smiles for symbol in set(smile_string)} + ) # Pad SMILE strings - df['smiles'] = [smile_string + 'x' * (max_smile - len(smile_string)) - for smile_string in df.smiles] + df["smiles"] = [ + smile_string + "x" * (max_smile - len(smile_string)) + for smile_string in df.smiles + ] # Encode SMILE strings for idx in range(max_smile): + def fix_smiles(txt: str): return smile_symbols.index(txt[idx]) - df[f'smiles_{idx}'] = df.smiles.map(fix_smiles) + + df[f"smiles_{idx}"] = df.smiles.map(fix_smiles) # Drop original SMILE feature - df = df.drop(columns='smiles') + df = df.drop(columns="smiles") # Put the target variable at the end - cols = ['logkow', 'kow_exp'] - cols += [f'smiles_{idx}' for idx in range(max_smile)] - cols += ['logbcf'] + cols = ["logkow", "kow_exp"] + cols += [f"smiles_{idx}" for idx in range(max_smile)] + cols += ["logbcf"] df = df[cols] # Ensure that the `logkow` column is numeric - df['logkow'] = pd.to_numeric(df.logkow) + df["logkow"] = pd.to_numeric(df.logkow) return df diff --git a/doubt/datasets/fish_toxicity.py b/src/doubt/datasets/fish_toxicity.py similarity index 84% rename from doubt/datasets/fish_toxicity.py rename to src/doubt/datasets/fish_toxicity.py index e6c5923..d4ec9e9 100644 --- a/doubt/datasets/fish_toxicity.py +++ b/src/doubt/datasets/fish_toxicity.py @@ -1,18 +1,19 @@ -'''Fish toxicity data set. +"""Fish toxicity data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class FishToxicity(BaseDataset): - __doc__ = f''' + __doc__ = f""" This dataset was used to develop quantitative regression QSAR models to predict acute aquatic toxicity towards the fish Pimephales promelas (fathead minnow) on a set of 908 chemicals. LC50 data, which is the @@ -68,29 +69,30 @@ class FishToxicity(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00504/qsar_fish_toxicity.csv') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00504/qsar_fish_toxicity.csv" + ) _features = range(6) _targets = [6] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) # Read the file-like object into a dataframe - cols = ['CIC0', 'SM1_Dz(Z)', 'GATS1i', 'NdsCH', - 'NdssC', 'MLOGP', 'LC50'] - df = pd.read_csv(csv_file, sep=';', header=None, names=cols) + cols = ["CIC0", "SM1_Dz(Z)", "GATS1i", "NdsCH", "NdssC", "MLOGP", "LC50"] + df = pd.read_csv(csv_file, sep=";", header=None, names=cols) return df diff --git a/doubt/datasets/forest_fire.py b/src/doubt/datasets/forest_fire.py similarity index 81% rename from doubt/datasets/forest_fire.py rename to src/doubt/datasets/forest_fire.py index 93ffc29..8629a31 100644 --- a/doubt/datasets/forest_fire.py +++ b/src/doubt/datasets/forest_fire.py @@ -1,18 +1,19 @@ -'''Forest fire data set. +"""Forest fire data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class ForestFire(BaseDataset): - __doc__ = f''' + __doc__ = f""" This is a difficult regression task, where the aim is to predict the burned area of forest fires, in the northeast region of Portugal, by using meteorological and other data. @@ -83,23 +84,25 @@ class ForestFire(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - 'forest-fires/forestfires.csv') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "forest-fires/forestfires.csv" + ) _features = range(12) _targets = [12] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) @@ -107,12 +110,24 @@ def _prep_data(self, data: bytes) -> pd.DataFrame: df = pd.read_csv(csv_file) # Encode month - months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', - 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] - df['month'] = df.month.map(lambda string: months.index(string)) + months = [ + "jan", + "feb", + "mar", + "apr", + "may", + "jun", + "jul", + "aug", + "sep", + "oct", + "nov", + "dec", + ] + df["month"] = df.month.map(lambda string: months.index(string)) # Encode day - weekdays = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] - df['day'] = df.day.map(lambda string: weekdays.index(string)) + weekdays = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"] + df["day"] = df.day.map(lambda string: weekdays.index(string)) return df diff --git a/doubt/datasets/gas_turbine.py b/src/doubt/datasets/gas_turbine.py similarity index 83% rename from doubt/datasets/gas_turbine.py rename to src/doubt/datasets/gas_turbine.py index b46ce9e..4aa0811 100644 --- a/doubt/datasets/gas_turbine.py +++ b/src/doubt/datasets/gas_turbine.py @@ -1,19 +1,20 @@ -'''Gas turbine data set. +"""Gas turbine data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class GasTurbine(BaseDataset): - __doc__ = f''' + __doc__ = f""" Data have been generated from a sophisticated simulator of a Gas Turbines (GT), mounted on a Frigate characterized by a COmbined Diesel eLectric And Gas (CODLAG) propulsion plant type. @@ -125,48 +126,63 @@ class GasTurbine(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00316/UCI%20CBM%20Dataset.zip') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00316/UCI%20CBM%20Dataset.zip" + ) _features = range(16) _targets = [16, 17] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out the txt file - with zipfile.ZipFile(buffer, 'r') as zip_file: - txt = zip_file.read('UCI CBM Dataset/data.txt') + with zipfile.ZipFile(buffer, "r") as zip_file: + txt_bytes = zip_file.read("UCI CBM Dataset/data.txt") # Decode text and replace initial space on each line - txt = txt[3:].decode('utf-8').replace('\n ', '\n') + txt = txt_bytes[3:].decode("utf-8").replace("\n ", "\n") # Convert the remaining triple spaces into commas, to make loading # it as a csv file easier - txt = txt.replace(' ', ',') + txt = txt.replace(" ", ",") # Convert the string into a file-like object csv_file = io.StringIO(txt) # Read the file-like object into a dataframe - cols = ['lever_position', 'ship_speed', 'shaft_torque', - 'turbine_revolution_rate', 'generator_revolution_rate', - 'starboard_propeller_torque', 'port_propeller_torque', - 'turbine_exit_temp', 'inlet_temp', 'outlet_temp', - 'turbine_exit_pres', 'inlet_pres', 'outlet_pres', - 'exhaust_pres', 'turbine_injection_control', 'fuel_flow', - 'compressor_decay', 'turbine_decay'] + cols = [ + "lever_position", + "ship_speed", + "shaft_torque", + "turbine_revolution_rate", + "generator_revolution_rate", + "starboard_propeller_torque", + "port_propeller_torque", + "turbine_exit_temp", + "inlet_temp", + "outlet_temp", + "turbine_exit_pres", + "inlet_pres", + "outlet_pres", + "exhaust_pres", + "turbine_injection_control", + "fuel_flow", + "compressor_decay", + "turbine_decay", + ] df = pd.read_csv(csv_file, header=None, names=cols) return df diff --git a/doubt/datasets/nanotube.py b/src/doubt/datasets/nanotube.py similarity index 92% rename from doubt/datasets/nanotube.py rename to src/doubt/datasets/nanotube.py index 537cd48..718bfba 100644 --- a/doubt/datasets/nanotube.py +++ b/src/doubt/datasets/nanotube.py @@ -1,18 +1,19 @@ -'''Nanotube data set. +"""Nanotube data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Nanotube(BaseDataset): - __doc__ = f''' + __doc__ = f""" CASTEP can simulate a wide range of properties of materials proprieties using density functional theory (DFT). DFT is the most successful method calculates atomic coordinates faster than other mathematical approaches, @@ -90,26 +91,28 @@ class Nanotube(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00448/carbon_nanotubes.csv') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00448/carbon_nanotubes.csv" + ) _features = range(5) _targets = [5, 6, 7] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) # Read the file-like object into a dataframe - df = pd.read_csv(csv_file, sep=';', decimal=',') + df = pd.read_csv(csv_file, sep=";", decimal=",") return df diff --git a/doubt/datasets/new_taipei_housing.py b/src/doubt/datasets/new_taipei_housing.py similarity index 81% rename from doubt/datasets/new_taipei_housing.py rename to src/doubt/datasets/new_taipei_housing.py index 27351c2..e1a35bf 100644 --- a/doubt/datasets/new_taipei_housing.py +++ b/src/doubt/datasets/new_taipei_housing.py @@ -1,18 +1,19 @@ -'''New Taipei Housing data set. +"""New Taipei Housing data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class NewTaipeiHousing(BaseDataset): - __doc__ = f''' + __doc__ = f""" The "real estate valuation" is a regression problem. The market historical data set of real estate valuation are collected from Sindian Dist., New Taipei City, Taiwan. @@ -66,29 +67,39 @@ class NewTaipeiHousing(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00477/Real%20estate%20valuation%20data%20set.xlsx') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00477/Real%20estate%20valuation%20data%20set.xlsx" + ) _features = range(6) _targets = [6] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object xlsx_file = io.BytesIO(data) # Load in the dataframe - cols = ['idx', 'transaction_date', 'house_age', 'mrt_distance', - 'n_stores', 'lat', 'lng', 'house_price'] + cols = [ + "idx", + "transaction_date", + "house_age", + "mrt_distance", + "n_stores", + "lat", + "lng", + "house_price", + ] df = pd.read_excel(xlsx_file, header=0, names=cols) # Remove the index diff --git a/doubt/datasets/parkinsons.py b/src/doubt/datasets/parkinsons.py similarity index 90% rename from doubt/datasets/parkinsons.py rename to src/doubt/datasets/parkinsons.py index fc5dae3..a81b592 100644 --- a/doubt/datasets/parkinsons.py +++ b/src/doubt/datasets/parkinsons.py @@ -1,18 +1,19 @@ -'''Parkinsons data set. +"""Parkinsons data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Parkinsons(BaseDataset): - __doc__ = f''' + __doc__ = f""" This dataset is composed of a range of biomedical voice measurements from 42 people with early-stage Parkinson's disease recruited to a six-month trial of a telemonitoring device for remote symptom progression @@ -105,23 +106,25 @@ class Parkinsons(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - 'parkinsons/telemonitoring/parkinsons_updrs.data') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "parkinsons/telemonitoring/parkinsons_updrs.data" + ) _features = range(20) _targets = [20, 21] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) @@ -129,7 +132,7 @@ def _prep_data(self, data: bytes) -> pd.DataFrame: df = pd.read_csv(csv_file, header=0) # Put target columns at the end - cols = [col for col in df.columns if col[-5:] != 'UPDRS'] - df = df[cols + ['motor_UPDRS', 'total_UPDRS']] + cols = [col for col in df.columns if col[-5:] != "UPDRS"] + df = df[cols + ["motor_UPDRS", "total_UPDRS"]] return df diff --git a/doubt/datasets/power_plant.py b/src/doubt/datasets/power_plant.py similarity index 90% rename from doubt/datasets/power_plant.py rename to src/doubt/datasets/power_plant.py index 5d9e7cd..ca19c50 100644 --- a/doubt/datasets/power_plant.py +++ b/src/doubt/datasets/power_plant.py @@ -1,19 +1,20 @@ -'''Power plant data set. +"""Power plant data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class PowerPlant(BaseDataset): - __doc__ = f''' + __doc__ = f""" The dataset contains 9568 data points collected from a Combined Cycle Power Plant over 6 years (2006-2011), when the power plant was set to work with full load. Features consist of hourly average ambient variables @@ -80,30 +81,29 @@ class PowerPlant(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00294/CCPP.zip') + _url = "https://archive.ics.uci.edu/ml/machine-learning-databases/" "00294/CCPP.zip" _features = range(4) _targets = [4] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out the xlsx file - with zipfile.ZipFile(buffer, 'r') as zip_file: - xlsx = zip_file.read('CCPP/Folds5x2_pp.xlsx') + with zipfile.ZipFile(buffer, "r") as zip_file: + xlsx = zip_file.read("CCPP/Folds5x2_pp.xlsx") # Convert the xlsx bytes into a file-like object xlsx_file = io.BytesIO(xlsx) diff --git a/doubt/datasets/protein.py b/src/doubt/datasets/protein.py similarity index 88% rename from doubt/datasets/protein.py rename to src/doubt/datasets/protein.py index c81565b..d7be651 100644 --- a/doubt/datasets/protein.py +++ b/src/doubt/datasets/protein.py @@ -1,18 +1,19 @@ -'''Protein data set. +"""Protein data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Protein(BaseDataset): - __doc__ = f''' + __doc__ = f""" This is a data set of Physicochemical Properties of Protein Tertiary Structure. The data set is taken from CASP 5-9. There are 45730 decoys and size varying from 0 to 21 armstrong. @@ -71,23 +72,22 @@ class Protein(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00265/CASP.csv') + _url = "https://archive.ics.uci.edu/ml/machine-learning-databases/" "00265/CASP.csv" _features = range(9) _targets = [9] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) @@ -95,6 +95,6 @@ def _prep_data(self, data: bytes) -> pd.DataFrame: df = pd.read_csv(csv_file) # Put the target column at the end - df = df[[f'F{i}' for i in range(1, 10)] + ['RMSD']] + df = df[[f"F{i}" for i in range(1, 10)] + ["RMSD"]] return df diff --git a/doubt/datasets/servo.py b/src/doubt/datasets/servo.py similarity index 85% rename from doubt/datasets/servo.py rename to src/doubt/datasets/servo.py index b81dc41..550b69d 100644 --- a/doubt/datasets/servo.py +++ b/src/doubt/datasets/servo.py @@ -1,18 +1,19 @@ -'''Servo data set. +"""Servo data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Servo(BaseDataset): - __doc__ = f''' + __doc__ = f""" Data was from a simulation of a servo system. Ross Quinlan: @@ -78,33 +79,34 @@ class (float): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - 'servo/servo.data') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" "servo/servo.data" + ) _features = range(4) _targets = [4] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) # Load in the dataframe - cols = ['motor', 'screw', 'pgain', 'vgain', 'class'] + cols = ["motor", "screw", "pgain", "vgain", "class"] df = pd.read_csv(csv_file, names=cols) # Encode motor and screw - codes = ['A', 'B', 'C', 'D', 'E'] - df['motor'] = df.motor.map(lambda x: codes.index(x)) - df['screw'] = df.screw.map(lambda x: codes.index(x)) + codes = ["A", "B", "C", "D", "E"] + df["motor"] = df.motor.map(lambda x: codes.index(x)) + df["screw"] = df.screw.map(lambda x: codes.index(x)) return df diff --git a/doubt/datasets/solar_flare.py b/src/doubt/datasets/solar_flare.py similarity index 78% rename from doubt/datasets/solar_flare.py rename to src/doubt/datasets/solar_flare.py index bc6e113..615bbb1 100644 --- a/doubt/datasets/solar_flare.py +++ b/src/doubt/datasets/solar_flare.py @@ -1,18 +1,19 @@ -'''Solar flare data set. +"""Solar flare data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class SolarFlare(BaseDataset): - __doc__ = f''' + __doc__ = f""" Each class attribute counts the number of solar flares of a certain class that occur in a 24 hour period. @@ -92,42 +93,56 @@ class (int): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - 'solar-flare/flare.data2') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "solar-flare/flare.data2" + ) _features = range(10) _targets = range(10, 13) def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object csv_file = io.BytesIO(data) # Load in dataframe - cols = ['class', 'spot_size', 'spot_distr', 'activity', 'evolution', - 'flare_activity', 'is_complex', 'became_complex', 'large', - 'large_spot', 'C-class', 'M-class', 'X-class'] - df = pd.read_csv(csv_file, sep=' ', skiprows=[0], names=cols) + cols = [ + "class", + "spot_size", + "spot_distr", + "activity", + "evolution", + "flare_activity", + "is_complex", + "became_complex", + "large", + "large_spot", + "C-class", + "M-class", + "X-class", + ] + df = pd.read_csv(csv_file, sep=" ", skiprows=[0], names=cols) # Encode class - encodings = ['A', 'B', 'C', 'D', 'E', 'F', 'H'] - df['class'] = df['class'].map(lambda x: encodings.index(x)) + encodings = ["A", "B", "C", "D", "E", "F", "H"] + df["class"] = df["class"].map(lambda x: encodings.index(x)) # Encode spot size - encodings = ['X', 'R', 'S', 'A', 'H', 'K'] - df['spot_size'] = df.spot_size.map(lambda x: encodings.index(x)) + encodings = ["X", "R", "S", "A", "H", "K"] + df["spot_size"] = df.spot_size.map(lambda x: encodings.index(x)) # Encode spot distribution - encodings = ['X', 'O', 'I', 'C'] - df['spot_distr'] = df.spot_distr.map(lambda x: encodings.index(x)) + encodings = ["X", "O", "I", "C"] + df["spot_distr"] = df.spot_distr.map(lambda x: encodings.index(x)) return df diff --git a/doubt/datasets/space_shuttle.py b/src/doubt/datasets/space_shuttle.py similarity index 87% rename from doubt/datasets/space_shuttle.py rename to src/doubt/datasets/space_shuttle.py index 46e6b16..e1cd9fd 100644 --- a/doubt/datasets/space_shuttle.py +++ b/src/doubt/datasets/space_shuttle.py @@ -1,19 +1,20 @@ -'''Space shuttle data set. +"""Space shuttle data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION - -import pandas as pd import io import re +import pandas as pd + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset + class SpaceShuttle(BaseDataset): - __doc__ = f''' + __doc__ = f""" The motivation for collecting this database was the explosion of the USA Space Shuttle Challenger on 28 January, 1986. An investigation ensued into the reliability of the shuttle's propulsion system. The explosion was @@ -88,34 +89,36 @@ class SpaceShuttle(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - 'space-shuttle/o-ring-erosion-only.data') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "space-shuttle/o-ring-erosion-only.data" + ) _features = range(4) _targets = [4] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Collapse whitespace - processed_data = re.sub(r' +', ' ', data.decode('utf-8')) + processed_data = re.sub(r" +", " ", data.decode("utf-8")) # Convert the bytes into a file-like object csv_file = io.StringIO(processed_data) # Load in dataframe - cols = ['n_risky_rings', 'n_distressed_rings', 'temp', 'pres', 'idx'] - df = pd.read_csv(csv_file, sep=' ', names=cols) + cols = ["n_risky_rings", "n_distressed_rings", "temp", "pres", "idx"] + df = pd.read_csv(csv_file, sep=" ", names=cols) # Reorder columns - df = df[['idx', 'temp', 'pres', 'n_risky_rings', 'n_distressed_rings']] + df = df[["idx", "temp", "pres", "n_risky_rings", "n_distressed_rings"]] return df diff --git a/doubt/datasets/stocks.py b/src/doubt/datasets/stocks.py similarity index 80% rename from doubt/datasets/stocks.py rename to src/doubt/datasets/stocks.py index e9bc18b..29c219e 100644 --- a/doubt/datasets/stocks.py +++ b/src/doubt/datasets/stocks.py @@ -1,18 +1,19 @@ -'''Stocks data set. +"""Stocks data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Stocks(BaseDataset): - __doc__ = f''' + __doc__ = f""" There are three disadvantages of weighted scoring stock selection models. First, they cannot identify the relations between weights of stock-picking concepts and performances of portfolios. Second, they cannot systematically @@ -108,35 +109,54 @@ class Stocks(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00390/stock%20portfolio%20performance%20data%20set.xlsx') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00390/stock%20portfolio%20performance%20data%20set.xlsx" + ) _features = range(12) _targets = range(12, 18) def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object xlsx_file = io.BytesIO(data) # Load in the dataframes - cols = ['id', 'bp', 'roe', 'sp', 'return_rate', 'market_value', - 'small_risk', 'orig_annual_return', 'orig_excess_return', - 'orig_risk', 'orig_total_risk', 'orig_abs_win_rate', - 'orig_rel_win_rate', 'annual_return', 'excess_return', - 'risk', 'total_risk', 'abs_win_rate', 'rel_win_rate'] - sheets = ['1st period', '2nd period', '3rd period', '4th period'] - dfs = pd.read_excel(xlsx_file, sheet_name=sheets, names=cols, - skiprows=[0, 1], header=None) + cols = [ + "id", + "bp", + "roe", + "sp", + "return_rate", + "market_value", + "small_risk", + "orig_annual_return", + "orig_excess_return", + "orig_risk", + "orig_total_risk", + "orig_abs_win_rate", + "orig_rel_win_rate", + "annual_return", + "excess_return", + "risk", + "total_risk", + "abs_win_rate", + "rel_win_rate", + ] + sheets = ["1st period", "2nd period", "3rd period", "4th period"] + dfs = pd.read_excel( + xlsx_file, sheet_name=sheets, names=cols, skiprows=[0, 1], header=None + ) # Concatenate the dataframes df = pd.concat([dfs[sheet] for sheet in sheets], ignore_index=True) diff --git a/doubt/datasets/superconductivity.py b/src/doubt/datasets/superconductivity.py similarity index 92% rename from doubt/datasets/superconductivity.py rename to src/doubt/datasets/superconductivity.py index e7ab62c..7225fa8 100644 --- a/doubt/datasets/superconductivity.py +++ b/src/doubt/datasets/superconductivity.py @@ -1,19 +1,20 @@ -'''Superconductivity data set. +"""Superconductivity data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io +import zipfile import pandas as pd -import zipfile -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Superconductivity(BaseDataset): - __doc__ = f''' + __doc__ = f""" This dataset contains data on 21,263 superconductors and their relevant features. The goal here is to predict the critical temperature based on the features extracted. @@ -134,29 +135,31 @@ class Superconductivity(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00464/superconduct.zip') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00464/superconduct.zip" + ) _features = range(81) _targets = [81] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object buffer = io.BytesIO(data) # Unzip the file and pull out the text - with zipfile.ZipFile(buffer, 'r') as zip_file: - txt = zip_file.read('train.csv') + with zipfile.ZipFile(buffer, "r") as zip_file: + txt = zip_file.read("train.csv") # Convert text to csv file csv_file = io.BytesIO(txt) diff --git a/doubt/datasets/tehran_housing.py b/src/doubt/datasets/tehran_housing.py similarity index 82% rename from doubt/datasets/tehran_housing.py rename to src/doubt/datasets/tehran_housing.py index 7638323..23e8183 100644 --- a/doubt/datasets/tehran_housing.py +++ b/src/doubt/datasets/tehran_housing.py @@ -1,18 +1,19 @@ -'''Tehran housing data set. +"""Tehran housing data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class TehranHousing(BaseDataset): - __doc__ = f''' + __doc__ = f""" Data set includes construction cost, sale prices, project variables, and economic variables corresponding to real estate single-family residential apartments in Tehran, Iran. @@ -73,31 +74,34 @@ class TehranHousing(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00437/Residential-Building-Data-Set.xlsx') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00437/Residential-Building-Data-Set.xlsx" + ) _features = range(107) _targets = [107, 108] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object xlsx_file = io.BytesIO(data) # Load it into dataframe - cols = ['start_year', 'start_quarter', 'completion_year', - 'completion_quarter'] + \ - [f'V-{i}' for i in range(1, 9)] + \ - [f'V-{i}-{j}' for j in range(1, 6) for i in range(11, 30)] + \ - ['construction_cost', 'sale_price'] + cols = ( + ["start_year", "start_quarter", "completion_year", "completion_quarter"] + + [f"V-{i}" for i in range(1, 9)] + + [f"V-{i}-{j}" for j in range(1, 6) for i in range(11, 30)] + + ["construction_cost", "sale_price"] + ) df = pd.read_excel(xlsx_file, skiprows=[0, 1], names=cols) return df diff --git a/doubt/datasets/yacht.py b/src/doubt/datasets/yacht.py similarity index 83% rename from doubt/datasets/yacht.py rename to src/doubt/datasets/yacht.py index eb6cf7f..3d2472d 100644 --- a/doubt/datasets/yacht.py +++ b/src/doubt/datasets/yacht.py @@ -1,18 +1,19 @@ -'''Yacht data set. +"""Yacht data set. This data set is from the UCI data set archive, with the description being the original description verbatim. Some feature names may have been altered, based on the description. -''' +""" -from ._dataset import BaseDataset, BASE_DATASET_DESCRIPTION +import io import pandas as pd -import io + +from ._dataset import BASE_DATASET_DESCRIPTION, BaseDataset class Yacht(BaseDataset): - __doc__ = f''' + __doc__ = f""" Prediction of residuary resistance of sailing yachts at the initial design stage is of a great value for evaluating the ship's performance and for estimating the required propulsive power. Essential inputs include the @@ -72,28 +73,37 @@ class Yacht(BaseDataset): >>> df = dataset.to_pandas() >>> type(df) - ''' + """ - _url = ('https://archive.ics.uci.edu/ml/machine-learning-databases/' - '00243/yacht_hydrodynamics.data') + _url = ( + "https://archive.ics.uci.edu/ml/machine-learning-databases/" + "00243/yacht_hydrodynamics.data" + ) _features = range(6) _targets = [6] def _prep_data(self, data: bytes) -> pd.DataFrame: - ''' Prepare the data set. + """Prepare the data set. Args: data (bytes): The raw data Returns: Pandas dataframe: The prepared data - ''' + """ # Convert the bytes into a file-like object txt_file = io.BytesIO(data) # Load it into dataframe - cols = ['pos', 'prismatic', 'displacement', 'beam_draught', - 'length_beam', 'froude_no', 'resistance'] - df = pd.read_csv(txt_file, header=None, sep=' ', names=cols) + cols = [ + "pos", + "prismatic", + "displacement", + "beam_draught", + "length_beam", + "froude_no", + "resistance", + ] + df = pd.read_csv(txt_file, header=None, sep=" ", names=cols) return df diff --git a/doubt/models/__init__.py b/src/doubt/models/__init__.py similarity index 100% rename from doubt/models/__init__.py rename to src/doubt/models/__init__.py index a15dcac..087167d 100644 --- a/doubt/models/__init__.py +++ b/src/doubt/models/__init__.py @@ -1,4 +1,4 @@ from .boot import Boot # noqa from .glm import QuantileRegressor # noqa -from .tree import QuantileRegressionTree # noqa from .tree import QuantileRegressionForest # noqa +from .tree import QuantileRegressionTree # noqa diff --git a/src/doubt/models/boot/__init__.py b/src/doubt/models/boot/__init__.py new file mode 100644 index 0000000..0c359d3 --- /dev/null +++ b/src/doubt/models/boot/__init__.py @@ -0,0 +1 @@ +from .boot import Boot # noqa diff --git a/src/doubt/models/boot/boot.py b/src/doubt/models/boot/boot.py new file mode 100644 index 0000000..71288f7 --- /dev/null +++ b/src/doubt/models/boot/boot.py @@ -0,0 +1,367 @@ +"""Bootstrap wrapper for datasets and models""" + +import copy +from types import MethodType +from typing import Callable, Optional, Sequence, Tuple, Union + +import numpy as np + + +class Boot: + """Bootstrap wrapper for datasets and models. + + Datasets can be any sequence of numeric input, from which bootstrapped + statistics can be calculated, with confidence intervals included. + + The models can be any model that is either callable or equipped with + a `predict` method, such as all the models in `scikit-learn`, `pytorch` + and `tensorflow`, and the bootstrapped model can then produce predictions + with prediction intervals. + + The bootstrapped prediction intervals are computed using the an extension + of method from [2] which also takes validation error into account. To + remedy this, the .632+ bootstrap estimate from [1] has been used. Read + more in [3]. + + Args: + input (float array or model): + Either a dataset to calculate bootstrapped statistics on, or an + model for which bootstrapped predictions will be computed. + random_seed (float or None): + The random seed used for bootstrapping. If set to None then no + seed will be set. Defaults to None. + + Examples: + Compute the bootstrap distribution of the mean, with a 95% confidence + interval:: + + >>> from doubt.datasets import FishToxicity + >>> X, y = FishToxicity().split() + >>> boot = Boot(y, random_seed=42) + >>> boot.compute_statistic(np.mean) + (4.064430616740088, array([3.97621225, 4.16582087])) + + Alternatively, we can output the whole bootstrap distribution:: + + >>> boot.compute_statistic(np.mean, n_boots=3, return_all=True) + (4.064430616740088, array([4.05705947, 4.06197577, 4.05728414])) + + Wrap a scikit-learn model and get prediction intervals:: + + >>> from sklearn.linear_model import LinearRegression + >>> from doubt.datasets import PowerPlant + >>> X, y = PowerPlant().split() + >>> linreg = Boot(LinearRegression(), random_seed=42) + >>> linreg = linreg.fit(X, y) + >>> linreg.predict([10, 30, 1000, 50], uncertainty=0.05) + (481.99688920651676, array([473.50425407, 490.14061895])) + + Sources: + [1]: Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements + of statistical learning (Vol. 1, No. 10). New York: Springer + series in statistics. + [2]: Kumar, S., & Srivistava, A. N. (2012). Bootstrap prediction + intervals in non-parametric regression with applications to + anomaly detection. + [3]: https://saattrupdan.github.io/2020-03-01-bootstrap-prediction/ + """ + + def __init__(self, input: object, random_seed: Optional[float] = None): + self.random_seed = random_seed + + # Input is a model + if callable(input) or hasattr(input, "predict"): + self._model = input + self._model_fit_predict = MethodType(_model_fit_predict, self) + self.fit = MethodType(fit, self) + self.predict = MethodType(predict, self) + type(self).__repr__ = MethodType(_model_repr, self) # type: ignore + + # Input is a dataset + elif hasattr(input, "__getitem__"): + self.data = np.asarray(input) + self.compute_statistic = MethodType(compute_statistic, self) + type(self).__repr__ = MethodType(_dataset_repr, self) # type: ignore + + else: + raise TypeError("Input not recognised.") + + +def _model_fit_predict( + self, X_train: np.ndarray, y_train: np.ndarray, X_test: np.ndarray +) -> np.ndarray: + """Fit the underlying model and perform predictions with it. + + This requires `self._model` to be set and that it is either callable or + have a `predict` method. + + Args: + X_train (float matrix): + Feature matrix for training, of shape + (n_train_samples, n_features). + y_train (float array): + Target array, of shape (n_train_samples,). + X_test (float matrix): + Feature matrix for predicting, of shape + (n_test_samples, n_features). + + Returns: + Numpy array: + Predictions, of shape (n_test_samples,) + """ + model = copy.deepcopy(self._model) + model.fit(X_train, y_train) + if callable(model): + return model(X_test) + else: + return model.predict(X_test) + + +def _dataset_repr(self) -> str: + return f"Boot(dataset_shape={self.data.shape}, " f"random_seed={self.random_seed})" + + +def _model_repr(self) -> str: + model_name = self._model.__class__.__name__ + return f"Boot(model={model_name}, random_seed={self.random_seed})" + + +def compute_statistic( + self, + statistic: Callable[[np.ndarray], float], + n_boots: Optional[int] = None, + uncertainty: float = 0.05, + quantiles: Optional[Sequence[float]] = None, + return_all: bool = False, +) -> Union[float, Tuple[float, np.ndarray]]: + """Compute bootstrapped statistic. + + Args: + statistic (numeric array -> float): + The statistic to be computed on bootstrapped samples. + n_boots (int or None): + The number of resamples to bootstrap. If None then it is set + to the square root of the data set. Defaults to None + uncertainty (float): + The uncertainty used to compute the confidence interval + of the bootstrapped statistic. Not used if `return_all` is + set to True or if `quantiles` is not None. Defaults to 0.05. + quantiles (sequence of floats or None, optional): + List of quantiles to output, as an alternative to the + `uncertainty` argument, and will not be used if that argument + is set. If None then `uncertainty` is used. Defaults to None. + return_all (bool): + Whether all bootstrapped statistics should be returned instead + of the confidence interval. Defaults to False. + + Returns: + a float or a pair of a float and an array of floats: + The statistic, and if `uncertainty` is set then also + the confidence interval, or if `quantiles` is set then also the + specified quantiles, or if `return_all` is set then also all of the + bootstrapped statistics. + """ + # Initialise random number generator + rng = np.random.default_rng(self.random_seed) + + # Compute the statistic + stat = statistic(self.data) + + # Get the number of data points + n = self.data.shape[0] + + # Set default value of the number of bootstrap samples if `n_boots` is not + # set + if n_boots is None: + n_boots = np.sqrt(n).astype(int) + + # Compute the bootstrapped statistics + statistics = np.empty((n_boots,), dtype=float) + for b in range(n_boots): + boot_idxs = rng.choice(range(n), size=n, replace=True) + statistics[b] = statistic(self.data[boot_idxs]) + + if return_all: + return stat, statistics + else: + # If uncertainty is set then set `quantiles` to be the two ends of the + # confidence interval + if uncertainty is not None: + quantiles = [uncertainty / 2, 1.0 - (uncertainty / 2)] + else: + quantiles = list(quantiles) + + # Compute the quantile values + quantile_vals = np.quantile(statistics, q=quantiles) + return stat, quantile_vals + + +def predict( + self, + X: np.ndarray, + n_boots: Optional[int] = None, + uncertainty: Optional[float] = None, + quantiles: Optional[Sequence[float]] = None, +) -> Tuple[Union[float, np.ndarray], np.ndarray]: + """Compute bootstrapped predictions. + + Args: + X (float array): + The array containing the data set, either of shape (f,) + or (n, f), with n being the number of samples and f being + the number of features. + n_boots (int or None, optional): + The number of resamples to bootstrap. If None then it is set + to the square root of the data set. Defaults to None + uncertainty (float or None, optional): + The uncertainty used to compute the prediction interval + of the bootstrapped prediction. If None then no prediction + intervals are returned. Defaults to None. + quantiles (sequence of floats or None, optional): + List of quantiles to output, as an alternative to the + `uncertainty` argument, and will not be used if that argument + is set. If None then `uncertainty` is used. Defaults to None. + + Returns: + float array or pair of float arrays: + The bootstrapped predictions, and the confidence intervals if + `uncertainty` is not None, or the specified quantiles if + `quantiles` is not None. + """ + # Initialise random number generator + rng = np.random.default_rng(self.random_seed) + + # Ensure that input feature matrix is a Numpy array + X = np.asarray(X) + + # If `X` is one-dimensional then expand it to two dimensions and save the + # information, so that we can ensure the output is also one-dimensional + onedim = len(X.shape) == 1 + if onedim: + X = np.expand_dims(X, 0) + + # Get the full non-bootstrapped predictions of `X` + preds = self._model(X) if callable(self._model) else self._model.predict(X) + + # If no quantiles should be outputted then simply return the predictions + # of the underlying model + if uncertainty is None and quantiles is None: + return preds + + # Ensure that the underlying model has been fitted before predicting. This + # is only a requirement if `uncertainty` is set, as we need access to + # `self.X_train` + if not hasattr(self, "X_train") or self.X_train is None: + raise RuntimeError( + "This model has not been fitted yet! Call fit() " + "before predicting new samples." + ) + + # Store the number of data points in the training and test datasets + n_train = self.X_train.shape[0] + n_test = X.shape[0] + + # The authors chose the number of bootstrap samples as the square root of + # the number of samples in the training dataset + if n_boots is None: + n_boots = np.sqrt(n_train).astype(int) + + # Compute the m_i's and the validation residuals + bootstrap_preds = np.empty((n_boots, n_test)) + for boot_idx in range(n_boots): + train_idxs = rng.choice(range(n_train), size=n_train, replace=True) + X_train = self.X_train[train_idxs, :] + y_train = self.y_train[train_idxs] + + bootstrap_pred = self._model_fit_predict(X_train, y_train, X) + bootstrap_preds[boot_idx] = bootstrap_pred + + # Centre the bootstrapped predictions across the bootstrap dimension + bootstrap_preds -= np.mean(bootstrap_preds, axis=0) + + # Add up the bootstrap predictions and the hybrid train/val residuals + C = np.array([m + o for m in bootstrap_preds for o in self.residuals]) + + # Calculate the desired quantiles + if quantiles is None and uncertainty is not None: + quantiles = [uncertainty / 2, 1 - uncertainty / 2] + quantile_vals = np.transpose(np.quantile(C, q=quantiles or [], axis=0)) + + # Return the predictions and the desired quantiles + if onedim: + return preds[0], (preds + quantile_vals)[0] + else: + return preds, np.expand_dims(preds, axis=1) + quantile_vals + + +def fit(self, X: np.ndarray, y: np.ndarray, n_boots: Optional[int] = None): + """Fits the model to the data. + + Args: + X (float array): + The array containing the data set, either of shape (f,) + or (n, f), with n being the number of samples and f being + the number of features. + y (float array): + The array containing the target values, of shape (n,) + n_boots (int or None): + The number of resamples to bootstrap. If None then it is set + to the square root of the data set. Defaults to None + """ + # Initialise random number generator + rng = np.random.default_rng(self.random_seed) + + # Set the number of data points in the dataset + n = X.shape[0] + + # Set default value of `n_boots` if it is not set + if n_boots is None: + n_boots = np.sqrt(n).astype(int) + + # Ensure that `X` and `y` are Numpy arrays + X = np.asarray(X) + y = np.asarray(y) + + # Store `X` and `y` for predictions + self.X_train = X + self.y_train = y + + # Fit the underlying model and get predictions on the training dataset + self._model.fit(X, y) + preds = self._model(X) if callable(self._model) else self._model.predict(X) + + # Calculate the training residuals and aggregate them into quantiles, to + # enable comparison with the validation residuals + train_residuals = np.quantile(y - preds, q=np.arange(0, 1, 0.01)) + + # Compute the m_i's and the validation residuals + val_residuals_list = [] + for _ in range(n_boots): + train_idxs = rng.choice(range(n), size=n, replace=True) + val_idxs = [idx for idx in range(n) if idx not in train_idxs] + + X_train = X[train_idxs, :] + y_train = y[train_idxs] + X_val = X[val_idxs, :] + y_val = y[val_idxs] + + boot_preds = self._model_fit_predict(X_train, y_train, X_val) + val_residuals_list.append(y_val - boot_preds) + + # Aggregate the validation residuals into quantiles, to enable comparison + # with the training residuals + val_residuals = np.concatenate(val_residuals_list) + val_residuals = np.quantile(val_residuals, q=np.arange(0, 1, 0.01)) + + # Compute the no-information value + permuted = rng.permutation(y) - rng.permutation(preds) + no_info_error = np.mean(np.abs(permuted)) + no_info_val = np.abs(no_info_error - train_residuals) + + # Compute the .632+ bootstrap estimate for the sample noise and bias + generalisation = np.abs(val_residuals.mean() - train_residuals.mean()) + relative_overfitting_rate = np.mean(generalisation / no_info_val) + weight = 0.632 / (1 - 0.368 * relative_overfitting_rate) + self.residuals = (1 - weight) * train_residuals + weight * val_residuals + + return self diff --git a/doubt/models/glm/__init__.py b/src/doubt/models/glm/__init__.py similarity index 100% rename from doubt/models/glm/__init__.py rename to src/doubt/models/glm/__init__.py diff --git a/doubt/models/glm/quantile_loss.py b/src/doubt/models/glm/quantile_loss.py similarity index 69% rename from doubt/models/glm/quantile_loss.py rename to src/doubt/models/glm/quantile_loss.py index 4a9bcac..7c6842b 100644 --- a/doubt/models/glm/quantile_loss.py +++ b/src/doubt/models/glm/quantile_loss.py @@ -1,13 +1,14 @@ -'''Implementation of the quantile loss function''' +"""Implementation of the quantile loss function""" from typing import Sequence + import numpy as np -def quantile_loss(predictions: Sequence[float], - targets: Sequence[float], - quantile: float) -> float: - '''Quantile loss function. +def quantile_loss( + predictions: Sequence[float], targets: Sequence[float], quantile: float +) -> float: + """Quantile loss function. Args: predictions (sequence of floats): @@ -19,7 +20,7 @@ def quantile_loss(predictions: Sequence[float], Returns: float: The quantile loss. - ''' + """ # Convert inputs to NumPy arrays target_arr = np.asarray(targets) prediction_arr = np.asarray(predictions) @@ -28,18 +29,22 @@ def quantile_loss(predictions: Sequence[float], res = target_arr - prediction_arr # Compute the mean quantile loss - loss = np.mean(np.maximum(res, np.zeros_like(res)) * quantile + - np.maximum(-res, np.zeros_like(res)) * (1 - quantile)) + loss = np.mean( + np.maximum(res, np.zeros_like(res)) * quantile + + np.maximum(-res, np.zeros_like(res)) * (1 - quantile) + ) # Ensure that loss is of type float and return it return float(loss) -def smooth_quantile_loss(predictions: Sequence[float], - targets: Sequence[float], - quantile: float, - alpha: float = 0.4) -> float: - '''The smooth quantile loss function from [1]. +def smooth_quantile_loss( + predictions: Sequence[float], + targets: Sequence[float], + quantile: float, + alpha: float = 0.4, +) -> float: + """The smooth quantile loss function from [1]. Args: predictions (sequence of floats): @@ -58,7 +63,7 @@ def smooth_quantile_loss(predictions: Sequence[float], [1]: Songfeng Zheng (2011). Gradient Descent Algorithms for Quantile Regression With Smooth Approximation. International Journal of Machine Learning and Cybernetics. - ''' + """ # Convert inputs to NumPy arrays target_arr = np.asarray(targets) prediction_arr = np.asarray(predictions) @@ -67,8 +72,7 @@ def smooth_quantile_loss(predictions: Sequence[float], residuals = target_arr - prediction_arr # Compute the smoothened mean quantile loss - loss = (quantile * residuals + - alpha * np.log(1 + np.exp(-residuals / alpha))) + loss = quantile * residuals + alpha * np.log(1 + np.exp(-residuals / alpha)) # Ensure that loss is of type float and return it return float(loss.mean()) diff --git a/doubt/models/glm/quantile_regressor.py b/src/doubt/models/glm/quantile_regressor.py similarity index 71% rename from doubt/models/glm/quantile_regressor.py rename to src/doubt/models/glm/quantile_regressor.py index fdd30b6..cfac63f 100644 --- a/doubt/models/glm/quantile_regressor.py +++ b/src/doubt/models/glm/quantile_regressor.py @@ -1,23 +1,18 @@ -'''Quantile regression for generalised linear models''' +"""Quantile regression for generalised linear models""" -from .._model import BaseModel -from .quantile_loss import smooth_quantile_loss -from .quantile_loss import quantile_loss +from typing import Callable, Optional, Sequence, Tuple, Union -from sklearn.linear_model._glm import GeneralizedLinearRegressor -from sklearn.linear_model import LinearRegression -from sklearn.preprocessing import StandardScaler import numpy as np -from typing import Sequence, Tuple, Union, Optional from scipy.optimize import minimize +from sklearn.linear_model import LinearRegression +from sklearn.linear_model._glm import _GeneralizedLinearRegressor +from sklearn.preprocessing import StandardScaler - -FloatMatrix = Sequence[Sequence[float]] -FloatArray = Sequence[float] +from .quantile_loss import quantile_loss, smooth_quantile_loss -class QuantileRegressor(BaseModel): - '''Quantile regression for generalised linear models. +class QuantileRegressor: + """Quantile regression for generalised linear models. This uses BFGS optimisation of the smooth quantile loss from [1]. @@ -55,13 +50,16 @@ class QuantileRegressor(BaseModel): [1]: Songfeng Zheng (2011). Gradient Descent Algorithms for Quantile Regression With Smooth Approximation. International Journal of Machine Learning and Cybernetics. - ''' - def __init__(self, - model: Union[LinearRegression, GeneralizedLinearRegressor], - max_iter: Optional[int] = None, - uncertainty: float = 0.05, - quantiles: Optional[Sequence[float]] = None, - alpha: float = 0.4): + """ + + def __init__( + self, + model: Union[LinearRegression, _GeneralizedLinearRegressor], + max_iter: Optional[int] = None, + uncertainty: float = 0.05, + quantiles: Optional[Sequence[float]] = None, + alpha: float = 0.4, + ): self.uncertainty = uncertainty self.alpha = alpha @@ -72,7 +70,7 @@ def __init__(self, # Set `max_iter` to be the model's `max_iter` attribute if it exists, # and otherwise default to 10,000 if max_iter is None: - if hasattr(model, 'max_iter'): + if hasattr(model, "max_iter"): self.max_iter = model.max_iter else: self.max_iter = 10_000 @@ -85,29 +83,27 @@ def __init__(self, else: self.quantiles = list(quantiles) - # Initialise empty inverse link function and weights - self._inverse_link_function = None + # Initialise inverse link function and weights + self._inverse_link_function: Callable self._weights = {q: None for q in self.quantiles} - def _objective_function(self, - beta: np.ndarray, - X: np.ndarray, - y: np.ndarray, - quantile: float, - inverse_link_function: callable) -> float: - '''Function used to optimise the quantile loss''' + def _objective_function( + self, + beta: np.ndarray, + X: np.ndarray, + y: np.ndarray, + quantile: float, + inverse_link_function: Callable, + ) -> float: + """Function used to optimise the quantile loss""" predictions = inverse_link_function(X @ beta) - loss = smooth_quantile_loss(predictions=predictions, - targets=y, - quantile=quantile, - alpha=self.alpha) + loss = smooth_quantile_loss( + predictions=predictions, targets=y, quantile=quantile, alpha=self.alpha + ) return loss - def fit(self, - X: FloatMatrix, - y: FloatArray, - random_seed: Optional[int] = None): - '''Fit the model. + def fit(self, X: np.ndarray, y: np.ndarray, **kwargs): + """Fit the model. Args: X (float matrix): @@ -116,7 +112,7 @@ def fit(self, number of features. y (float array): The target array, of shape (n,). - ''' + """ # Convert inputs to Numpy arrays X_arr = np.asarray(X) y_arr = np.asarray(y) @@ -150,24 +146,22 @@ def fit(self, # Fit all quantile estimates for q in self.quantiles: - # Initialise random seed, which is used by SciPy - if random_seed is not None: - np.random.seed(random_seed) - args = (X_arr, y_arr, q, self._inverse_link_function) - result = minimize(self._objective_function, - beta_init, - args=args, - method='BFGS', - options={'maxiter': self.max_iter}) + result = minimize( + self._objective_function, + beta_init, + args=args, + method="BFGS", + options={"maxiter": self.max_iter}, + ) self._weights[q] = result.x return self - def predict(self, - X: FloatMatrix - ) -> Tuple[Union[float, np.ndarray], np.ndarray]: - '''Compute model predictions. + def predict( + self, X: np.ndarray, **kwargs + ) -> Tuple[Union[float, np.ndarray], np.ndarray]: + """Compute model predictions. Args: X (float matrix): @@ -179,12 +173,12 @@ def predict(self, pair of float arrays: The predictions, of shape (n,), and the prediction intervals, of shape (n, 2). - ''' + """ # Convert inputs to Numpy array X_arr = np.asarray(X) # If input is one-dimensional, then add a dimension to it - onedim = (len(X_arr.shape) == 1) + onedim = len(X_arr.shape) == 1 if onedim: X_arr = np.expand_dims(X_arr, 0) @@ -198,8 +192,10 @@ def predict(self, X_arr = np.concatenate((X_arr, np.ones((X_arr.shape[0], 1))), axis=1) # Get the prediction for the lower- and upper quantiles - quantile_vals = [self._inverse_link_function(X_arr @ self._weights[q]) - for q in self._weights.keys()] + quantile_vals = [ + self._inverse_link_function(X_arr @ self._weights[q]) + for q in self._weights.keys() + ] # Concatenate the quantiles to get the intervals quantile_vals = np.stack(quantile_vals, axis=1).squeeze() @@ -212,7 +208,7 @@ def predict(self, return preds, quantile_vals def score(self, X: Sequence[float], y: Sequence[float]) -> float: - '''Compute either the R^2 value or the negative pinball loss. + """Compute either the R^2 value or the negative pinball loss. If `uncertainty` is not set in the constructor then the R^2 value will be returned, and otherwise the mean of the two negative pinball losses @@ -232,7 +228,7 @@ def score(self, X: Sequence[float], y: Sequence[float]) -> float: Returns: float: The negative pinball loss. - ''' + """ # Convert inputs to Numpy arrays X_arr = np.asarray(X) y_arr = np.asarray(y) @@ -247,13 +243,17 @@ def score(self, X: Sequence[float], y: Sequence[float]) -> float: else: # Get the predictions _, quantile_vals = self.predict(X_arr) - losses = [quantile_loss(y_arr, quantile_vals[:, i], q) - for i, q in enumerate(self.quantiles)] + losses = [ + quantile_loss(y_arr, quantile_vals[:, i], q) + for i, q in enumerate(self.quantiles) + ] return -np.mean(losses) def __repr__(self) -> str: model_name = self._model.__class__.__name__ - return (f'QuantileRegressor(model={model_name},\n' - f' quantiles={self.quantiles})\n' - f' alpha={self.alpha})\n' - f' max_iter={self.max_iter}))') + return ( + f"QuantileRegressor(model={model_name},\n" + f" quantiles={self.quantiles})\n" + f" alpha={self.alpha})\n" + f" max_iter={self.max_iter}))" + ) diff --git a/src/doubt/models/model.py b/src/doubt/models/model.py new file mode 100644 index 0000000..4a90a55 --- /dev/null +++ b/src/doubt/models/model.py @@ -0,0 +1,23 @@ +"""Base class for estimators.""" + +from typing import Protocol, Tuple, Union + +import numpy as np + + +class Model(Protocol): + def __init__(self, *args, **kwargs): + ... + + def predict( + self, X: np.ndarray, **kwargs + ) -> Tuple[Union[float, np.ndarray], np.ndarray]: + ... + + def fit(self, X: np.ndarray, y: np.ndarray, **kwargs): + ... + + def __call__( + self, X: np.ndarray, **kwargs + ) -> Tuple[Union[float, np.ndarray], np.ndarray]: + ... diff --git a/src/doubt/models/tree/__init__.py b/src/doubt/models/tree/__init__.py new file mode 100644 index 0000000..206a9bf --- /dev/null +++ b/src/doubt/models/tree/__init__.py @@ -0,0 +1,2 @@ +from .forest import QuantileRegressionForest # noqa +from .tree import QuantileRegressionTree # noqa diff --git a/doubt/models/tree/forest.py b/src/doubt/models/tree/forest.py similarity index 81% rename from doubt/models/tree/forest.py rename to src/doubt/models/tree/forest.py index 11008f6..4c70a7e 100644 --- a/doubt/models/tree/forest.py +++ b/src/doubt/models/tree/forest.py @@ -1,16 +1,16 @@ -'''Quantile regression forests''' +"""Quantile regression forests""" -from .._model import BaseModel -from .tree import QuantileRegressionTree +from typing import Optional, Sequence, Tuple, Union -from typing import Optional, Union, Tuple, Sequence import numpy as np from joblib import Parallel, delayed from tqdm.auto import tqdm +from .tree import QuantileRegressionTree + -class QuantileRegressionForest(BaseModel): - '''A random forest for regression which can output quantiles as well. +class QuantileRegressionForest: + """A random forest for regression which can output quantiles as well. Args: n_estimators (int, optional): @@ -99,20 +99,23 @@ class QuantileRegressionForest(BaseModel): >>> preds, interval = forest.predict(np.ones(8), uncertainty=0.25) >>> interval[0] < preds < interval[1] True - ''' - def __init__(self, - n_estimators: int = 100, - criterion: str = "mse", - splitter: str = "best", - max_features: Optional[Union[int, float, str]] = None, - max_depth: Optional[int] = None, - min_samples_split: Union[int, float] = 2, - min_samples_leaf: Union[int, float] = 5, - min_weight_fraction_leaf: float = 0., - max_leaf_nodes: Optional[int] = None, - n_jobs: int = -1, - random_seed: Optional[int] = None, - verbose: bool = False): + """ + + def __init__( + self, + n_estimators: int = 100, + criterion: str = "mse", + splitter: str = "best", + max_features: Optional[Union[int, float, str]] = None, + max_depth: Optional[int] = None, + min_samples_split: Union[int, float] = 2, + min_samples_leaf: Union[int, float] = 5, + min_weight_fraction_leaf: float = 0.0, + max_leaf_nodes: Optional[int] = None, + n_jobs: int = -1, + random_seed: Optional[int] = None, + verbose: bool = False, + ): self.n_estimators = n_estimators self.min_samples_leaf = min_samples_leaf @@ -138,26 +141,35 @@ def __init__(self, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, - random_seed=random_seed + random_seed=random_seed, ) ] def __repr__(self) -> str: - txt = 'QuantileRegressionForest(' - attributes = ['n_estimators', 'criterion', 'splitter', 'max_features', - 'max_depth', 'min_samples_split', 'min_samples_leaf', - 'min_weight_fraction_leaf', 'max_leaf_nodes', 'n_jobs', - 'random_seed'] + txt = "QuantileRegressionForest(" + attributes = [ + "n_estimators", + "criterion", + "splitter", + "max_features", + "max_depth", + "min_samples_split", + "min_samples_leaf", + "min_weight_fraction_leaf", + "max_leaf_nodes", + "n_jobs", + "random_seed", + ] for idx, attr in enumerate(attributes): if idx > 0: - txt += ' ' - txt += f'{attr}={getattr(self, attr)}' + txt += " " + txt += f"{attr}={getattr(self, attr)}" if idx < len(attributes) - 1: - txt += ',\n' - return txt + ')' + txt += ",\n" + return txt + ")" - def fit(self, X, y, verbose: Optional[bool] = None): - '''Fit decision trees in parallel. + def fit(self, X, y, **kwargs): + """Fit decision trees in parallel. Args: X (array-like or sparse matrix): @@ -171,10 +183,12 @@ def fit(self, X, y, verbose: Optional[bool] = None): Whether extra output should be printed during training. If None then the initialised value of the `verbose` parameter will be used. Defaults to None. - ''' + """ # Set the verbose argument if it has not been set - if verbose is None: + if kwargs.get("verbose") is None: verbose = self.verbose + else: + verbose = kwargs.get("verbose") # Initialise random number generator rng = np.random.default_rng(self.random_seed) @@ -187,7 +201,7 @@ def fit(self, X, y, verbose: Optional[bool] = None): # Set up progress bar if requested if verbose: - itr = tqdm(self._estimators, desc='Fitting trees') + itr = tqdm(self._estimators, desc="Fitting trees") else: itr = self._estimators @@ -203,13 +217,14 @@ def fit(self, X, y, verbose: Optional[bool] = None): return self - def predict(self, - X: Sequence[Union[float, int]], - uncertainty: Optional[float] = None, - quantiles: Optional[Sequence[float]] = None, - verbose: Optional[bool] = None - ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: - '''Predict regression value for X. + def predict( + self, + X: np.ndarray, + uncertainty: Optional[float] = None, + quantiles: Optional[np.ndarray] = None, + verbose: Optional[bool] = None, + ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + """Predict regression value for X. Args: X (array-like or sparse matrix): @@ -235,32 +250,32 @@ def predict(self, second one being the desired quantiles/intervals, of shape [2, n_samples] if `uncertainty` is not None, and [n_quantiles, n_samples] if `quantiles` is not None. - ''' + """ # Set the verbose argument if it has not been set if verbose is None: verbose = self.verbose # Ensure that X is two-dimensional - onedim = (len(X.shape) == 1) + onedim = len(X.shape) == 1 if onedim: X = np.expand_dims(X, 0) # Set up progress bar if requested if verbose: - itr = tqdm(self._estimators, desc='Getting tree predictions') + itr = tqdm(self._estimators, desc="Getting tree predictions") else: itr = self._estimators with Parallel(n_jobs=self.n_jobs) as parallel: preds = parallel( - delayed(estimator.predict)(X, uncertainty=uncertainty, - quantiles=quantiles) + delayed(estimator.predict)( + X, uncertainty=uncertainty, quantiles=quantiles + ) for estimator in itr ) if uncertainty is not None or quantiles is not None: - quantile_vals = np.stack([interval for _, interval in preds], - axis=0) + quantile_vals = np.stack([interval for _, interval in preds], axis=0) quantile_vals = quantile_vals.mean(0) preds = np.stack([pred for pred, _ in preds]) preds = preds.mean(0) diff --git a/doubt/models/tree/tree.py b/src/doubt/models/tree/tree.py similarity index 83% rename from doubt/models/tree/tree.py rename to src/doubt/models/tree/tree.py index 10c3450..c7e7f94 100644 --- a/doubt/models/tree/tree.py +++ b/src/doubt/models/tree/tree.py @@ -1,25 +1,23 @@ -'''Quantile regression trees''' +"""Quantile regression trees""" +from typing import Optional, Sequence, Tuple, Union + +import numpy as np from sklearn.tree import BaseDecisionTree, DecisionTreeRegressor from sklearn.utils import check_array, check_X_y -from typing import Optional, Union, Sequence, Tuple -import numpy as np -from .utils import weighted_percentile - -FloatArray = Sequence[float] -NumericArray = Sequence[Union[float, int]] +from .utils import weighted_percentile class BaseTreeQuantileRegressor(BaseDecisionTree): - - def predict(self, - X: NumericArray, - uncertainty: Optional[float] = None, - quantiles: Optional[Sequence[float]] = None, - check_input: bool = True - ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: - '''Predict regression value for X. + def predict( + self, + X: np.ndarray, + uncertainty: Optional[float] = None, + quantiles: Optional[Sequence[float]] = None, + check_input: bool = True, + ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + """Predict regression value for X. Args: X (array-like or sparse matrix): @@ -44,9 +42,9 @@ def predict(self, second one being the desired quantiles/intervals, of shape [n_samples, 2] if `uncertainty` is not None, and [n_samples, n_quantiles] if `quantiles` is not None. - ''' + """ # Apply method requires X to be of dtype np.float32 - X = check_array(X, dtype=np.float32, accept_sparse='csc') + X = check_array(X, dtype=np.float32, accept_sparse="csc") preds = super().predict(X, check_input=check_input) if uncertainty is not None or quantiles is not None: @@ -55,7 +53,7 @@ def predict(self, if uncertainty is not None: quantiles = [uncertainty / 2, 1 - (uncertainty / 2)] else: - quantiles = list(quantiles) + quantiles = list(quantiles or []) # Collect the leaves in the tree X_leaves = self.apply(X) @@ -67,22 +65,23 @@ def predict(self, # Populate the quantile values for leaf in unique_leaves: for idx, quantile in enumerate(quantiles): - X_leaf = (X_leaves == leaf) + X_leaf = X_leaves == leaf y_leaf = self.y_train_[self.y_train_leaves_ == leaf] - quantile_vals[X_leaf, idx] = weighted_percentile(y_leaf, - quantile) + quantile_vals[X_leaf, idx] = weighted_percentile(y_leaf, quantile) return preds, quantile_vals else: return preds - def fit(self, - X: NumericArray, - y: NumericArray, - sample_weight: Optional[NumericArray] = None, - check_input: bool = True, - X_idx_sorted: Optional[NumericArray] = None): - '''Build a decision tree classifier from the training set (X, y). + def fit( + self, + X: np.ndarray, + y: np.ndarray, + sample_weight: Optional[np.ndarray] = None, + check_input: bool = True, + X_idx_sorted: Optional[np.ndarray] = None, + ): + """Build a decision tree classifier from the training set (X, y). Args: X (array-like or sparse matrix) @@ -108,7 +107,7 @@ def fit(self, dataset, this allows the ordering to be cached between trees. If None, the data will be sorted here. Don't use this parameter unless you know what to do. Defaults to None. - ''' + """ # y passed from a forest is 2-D. This is to silence the annoying # data-conversion warnings. y = np.asarray(y) @@ -117,9 +116,15 @@ def fit(self, # Apply method requires X to be of dtype np.float32 X, y = check_X_y( - X, y, accept_sparse='csc', dtype=np.float32, multi_output=False) - super().fit(X, y, sample_weight=sample_weight, check_input=check_input, - X_idx_sorted=X_idx_sorted) + X, y, accept_sparse="csc", dtype=np.float32, multi_output=False + ) + super().fit( + X, + y, + sample_weight=sample_weight, + check_input=check_input, + X_idx_sorted=X_idx_sorted, + ) self.y_train_ = y # Stores the leaf nodes that the samples lie in. @@ -128,7 +133,7 @@ def fit(self, class QuantileRegressionTree(DecisionTreeRegressor, BaseTreeQuantileRegressor): - '''A decision tree regressor that provides quantile estimates. + """A decision tree regressor that provides quantile estimates. Args: criterion (string, optional): @@ -203,17 +208,20 @@ class QuantileRegressionTree(DecisionTreeRegressor, BaseTreeQuantileRegressor): y_train_leaves_ (array-like): Cache the leaf nodes that each training sample falls into. y_train_leaves_[i] is the leaf that y_train[i] ends up at. - ''' - def __init__(self, - criterion: str = 'mse', - splitter: str = 'best', - max_features: Optional[Union[int, float, str]] = None, - max_depth: Optional[int] = None, - min_samples_split: Union[int, float] = 2, - min_samples_leaf: Union[int, float] = 1, - min_weight_fraction_leaf: float = 0., - max_leaf_nodes: Optional[int] = None, - random_seed: Union[int, np.random.RandomState, None] = None): + """ + + def __init__( + self, + criterion: str = "mse", + splitter: str = "best", + max_features: Optional[Union[int, float, str]] = None, + max_depth: Optional[int] = None, + min_samples_split: Union[int, float] = 2, + min_samples_leaf: Union[int, float] = 1, + min_weight_fraction_leaf: float = 0.0, + max_leaf_nodes: Optional[int] = None, + random_seed: Union[int, np.random.RandomState, None] = None, + ): super().__init__( criterion=criterion, splitter=splitter, @@ -223,4 +231,5 @@ def __init__(self, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, - random_state=random_seed) + random_state=random_seed, + ) diff --git a/doubt/models/tree/utils.py b/src/doubt/models/tree/utils.py similarity index 71% rename from doubt/models/tree/utils.py rename to src/doubt/models/tree/utils.py index 5b73e9a..92b3e37 100644 --- a/doubt/models/tree/utils.py +++ b/src/doubt/models/tree/utils.py @@ -1,17 +1,17 @@ -'''Utility functions used in tree models''' - -import numpy as np -from typing import Sequence, Union, Optional +"""Utility functions used in tree models""" +from typing import Optional -NumericArray = Sequence[Union[float, int]] +import numpy as np -def weighted_percentile(arr: NumericArray, - quantile: float, - weights: Optional[NumericArray] = None, - sorter: Optional[NumericArray] = None): - '''Returns the weighted percentile of an array. +def weighted_percentile( + arr: np.ndarray, + quantile: float, + weights: Optional[np.ndarray] = None, + sorter: Optional[np.ndarray] = None, +): + """Returns the weighted percentile of an array. See [1] for an explanation of this concept. @@ -44,48 +44,47 @@ def weighted_percentile(arr: NumericArray, Sources: [1]: https://en.wikipedia.org/wiki/Percentile\ #The_weighted_percentile_method - ''' + """ # Ensure that quantile is set properly if quantile > 1 or quantile < 0: - raise ValueError('The quantile should be between 0 and 1.') + raise ValueError("The quantile should be between 0 and 1.") # Set weights to be uniform if not specified - if weights is None: - weights = np.ones_like(arr) + weights_arr = np.ones_like(arr) if weights is None else weights - # Ensure that `arr` and `weights` are numpy arrays + # Ensure that `arr` and `weights_arr` are numpy arrays arr = np.asarray(arr, dtype=np.float32) - weights = np.asarray(weights, dtype=np.float32) + weights = np.asarray(weights_arr, dtype=np.float32) # Ensure that `arr` and `weights` are of the same length - if len(arr) != len(weights): - raise ValueError('a and weights should have the same length.') + if len(arr) != len(weights_arr): + raise ValueError("`arr` and `weights` should have the same length.") - # If `sorter` is given , then sort `arr` and `weights` using it + # If `sorter` is given , then sort `arr` and `weights_arr` using it if sorter is not None: arr = arr[sorter] - weights = weights[sorter] + weights_arr = weights_arr[sorter] # Remove all the array (and weight) elements with zero weight - non_zeros = (weights != 0) + non_zeros = weights_arr != 0 arr = arr[non_zeros] - weights = weights[non_zeros] + weights_arr = weights_arr[non_zeros] # Sort the array if `sorter` is not given if sorter is None: sorted_indices = np.argsort(arr) sorted_arr = arr[sorted_indices] - sorted_weights = weights[sorted_indices] + sorted_weights = weights_arr[sorted_indices] else: sorted_arr = arr - sorted_weights = weights + sorted_weights = weights_arr # Calculate the partial sum of weights and get the total weight sorted_cum_weights = np.cumsum(sorted_weights) total = sorted_cum_weights[-1] # Calculate the percentile values - partial_sum = 1. / total + partial_sum = 1.0 / total partial_sum *= sorted_cum_weights - sorted_weights / 2.0 # Find the spot in `partial_sum` where `quantile` belongs, preserving order @@ -105,5 +104,4 @@ def weighted_percentile(arr: NumericArray, # Add the corresponding proportion from `sorted_arr[start]` to # `sorted_arr[start + 1]`, to `sorted_arr[start]`. - return sorted_arr[start] + fraction * \ - (sorted_arr[start + 1] - sorted_arr[start]) + return sorted_arr[start] + fraction * (sorted_arr[start + 1] - sorted_arr[start]) diff --git a/src/scripts/fix_dot_env_file.py b/src/scripts/fix_dot_env_file.py new file mode 100644 index 0000000..dc58bde --- /dev/null +++ b/src/scripts/fix_dot_env_file.py @@ -0,0 +1,68 @@ +"""Checks related to the .env file in the repository.""" + +import subprocess +from pathlib import Path + +# List of all the environment variables that are desired +DESIRED_ENVIRONMENT_VARIABLES = dict( + GPG_KEY_ID="Enter GPG key ID or leave empty if you do not want to use it. Type " + "`gpg --list-secret-keys --keyid-format=long | grep sec | sed -E " + "'s/.*\/([^ ]+).*/\\1/'` to see your key ID:\n> ", # noqa + GIT_NAME="Enter your full name, to be shown in Git commits:\n> ", + GIT_EMAIL="Enter your email, as registered on your Github account:\n> ", + PYPI_API_TOKEN="Enter your PyPI API token, or leave empty if you do not want " + "to use it:\n> ", +) + + +def fix_dot_env_file(): + """Ensures that the .env file exists and contains all desired variables.""" + # Create path to the .env file + env_file_path = Path(".env") + + # Ensure that the .env file exists + env_file_path.touch(exist_ok=True) + + # Otherwise, extract all the lines in the .env file + env_file_lines = env_file_path.read_text().splitlines(keepends=False) + + # Extract all the environment variables in the .env file + env_vars = [line.split("=")[0] for line in env_file_lines] + + # For each of the desired environment variables, check if it exists in the .env + # file + env_vars_missing = [ + env_var + for env_var in DESIRED_ENVIRONMENT_VARIABLES.keys() + if env_var not in env_vars + ] + + # Create all the missing environment variables + with env_file_path.open("a") as f: + for env_var in env_vars_missing: + value = "" + if env_var == "GPG_KEY_ID": + gpg = subprocess.Popen( + ["gpg", "--list-secret-keys", "--keyid-format=long"], + stdout=subprocess.PIPE, + ) + grep = subprocess.Popen( + ["grep", "sec"], stdin=gpg.stdout, stdout=subprocess.PIPE + ) + value = ( + subprocess.check_output( + ["sed", "-E", "s/.*\\/([^ ]+).*/\\1/"], + stdin=grep.stdout, + ) + .decode() + .strip("\n") + ) + gpg.wait() + grep.wait() + if value == "": + value = input(DESIRED_ENVIRONMENT_VARIABLES[env_var]) + f.write(f'{env_var}="{value}"\n') + + +if __name__ == "__main__": + fix_dot_env_file() diff --git a/src/scripts/versioning.py b/src/scripts/versioning.py new file mode 100644 index 0000000..fac4956 --- /dev/null +++ b/src/scripts/versioning.py @@ -0,0 +1,121 @@ +"""Scripts related to updating of version.""" + +import datetime as dt +import re +import subprocess +from pathlib import Path +from typing import Tuple + +import pkg_resources + + +def bump_major(): + """Add one to the major version.""" + major, _, _ = get_current_version() + set_new_version(major + 1, 0, 0) + + +def bump_minor(): + """Add one to the minor version.""" + major, minor, _ = get_current_version() + set_new_version(major, minor + 1, 0) + + +def bump_patch(): + """Add one to the patch version.""" + major, minor, patch = get_current_version() + set_new_version(major, minor, patch + 1) + + +def set_new_version(major: int, minor: int, patch: int): + """Sets a new version. + + Args: + major (int): + The major version. This only changes when the code stops being backwards + compatible. + minor (int): + The minor version. This changes when a backwards compatible change + happened. + patch (init): + The patch version. This changes when the only new changes are bug fixes. + """ + version = f"{major}.{minor}.{patch}" + + # Update the version in the `pyproject.toml` file + pyproject_path = Path("pyproject.toml") + pyproject = pyproject_path.read_text() + pyproject = re.sub( + r'version = "[^"]+"', + f'version = "{version}"', + pyproject, + count=1, + ) + pyproject_path.write_text(pyproject) + + # Get current changelog and ensure that it has an [Unreleased] entry + changelog_path = Path("CHANGELOG.md") + changelog = changelog_path.read_text() + if "[Unreleased]" not in changelog: + raise RuntimeError("No [Unreleased] entry in CHANGELOG.md.") + + # Add version to CHANGELOG + today = dt.date.today().strftime("%Y-%m-%d") + new_changelog = re.sub(r"\[Unreleased\].*", f"[v{version}] - {today}", changelog) + changelog_path.write_text(new_changelog) + + # Add to version control + subprocess.run(["git", "add", "CHANGELOG.md"]) + subprocess.run(["git", "commit", "-m", f"feat: v{version}"]) + subprocess.run(["git", "tag", f"v{version}"]) + + +def get_current_version() -> Tuple[int, int, int]: + """Fetch the current version of the package. + + Returns: + triple of ints: + The current version, separated into major, minor and patch versions. + """ + version_str = pkg_resources.get_distribution("doubt").version + major, minor, patch = map(int, version_str.split(".")) + return major, minor, patch + + +if __name__ == "__main__": + from argparse import ArgumentParser + + parser = ArgumentParser() + parser.add_argument( + "--major", + const=True, + nargs="?", + default=False, + help="Bump the major version by one.", + ) + parser.add_argument( + "--minor", + const=True, + nargs="?", + default=False, + help="Bump the minor version by one.", + ) + parser.add_argument( + "--patch", + const=True, + nargs="?", + default=False, + help="Bump the patch version by one.", + ) + args = parser.parse_args() + + if args.major + args.minor + args.patch != 1: + raise RuntimeError( + "Exactly one of --major, --minor and --patch must be selected." + ) + elif args.major: + bump_major() + elif args.minor: + bump_minor() + elif args.patch: + bump_patch() diff --git a/tests/.gitkeep b/tests/.gitkeep new file mode 100644 index 0000000..e69de29