Skip to content

Commit

Permalink
feat: stubs for desbordante 2.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
vladyoslav committed Oct 22, 2024
1 parent 0e86d5c commit 9ed02ae
Show file tree
Hide file tree
Showing 32 changed files with 904 additions and 40 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Lint
on:
pull_request:
push:
jobs:
run-linters:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install deps
run: python3 -m pip install poetry==1.8.2 && make init

- name: Run all linters and formatters
run: make lint
33 changes: 33 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
fail_fast: true
default_language_version:
python: python3.12

ci:
autoupdate_commit_msg: "chore: update pre-commit hooks"
autofix_commit_msg: "style: pre-commit fixes"
skip: [make-format, make-lint]

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
- id: check-merge-conflict
- id: check-symlinks
- id: end-of-file-fixer
- id: mixed-line-ending
- id: requirements-txt-fixer
- id: trailing-whitespace

- repo: local
hooks:
- id: make-format
name: make-format
entry: make format
language: system

- id: make-lint
name: make-lint
entry: make lint
language: system
30 changes: 30 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
.PHONY: init lint format stubs

## Install dependencies
init:
poetry install
poetry run pre-commit install

## Run all formatters and linters in project
lint:
poetry run ruff check desbordante-stubs \
& poetry run ruff format --check desbordante-stubs \
& poetry run black --check desbordante-stubs

## Reformat code
format:
poetry run ruff format desbordante-stubs & poetry run ruff check --fix & poetry run black desbordante-stubs

## Generate stubs
stubs:
pybind11-stubgen desbordante
@if [ -d "desbordante-stubs" ]; then rm -rf desbordante-stubs; fi
@if [ -d "stubs/desbordante" ]; then mv stubs/desbordante desbordante-stubs; fi
rm -rf stubs
make format


.DEFAULT_GOAL := help
# See <https://gist.github.com/klmr/575726c7e05d8780505a> for explanation.
help:
@echo "$$(tput setaf 2)Available rules:$$(tput sgr0)";sed -ne"/^## /{h;s/.*//;:d" -e"H;n;s/^## /---/;td" -e"s/:.*//;G;s/\\n## /===/;s/\\n//g;p;}" ${MAKEFILE_LIST}|awk -F === -v n=$$(tput cols) -v i=4 -v a="$$(tput setaf 6)" -v z="$$(tput sgr0)" '{printf"- %s%s%s\n",a,$$1,z;m=split($$2,w,"---");l=n-i;for(j=1;j<=m;j++){l-=length(w[j])+1;if(l<= 0){l=n-i-length(w[j])-1;}printf"%*s%s\n",-i," ",w[j];}}'
28 changes: 28 additions & 0 deletions desbordante-stubs/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
"""
A high-performance data profiling library oriented towards exploratory data analysis
"""

from __future__ import annotations
import typing
from . import ac
from . import afd
from . import aind
from . import ar
from . import cfd
from . import data_types
from . import dd
from . import dynamic_fd_verification
from . import fd
from . import fd_verification
from . import gfd_verification
from . import ind
from . import mfd_verification
from . import od
from . import pfd
from . import statistics
from . import ucc
from . import ucc_verification
Expand All @@ -20,11 +29,21 @@ __all__ = [
"ac",
"afd",
"afd_verification",
"aind",
"ar",
"aucc_verification",
"cfd",
"data_types",
"dd",
"dynamic_fd_verification",
"fd",
"fd_verification",
"gfd_verification",
"ind",
"mfd_verification",
"od",
"od_module",
"pfd",
"statistics",
"ucc",
"ucc_verification",
Expand All @@ -35,10 +54,12 @@ class Algorithm:
"""
Process data.
"""

def get_description(self, option_name: str) -> str:
"""
Get description of an option.
"""

def get_needed_options(self) -> set[str]:
"""
Get names of options the algorithm requires to be set at the moment.
Expand All @@ -47,22 +68,27 @@ class Algorithm:
algorithms' options using keyword arguments of the load_data and execute
methods.
"""

def get_option_type(self, option_name: str) -> tuple:
"""
Get info about the option's type.
"""

def get_opts(self) -> dict[str, typing.Any]:
"""
Get option values represented as the closest Python type
"""

def get_possible_options(self) -> set[str]:
"""
Get names of options the algorithm may request.
"""

def load_data(self, **kwargs) -> None:
"""
Load data for execution
"""

def set_option(self, option_name: str, option_value: typing.Any = None) -> None:
"""
Set option value. Passing None means setting the default value.
Expand All @@ -76,3 +102,5 @@ class ConfigurationError(ValueError):
pass

afd_verification = fd_verification
aucc_verification = ucc_verification
od_module = od
1 change: 1 addition & 0 deletions desbordante-stubs/ac/algorithms.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class AcAlgorithm(desbordante.Algorithm):
iterations_limit: limit for iterations of sampling
ac_seed: seed, needed for choosing a data sample
"""

def __init__(self) -> None: ...
def get_ac_exceptions(self) -> list[desbordante.ac.ACException]: ...
def get_ac_ranges(self) -> list[desbordante.ac.ACRanges]: ...
Expand Down
4 changes: 4 additions & 0 deletions desbordante-stubs/aind/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from __future__ import annotations
from . import algorithms

__all__ = ["algorithms"]
6 changes: 6 additions & 0 deletions desbordante-stubs/aind/algorithms.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from __future__ import annotations
from desbordante.ind.algorithms import Mind
from desbordante.ind.algorithms import Spider
from desbordante.ind.algorithms import Spider as Default

__all__ = ["Default", "Mind", "Spider"]
20 changes: 15 additions & 5 deletions desbordante-stubs/ar/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,26 @@ from __future__ import annotations
import desbordante
from . import algorithms

__all__ = ["ArAlgorithm", "AssociativeRule", "algorithms"]
__all__ = ["ARStrings", "ArAlgorithm", "ArIDs", "algorithms"]

class ArAlgorithm(desbordante.Algorithm):
def get_ars(self) -> list[AssociativeRule]: ...

class AssociativeRule:
class ARStrings:
def __str__(self) -> str: ...
@property
def confidence(self) -> float: ...
@property
def left(self) -> list[str]: ...
@property
def right(self) -> list[str]: ...

class ArAlgorithm(desbordante.Algorithm):
def get_ar_ids(self) -> list[ArIDs]: ...
def get_ars(self) -> list[ARStrings]: ...
def get_itemnames(self) -> list[str]: ...

class ArIDs:
@property
def confidence(self) -> float: ...
@property
def left(self) -> list[int]: ...
@property
def right(self) -> list[int]: ...
1 change: 1 addition & 0 deletions desbordante-stubs/ar/algorithms.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Apriori(desbordante.ar.ArAlgorithm):
minsup: minimum support value (between 0 and 1)
tid_column_index: index of the column where a TID is stored
"""

def __init__(self) -> None: ...

Default = Apriori
21 changes: 21 additions & 0 deletions desbordante-stubs/cfd/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import annotations
import desbordante
from . import algorithms

__all__ = ["CFD", "CfdAlgorithm", "Item", "algorithms"]

class CFD:
def __str__(self) -> str: ...
@property
def lhs_items(self) -> list[Item]: ...
@property
def rhs_item(self) -> Item: ...

class CfdAlgorithm(desbordante.Algorithm):
def get_cfds(self) -> list[CFD]: ...

class Item:
@property
def attribute(self) -> int: ...
@property
def value(self) -> str | None: ...
21 changes: 21 additions & 0 deletions desbordante-stubs/cfd/algorithms.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import annotations
import desbordante.cfd

__all__ = ["Default", "FDFirst"]

class FDFirst(desbordante.cfd.CfdAlgorithm):
"""
Options:
table: table processed by the algorithm
columns_number: Number of columns in the part of the dataset if you want to use algo not on the full dataset, but on its part
cfd_minsup: minimum support value (integer number between 1 and number of tuples in dataset)
cfd_minconf: cfd minimum confidence value (between 0 and 1)
tuples_number: Number of tuples in the part of the dataset if you want to use algo not on the full dataset, but on its part
cfd_max_lhs: cfd max considered LHS size
cfd_substrategy: CFD lattice traversal strategy to use
[dfs|bfs]
"""

def __init__(self) -> None: ...

Default = FDFirst
16 changes: 12 additions & 4 deletions desbordante-stubs/data_types.pyi
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
"""
Contains the types of data supported by Desbordante.
Contains the types of data supported by Desbordante.
Currently only used as tags for Algorithm.get_option_type
Currently only used as tags for Algorithm.get_option_type
"""

from __future__ import annotations

__all__ = ["Table"]
__all__ = ["ColumnCombination", "Table"]

class ColumnCombination:
def __str__(self) -> str: ...
@property
def column_indices(self) -> list[int]: ...
@property
def table_index(self) -> int: ...

class Table:
pass
8 changes: 8 additions & 0 deletions desbordante-stubs/dd/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from __future__ import annotations
from . import algorithms

__all__ = ["DD", "algorithms"]

class DD:
def __repr__(self) -> str: ...
def __str__(self) -> str: ...
19 changes: 19 additions & 0 deletions desbordante-stubs/dd/algorithms.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from __future__ import annotations
import desbordante
import desbordante.dd

__all__ = ["Default", "Split"]

class Split(desbordante.Algorithm):
"""
Options:
table: table processed by the algorithm
difference_table: CSV table containing difference limits for each column
num_rows: Use only first N rows of the table
num_columns: Use only first N columns of the table
"""

def __init__(self) -> None: ...
def get_dds(self) -> list[desbordante.dd.DD]: ...

Default = Split
4 changes: 4 additions & 0 deletions desbordante-stubs/dynamic_fd_verification/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from __future__ import annotations
from . import algorithms

__all__ = ["algorithms"]
24 changes: 24 additions & 0 deletions desbordante-stubs/dynamic_fd_verification/algorithms.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import annotations
import desbordante
import desbordante.fd_verification

__all__ = ["Default", "DynamicFDVerifier"]

class DynamicFDVerifier(desbordante.Algorithm):
"""
Options:
table: table processed by the algorithm
insert: Rows to be inserted into the table using the insert operation
delete: Rows to be deleted from the table using the delete operation
update: Rows to be replaced in the table using the update operation
lhs_indices: LHS column indices
rhs_indices: RHS column indices
"""

def __init__(self) -> None: ...
def fd_holds(self) -> bool: ...
def get_error(self) -> float: ...
def get_highlights(self) -> list[desbordante.fd_verification.Highlight]: ...
def get_num_error_clusters(self) -> int: ...

Default = DynamicFDVerifier
Loading

0 comments on commit 9ed02ae

Please sign in to comment.