docs: Introduces docs and ci/cd (#16)

* gha * docs * update readme and index
FBruzzesi · Jun 7, 2024 · 5ee3c67 · 5ee3c67
1 parent f23a4a2
commit 5ee3c67
Show file tree

Hide file tree

Showing 22 changed files with 1,111 additions and 70 deletions.
diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
@@ -0,0 +1,7 @@
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
diff --git a/.github/workflows/check-typos.yaml b/.github/workflows/check-typos.yaml
@@ -0,0 +1,20 @@
+name: Check spelling typos
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+    - main
+
+jobs:
+
+  run-typos:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout source code
+        uses: actions/checkout@v4
+
+      - name: Check spelling
+        uses: crate-ci/typos@master
+        with:
+          files: .
diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml
@@ -0,0 +1,36 @@
+name: Deploy Documentation
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: write
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout source code
+        uses: actions/checkout@v4
+      - name: Configure Git Credentials
+        run: |
+          git config user.name github-actions[bot]
+          git config user.email 41898282+github-actions[bot]@users.noreply.github.com
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Install uv
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+      - uses: actions/cache@v4
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+
+      - name: Install dependencies and deploy
+        run: |
+          uv install mkdocs-material --system
+          mkdocs gh-deploy --force
diff --git a/.github/workflows/pre-commit-update.yaml b/.github/workflows/pre-commit-update.yaml
@@ -0,0 +1,34 @@
+name: Pre-commit auto-update
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 1 * *"  # Every 1st of the month at 00:00 UTC
+
+permissions: write-all
+
+jobs:
+  auto-update:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout source code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: pre-commit install autoupdate
+        run: |
+          pip install pre-commit
+          pre-commit autoupdate
+
+      - name: Commit and push changes
+        uses: peter-evans/create-pull-request@v6
+        with:
+          branch: update-pre-commit-hooks
+          title: 'Update pre-commit hooks'
+          commit-message: 'Update pre-commit hooks'
+          body: |
+              Update versions of pre-commit hooks to latest versions.
diff --git a/.github/workflows/pull-request.yaml b/.github/workflows/pull-request.yaml
@@ -0,0 +1,65 @@
+name: PR Checks
+
+on:
+  pull_request:
+    branches:
+    - main
+
+jobs:
+
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout source code
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Install uv
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - name: Install & run linter
+        run: |
+          uv pip install ruff --system
+          make lint
+  test:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.10", "3.11", "3.12"]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout source code
+        uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install uv
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - name: Install dependencies and run tests
+        run: |
+          uv pip install -r requirements.txt --system
+          uv pip install pytest pytest-cov pytest-xdist --system
+          make test-cov
+      - name: Install and run mypy
+        run: |
+          uv pip install mypy --system
+          mypy sksmithy tests
+
+
+  doc-build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout source code
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Install uv
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - name: Install dependencies and check docs can build
+        run: |
+          uv pip install mkdocs-material --system
+          mkdocs build -v -s
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,36 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: requirements-txt-fixer
+    -   id: check-json
+    -   id: check-yaml
+    -   id: check-ast
+    -   id: check-added-large-files
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.4.7
+  hooks:
+    - id: ruff-format
+      args: [sksmithy, tests]
+    - id: ruff
+      args: [--fix, sksmithy, tests]
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.10.0
+    hooks:
+    - id: mypy
+      args: [sksmithy, tests]
+-   repo: https://github.com/Lucas-C/pre-commit-hooks-bandit
+    rev: v1.0.6
+    hooks:
+    -   id: python-bandit-vulnerability-check
+        args: [--skip, "B101",--severity-level, medium, --recursive, sksmithy]
+-   repo: https://github.com/pre-commit/pygrep-hooks
+    rev: v1.10.0
+    hooks:
+    -   id: python-no-eval
+-   repo: https://github.com/crate-ci/typos
+    rev: v1.21.0
+    hooks:
+    -   id: typos
diff --git a/README.md b/README.md
@@ -2,17 +2,30 @@
 
 # Scikit-learn Smithy
 
-Scikit-learn smithy is a tool that helps you to forge scikit-learn compatible estimator templates with ease.
+Scikit-learn smithy is a tool that helps you to forge scikit-learn compatible estimator with ease.
+
+---
+
+[Documentation](https://fbruzzesi.github.io/sklearn-smithy) | [Repository](https://github.com/fbruzzesi/sklearn-smithy) | [Issue Tracker](https://github.com/fbruzzesi/sklearn-smithy/issues)
+
+---
 
 How can you use it?
 
-- ✅ From a [web UI](https://sklearn-smithy.streamlit.app/) powered by [streamlit](https://streamlit.io/).
-- ✅ As a CLI (command line interface): `smith forge` command (see [installation](#installation) and [commands](#available-cli-commands)).
-- 🚧 As a TUI (terminal user interface): We are not there yet!
+✅ Directly from the web: we have a [web UI](https://sklearn-smithy.streamlit.app/) powered by [streamlit](https://streamlit.io/).
+✅ As a CLI (command line interface) in your terminal (requires [installation](#installation)) powered by [typer](https://typer.tiangolo.com/):
+
+    ```terminal
+    smith forge
+    ```
+
+🚧 As a TUI (terminal user interface): Working in progress!
+
+All these tools will prompt a series of questions regarding the estimator you want to create, and then it will generate the boilerplate code for you.
 
 ## Why ❓
 
-Writing a scikit-learn compatible estimators might be harder than expected.
+Writing scikit-learn compatible estimators might be harder than expected.
 
 While everyone knows about the `fit` and `predict`, there are other behaviours, methods and attributes that
 scikit-learn might be expecting from your estimator depending on:
@@ -27,93 +40,61 @@ questions about it, and then generating the boilerplate code.
 In this way you will be able to fully focus on the core implementation logic, and not on nitty-gritty details
 of the scikit-learn API.
 
-Once the core logic is implemented, the estimator should be ready to test against the _somewhat official_ [`parametrize_with_checks`](https://scikit-learn.org/dev/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks) pytest compatible decorator:
+### Sanity check
+
+Once the core logic is implemented, the estimator should be ready to test against the _somewhat official_
+[`parametrize_with_checks`](https://scikit-learn.org/dev/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks)
+pytest compatible decorator:
 
 ```py
 from sklearn.utils.estimator_checks import parametrize_with_checks
 
-@parametrize_with_checks([YourAwesomeRegressor, MoreAwesomeClassifier, EvenMoreAwesomeTransformer])
+@parametrize_with_checks([
+    YourAwesomeRegressor,
+    MoreAwesomeClassifier,
+    EvenMoreAwesomeTransformer,
+])
 def test_sklearn_compatible_estimator(estimator, check):
     check(estimator)
 ```
 
-## Installation
-
-To use the tool from the terminal, we suggest to install it directly from pypi:
+and it should be compatible with scikit-learn Pipeline, GridSearchCV, etc.
 
-```bash
-python -m pip install sklearn-smithy
-```
+### Official guide
 
-This will make the `smith` command available in your terminal.
+Scikit-learn documentation on how to
+[develop estimators](https://scikit-learn.org/dev/developers/develop.html#developing-scikit-learn-estimators).
 
-## Available CLI commands
+## Installation
 
-The `smith` entrypoint offers two commands:
+sklearn-smithy is available on [pypi](https://pypi.org/project/sklearn-smithy), so you can install it directly from there:
 
 ```bash
-smith --help
+python -m pip install sklearn-smithy
 ```
 
-```terminal
-Usage: smith [OPTIONS] COMMAND [ARGS]...
-
-CLI to generate scikit-learn estimator boilerplate code
-
-...
-
-╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
-│ forge     Generate a new shiny scikit-learn compatible estimator ✨                    │
-│ version   Display library version.                                                     │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-```
+**Remark:** The minimum Python version supported is 3.10.
 
-and as you can already guess, the `forge` command is the one that will generate the boilerplate code for you.
+This will make the `smith` command available in your terminal, and you should be able to run the following:
 
 ```bash
-smith forge --help
+smith version
 ```
 
-```terminal
-Generate a new shiny scikit-learn compatible estimator ✨
+> sklearn-smithy=...
 
-Depending on the estimator type the following additional information could be required:
+## User guide 📚
 
-* if the estimator is linear (classifier or regression)
-* if the estimator implements `.predict_proba()` method (classifier or outlier detector)
-* if the estimator implements `.decision_function()` method (classifier only)
-
-Finally, the following two questions will be prompt:
-
-* if the estimator should have tags (To know more about tags, check the dedicated scikit-learn documentation
-    at https://scikit-learn.org/dev/developers/develop.html#estimator-tags)
-* in which file the class should be saved (default is `f'{name.lower()}.py'`)
-
-
-╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
-│ *  --name                                           TEXT                                                Name of the estimator [default: None] [required]                                              │
-│ *  --estimator-type                                 [classifier|outlier|regressor|transformer|cluster]  Estimator type [default: None] [required]                                                     │
-│    --required-params                                TEXT                                                List of (comma-separated) required parameters                                                 │
-│    --optional-params                                TEXT                                                List of  (comma-separated) optional parameters                                                │
-│    --sample-weight        --no-sample-weight                                                            Whether or not `.fit()` supports `sample_weight` [default: no-sample-weight]                  │
-│    --linear               --no-linear                                                                   Whether or not the estimator is linear [default: no-linear]                                   │
-│    --predict-proba        --no-predict-proba                                                            Whether or not the estimator implements `predict_proba` method [default: no-predict-proba]    │
-│    --decision-function    --no-decision-function                                                        Whether or not the estimator implements `decision_function` method                            │
-│                                                                                                         [default: no-decision-function]                                                               │
-│    --tags                                           TEXT                                                List of optional extra scikit-learn tags                                                      │
-│    --output-file                                    TEXT                                                Destination file where to save the boilerplate code                                           │
-│    --help                                                                                               Show this message and exit.                                                                   │
-╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
-```
+Please refer to the dedicated [user guide](https://fbruzzesi.github.io/sklearn-smithy/user-guide/) documentation section.
 
 ## Origin story
 
-The idea for this tool originated from [scikit-lego #660](https://github.com/koaning/scikit-lego/pull/660), which I cannot better explain than quoting the PR description:
+The idea for this tool originated from [scikit-lego #660](https://github.com/koaning/scikit-lego/pull/660), which I cannot better explain than quoting the PR description itself:
 
 > So the story goes as the following:
 >
 > - The CI/CD fails for scikit-learn==1.5rc1 because of a change in the `check_estimator` internals
 > - In the [scikit-learn issue](https://github.com/scikit-learn/scikit-learn/issues/28966) I got a better picture of how to run test for compatible components
-> - In particular, in [rolling your own estimator](https://scikit-learn.org/dev/developers/develop.html#rolling-your-own-estimator) suggests to use [`parametrize_with_checks`](https://scikit-learn.org/dev/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks), and of course I thought "that is a great idea to avoid dealing manually with each test"
+> - In particular, [rolling your own estimator](https://scikit-learn.org/dev/developers/develop.html#rolling-your-own-estimator) suggests to use [`parametrize_with_checks`](https://scikit-learn.org/dev/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks), and of course I thought "that is a great idea to avoid dealing manually with each test"
 > - Say no more, I enter a rabbit hole to refactor all our tests - which would be fine
 > - Except that these tests failures helped me figure out a few missing parts in the codebase