coherent-oss · thiagopena · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
+# Project specific
+kubedantic_models/
+kubedantic_specs/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/README.rst b/README.rst
@@ -1,3 +1,6 @@
+Kubedantic
+======================================================
+
 .. image:: https://img.shields.io/pypi/v/kubedantic.svg
    :target: https://pypi.org/project/kubedantic
 
@@ -16,3 +19,60 @@
 
 .. image:: https://img.shields.io/badge/skeleton-2024-informational
    :target: https://blog.jaraco.com/skeleton
+
+
+Generate Pydantic models from Kubernetes OpenAPI specifications.
+
+Overview
+--------
+
+Kubedantic provides a way to automatically generate Pydantic models using your Kubernetes cluster OpenAPI specifications and `datamodel-code-generator <https://github.com/koxudaxi/datamodel-code-generator>`_.
+
+Usage
+-----
+
+1. Make sure you have a Kubernetes cluster running and `kubectl` is configured to access it.
+2. Run `kubedantic` to generate the models.
+
+.. code-block:: bash
+
+   $ kubedantic --output-path <destination>
+
+How it works
+------------
+
+Kubedantic does the following:
+
+1. Uses the `kubernetes <https://github.com/kubernetes-client/python>`_ library to fetch the OpenAPI specifications from the cluster.
+2. Merges the specifications extracted into a couple schema file (one for the native types and one for the custom resources).
+3. Uses `datamodel-code-generator <https://github.com/koxudaxi/datamodel-code-generator>`_ to generate the Pydantic models from each schema file.
+
+Schema files will be cached locally in the `kubedantic_specs/` directory, e.g.:
+
+.. code-block:: bash
+
+   kubedantic_specs/
+   ├── k8s.json  # Kubernetes native types
+   └── crd.json  # Custom resource definitions
+
+You can control the cache location by using the `--specs-path` option.
+
+Models will be generated by default in the `kubedantic_models/` directory, e.g.:
+
+.. code-block:: bash
+
+   kubedantic_models/
+   ├── crd
+   │   └── io
+   │       └── argoproj
+   │           ├── __init__.py
+   │           └── v1alpha1.py
+   └── k8s
+      └── io
+         └── k8s
+               └── api
+                  └── apps
+                     ├── __init__.py
+                     └── v1.py
+
+You can control the output location by using the `--output-path` option.
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,8 @@ classifiers = [
 ]
 requires-python = ">=3.8"
 dependencies = [
+	"datamodel-code-generator >= 0.25.5",
+	"kubernetes",
 	"pydantic >= 2",
 ]
 dynamic = ["version"]
@@ -50,5 +52,6 @@ docs = [
 Homepage = "https://github.com/coherent-oss/kubedantic"
 
 [project.scripts]
+kubedantic = "kubedantic.main:main"
 
 [tool.setuptools_scm]
diff --git a/pytest.ini b/pytest.ini
@@ -3,6 +3,7 @@ norecursedirs=dist build .tox .eggs fixtures
 addopts=
 	--doctest-modules
 	--import-mode importlib
+	--ignore tests/data
 consider_namespace_packages=true
 filterwarnings=
 	## upstream

diff --git a/ruff.toml b/ruff.toml
@@ -20,9 +20,11 @@ ignore = [
 	"ISC001",
 	"ISC002",
 ]
+exclude = ["tests/data/**/*.py"]
 
 [format]
 # Enable preview, required for quote-style = "preserve"
 preview = true
 # https://docs.astral.sh/ruff/settings/#format-quote-style
 quote-style = "preserve"
+exclude = ["tests/data/**/*.py"]
diff --git a/src/kubedantic/extractor.py b/src/kubedantic/extractor.py
@@ -0,0 +1,138 @@
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict, Generator, List, Optional, Union
+
+from kubernetes import client, config
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+
+API_PATH_BY_TITLE: Dict[str, Path] = {
+    "Kubernetes": Path("k8s"),  # Kubernetes API
+    "Kubernetes CRD Swagger": Path("crd"),  # CustomResourceDefinition API
+}
+
+
+class SchemaMetadata(BaseModel):
+    openapi: str
+    title: str
+    version: str
+
+    @property
+    def is_supported(self) -> bool:
+        return self.openapi.startswith("3.") and self.title in API_PATH_BY_TITLE
+
+    @property
+    def path(self) -> Path:
+        return API_PATH_BY_TITLE.get(self.title, Path(""))
+
+    @classmethod
+    def from_spec(cls, spec: Dict[str, Any]) -> "SchemaMetadata":
+        return cls(
+            openapi=spec["openapi"],
+            title=spec["info"]["title"],
+            version=spec["info"]["version"],
+        )
+
+
+class Schema(BaseModel):
+    openapi_schema: Dict[str, Any] = Field(default_factory=dict)
+    metadata: SchemaMetadata
+
+    def to_openapi(self) -> Dict[str, Any]:
+        return {
+            "openapi": self.metadata.openapi,
+            "info": {"title": self.metadata.title, "version": self.metadata.version},
+            "components": {"schemas": self.openapi_schema},
+        }
+
+
+class K8sOpenAPIExtractor:
+    _client: Optional[client.ApiClient] = None
+
+    def __init__(self, output_path: Union[str, Path]):
+        self.output_path = Path(output_path)
+        self.schema_by_path: dict[Path, Schema] = {}
+
+    @property
+    def client(self) -> client.ApiClient:
+        if self._client is None:
+            self._client = config.new_client_from_config()  # pragma: no cover
+        return self._client
+
+    def _should_skip_path(self, path: str) -> bool:
+        stem = Path(path.split("?")[0]).stem
+        return not stem.startswith("v") or stem == "version"
+
+    def _add_to_schema_by_path(self, spec: Dict[str, Any]):
+        spec_metadata = SchemaMetadata.from_spec(spec)
+
+        if not spec_metadata.is_supported:
+            logger.warning("Skipping unsupported spec %s", spec_metadata.title)
+            return
+
+        schema_path = spec_metadata.path
+
+        for name, schema in spec["components"].get("schemas", {}).items():
+            current_schema = self.schema_by_path.get(
+                schema_path, Schema(metadata=spec_metadata)
+            )
+            current_schema.openapi_schema[name] = schema
+            self.schema_by_path[schema_path] = current_schema
+
+    def call_api(self, resource_path: str, method: str = "GET", **kwargs: Any) -> Any:
+        return self.client.call_api(
+            resource_path=resource_path,
+            method=method,
+            response_type="object",
+            auth_settings=self.client.configuration.auth_settings(),
+            _return_http_data_only=True,
+            **kwargs,
+        )
+
+    def _load_schema_by_path(self):
+        paths = self.call_api(resource_path="/openapi/v3")["paths"]
+
+        for name, value in paths.items():
+            relative_path = value["serverRelativeURL"]
+
+            if self._should_skip_path(relative_path):
+                continue
+
+            logger.info("Fetching specs for %s", name)
+            spec = self.call_api(resource_path=relative_path)
+
+            self._add_to_schema_by_path(spec)
+
+    def _write_schema(self, path: Path, schemas: Schema) -> Path:
+        out_path = self.output_path / path.with_suffix(".json")
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+
+        logger.info("Writing spec %s to %s", path, out_path)
+
+        with open(out_path, "w") as f:
+            f.write(json.dumps(schemas.to_openapi(), indent=4, default=str))
+
+        return out_path
+
+    def _load_specs(self) -> Generator[Path, None, None]:
+        if self.output_path.exists():
+            logger.info("Using existing specs")
+            return self.output_path.glob("**/*.json")
+
+        self._load_schema_by_path()
+
+        return (
+            self._write_schema(path, schemas)
+            for path, schemas in self.schema_by_path.items()
+        )
+
+    def extract(self) -> List[Path]:
+        """
+        Extracts the Kubernetes OpenAPI specs and writes them to the output path.
+
+        :return: The list of paths where the specs were written to.
+        """
+        return [path.absolute() for path in self._load_specs()]
diff --git a/src/kubedantic/main.py b/src/kubedantic/main.py
@@ -0,0 +1,78 @@
+import argparse
+import logging
+import sys
+from pathlib import Path
+from typing import Tuple
+
+from datamodel_code_generator.parser.base import Result
+
+from kubedantic.extractor import K8sOpenAPIExtractor
+from kubedantic.parser import K8sOpenAPIParser
+
+
+def _get_options(args):
+    parser = argparse.ArgumentParser(
+        description="Generates Python data models from Kubernetes OpenAPI specs.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--log-level",
+        "-l",
+        default="INFO",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        help="Logging level.",
+    )
+    parser.add_argument(
+        "--output-path",
+        "-o",
+        default="kubedantic_models",
+        help="Output directory where the Python data models will be put at.",
+    )
+    parser.add_argument(
+        "--specs-path",
+        "-s",
+        default="kubedantic_specs",
+        help="Output directory where the Kubernetes OpenAPI specs will be put at.",
+    )
+
+    return parser.parse_args(args)
+
+
+def _write_result(path: Tuple[str, ...], result: Result, output_path: Path):
+    output_file = output_path.joinpath(*path[1:]).with_suffix(".py")
+    logging.info("Generating %s", output_file)
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_file, "w") as out_file:
+        out_file.write(result.body)
+
+
+def _generate_models(output_path: Path, specs_path: Path):
+    extractor = K8sOpenAPIExtractor(output_path=specs_path)
+    parser = K8sOpenAPIParser(source=extractor.extract())
+
+    results: dict[tuple[str, ...], Result] = parser.parse()  # type: ignore
+
+    for name, result in sorted(results.items()):
+        _write_result(name, result, output_path)
+
+
+def run(args):
+    options = _get_options(args)
+
+    log_level = logging.getLevelName(options.log_level)
+    logging.basicConfig(level=log_level)
+
+    output_path = Path(options.output_path)
+    specs_path = Path(options.specs_path)
+
+    _generate_models(output_path, specs_path)
+
+
+def main():  # pragma: no cover
+    run(sys.argv[1:])
+
+
+if __name__ == "__main__":  # pragma: no cover
+    main()