Skip to content

Commit

Permalink
Merge pull request #114 from dlt-hub/feat/detector_improvements
Browse files Browse the repository at this point in the history
detector improvements, add contributing.md, small rendering fixes
  • Loading branch information
sh-rp authored May 23, 2024
2 parents 5556248 + f1d2c65 commit 2592216
Show file tree
Hide file tree
Showing 27 changed files with 175,416 additions and 58 deletions.
11 changes: 11 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Contributing to dlt-init-openapi

To contribute to this Repo, you can do the following:

1. [Join our slack community](https://dlthub.com/community) and talk to us so if you want to extend dlt-init-openapi. Until we have a more comprehensive contribution guide, we're happy to help you get started there.
2. Fork this repo and check it out
3. Install all dependencies with `make dev` (you will need poetry for dependency management)
4. Run the fast tests to verify that all is properly installed with `make test-fast`
5. Do you code changes, write new tests if you add new features.
6. Format and lint with `make format` and `make lint`
7. Create a PR to this repo.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,11 @@ $ dlt-init-openapi pokemon --path ./path/to/my_spec.yml
- `--output-path PATH`: A path to render the output to
- `--config PATH`: Path to the config file to use (see below)
- `--no-interactive`: Skip endpoint selection and render all paths of the OpenAPI spec.
- `--loglevel`: Set logging level for stdout output, defaults to 20 (INFO).
- `--log-level`: Set logging level for stdout output, defaults to 20 (INFO).
- `--global-limit`: Set a global limit on the generated source.
- `--update-rest-api-source`: Update the locally cached rest_api verified source.
- `--allow-openapi-2`: Allow to use OpenAPI v2. specs. Migration of the spec to 3.0 is recommended though.
- `--version`: Show installed version of the generator.
- `--help`: Show this message and exit.

## Config options
Expand Down
31 changes: 14 additions & 17 deletions dlt_init_openapi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from enum import Enum
from importlib.metadata import version
from pathlib import Path
from typing import Optional, cast
from typing import cast

import httpcore
import httpx
Expand Down Expand Up @@ -57,7 +57,10 @@ def render(self, dry: bool = False) -> None:
logger.info("Rendering project")
if self.config.endpoint_filter:
filtered_endpoints = self.config.endpoint_filter(self.openapi.endpoints)
self.openapi.endpoints.set_ids_to_render(filtered_endpoints)
if filtered_endpoints:
self.openapi.endpoints.set_ids_to_render(filtered_endpoints)
else:
logger.warning("You have not selected any endpoints, all endpoints will be rendered.")
self.renderer.run(self.openapi, dry=dry)
logger.success(f"Rendered project to: {self.config.project_dir}")
logger.info("You can now run your pipeline from this folder with 'python pipeline.py'.")
Expand All @@ -84,30 +87,28 @@ def print_warnings(self) -> None:
logger.warning(w.msg)


def _get_document(*, url: Optional[str] = None, path: Optional[Path] = None, timeout: int = 60) -> bytes:
if url is not None and path is not None:
def _get_document(*, config: Config, timeout: int = 60) -> bytes:
if config.spec_url is not None and config.spec_path is not None:
raise ValueError("Provide URL or Path, not both.")
if url is not None:
logger.info(f"Downloading spec from {url}")
if config.spec_url is not None:
logger.info(f"Downloading spec from {config.spec_url}")
try:
response = httpx.get(url, timeout=timeout)
response = httpx.get(config.spec_url, timeout=timeout)
logger.success("Download complete")
return response.content
except (httpx.HTTPError, httpcore.NetworkError) as e:
raise ValueError("Could not get OpenAPI document from provided URL") from e
elif path is not None:
logger.info(f"Reading spec from {path}")
return Path(path).read_bytes()
elif config.spec_path is not None:
logger.info(f"Reading spec from {config.spec_path}")
return Path(config.spec_path).read_bytes()
else:
raise ValueError("No URL or Path provided")


def _get_project_for_url_or_path( # pylint: disable=too-many-arguments
url: Optional[str],
path: Optional[Path],
config: Config = None,
) -> Project:
doc = _get_document(url=url, path=path)
doc = _get_document(config=config)

renderer_cls = cast(BaseRenderer, import_class_from_string(config.renderer_class))
detector_cls = cast(BaseDetector, import_class_from_string(config.detector_class))
Expand All @@ -123,8 +124,6 @@ def _get_project_for_url_or_path( # pylint: disable=too-many-arguments

def create_new_client(
*,
url: Optional[str] = None,
path: Optional[Path] = None,
config: Config = None,
) -> Project:
"""
Expand All @@ -134,8 +133,6 @@ def create_new_client(
The project.
"""
project = _get_project_for_url_or_path(
url=url,
path=path,
config=config,
)
project.parse()
Expand Down
22 changes: 14 additions & 8 deletions dlt_init_openapi/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,14 @@ def init(
output_path: Optional[pathlib.Path] = typer.Option(None, help="A path to render the output to."),
config_path: Optional[pathlib.Path] = typer.Option(None, "--config", help="Path to the config file to use"),
interactive: bool = typer.Option(True, help="Wether to select needed endpoints interactively"),
loglevel: int = typer.Option(20, help="Set logging level for stdout output, defaults to 20 (INFO)"),
log_level: int = typer.Option(20, help="Set logging level for stdout output, defaults to 20 (INFO)"),
global_limit: int = typer.Option(0, help="Set a global limit on the generated source"),
update_rest_api_source: bool = typer.Option(
False, help="Wether to update the locally cached rest_api verified source"
allow_openapi_2: bool = typer.Option(
False,
"--allow-openapi-2",
help="Allow to use OpenAPI v2. specs. Migration of the spec to 3.0 is recommended though.",
),
update_rest_api_source: bool = typer.Option(False, help="Update the locally cached rest_api verified source."),
version: bool = typer.Option(False, "--version", callback=_print_version, help="Print the version and exit"),
) -> None:
"""Generate a new dlt pipeline"""
Expand All @@ -59,9 +62,10 @@ def init(
output_path=output_path,
config_path=config_path,
interactive=interactive,
loglevel=loglevel,
log_level=log_level,
global_limit=global_limit,
update_rest_api_source=update_rest_api_source,
allow_openapi_2=allow_openapi_2,
)


Expand All @@ -73,16 +77,17 @@ def _init_command_wrapped(
output_path: Optional[pathlib.Path] = None,
config_path: Optional[pathlib.Path] = None,
interactive: bool = True,
loglevel: int = 20,
log_level: int = 20,
global_limit: int = 0,
update_rest_api_source: bool = False,
allow_openapi_2: bool = False,
) -> None:

from dlt_init_openapi import create_new_client

# set up console logging
logger.remove()
logger.add(sys.stdout, level=loglevel)
logger.add(sys.stdout, level=log_level)
logger.success("Starting dlt openapi generator")

if not url and not path:
Expand All @@ -105,6 +110,9 @@ def _init_command_wrapped(
"output_path": output_path,
"endpoint_filter": questionary_endpoint_selection if interactive else None,
"global_limit": global_limit,
"spec_url": url,
"spec_path": path,
"allow_openapi_2": allow_openapi_2,
},
)

Expand All @@ -117,8 +125,6 @@ def _init_command_wrapped(
exit(0)

create_new_client(
url=url,
path=path,
config=config,
)
logger.success("Pipeline created. Learn more at https://dlthub.com/docs. See you next time :)")
Expand Down
6 changes: 3 additions & 3 deletions dlt_init_openapi/cli/cli_endpoint_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ def questionary_endpoint_selection(endpoints: EndpointCollection) -> Set[str]:
("italic", f" {endpoint.path}"),
]
choices.append(questionary.Choice(text, endpoint))
if not choices:
raise ValueError("No endpoints found")
selected_endpoints: List[Endpoint] = questionary.checkbox(
"Which resources would you like to generate?", choices
"Which resources would you like to generate? Press enter to continue, "
+ "if you do not select any resources, all of them will be rendered.",
choices,
).ask()

# return resource names of selected endpoints
Expand Down
4 changes: 4 additions & 0 deletions dlt_init_openapi/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,14 @@ class Config(BaseModel):
"""Set a limit on how many items are emitted from a resource"""
parameter_default_value: str = "FILL_ME_IN"
"""default to render for required parameters that do not have a default in the spec"""
allow_openapi_2: bool = False
"""Allow to use OpenAPI 2 specs"""

# internal, do not set via config file
project_dir: Path = None
pipeline_file_name: str = None
spec_url: str = None
spec_path: Path = None

def __init__(self, *args: Any, **kwargs: Any) -> None:
super(Config, self).__init__(*args, **kwargs)
Expand Down
6 changes: 6 additions & 0 deletions dlt_init_openapi/detector/default/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ def detect_security_schemes(self, open_api: OpenapiParser) -> None:
elif global_scheme and not global_scheme.supported:
self._add_warning(UnsupportedSecuritySchemeWarning(global_scheme.type))

# set first auth as global scheme
if open_api.security_schemes and not open_api.detected_global_security_scheme:
global_scheme = list(open_api.security_schemes.values())[0]
if global_scheme.supported:
open_api.detected_global_security_scheme = global_scheme

def detect_resource_names(self, endpoints: EndpointCollection) -> None:
"""iterate all endpoints and find a strategy to select the right resource name"""

Expand Down
18 changes: 9 additions & 9 deletions dlt_init_openapi/detector/default/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@
PRIMARY_KEY_SUFFIXES = ["id", "pk"]
PRIMARY_KEY_WORD_SEPARATORS = ["", "-", "_"]

RE_UNIQUE_KEY = re.compile(r"\b(unique|id|identifier)\b", re.IGNORECASE)
RE_UNIQUE_KEY = re.compile(r"^(unique|id|identifier)$", re.IGNORECASE)

# pagination
RE_PAGE_PARAM = re.compile(r"(?i)(page|page_number)", re.IGNORECASE)
RE_TOTAL_PAGE_PROPERTY = re.compile(r"(?i)(total|count)", re.IGNORECASE)
RE_OFFSET_PARAM = re.compile(r"(?i)(start|offset|skip)", re.IGNORECASE)
RE_LIMIT_PARAM = re.compile(r"(?i)(limit|per_page|page_size|size)", re.IGNORECASE)
RE_TOTAL_PROPERTY = re.compile(r"(?i)(total|count|total_count)", re.IGNORECASE)
RE_CURSOR_PARAM = re.compile(r"(?i)(cursor|after|since)", re.IGNORECASE)
RE_CURSOR_PROP = re.compile(r"(?i)(cursor|next_cursor)", re.IGNORECASE)
RE_NEXT_PROPERTY = re.compile(r"(?i)(next|next_url|more)", re.IGNORECASE)
RE_PAGE_PARAM = re.compile(r"^(page|page_number)$", re.IGNORECASE)
RE_TOTAL_PAGE_PROPERTY = re.compile(r"^(total|count|totalPages)$", re.IGNORECASE)
RE_OFFSET_PARAM = re.compile(r"^(start|offset|skip)$", re.IGNORECASE)
RE_LIMIT_PARAM = re.compile(r"^(limit|per_page|page_size|size)$", re.IGNORECASE)
RE_TOTAL_PROPERTY = re.compile(r"^(total|count|total_count|totalRecords|totalItems)$", re.IGNORECASE)
RE_CURSOR_PARAM = re.compile(r"^(cursor|after|since)$", re.IGNORECASE)
RE_CURSOR_PROP = re.compile(r"^(cursor|next_cursor)$", re.IGNORECASE)
RE_NEXT_PROPERTY = re.compile(r"^(next|next_url|more)$", re.IGNORECASE)
RE_MATCH_ALL = re.compile(r".*", re.IGNORECASE)

# content path discovery
Expand Down
15 changes: 14 additions & 1 deletion dlt_init_openapi/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from typing import List


class DltOpenAPIException(Exception):
pass

Expand All @@ -16,7 +19,9 @@ def __init__(self, swagger_detected: bool = False) -> None:
convert_helper = (
"you can convert it to an openapi 3.0 spec by going to https://editor.swagger.io/, "
+ "pasting your spec and selecting 'Edit' -> 'Convert to OpenAPI 3.0' from the Menu "
+ "and then retry with the converted file."
+ "and then retry with the converted file. Alternatively you can run the generator "
+ "with the --allow-openapi-2 flag. The generated result usually improves if you convert "
+ "your spec to 3.0 thouhg."
)

super().__init__(
Expand All @@ -36,3 +41,11 @@ class DltUnparseableSpecException(DltOpenAPITerminalException):
def __init__(self) -> None:

super().__init__("Could not parse selected spec, please provide a valid YAML or JSON document.")


class DltNoEndpointsDiscovered(DltOpenAPITerminalException):
def __init__(self, enabled_methods: List[str]):
super().__init__(
f"Did not find any endpoint with http methods {enabled_methods} in provided OpenAPI spec. "
+ "Please check your spec if endpoints with these methods exist or add additional methods in your config."
)
19 changes: 17 additions & 2 deletions dlt_init_openapi/parser/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class Response:
osp_response: osp.Response
schema: Optional[SchemaWrapper]
status_code: str
description: str
# detected values
detected_payload: Optional[DataPropertyPath] = None
detected_primary_key: Optional[str] = None
Expand Down Expand Up @@ -144,6 +145,13 @@ def default_for_param(self, location: Literal["path", "query"], param_name: str)
return p.default
return self.context.config.parameter_default_value

@property
def render_description(self) -> Optional[str]:
description = self.description or self.path_description
if not description:
return None
return description.replace("\n", " ")

@classmethod
def from_operation(
cls,
Expand All @@ -168,11 +176,18 @@ def from_operation(
response_schema = context.response_from_reference(response_ref)
content_schema: Optional[SchemaWrapper] = None
for content_type, media_type in (response_schema.content or {}).items():
if content_type.endswith("json") and media_type.media_type_schema:
if (content_type.endswith("json") or content_type == "*/*") and media_type.media_type_schema:
content_schema = SchemaWrapper.from_reference(media_type.media_type_schema, context)
break

responses.append(Response(osp_response=response_schema, schema=content_schema, status_code=status_code))
responses.append(
Response(
osp_response=response_schema,
schema=content_schema,
status_code=status_code,
description=response_schema.description,
)
)

return cls(
method=method,
Expand Down
26 changes: 17 additions & 9 deletions dlt_init_openapi/parser/openapi_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from loguru import logger
from yaml import BaseLoader

from dlt_init_openapi.exceptions import DltInvalidSpecException, DltOpenAPINot30Exception, DltUnparseableSpecException
from dlt_init_openapi.exceptions import (
DltInvalidSpecException,
DltNoEndpointsDiscovered,
DltOpenAPINot30Exception,
DltUnparseableSpecException,
)
from dlt_init_openapi.parser.config import Config
from dlt_init_openapi.parser.context import OpenapiContext
from dlt_init_openapi.parser.endpoints import EndpointCollection
Expand All @@ -34,22 +39,22 @@ def parse(self, data: bytes) -> None:

self.spec_raw = self._load_yaml_or_json(data)
self.security_schemes = {}

logger.info("Validating spec structure")
try:
spec = osp.OpenAPI.parse_obj(self.spec_raw)
except Exception as e:
raise DltInvalidSpecException() from e
logger.success("Spec validation successful")

# check if this is openapi 3.0
swagger_version = self.spec_raw.get("swagger")
if swagger_version:
raise DltOpenAPINot30Exception(swagger_detected=True)
if not self.config.allow_openapi_2:
# check if this is openapi 3.0
swagger_version = self.spec_raw.get("swagger")
if swagger_version:
raise DltOpenAPINot30Exception(swagger_detected=True)

openapi_version = self.spec_raw.get("openapi")
if not openapi_version or not openapi_version.startswith("3"):
raise DltOpenAPINot30Exception(swagger_detected=False)
openapi_version = self.spec_raw.get("openapi")
if not openapi_version or not openapi_version.startswith("3"):
raise DltOpenAPINot30Exception(swagger_detected=False)

logger.info("Extracting openapi metadata")
self.context = OpenapiContext(self.config, spec, self.spec_raw)
Expand All @@ -73,6 +78,9 @@ def parse(self, data: bytes) -> None:
self.endpoints = EndpointCollection.from_context(self.context)
logger.success(f"Completed parsing endpoints. {len(self.endpoints.endpoints)} endpoints found.")

if len(self.endpoints.endpoints) == 0:
raise DltNoEndpointsDiscovered(self.config.include_methods)

def _load_yaml_or_json(self, data: bytes) -> Dict[str, Any]:
logger.info("Trying to parse spec as JSON")
try:
Expand Down
1 change: 1 addition & 0 deletions dlt_init_openapi/renderer/default/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def run(self, openapi: OpenapiParser, dry: bool = False) -> None:
package_name=self.package_name,
project_name=self.config.project_name,
credentials=self.openapi.detected_global_security_scheme,
config=self.config,
)

if dry:
Expand Down
Loading

0 comments on commit 2592216

Please sign in to comment.