Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

detector improvements, add contributing.md, small rendering fixes #114

Merged
merged 14 commits into from
May 23, 2024
Merged
11 changes: 11 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Contributing to dlt-init-openapi

To contribute to this Repo, you can do the following:

1. [Join our slack community](https://dlthub.com/community) and talk to us so if you want to extend dlt-init-openapi. Until we have a more comprehensive contribution guide, we're happy to help you get started there.
2. Fork this repo and check it out
3. Install all dependencies with `make dev` (you will need poetry for dependency management)
4. Run the fast tests to verify that all is properly installed with `make test-fast`
5. Do you code changes, write new tests if you add new features.
6. Format and lint with `make format` and `make lint`
7. Create a PR to this repo.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,11 @@ $ dlt-init-openapi pokemon --path ./path/to/my_spec.yml
- `--output-path PATH`: A path to render the output to
- `--config PATH`: Path to the config file to use (see below)
- `--no-interactive`: Skip endpoint selection and render all paths of the OpenAPI spec.
- `--loglevel`: Set logging level for stdout output, defaults to 20 (INFO).
- `--log-level`: Set logging level for stdout output, defaults to 20 (INFO).
- `--global-limit`: Set a global limit on the generated source.
- `--update-rest-api-source`: Update the locally cached rest_api verified source.
- `--allow-openapi-2`: Allow to use OpenAPI v2. specs. Migration of the spec to 3.0 is recommended though.
- `--version`: Show installed version of the generator.
- `--help`: Show this message and exit.

## Config options
Expand Down
31 changes: 14 additions & 17 deletions dlt_init_openapi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from enum import Enum
from importlib.metadata import version
from pathlib import Path
from typing import Optional, cast
from typing import cast

import httpcore
import httpx
Expand Down Expand Up @@ -57,7 +57,10 @@ def render(self, dry: bool = False) -> None:
logger.info("Rendering project")
if self.config.endpoint_filter:
filtered_endpoints = self.config.endpoint_filter(self.openapi.endpoints)
self.openapi.endpoints.set_ids_to_render(filtered_endpoints)
if filtered_endpoints:
self.openapi.endpoints.set_ids_to_render(filtered_endpoints)
else:
logger.warning("You have not selected any endpoints, all endpoints will be rendered.")
self.renderer.run(self.openapi, dry=dry)
logger.success(f"Rendered project to: {self.config.project_dir}")
logger.info("You can now run your pipeline from this folder with 'python pipeline.py'.")
Expand All @@ -84,30 +87,28 @@ def print_warnings(self) -> None:
logger.warning(w.msg)


def _get_document(*, url: Optional[str] = None, path: Optional[Path] = None, timeout: int = 60) -> bytes:
if url is not None and path is not None:
def _get_document(*, config: Config, timeout: int = 60) -> bytes:
if config.spec_url is not None and config.spec_path is not None:
raise ValueError("Provide URL or Path, not both.")
if url is not None:
logger.info(f"Downloading spec from {url}")
if config.spec_url is not None:
logger.info(f"Downloading spec from {config.spec_url}")
try:
response = httpx.get(url, timeout=timeout)
response = httpx.get(config.spec_url, timeout=timeout)
logger.success("Download complete")
return response.content
except (httpx.HTTPError, httpcore.NetworkError) as e:
raise ValueError("Could not get OpenAPI document from provided URL") from e
elif path is not None:
logger.info(f"Reading spec from {path}")
return Path(path).read_bytes()
elif config.spec_path is not None:
logger.info(f"Reading spec from {config.spec_path}")
return Path(config.spec_path).read_bytes()
else:
raise ValueError("No URL or Path provided")


def _get_project_for_url_or_path( # pylint: disable=too-many-arguments
url: Optional[str],
path: Optional[Path],
config: Config = None,
) -> Project:
doc = _get_document(url=url, path=path)
doc = _get_document(config=config)

renderer_cls = cast(BaseRenderer, import_class_from_string(config.renderer_class))
detector_cls = cast(BaseDetector, import_class_from_string(config.detector_class))
Expand All @@ -123,8 +124,6 @@ def _get_project_for_url_or_path( # pylint: disable=too-many-arguments

def create_new_client(
*,
url: Optional[str] = None,
path: Optional[Path] = None,
config: Config = None,
) -> Project:
"""
Expand All @@ -134,8 +133,6 @@ def create_new_client(
The project.
"""
project = _get_project_for_url_or_path(
url=url,
path=path,
config=config,
)
project.parse()
Expand Down
22 changes: 14 additions & 8 deletions dlt_init_openapi/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,14 @@ def init(
output_path: Optional[pathlib.Path] = typer.Option(None, help="A path to render the output to."),
config_path: Optional[pathlib.Path] = typer.Option(None, "--config", help="Path to the config file to use"),
interactive: bool = typer.Option(True, help="Wether to select needed endpoints interactively"),
loglevel: int = typer.Option(20, help="Set logging level for stdout output, defaults to 20 (INFO)"),
log_level: int = typer.Option(20, help="Set logging level for stdout output, defaults to 20 (INFO)"),
global_limit: int = typer.Option(0, help="Set a global limit on the generated source"),
update_rest_api_source: bool = typer.Option(
False, help="Wether to update the locally cached rest_api verified source"
allow_openapi_2: bool = typer.Option(
False,
"--allow-openapi-2",
help="Allow to use OpenAPI v2. specs. Migration of the spec to 3.0 is recommended though.",
),
update_rest_api_source: bool = typer.Option(False, help="Update the locally cached rest_api verified source."),
version: bool = typer.Option(False, "--version", callback=_print_version, help="Print the version and exit"),
) -> None:
"""Generate a new dlt pipeline"""
Expand All @@ -59,9 +62,10 @@ def init(
output_path=output_path,
config_path=config_path,
interactive=interactive,
loglevel=loglevel,
log_level=log_level,
global_limit=global_limit,
update_rest_api_source=update_rest_api_source,
allow_openapi_2=allow_openapi_2,
)


Expand All @@ -73,16 +77,17 @@ def _init_command_wrapped(
output_path: Optional[pathlib.Path] = None,
config_path: Optional[pathlib.Path] = None,
interactive: bool = True,
loglevel: int = 20,
log_level: int = 20,
global_limit: int = 0,
update_rest_api_source: bool = False,
allow_openapi_2: bool = False,
) -> None:

from dlt_init_openapi import create_new_client

# set up console logging
logger.remove()
logger.add(sys.stdout, level=loglevel)
logger.add(sys.stdout, level=log_level)
logger.success("Starting dlt openapi generator")

if not url and not path:
Expand All @@ -105,6 +110,9 @@ def _init_command_wrapped(
"output_path": output_path,
"endpoint_filter": questionary_endpoint_selection if interactive else None,
"global_limit": global_limit,
"spec_url": url,
"spec_path": path,
"allow_openapi_2": allow_openapi_2,
},
)

Expand All @@ -117,8 +125,6 @@ def _init_command_wrapped(
exit(0)

create_new_client(
url=url,
path=path,
config=config,
)
logger.success("Pipeline created. Learn more at https://dlthub.com/docs. See you next time :)")
Expand Down
6 changes: 3 additions & 3 deletions dlt_init_openapi/cli/cli_endpoint_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ def questionary_endpoint_selection(endpoints: EndpointCollection) -> Set[str]:
("italic", f" {endpoint.path}"),
]
choices.append(questionary.Choice(text, endpoint))
if not choices:
raise ValueError("No endpoints found")
selected_endpoints: List[Endpoint] = questionary.checkbox(
"Which resources would you like to generate?", choices
"Which resources would you like to generate? Press enter to continue, "
+ "if you do not select any resources, all of them will be rendered.",
choices,
).ask()

# return resource names of selected endpoints
Expand Down
4 changes: 4 additions & 0 deletions dlt_init_openapi/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,14 @@ class Config(BaseModel):
"""Set a limit on how many items are emitted from a resource"""
parameter_default_value: str = "FILL_ME_IN"
"""default to render for required parameters that do not have a default in the spec"""
allow_openapi_2: bool = False
"""Allow to use OpenAPI 2 specs"""

# internal, do not set via config file
project_dir: Path = None
pipeline_file_name: str = None
spec_url: str = None
spec_path: Path = None

def __init__(self, *args: Any, **kwargs: Any) -> None:
super(Config, self).__init__(*args, **kwargs)
Expand Down
6 changes: 6 additions & 0 deletions dlt_init_openapi/detector/default/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ def detect_security_schemes(self, open_api: OpenapiParser) -> None:
elif global_scheme and not global_scheme.supported:
self._add_warning(UnsupportedSecuritySchemeWarning(global_scheme.type))

# set first auth as global scheme
if open_api.security_schemes and not open_api.detected_global_security_scheme:
global_scheme = list(open_api.security_schemes.values())[0]
if global_scheme.supported:
open_api.detected_global_security_scheme = global_scheme

def detect_resource_names(self, endpoints: EndpointCollection) -> None:
"""iterate all endpoints and find a strategy to select the right resource name"""

Expand Down
18 changes: 9 additions & 9 deletions dlt_init_openapi/detector/default/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@
PRIMARY_KEY_SUFFIXES = ["id", "pk"]
PRIMARY_KEY_WORD_SEPARATORS = ["", "-", "_"]

RE_UNIQUE_KEY = re.compile(r"\b(unique|id|identifier)\b", re.IGNORECASE)
RE_UNIQUE_KEY = re.compile(r"^(unique|id|identifier)$", re.IGNORECASE)

# pagination
RE_PAGE_PARAM = re.compile(r"(?i)(page|page_number)", re.IGNORECASE)
RE_TOTAL_PAGE_PROPERTY = re.compile(r"(?i)(total|count)", re.IGNORECASE)
RE_OFFSET_PARAM = re.compile(r"(?i)(start|offset|skip)", re.IGNORECASE)
RE_LIMIT_PARAM = re.compile(r"(?i)(limit|per_page|page_size|size)", re.IGNORECASE)
RE_TOTAL_PROPERTY = re.compile(r"(?i)(total|count|total_count)", re.IGNORECASE)
RE_CURSOR_PARAM = re.compile(r"(?i)(cursor|after|since)", re.IGNORECASE)
RE_CURSOR_PROP = re.compile(r"(?i)(cursor|next_cursor)", re.IGNORECASE)
RE_NEXT_PROPERTY = re.compile(r"(?i)(next|next_url|more)", re.IGNORECASE)
RE_PAGE_PARAM = re.compile(r"^(page|page_number)$", re.IGNORECASE)
RE_TOTAL_PAGE_PROPERTY = re.compile(r"^(total|count|totalPages)$", re.IGNORECASE)
RE_OFFSET_PARAM = re.compile(r"^(start|offset|skip)$", re.IGNORECASE)
RE_LIMIT_PARAM = re.compile(r"^(limit|per_page|page_size|size)$", re.IGNORECASE)
RE_TOTAL_PROPERTY = re.compile(r"^(total|count|total_count|totalRecords|totalItems)$", re.IGNORECASE)
RE_CURSOR_PARAM = re.compile(r"^(cursor|after|since)$", re.IGNORECASE)
RE_CURSOR_PROP = re.compile(r"^(cursor|next_cursor)$", re.IGNORECASE)
RE_NEXT_PROPERTY = re.compile(r"^(next|next_url|more)$", re.IGNORECASE)
RE_MATCH_ALL = re.compile(r".*", re.IGNORECASE)

# content path discovery
Expand Down
15 changes: 14 additions & 1 deletion dlt_init_openapi/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from typing import List


class DltOpenAPIException(Exception):
pass

Expand All @@ -16,7 +19,9 @@ def __init__(self, swagger_detected: bool = False) -> None:
convert_helper = (
"you can convert it to an openapi 3.0 spec by going to https://editor.swagger.io/, "
+ "pasting your spec and selecting 'Edit' -> 'Convert to OpenAPI 3.0' from the Menu "
+ "and then retry with the converted file."
+ "and then retry with the converted file. Alternatively you can run the generator "
+ "with the --allow-openapi-2 flag. The generated result usually improves if you convert "
+ "your spec to 3.0 thouhg."
)

super().__init__(
Expand All @@ -36,3 +41,11 @@ class DltUnparseableSpecException(DltOpenAPITerminalException):
def __init__(self) -> None:

super().__init__("Could not parse selected spec, please provide a valid YAML or JSON document.")


class DltNoEndpointsDiscovered(DltOpenAPITerminalException):
def __init__(self, enabled_methods: List[str]):
super().__init__(
f"Did not find any endpoint with http methods {enabled_methods} in provided OpenAPI spec. "
+ "Please check your spec if endpoints with these methods exist or add additional methods in your config."
)
19 changes: 17 additions & 2 deletions dlt_init_openapi/parser/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class Response:
osp_response: osp.Response
schema: Optional[SchemaWrapper]
status_code: str
description: str
# detected values
detected_payload: Optional[DataPropertyPath] = None
detected_primary_key: Optional[str] = None
Expand Down Expand Up @@ -144,6 +145,13 @@ def default_for_param(self, location: Literal["path", "query"], param_name: str)
return p.default
return self.context.config.parameter_default_value

@property
def render_description(self) -> Optional[str]:
description = self.description or self.path_description
if not description:
return None
return description.replace("\n", " ")

@classmethod
def from_operation(
cls,
Expand All @@ -168,11 +176,18 @@ def from_operation(
response_schema = context.response_from_reference(response_ref)
content_schema: Optional[SchemaWrapper] = None
for content_type, media_type in (response_schema.content or {}).items():
if content_type.endswith("json") and media_type.media_type_schema:
if (content_type.endswith("json") or content_type == "*/*") and media_type.media_type_schema:
content_schema = SchemaWrapper.from_reference(media_type.media_type_schema, context)
break

responses.append(Response(osp_response=response_schema, schema=content_schema, status_code=status_code))
responses.append(
Response(
osp_response=response_schema,
schema=content_schema,
status_code=status_code,
description=response_schema.description,
)
)

return cls(
method=method,
Expand Down
26 changes: 17 additions & 9 deletions dlt_init_openapi/parser/openapi_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from loguru import logger
from yaml import BaseLoader

from dlt_init_openapi.exceptions import DltInvalidSpecException, DltOpenAPINot30Exception, DltUnparseableSpecException
from dlt_init_openapi.exceptions import (
DltInvalidSpecException,
DltNoEndpointsDiscovered,
DltOpenAPINot30Exception,
DltUnparseableSpecException,
)
from dlt_init_openapi.parser.config import Config
from dlt_init_openapi.parser.context import OpenapiContext
from dlt_init_openapi.parser.endpoints import EndpointCollection
Expand All @@ -34,22 +39,22 @@ def parse(self, data: bytes) -> None:

self.spec_raw = self._load_yaml_or_json(data)
self.security_schemes = {}

logger.info("Validating spec structure")
try:
spec = osp.OpenAPI.parse_obj(self.spec_raw)
except Exception as e:
raise DltInvalidSpecException() from e
logger.success("Spec validation successful")

# check if this is openapi 3.0
swagger_version = self.spec_raw.get("swagger")
if swagger_version:
raise DltOpenAPINot30Exception(swagger_detected=True)
if not self.config.allow_openapi_2:
# check if this is openapi 3.0
swagger_version = self.spec_raw.get("swagger")
if swagger_version:
raise DltOpenAPINot30Exception(swagger_detected=True)

openapi_version = self.spec_raw.get("openapi")
if not openapi_version or not openapi_version.startswith("3"):
raise DltOpenAPINot30Exception(swagger_detected=False)
openapi_version = self.spec_raw.get("openapi")
if not openapi_version or not openapi_version.startswith("3"):
raise DltOpenAPINot30Exception(swagger_detected=False)

logger.info("Extracting openapi metadata")
self.context = OpenapiContext(self.config, spec, self.spec_raw)
Expand All @@ -73,6 +78,9 @@ def parse(self, data: bytes) -> None:
self.endpoints = EndpointCollection.from_context(self.context)
logger.success(f"Completed parsing endpoints. {len(self.endpoints.endpoints)} endpoints found.")

if len(self.endpoints.endpoints) == 0:
raise DltNoEndpointsDiscovered(self.config.include_methods)

def _load_yaml_or_json(self, data: bytes) -> Dict[str, Any]:
logger.info("Trying to parse spec as JSON")
try:
Expand Down
1 change: 1 addition & 0 deletions dlt_init_openapi/renderer/default/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def run(self, openapi: OpenapiParser, dry: bool = False) -> None:
package_name=self.package_name,
project_name=self.config.project_name,
credentials=self.openapi.detected_global_security_scheme,
config=self.config,
)

if dry:
Expand Down
Loading
Loading