Skip to content

Commit 8931b10

Browse files
authored
Merge pull request #94 from scale-vector/rfix/adds-dlt-config-default-literals
adds dlt config default literals
2 parents d77e363 + 3f6a72e commit 8931b10

35 files changed

+584
-354
lines changed

Makefile

+2-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ build-library:
5858
poetry build
5959

6060
publish-library: build-library
61-
poetry publish -u __token__
61+
# provide the token via poetry config pypi-token.pypi your-api-token
62+
poetry publish
6263

6364
build-image-tags:
6465
@echo ${IMG}

dlt/cli/_dlt.py

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def init_command_wrapper(pipeline_name: str, destination_name: str, branch: str)
3131
init_command(pipeline_name, destination_name, branch)
3232
except Exception as ex:
3333
click.secho(str(ex), err=True, fg="red")
34+
# TODO: display stack trace if with debug flag
3435

3536

3637
def main() -> None:

dlt/cli/config_toml_writer.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import tomlkit
33
from tomlkit.items import Table as TOMLTable
44

5-
from dlt.common.configuration.resolve import extract_inner_hint, is_base_configuration_hint
6-
from dlt.common.configuration.specs.base_configuration import BaseConfiguration
5+
from dlt.common.configuration.resolve import extract_inner_hint
6+
from dlt.common.configuration.specs.base_configuration import BaseConfiguration, is_base_configuration_hint
77
from dlt.common.typing import AnyType, is_final_type, is_optional_type
88

99

@@ -41,9 +41,8 @@ def write_spec(toml_table: TOMLTable, config: BaseConfiguration) -> None:
4141

4242

4343
def write_values(toml: tomlkit.TOMLDocument, values: Iterable[WritableConfigValue]) -> None:
44-
# print(values)
45-
toml_table: TOMLTable = toml # type: ignore
4644
for value in values:
45+
toml_table: TOMLTable = toml # type: ignore
4746
for namespace in value.namespaces:
4847
if namespace not in toml_table:
4948
inner_table = tomlkit.table(True)

dlt/cli/init_command.py

+37-60
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212

1313
from dlt.common.git import clone_repo
1414
from dlt.common.configuration.providers.toml import ConfigTomlProvider, SecretsTomlProvider
15-
from dlt.common.configuration.resolve import is_secret_hint
15+
from dlt.common.configuration.specs.base_configuration import is_secret_hint
16+
from dlt.common.configuration.accessors import DLT_SECRETS_VALUE, DLT_CONFIG_VALUE
1617
from dlt.common.exceptions import DltException
1718
from dlt.common.logger import DLT_PKG_NAME
1819
from dlt.common.normalizers.names.snake_case import normalize_schema_name
1920
from dlt.common.destination import DestinationReference
21+
from dlt.common.reflection.utils import set_ast_parents
2022
from dlt.common.schema.exceptions import InvalidSchemaName
2123
from dlt.common.storages.file_storage import FileStorage
2224
from dlt.common.typing import AnyType, is_optional_type
@@ -30,14 +32,13 @@
3032
from dlt.cli.config_toml_writer import WritableConfigValue, write_values
3133

3234

33-
3435
REQUIREMENTS_TXT = "requirements.txt"
3536
PYPROJECT_TOML = "pyproject.toml"
3637

3738

3839
def _clone_init_repo(branch: str) -> Tuple[FileStorage, List[str], str]:
3940
# return tuple is (file storage for cloned repo, list of template files to copy, the default pipeline template script)
40-
# template_dir = "/tmp/tmptz2omtdf" # tempfile.mkdtemp()
41+
# template_dir = "~/src/python-dlt-init-template"
4142
template_dir = tempfile.mkdtemp()
4243
clone_repo("https://github.com/scale-vector/python-dlt-init-template.git", template_dir, branch=branch)
4344

@@ -58,7 +59,8 @@ def _clone_init_repo(branch: str) -> Tuple[FileStorage, List[str], str]:
5859
def _parse_init_script(script_source: str, init_script_name: str) -> PipelineScriptVisitor:
5960
# parse the script first
6061
tree = ast.parse(source=script_source)
61-
visitor = PipelineScriptVisitor(script_source, add_parents=True)
62+
set_ast_parents(tree)
63+
visitor = PipelineScriptVisitor(script_source)
6264
visitor.visit(tree)
6365
if len(visitor.mod_aliases) == 0:
6466
raise CliCommandException("init", f"The pipeline script {init_script_name} does not import dlt or has bizarre import structure")
@@ -94,57 +96,34 @@ def _find_argument_nodes_to_replace(visitor: PipelineScriptVisitor, replace_node
9496
raise CliCommandException("init", f"The pipeline script {init_script_name} is not explicitly passing the '{t_arg_name}' argument to 'pipeline' or 'run' function. In init script the default and configured values are not accepted.")
9597
return transformed_nodes
9698

99+
97100
def _detect_required_configs(visitor: PipelineScriptVisitor, script_module: ModuleType, init_script_name: str) -> Tuple[Dict[str, WritableConfigValue], Dict[str, WritableConfigValue]]:
98101
# all detected secrets with namespaces
99102
required_secrets: Dict[str, WritableConfigValue] = {}
100103
# all detected configs with namespaces
101104
required_config: Dict[str, WritableConfigValue] = {}
102105

103-
# skip sources without spec. those are not imported and most probably are inner functions
104-
known_imported_calls = {name: calls for name, calls in visitor.known_source_calls.items() if name in _SOURCES}
106+
# skip sources without spec. those are not imported and most probably are inner functions. also skip the sources that are not called
107+
# also skip the sources that are called from functions, the parent of call object to the source must be None (no outer function)
108+
known_imported_sources = {name: _SOURCES[name] for name in visitor.known_sources
109+
if name in _SOURCES and name in visitor.known_source_calls and any(call.parent is None for call in visitor.known_source_calls[name])} # type: ignore
105110

106-
for pipeline_name, call_nodes in known_imported_calls.items():
107-
source_config = _SOURCES.get(pipeline_name).SPEC()
111+
for source_name, source_info in known_imported_sources.items():
112+
source_config = source_info.SPEC()
108113
spec_fields = source_config.get_resolvable_fields()
109-
source_sig = inspect.signature(getattr(script_module, pipeline_name))
110-
# bind all calls
111-
for call_node in call_nodes:
112-
try:
113-
bound_args = source_sig.bind(*call_node.args, **{str(kwd.arg):kwd.value for kwd in call_node.keywords})
114-
bound_args.apply_defaults()
115-
except TypeError as ty_ex:
116-
call_info = visitor.source_segment(call_node)
117-
raise CliCommandException("init", f"In {init_script_name} the source/resource {pipeline_name} call {call_info} looks wrong: {ty_ex}")
118-
# find all the arguments that are not sufficiently bound
119-
for arg_name, arg_node in bound_args.arguments.items():
120-
# check if argument is in spec and is not optional. optional arguments won't be added to config/secrets
121-
arg_type = spec_fields.get(arg_name)
122-
if arg_type and not is_optional_type(arg_type):
123-
value_provided = True
124-
from_placeholder = False
125-
from_secrets = is_secret_hint(arg_type)
126-
if isinstance(arg_node, ast.Constant):
127-
value_provided = ast.literal_eval(arg_node) is not None
128-
if isinstance(arg_node, ast.Attribute) and arg_node.attr == "value":
129-
attr_source = visitor.source_segment(arg_node)
130-
if attr_source.endswith("config.value"):
131-
value_provided = False
132-
from_placeholder = True
133-
if from_secrets:
134-
raise CliCommandException("init", f"The pipeline script {init_script_name} calls source/resource {pipeline_name} where argument {arg_name} is a secret but it requests it via {attr_source}")
135-
if attr_source.endswith("secrets.value"):
136-
value_provided = False
137-
from_placeholder = True
138-
from_secrets = True
139-
# was value provided in the call args?
140-
if not value_provided:
141-
# do we have sufficient information if arg_name is config or secret?
142-
if arg_type is AnyType and not from_placeholder:
143-
raise CliCommandException("init", f"The pipeline script {init_script_name} in source/resource '{pipeline_name}' does not provide enough information if argument '{arg_name}' is a secret or a config value. Use 'dlt.config.value' or 'dlt.secret.value' or (strongly suggested) type the source/resource function signature.")
144-
val_store = required_secrets if from_secrets else required_config
145-
# use full namespaces if we have many sources
146-
namespaces = () if len(known_imported_calls) == 1 else ("sources", pipeline_name)
147-
val_store[pipeline_name + ":" + arg_name] = WritableConfigValue(arg_name, arg_type, namespaces)
114+
for field_name, field_type in spec_fields.items():
115+
val_store = None
116+
# all secrets must go to secrets.toml
117+
if is_secret_hint(field_type):
118+
val_store = required_secrets
119+
# all configs that are required and do not have a default value must go to config.toml
120+
elif not is_optional_type(field_type) and getattr(source_config, field_name) is None:
121+
val_store = required_config
122+
123+
if val_store is not None:
124+
# we are sure that all resources come from single file so we can put them in single namespace
125+
# namespaces = () if len(known_imported_sources) == 1 else ("sources", source_name)
126+
val_store[source_name + ":" + field_name] = WritableConfigValue(field_name, field_type, ())
148127

149128
return required_secrets, required_config
150129

@@ -229,7 +208,7 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None
229208
# find all arguments in all calls to replace
230209
transformed_nodes = _find_argument_nodes_to_replace(
231210
visitor,
232-
[("destination", destination_name), ("pipeline_name", pipeline_name)],
211+
[("destination", destination_name), ("pipeline_name", pipeline_name), ("dataset_name", pipeline_name + "_data")],
233212
init_script_name
234213
)
235214

@@ -238,8 +217,10 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None
238217

239218
if len(_SOURCES) == 0:
240219
raise CliCommandException("init", f"The pipeline script {init_script_name} is not creating or importing any sources or resources")
220+
241221
for source_q_name, source_config in _SOURCES.items():
242222
if source_q_name not in visitor.known_sources:
223+
print(visitor.known_sources)
243224
raise CliCommandException("init", f"The pipeline script {init_script_name} imports a source/resource {source_config.f.__name__} from module {source_config.module.__name__}. In init scripts you must declare all sources and resources in single file.")
244225

245226
# detect all the required secrets and configs that should go into tomls files
@@ -251,12 +232,6 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None
251232
# modify the script
252233
dest_script_source = _rewrite_script(visitor.source, transformed_nodes)
253234

254-
# generate tomls with comments
255-
secrets_prov = SecretsTomlProvider()
256-
write_values(secrets_prov._toml, required_secrets.values())
257-
config_prov = ConfigTomlProvider()
258-
write_values(config_prov._toml, required_config.values())
259-
260235
# welcome message
261236
click.echo()
262237
click.echo("Your new pipeline %s is ready to be customized!" % fmt.bold(pipeline_name))
@@ -281,20 +256,22 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None
281256
if dest_storage.has_file(REQUIREMENTS_TXT):
282257
click.echo("Your python dependencies are kept in %s. Please add the dependency for %s as follows:" % (fmt.bold(REQUIREMENTS_TXT), fmt.bold(DLT_PKG_NAME)))
283258
click.echo(req_dep_line)
259+
click.echo("To install dlt with the %s extra using pip:" % fmt.bold(destination_name))
284260
else:
285-
if click.confirm("%s not found. Should I create one?" % REQUIREMENTS_TXT):
286-
requirements_txt = req_dep_line
287-
click.echo("* %s created. Install it with:\npip3 install -r %s" % (fmt.bold(REQUIREMENTS_TXT), REQUIREMENTS_TXT))
288-
else:
289-
click.echo("Do not forget to install dlt with the %s extra using:")
290-
click.echo(f"pip3 install {DLT_PKG_NAME}[{destination_name}]")
261+
requirements_txt = req_dep_line
262+
click.echo("* %s created. Install it with:\npip3 install -r %s" % (fmt.bold(REQUIREMENTS_TXT), REQUIREMENTS_TXT))
291263

292264
# copy files at the very end
293265
for file_name in TEMPLATE_FILES + toml_files:
294266
shutil.copy(clone_storage.make_full_path(file_name), dest_storage.make_full_path(file_name))
295267

296268
# create script
297269
dest_storage.save(dest_pipeline_script, dest_script_source)
270+
# generate tomls with comments
271+
secrets_prov = SecretsTomlProvider()
272+
write_values(secrets_prov._toml, required_secrets.values())
273+
config_prov = ConfigTomlProvider()
274+
write_values(config_prov._toml, required_config.values())
298275
# write toml files
299276
secrets_prov._write_toml()
300277
config_prov._write_toml()

dlt/common/configuration/accessors.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77

88
from dlt.common.configuration.providers.provider import ConfigProvider
99
from dlt.common.configuration.resolve import deserialize_value
10-
from dlt.common.configuration.specs import BaseConfiguration
10+
from dlt.common.configuration.specs.base_configuration import is_base_configuration_hint
1111
from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext
1212
from dlt.common.schema.utils import coerce_value
1313
from dlt.common.typing import AnyType, ConfigValue
1414

15-
15+
DLT_SECRETS_VALUE = "secrets.value"
16+
DLT_CONFIG_VALUE = "config.value"
1617
TConfigAny = TypeVar("TConfigAny", bound=Any)
1718

1819
class _Accessor(abc.ABC):
@@ -33,7 +34,7 @@ def get(self, field: str, expected_type: Type[TConfigAny] = None) -> TConfigAny:
3334
return None
3435
# cast to required type
3536
if expected_type:
36-
if inspect.isclass(expected_type) and issubclass(expected_type, BaseConfiguration):
37+
if is_base_configuration_hint(expected_type):
3738
c = expected_type()
3839
if isinstance(value, dict):
3940
c.update(value)

dlt/common/configuration/inject.py

+6-55
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,23 @@
1-
import re
21
import inspect
32
from makefun import wraps
4-
from types import ModuleType
53
from typing import Callable, Dict, Type, Any, Optional, Tuple, TypeVar, overload
64
from inspect import Signature, Parameter
75

8-
from dlt.common.typing import AnyType, DictStrAny, StrAny, TFun, AnyFun
6+
from dlt.common.typing import DictStrAny, StrAny, TFun, AnyFun
97
from dlt.common.configuration.resolve import resolve_configuration, inject_namespace
10-
from dlt.common.configuration.specs.base_configuration import BaseConfiguration, is_valid_hint, configspec
8+
from dlt.common.configuration.specs.base_configuration import BaseConfiguration
119
from dlt.common.configuration.specs.config_namespace_context import ConfigNamespacesContext
12-
from dlt.common.utils import get_callable_name
10+
from dlt.common.reflection.spec import spec_from_signature
11+
1312

14-
# [^.^_]+ splits by . or _
15-
_SLEEPING_CAT_SPLIT = re.compile("[^.^_]+")
1613
_LAST_DLT_CONFIG = "_dlt_config"
1714
_ORIGINAL_ARGS = "_dlt_orig_args"
1815
TConfiguration = TypeVar("TConfiguration", bound=BaseConfiguration)
1916
# keep a registry of all the decorated functions
2017
_FUNC_SPECS: Dict[int, Type[BaseConfiguration]] = {}
2118

2219

20+
2321
def get_fun_spec(f: AnyFun) -> Type[BaseConfiguration]:
2422
return _FUNC_SPECS.get(id(f))
2523

@@ -50,7 +48,7 @@ def decorator(f: TFun) -> TFun:
5048
namespace_context = ConfigNamespacesContext()
5149

5250
if spec is None:
53-
SPEC = _spec_from_signature(_get_spec_name_from_f(f), inspect.getmodule(f), sig, only_kw)
51+
SPEC = spec_from_signature(f, sig, only_kw)
5452
else:
5553
SPEC = spec
5654

@@ -129,50 +127,3 @@ def last_config(**kwargs: Any) -> BaseConfiguration:
129127

130128
def get_orig_args(**kwargs: Any) -> Tuple[Tuple[Any], DictStrAny]:
131129
return kwargs[_ORIGINAL_ARGS] # type: ignore
132-
133-
134-
def _get_spec_name_from_f(f: AnyFun) -> str:
135-
func_name = get_callable_name(f, "__qualname__").replace("<locals>.", "") # func qual name contains position in the module, separated by dots
136-
137-
def _first_up(s: str) -> str:
138-
return s[0].upper() + s[1:]
139-
140-
return "".join(map(_first_up, _SLEEPING_CAT_SPLIT.findall(func_name))) + "Configuration"
141-
142-
143-
def _spec_from_signature(name: str, module: ModuleType, sig: Signature, kw_only: bool = False) -> Type[BaseConfiguration]:
144-
# synthesize configuration from the signature
145-
fields: Dict[str, Any] = {}
146-
annotations: Dict[str, Any] = {}
147-
148-
for p in sig.parameters.values():
149-
# skip *args and **kwargs, skip typical method params and if kw_only flag is set: accept KEYWORD ONLY args
150-
if p.kind not in (Parameter.VAR_KEYWORD, Parameter.VAR_POSITIONAL) and p.name not in ["self", "cls"] and \
151-
(kw_only and p.kind == Parameter.KEYWORD_ONLY or not kw_only):
152-
field_type = AnyType if p.annotation == Parameter.empty else p.annotation
153-
if is_valid_hint(field_type):
154-
field_default = None if p.default == Parameter.empty else p.default
155-
# try to get type from default
156-
if field_type is AnyType and field_default:
157-
field_type = type(field_default)
158-
# make type optional if explicit None is provided as default
159-
if p.default is None:
160-
field_type = Optional[field_type]
161-
# set annotations
162-
annotations[p.name] = field_type
163-
# set field with default value
164-
fields[p.name] = field_default
165-
166-
# new type goes to the module where sig was declared
167-
fields["__module__"] = module.__name__
168-
# set annotations so they are present in __dict__
169-
fields["__annotations__"] = annotations
170-
# synthesize type
171-
T: Type[BaseConfiguration] = type(name, (BaseConfiguration,), fields)
172-
# add to the module
173-
setattr(module, name, T)
174-
SPEC = configspec(init=False)(T)
175-
# print(f"SYNTHESIZED {SPEC} in {inspect.getmodule(SPEC)} for sig {sig}")
176-
# import dataclasses
177-
# print("\n".join(map(str, dataclasses.fields(SPEC))))
178-
return SPEC

dlt/common/configuration/resolve.py

+1-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import ast
2-
import inspect
32
from collections.abc import Mapping as C_Mapping
43
from typing import Any, Dict, ContextManager, List, Optional, Sequence, Tuple, Type, TypeVar, get_origin
54

@@ -8,7 +7,7 @@
87
from dlt.common.typing import AnyType, StrAny, TSecretValue, is_final_type, is_optional_type, extract_inner_type
98
from dlt.common.schema.utils import coerce_value, py_type_to_sc_type
109

11-
from dlt.common.configuration.specs.base_configuration import BaseConfiguration, CredentialsConfiguration, ContainerInjectableContext, get_config_if_union
10+
from dlt.common.configuration.specs.base_configuration import BaseConfiguration, CredentialsConfiguration, is_secret_hint, get_config_if_union, is_base_configuration_hint, is_context_hint
1211
from dlt.common.configuration.specs.config_namespace_context import ConfigNamespacesContext
1312
from dlt.common.configuration.container import Container
1413
from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext
@@ -68,18 +67,6 @@ def serialize_value(value: Any) -> Any:
6867
return coerce_value("text", value_dt, value)
6968

7069

71-
def is_secret_hint(hint: Type[Any]) -> bool:
72-
return hint is TSecretValue or (inspect.isclass(hint) and issubclass(hint, CredentialsConfiguration))
73-
74-
75-
def is_base_configuration_hint(hint: Type[Any]) -> bool:
76-
return inspect.isclass(hint) and issubclass(hint, BaseConfiguration)
77-
78-
79-
def is_context_hint(hint: Type[Any]) -> bool:
80-
return inspect.isclass(hint) and issubclass(hint, ContainerInjectableContext)
81-
82-
8370
def extract_inner_hint(hint: Type[Any]) -> Type[Any]:
8471
# extract hint from Optional / Literal / NewType hints
8572
inner_hint = extract_inner_type(hint)

0 commit comments

Comments
 (0)