12
12
13
13
from dlt .common .git import clone_repo
14
14
from dlt .common .configuration .providers .toml import ConfigTomlProvider , SecretsTomlProvider
15
- from dlt .common .configuration .resolve import is_secret_hint
15
+ from dlt .common .configuration .specs .base_configuration import is_secret_hint
16
+ from dlt .common .configuration .accessors import DLT_SECRETS_VALUE , DLT_CONFIG_VALUE
16
17
from dlt .common .exceptions import DltException
17
18
from dlt .common .logger import DLT_PKG_NAME
18
19
from dlt .common .normalizers .names .snake_case import normalize_schema_name
19
20
from dlt .common .destination import DestinationReference
21
+ from dlt .common .reflection .utils import set_ast_parents
20
22
from dlt .common .schema .exceptions import InvalidSchemaName
21
23
from dlt .common .storages .file_storage import FileStorage
22
24
from dlt .common .typing import AnyType , is_optional_type
30
32
from dlt .cli .config_toml_writer import WritableConfigValue , write_values
31
33
32
34
33
-
34
35
REQUIREMENTS_TXT = "requirements.txt"
35
36
PYPROJECT_TOML = "pyproject.toml"
36
37
37
38
38
39
def _clone_init_repo (branch : str ) -> Tuple [FileStorage , List [str ], str ]:
39
40
# return tuple is (file storage for cloned repo, list of template files to copy, the default pipeline template script)
40
- # template_dir = "/tmp/tmptz2omtdf" # tempfile.mkdtemp()
41
+ # template_dir = "~/src/python-dlt-init-template"
41
42
template_dir = tempfile .mkdtemp ()
42
43
clone_repo ("https://github.com/scale-vector/python-dlt-init-template.git" , template_dir , branch = branch )
43
44
@@ -58,7 +59,8 @@ def _clone_init_repo(branch: str) -> Tuple[FileStorage, List[str], str]:
58
59
def _parse_init_script (script_source : str , init_script_name : str ) -> PipelineScriptVisitor :
59
60
# parse the script first
60
61
tree = ast .parse (source = script_source )
61
- visitor = PipelineScriptVisitor (script_source , add_parents = True )
62
+ set_ast_parents (tree )
63
+ visitor = PipelineScriptVisitor (script_source )
62
64
visitor .visit (tree )
63
65
if len (visitor .mod_aliases ) == 0 :
64
66
raise CliCommandException ("init" , f"The pipeline script { init_script_name } does not import dlt or has bizarre import structure" )
@@ -94,57 +96,34 @@ def _find_argument_nodes_to_replace(visitor: PipelineScriptVisitor, replace_node
94
96
raise CliCommandException ("init" , f"The pipeline script { init_script_name } is not explicitly passing the '{ t_arg_name } ' argument to 'pipeline' or 'run' function. In init script the default and configured values are not accepted." )
95
97
return transformed_nodes
96
98
99
+
97
100
def _detect_required_configs (visitor : PipelineScriptVisitor , script_module : ModuleType , init_script_name : str ) -> Tuple [Dict [str , WritableConfigValue ], Dict [str , WritableConfigValue ]]:
98
101
# all detected secrets with namespaces
99
102
required_secrets : Dict [str , WritableConfigValue ] = {}
100
103
# all detected configs with namespaces
101
104
required_config : Dict [str , WritableConfigValue ] = {}
102
105
103
- # skip sources without spec. those are not imported and most probably are inner functions
104
- known_imported_calls = {name : calls for name , calls in visitor .known_source_calls .items () if name in _SOURCES }
106
+ # skip sources without spec. those are not imported and most probably are inner functions. also skip the sources that are not called
107
+ # also skip the sources that are called from functions, the parent of call object to the source must be None (no outer function)
108
+ known_imported_sources = {name : _SOURCES [name ] for name in visitor .known_sources
109
+ if name in _SOURCES and name in visitor .known_source_calls and any (call .parent is None for call in visitor .known_source_calls [name ])} # type: ignore
105
110
106
- for pipeline_name , call_nodes in known_imported_calls .items ():
107
- source_config = _SOURCES . get ( pipeline_name ) .SPEC ()
111
+ for source_name , source_info in known_imported_sources .items ():
112
+ source_config = source_info .SPEC ()
108
113
spec_fields = source_config .get_resolvable_fields ()
109
- source_sig = inspect .signature (getattr (script_module , pipeline_name ))
110
- # bind all calls
111
- for call_node in call_nodes :
112
- try :
113
- bound_args = source_sig .bind (* call_node .args , ** {str (kwd .arg ):kwd .value for kwd in call_node .keywords })
114
- bound_args .apply_defaults ()
115
- except TypeError as ty_ex :
116
- call_info = visitor .source_segment (call_node )
117
- raise CliCommandException ("init" , f"In { init_script_name } the source/resource { pipeline_name } call { call_info } looks wrong: { ty_ex } " )
118
- # find all the arguments that are not sufficiently bound
119
- for arg_name , arg_node in bound_args .arguments .items ():
120
- # check if argument is in spec and is not optional. optional arguments won't be added to config/secrets
121
- arg_type = spec_fields .get (arg_name )
122
- if arg_type and not is_optional_type (arg_type ):
123
- value_provided = True
124
- from_placeholder = False
125
- from_secrets = is_secret_hint (arg_type )
126
- if isinstance (arg_node , ast .Constant ):
127
- value_provided = ast .literal_eval (arg_node ) is not None
128
- if isinstance (arg_node , ast .Attribute ) and arg_node .attr == "value" :
129
- attr_source = visitor .source_segment (arg_node )
130
- if attr_source .endswith ("config.value" ):
131
- value_provided = False
132
- from_placeholder = True
133
- if from_secrets :
134
- raise CliCommandException ("init" , f"The pipeline script { init_script_name } calls source/resource { pipeline_name } where argument { arg_name } is a secret but it requests it via { attr_source } " )
135
- if attr_source .endswith ("secrets.value" ):
136
- value_provided = False
137
- from_placeholder = True
138
- from_secrets = True
139
- # was value provided in the call args?
140
- if not value_provided :
141
- # do we have sufficient information if arg_name is config or secret?
142
- if arg_type is AnyType and not from_placeholder :
143
- raise CliCommandException ("init" , f"The pipeline script { init_script_name } in source/resource '{ pipeline_name } ' does not provide enough information if argument '{ arg_name } ' is a secret or a config value. Use 'dlt.config.value' or 'dlt.secret.value' or (strongly suggested) type the source/resource function signature." )
144
- val_store = required_secrets if from_secrets else required_config
145
- # use full namespaces if we have many sources
146
- namespaces = () if len (known_imported_calls ) == 1 else ("sources" , pipeline_name )
147
- val_store [pipeline_name + ":" + arg_name ] = WritableConfigValue (arg_name , arg_type , namespaces )
114
+ for field_name , field_type in spec_fields .items ():
115
+ val_store = None
116
+ # all secrets must go to secrets.toml
117
+ if is_secret_hint (field_type ):
118
+ val_store = required_secrets
119
+ # all configs that are required and do not have a default value must go to config.toml
120
+ elif not is_optional_type (field_type ) and getattr (source_config , field_name ) is None :
121
+ val_store = required_config
122
+
123
+ if val_store is not None :
124
+ # we are sure that all resources come from single file so we can put them in single namespace
125
+ # namespaces = () if len(known_imported_sources) == 1 else ("sources", source_name)
126
+ val_store [source_name + ":" + field_name ] = WritableConfigValue (field_name , field_type , ())
148
127
149
128
return required_secrets , required_config
150
129
@@ -229,7 +208,7 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None
229
208
# find all arguments in all calls to replace
230
209
transformed_nodes = _find_argument_nodes_to_replace (
231
210
visitor ,
232
- [("destination" , destination_name ), ("pipeline_name" , pipeline_name )],
211
+ [("destination" , destination_name ), ("pipeline_name" , pipeline_name ), ( "dataset_name" , pipeline_name + "_data" ) ],
233
212
init_script_name
234
213
)
235
214
@@ -238,8 +217,10 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None
238
217
239
218
if len (_SOURCES ) == 0 :
240
219
raise CliCommandException ("init" , f"The pipeline script { init_script_name } is not creating or importing any sources or resources" )
220
+
241
221
for source_q_name , source_config in _SOURCES .items ():
242
222
if source_q_name not in visitor .known_sources :
223
+ print (visitor .known_sources )
243
224
raise CliCommandException ("init" , f"The pipeline script { init_script_name } imports a source/resource { source_config .f .__name__ } from module { source_config .module .__name__ } . In init scripts you must declare all sources and resources in single file." )
244
225
245
226
# detect all the required secrets and configs that should go into tomls files
@@ -251,12 +232,6 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None
251
232
# modify the script
252
233
dest_script_source = _rewrite_script (visitor .source , transformed_nodes )
253
234
254
- # generate tomls with comments
255
- secrets_prov = SecretsTomlProvider ()
256
- write_values (secrets_prov ._toml , required_secrets .values ())
257
- config_prov = ConfigTomlProvider ()
258
- write_values (config_prov ._toml , required_config .values ())
259
-
260
235
# welcome message
261
236
click .echo ()
262
237
click .echo ("Your new pipeline %s is ready to be customized!" % fmt .bold (pipeline_name ))
@@ -281,20 +256,22 @@ def init_command(pipeline_name: str, destination_name: str, branch: str) -> None
281
256
if dest_storage .has_file (REQUIREMENTS_TXT ):
282
257
click .echo ("Your python dependencies are kept in %s. Please add the dependency for %s as follows:" % (fmt .bold (REQUIREMENTS_TXT ), fmt .bold (DLT_PKG_NAME )))
283
258
click .echo (req_dep_line )
259
+ click .echo ("To install dlt with the %s extra using pip:" % fmt .bold (destination_name ))
284
260
else :
285
- if click .confirm ("%s not found. Should I create one?" % REQUIREMENTS_TXT ):
286
- requirements_txt = req_dep_line
287
- click .echo ("* %s created. Install it with:\n pip3 install -r %s" % (fmt .bold (REQUIREMENTS_TXT ), REQUIREMENTS_TXT ))
288
- else :
289
- click .echo ("Do not forget to install dlt with the %s extra using:" )
290
- click .echo (f"pip3 install { DLT_PKG_NAME } [{ destination_name } ]" )
261
+ requirements_txt = req_dep_line
262
+ click .echo ("* %s created. Install it with:\n pip3 install -r %s" % (fmt .bold (REQUIREMENTS_TXT ), REQUIREMENTS_TXT ))
291
263
292
264
# copy files at the very end
293
265
for file_name in TEMPLATE_FILES + toml_files :
294
266
shutil .copy (clone_storage .make_full_path (file_name ), dest_storage .make_full_path (file_name ))
295
267
296
268
# create script
297
269
dest_storage .save (dest_pipeline_script , dest_script_source )
270
+ # generate tomls with comments
271
+ secrets_prov = SecretsTomlProvider ()
272
+ write_values (secrets_prov ._toml , required_secrets .values ())
273
+ config_prov = ConfigTomlProvider ()
274
+ write_values (config_prov ._toml , required_config .values ())
298
275
# write toml files
299
276
secrets_prov ._write_toml ()
300
277
config_prov ._write_toml ()
0 commit comments