Data Catalog "sftp" not working #1880
GiulianoSquarcina
started this conversation in
Idea
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Hello everybody,
I'm trying to connect to a remote server via SSH exploiting "sftp" functionality in Data Catalog, but got an "urlopen error unknown url type: sftp". I'm able to access the server using Paramiko package and it works fine.
Could you please help me?
Many thanks in advance
The first two imports in Data Catalog (the others have the same structure so are omitted):
#------------------------------------------------------------------
population:
type: pandas.CSVDataSet #pandas.ExcelDataSet #pandas.JSONDataSet
filepath: "sftp:///home/giuliano_squarcina/testOVS/Inputs/DMM_BDL_ANL__FT_POPULATION.csv"
credentials: cred
load_args:
sep: ','
items:
type: pandas.CSVDataSet
filepath: "sftp:///home/giuliano_squarcina/testOVS/Inputs/SPINDOX_ANAGRAFICA_ARTICOLI_NEW.txt"
credentials: cred
load_args:
sep: '\t'
engine: 'python'
#------------------------------------------------------------------
Below are reported the full error print:
#------------------------------------------------------------------
09/27/22 15:39:17] INFO Kedro project ovs-2022-residuo-taglie session.py:343
[09/27/22 15:39:28] INFO Loading data from 'items' (CSVDataSet)... data_catalog.py:343
WARNING No nodes ran. Repeat the previous command to attempt a new run. runner.py:198
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/io/core.py:186 in load │
│ │
│ 183 │ │ self._logger.debug("Loading %s", str(self)) │
│ 184 │ │ │
│ 185 │ │ try: │
│ ❱ 186 │ │ │ return self._load() │
│ 187 │ │ except DataSetError: │
│ 188 │ │ │ raise │
│ 189 │ │ except Exception as exc: │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/extras/datasets/pandas/csv_dataset.py:163 in _load │
│ │
│ 160 │ │ │ return pd.read_csv(load_path, **self._load_args) │
│ 161 │ │ │
│ 162 │ │ load_path = f"{self._protocol}{PROTOCOL_DELIMITER}{load_path}" │
│ ❱ 163 │ │ return pd.read_csv( │
│ 164 │ │ │ load_path, storage_options=self._storage_options, **self._load_args │
│ 165 │ │ ) │
│ 166 │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/util/_decorators.py:211 in wrapper │
│ │
│ 208 │ │ │ │ │ raise TypeError(msg) │
│ 209 │ │ │ │ else: │
│ 210 │ │ │ │ │ kwargs[new_arg_name] = new_arg_value │
│ ❱ 211 │ │ │ return func(*args, **kwargs) │
│ 212 │ │ │
│ 213 │ │ return cast(F, wrapper) │
│ 214 │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/util/_decorators.py:317 in wrapper │
│ │
│ 314 │ │ │ │ │ FutureWarning, │
│ 315 │ │ │ │ │ stacklevel=find_stack_level(inspect.currentframe()), │
│ 316 │ │ │ │ ) │
│ ❱ 317 │ │ │ return func(*args, **kwargs) │
│ 318 │ │ │
│ 319 │ │ return wrapper │
│ 320 │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/io/parsers/readers.py:950 in read_csv │
│ │
│ 947 │ ) │
│ 948 │ kwds.update(kwds_defaults) │
│ 949 │ │
│ ❱ 950 │ return _read(filepath_or_buffer, kwds) │
│ 951 │
│ 952 │
│ 953 # iterator=True -> TextFileReader │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/io/parsers/readers.py:605 in _read │
│ │
│ 602 │ _validate_names(kwds.get("names", None)) │
│ 603 │ │
│ 604 │ # Create the parser. │
│ ❱ 605 │ parser = TextFileReader(filepath_or_buffer, **kwds) │
│ 606 │ │
│ 607 │ if chunksize or iterator: │
│ 608 │ │ return parser │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/io/parsers/readers.py:1442 in init │
│ │
│ 1439 │ │ │ self.options["has_index_names"] = kwds["has_index_names"] │
│ 1440 │ │ │
│ 1441 │ │ self.handles: IOHandles | None = None │
│ ❱ 1442 │ │ self._engine = self._make_engine(f, self.engine) │
│ 1443 │ │
│ 1444 │ def close(self) -> None: │
│ 1445 │ │ if self.handles is not None: │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/io/parsers/readers.py:1729 in _make_engine │
│ │
│ 1726 │ │ │ if engine == "pyarrow": │
│ 1727 │ │ │ │ is_text = False │
│ 1728 │ │ │ │ mode = "rb" │
│ ❱ 1729 │ │ │ self.handles = get_handle( │
│ 1730 │ │ │ │ f, │
│ 1731 │ │ │ │ mode, │
│ 1732 │ │ │ │ encoding=self.options.get("encoding", None), │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/io/common.py:714 in get_handle │
│ │
│ 711 │ │ codecs.lookup_error(errors) │
│ 712 │ │
│ 713 │ # open URLs │
│ ❱ 714 │ ioargs = _get_filepath_or_buffer( │
│ 715 │ │ path_or_buf, │
│ 716 │ │ encoding=encoding, │
│ 717 │ │ compression=compression, │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/io/common.py:364 in _get_filepath_or_buffer │
│ │
│ 361 │ │ │
│ 362 │ │ # assuming storage_options is to be interpreted as headers │
│ 363 │ │ req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options) │
│ ❱ 364 │ │ with urlopen(req_info) as req: │
│ 365 │ │ │ content_encoding = req.headers.get("Content-Encoding", None) │
│ 366 │ │ │ if content_encoding == "gzip": │
│ 367 │ │ │ │ # Override compression based on Content-Encoding header │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ pandas/io/common.py:266 in urlopen │
│ │
│ 263 │ """ │
│ 264 │ import urllib.request │
│ 265 │ │
│ ❱ 266 │ return urllib.request.urlopen(*args, **kwargs) │
│ 267 │
│ 268 │
│ 269 def is_fsspec_url(url: FilePath | BaseBuffer) -> bool: │
│ │
│ /usr/lib/python3.8/urllib/request.py:222 in urlopen │
│ │
│ 219 │ │ _opener = opener = build_opener() │
│ 220 │ else: │
│ 221 │ │ opener = _opener │
│ ❱ 222 │ return opener.open(url, data, timeout) │
│ 223 │
│ 224 def install_opener(opener): │
│ 225 │ global _opener │
│ │
│ /usr/lib/python3.8/urllib/request.py:525 in open │
│ │
│ 522 │ │ │ req = meth(req) │
│ 523 │ │ │
│ 524 │ │ sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method( │
│ ❱ 525 │ │ response = self._open(req, data) │
│ 526 │ │ │
│ 527 │ │ # post-process response │
│ 528 │ │ meth_name = protocol+"_response" │
│ │
│ /usr/lib/python3.8/urllib/request.py:547 in _open │
│ │
│ 544 │ │ if result: │
│ 545 │ │ │ return result │
│ 546 │ │ │
│ ❱ 547 │ │ return self._call_chain(self.handle_open, 'unknown', │
│ 548 │ │ │ │ │ │ │ │ 'unknown_open', req) │
│ 549 │ │
│ 550 │ def error(self, proto, *args): │
│ │
│ /usr/lib/python3.8/urllib/request.py:502 in _call_chain │
│ │
│ 499 │ │ handlers = chain.get(kind, ()) │
│ 500 │ │ for handler in handlers: │
│ 501 │ │ │ func = getattr(handler, meth_name) │
│ ❱ 502 │ │ │ result = func(*args) │
│ 503 │ │ │ if result is not None: │
│ 504 │ │ │ │ return result │
│ 505 │
│ │
│ /usr/lib/python3.8/urllib/request.py:1425 in unknown_open │
│ │
│ 1422 class UnknownHandler(BaseHandler): │
│ 1423 │ def unknown_open(self, req): │
│ 1424 │ │ type = req.type │
│ ❱ 1425 │ │ raise URLError('unknown url type: %s' % type) │
│ 1426 │
│ 1427 def parse_keqv_list(l): │
│ 1428 │ """Parse list of key=value strings where keys are not duplicated.""" │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
URLError:
The above exception was the direct cause of the following exception:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/bin/kedro:10 in │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/framework/cli/cli.py:211 in main │
│ │
│ 208 │ """ │
│ 209 │ _init_plugins() │
│ 210 │ cli_collection = KedroCLI(project_path=Path.cwd()) │
│ ❱ 211 │ cli_collection() │
│ 212 │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ click/core.py:1130 in call │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/framework/cli/cli.py:139 in main │
│ │
│ 136 │ │ ) │
│ 137 │ │ │
│ 138 │ │ try: │
│ ❱ 139 │ │ │ super().main( │
│ 140 │ │ │ │ args=args, │
│ 141 │ │ │ │ prog_name=prog_name, │
│ 142 │ │ │ │ complete_var=complete_var, │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ click/core.py:1055 in main │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ click/core.py:1657 in invoke │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ click/core.py:1404 in invoke │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ click/core.py:760 in invoke │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/framework/cli/project.py:366 in run │
│ │
│ 363 │ node_names = _get_values_as_tuple(node_names) if node_names else node_names │
│ 364 │ │
│ 365 │ with KedroSession.create(env=env, extra_params=params) as session: │
│ ❱ 366 │ │ session.run( │
│ 367 │ │ │ tags=tag, │
│ 368 │ │ │ runner=runner(is_async=is_async), │
│ 369 │ │ │ node_names=node_names, │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/framework/session/session.py:407 in run │
│ │
│ 404 │ │ ) │
│ 405 │ │ │
│ 406 │ │ try: │
│ ❱ 407 │ │ │ run_result = runner.run( │
│ 408 │ │ │ │ filtered_pipeline, catalog, hook_manager, session_id │
│ 409 │ │ │ ) │
│ 410 │ │ │ self._run_called = True │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/runner/runner.py:88 in run │
│ │
│ 85 │ │ │ self._logger.info( │
│ 86 │ │ │ │ "Asynchronous mode is enabled for loading and saving data" │
│ 87 │ │ │ ) │
│ ❱ 88 │ │ self._run(pipeline, catalog, hook_manager, session_id) │
│ 89 │ │ │
│ 90 │ │ self._logger.info("Pipeline execution completed successfully.") │
│ 91 │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/runner/sequential_runner.py:70 in _run │
│ │
│ 67 │ │ │
│ 68 │ │ for exec_index, node in enumerate(nodes): │
│ 69 │ │ │ try: │
│ ❱ 70 │ │ │ │ run_node(node, catalog, hook_manager, self._is_async, session_id) │
│ 71 │ │ │ │ done_nodes.add(node) │
│ 72 │ │ │ except Exception: │
│ 73 │ │ │ │ self._suggest_resume_scenario(pipeline, done_nodes, catalog) │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/runner/runner.py:304 in run_node │
│ │
│ 301 │ if is_async: │
│ 302 │ │ node = _run_node_async(node, catalog, hook_manager, session_id) │
│ 303 │ else: │
│ ❱ 304 │ │ node = _run_node_sequential(node, catalog, hook_manager, session_id) │
│ 305 │ │
│ 306 │ for name in node.confirms: │
│ 307 │ │ catalog.confirm(name) │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/runner/runner.py:388 in _run_node_sequential │
│ │
│ 385 │ │
│ 386 │ for name in node.inputs: │
│ 387 │ │ hook_manager.hook.before_dataset_loaded(dataset_name=name) │
│ ❱ 388 │ │ inputs[name] = catalog.load(name) │
│ 389 │ │ hook_manager.hook.after_dataset_loaded(dataset_name=name, data=inputs[name]) │
│ 390 │ │
│ 391 │ is_async = False │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/io/data_catalog.py:347 in load │
│ │
│ 344 │ │ │ "Loading data from '%s' (%s)...", name, type(dataset).name │
│ 345 │ │ ) │
│ 346 │ │ │
│ ❱ 347 │ │ result = dataset.load() │
│ 348 │ │ │
│ 349 │ │ return result │
│ 350 │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/io/core.py:599 in load │
│ │
│ 596 │ │
│ 597 │ def load(self) -> _DO: │
│ 598 │ │ self.resolve_load_version() # Make sure last load version is set │
│ ❱ 599 │ │ return super().load() │
│ 600 │ │
│ 601 │ def save(self, data: _DI) -> None: │
│ 602 │ │ self._version_cache.clear() │
│ │
│ /mnt/c/Users/giuliano.squarcina/SPINDOX SPA/aHEAD - MGMT - Data and Analytics - │
│ General/Projects/OVS_2022/Python_project/kedro-environment/OVS_venv/lib/python3.8/site-packages/ │
│ kedro/io/core.py:195 in load │
│ │
│ 192 │ │ │ message = ( │
│ 193 │ │ │ │ f"Failed while loading data from data set {str(self)}.\n{str(exc)}" │
│ 194 │ │ │ ) │
│ ❱ 195 │ │ │ raise DataSetError(message) from exc │
│ 196 │ │
│ 197 │ def save(self, data: _DI) -> None: │
│ 198 │ │ """Saves data by delegation to the provided save method. │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
DataSetError: Failed while loading data from data set CSVDataSet(filepath=/home/giuliano_squarcina/testOVS/Inputs/SPINDOX_ANAGRAFICA_ARTICOLI_NEW.txt, load_args={'engine': python, 'sep': \t},
protocol=sftp, save_args={'index': False}).
#------------------------------------------------------------------
Beta Was this translation helpful? Give feedback.
All reactions