diff --git a/dl_coursera/Crawler.py b/dl_coursera/Crawler.py index d0dd566..72b6fa0 100644 --- a/dl_coursera/Crawler.py +++ b/dl_coursera/Crawler.py @@ -372,7 +372,8 @@ def _get_uid(s: str): break if not self._uid: - raise UserIDNotFoundException() + logging.warning("UserID not found in cookies, but ignoring as it seems unused.") + # raise UserIDNotFoundException() def crawl(self, *, slug, is_spec): if not self._loggedin: diff --git a/dl_coursera/DLTaskGatherer.py b/dl_coursera/DLTaskGatherer.py index 5e0cbb3..bb80632 100644 --- a/dl_coursera/DLTaskGatherer.py +++ b/dl_coursera/DLTaskGatherer.py @@ -2,6 +2,7 @@ import zipfile import io import logging +import re from .lib.ExploringTree import ExploringTree @@ -10,6 +11,8 @@ from .resource import load_resource from .define import * +def _sanitize_filename(s): + return re.sub(r'[<>:"/\\|?*]', '_', s) def _shorten_slug(x): if len(x['slug']) > 40: @@ -191,4 +194,6 @@ def _gather_cml(self, cml, i, supplement): self._gather_asset(asset) def _gather_asset(self, asset): - self._add_dl_task(asset['url'], self._see(asset['name'])) + self._add_dl_task( + asset['url'], self._see(_sanitize_filename(asset['name'])) + )