From 767fe39a1b647b486a70bdcf49006be64d8ed0df Mon Sep 17 00:00:00 2001 From: Johan Isevind Date: Tue, 30 Dec 2025 20:53:03 +0100 Subject: [PATCH 1/2] fix: sanitize filenames to remove invalid characters Add _sanitize_filename function to replace invalid filename characters with underscores when gathering assets. This prevents filesystem errors when downloading files with names containing special characters like <, >, :, ", /, \, |, ?, or *. --- dl_coursera/DLTaskGatherer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dl_coursera/DLTaskGatherer.py b/dl_coursera/DLTaskGatherer.py index 5e0cbb3..bb80632 100644 --- a/dl_coursera/DLTaskGatherer.py +++ b/dl_coursera/DLTaskGatherer.py @@ -2,6 +2,7 @@ import zipfile import io import logging +import re from .lib.ExploringTree import ExploringTree @@ -10,6 +11,8 @@ from .resource import load_resource from .define import * +def _sanitize_filename(s): + return re.sub(r'[<>:"/\\|?*]', '_', s) def _shorten_slug(x): if len(x['slug']) > 40: @@ -191,4 +194,6 @@ def _gather_cml(self, cml, i, supplement): self._gather_asset(asset) def _gather_asset(self, asset): - self._add_dl_task(asset['url'], self._see(asset['name'])) + self._add_dl_task( + asset['url'], self._see(_sanitize_filename(asset['name'])) + ) From c7195efac1f30721feb4804107d0b7364b8d3528 Mon Sep 17 00:00:00 2001 From: Johan Isevind Date: Tue, 30 Dec 2025 20:54:27 +0100 Subject: [PATCH 2/2] fix(crawler): ignore missing UserID in cookies instead of throwing exception The UserID check was causing failures when cookies didn't contain the UID, but this value appears to be unused in the crawling process. Changed to log a warning instead of raising an exception to allow crawling to continue. --- dl_coursera/Crawler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dl_coursera/Crawler.py b/dl_coursera/Crawler.py index d0dd566..72b6fa0 100644 --- a/dl_coursera/Crawler.py +++ b/dl_coursera/Crawler.py @@ -372,7 +372,8 @@ def _get_uid(s: str): break if not self._uid: - raise UserIDNotFoundException() + logging.warning("UserID not found in cookies, but ignoring as it seems unused.") + # raise UserIDNotFoundException() def crawl(self, *, slug, is_spec): if not self._loggedin: