From 846f8c3fe70d7afa11b1e06d7ae5c739d9abc44d Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Tue, 19 May 2026 02:50:43 +0500 Subject: [PATCH 1/3] Created Rezipping feature of SCORM at export and import --- openedxscorm/scormxblock.py | 117 ++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index 6ae123e..a115180 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -380,6 +380,123 @@ def extract_package(self, package_file): ContentFile(scorm_zipfile.read(zipinfo.filename)), ) + def add_xml_to_node(self, node): + """ + Export hook. The platform sets ``runtime.export_fs`` to the OLX + export filesystem before calling this. We re-zip the extracted + package out of ``default_storage`` into that filesystem so the + block roundtrips through OLX export/import. Outside an export + context ``export_fs`` is None and this is a pass-through. + """ + super().add_xml_to_node(node) + + export_fs = getattr(self.runtime, "export_fs", None) + if export_fs is None: + return + if not self.package_meta or "sha1" not in self.package_meta: + return + if not self.path_exists(self.extract_folder_path): + return + + zip_relpath = f"scorm/{self.url_name}/package.zip" + try: + export_fs.makedirs(os.path.dirname(zip_relpath), recreate=True) + with export_fs.open(zip_relpath, "wb") as out: + self._rezip_extracted_package(out) + except Exception: # pylint: disable=broad-except + logger.exception("Failed to bundle SCORM package into OLX export") + return + + node.set("scorm_package_path", zip_relpath) + + @classmethod + def parse_xml(cls, node, runtime, keys): + """ + Import hook. After the base class restores fields from XML + attributes (so ``package_meta`` and therefore ``extract_folder_path`` + are populated), look for the package zip the exporter wrote into + the OLX tarball and re-extract it into ``default_storage``. + """ + block = super().parse_xml(node, runtime, keys) + + zip_relpath = node.get("scorm_package_path") + resources_fs = getattr(runtime, "resources_fs", None) + if not zip_relpath or resources_fs is None: + return block + if not resources_fs.exists(zip_relpath): + return block + + # During OLX import the block's ``scope_ids.usage_id`` is keyed to + # the source course (the one being imported FROM). The block is + # then persisted under the target course's keys. + # ``extract_folder_base_path`` hashes ``scope_ids.usage_id`` to + # locate files on disk, so extracting under the source key lands + # files where the rendered block (keyed to the target course) + # cannot find them. Re-key the block in place for the duration + # of extraction so files end up where they will be looked up. + target_usage_id = cls._target_usage_id_for_import(node, runtime) + original_scope_ids = block.scope_ids + if target_usage_id is not None and target_usage_id != original_scope_ids.usage_id: + block.scope_ids = original_scope_ids._replace(usage_id=target_usage_id) + + try: + with resources_fs.open(zip_relpath, "rb") as zip_file: + block.extract_package(zip_file) + except Exception: # pylint: disable=broad-except + logger.exception("Failed to rehydrate SCORM package from OLX import") + finally: + if block.scope_ids is not original_scope_ids: + block.scope_ids = original_scope_ids + + return block + + @classmethod + def _target_usage_id_for_import(cls, node, runtime): + """ + During OLX import, ``runtime.id_generator`` is a + ``CourseImportLocationManager`` that carries ``target_course_id`` + — the course the block will be persisted under. Combine it with + the preserved ``url_name`` (which becomes ``block_id``) to get + the usage_id the rendered block will eventually see. Returns + ``None`` for runtimes that don't expose this (no-op fallback). + """ + id_generator = getattr(runtime, "id_generator", None) + target_course_id = getattr(id_generator, "target_course_id", None) + if target_course_id is None: + return None + url_name = node.get("url_name") + if not url_name: + return None + try: + return target_course_id.make_usage_key(node.tag, url_name) + except Exception: # pylint: disable=broad-except + return None + + def _rezip_extracted_package(self, out_file): + """ + Stream a fresh zip of the extracted SCORM tree into ``out_file``. + Files are read from ``self.storage`` and written through ``zf.open`` + in chunks so the whole archive is never held in memory at once. + """ + root = self.extract_folder_path + with zipfile.ZipFile(out_file, "w", zipfile.ZIP_DEFLATED, allowZip64=True) as zf: + stack = [root] + while stack: + cur = stack.pop() + try: + dirs, files = self.storage.listdir(cur) + except (FileNotFoundError, NotImplementedError): + continue + for d in dirs: + stack.append(os.path.join(cur, d)) + for f in files: + src_path = os.path.join(cur, f) + arcname = os.path.relpath(src_path, root) + with self.storage.open(src_path, "rb") as fh: + with zf.open(arcname, "w", force_zip64=True) as zout: + for chunk in iter(lambda fh=fh: fh.read(1024 * 1024), b""): + zout.write(chunk) + @property def index_page_url(self): if not self.package_meta or not self.index_page_path: From 0faecbead9e68600c162cbd0dfb033cb4c08ba16 Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Tue, 19 May 2026 14:24:46 +0500 Subject: [PATCH 2/3] Fixed issue of scorm block not surviving course-rerun --- ..._141139_ali.abbas02_olx_roundtrip_spike.md | 1 + openedxscorm/scormxblock.py | 77 ++++++++++++++++++- 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 changelog.d/20260519_141139_ali.abbas02_olx_roundtrip_spike.md diff --git a/changelog.d/20260519_141139_ali.abbas02_olx_roundtrip_spike.md b/changelog.d/20260519_141139_ali.abbas02_olx_roundtrip_spike.md new file mode 100644 index 0000000..5f6eccc --- /dev/null +++ b/changelog.d/20260519_141139_ali.abbas02_olx_roundtrip_spike.md @@ -0,0 +1 @@ +- [Bugfix] SCORM blocks now survive OLX export/import and course rerun. Export bundles the package zip into the OLX tarball; import re-extracts it under the target course's storage path; rerun and previously broken blocks self-heal on first render by locating a sibling storage bucket with the same SHA1. (by @Syed-Ali-Abbas-568) \ No newline at end of file diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index a115180..fa5f850 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -231,7 +231,7 @@ def assets_proxy(self, request, suffix): Response object containing the content of the requested file with the appropriate content type. """ file_name = os.path.basename(suffix) - file_path = self.find_file_path(file_name) + file_path = self.find_file_path(file_name) file_type, _ = mimetypes.guess_type(file_name) with self.storage.open(file_path) as response: file_content = response.read() @@ -497,10 +497,85 @@ def _rezip_extracted_package(self, out_file): for chunk in iter(lambda fh=fh: fh.read(1024 * 1024), b""): zout.write(chunk) + def _rehydrate_extract_folder_if_missing(self): + """ + Self-healing: if our extract folder is empty but we know our sha1, + search ``scorm_location()`` for another bucket containing the + same sha1 directory and copy its tree into our extract folder. + + This covers operations that duplicate block metadata without + touching storage — notably course rerun, which clones blocks at + the modulestore level and never invokes ``parse_xml``. Cached + per block instance via ``_extract_folder_verified`` so repeated + calls during a single request (e.g. many ``assets_proxy`` hits) + don't keep listing storage. + """ + if getattr(self, "_extract_folder_verified", False): + return + self._extract_folder_verified = True + + if not self.package_meta or "sha1" not in self.package_meta: + return + if self.path_exists(self.extract_folder_path): + return + + sha1 = self.package_meta["sha1"] + scorm_root = self.scorm_location() + + try: + bucket_dirs, _ = self.storage.listdir(scorm_root) + except (FileNotFoundError, NotImplementedError): + return + + own_bucket = os.path.basename(self.extract_folder_base_path) + for bucket in bucket_dirs: + if bucket == own_bucket: + continue + candidate = os.path.join(scorm_root, bucket, sha1) + if not self.path_exists(candidate): + continue + try: + self._copy_storage_tree(candidate, self.extract_folder_path) + logger.info( + "Rehydrated SCORM extract folder for %s from %s", + self.scope_ids.usage_id, candidate, + ) + except Exception: # pylint: disable=broad-except + logger.exception( + "Failed to rehydrate SCORM extract folder for %s from %s", + self.scope_ids.usage_id, candidate, + ) + return + + def _copy_storage_tree(self, src_root, dst_root): + """ + Recursively copy every file under ``src_root`` to ``dst_root`` + using the Django storage API. Both paths are storage-relative — + we never touch the local filesystem directly, so this works on + S3 and filesystem backends alike. + """ + stack = [src_root] + while stack: + cur = stack.pop() + try: + dirs, files = self.storage.listdir(cur) + except (FileNotFoundError, NotImplementedError): + continue + for d in dirs: + stack.append(os.path.join(cur, d)) + for f in files: + src_path = os.path.join(cur, f) + relpath = os.path.relpath(src_path, src_root) + dst_path = os.path.join(dst_root, relpath) + with self.storage.open(src_path, "rb") as src_fh: + self.storage.save(dst_path, ContentFile(src_fh.read())) + @property def index_page_url(self): if not self.package_meta or not self.index_page_path: return "" + + self._rehydrate_extract_folder_if_missing() # Serve assets by proxying them through the LMS by default if self.xblock_settings.get("PROXY_ASSETS_LMS", True): From 2a4c2dd7b9d1a58d7f711b94945140ee86eacd08 Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Wed, 3 Jun 2026 15:45:39 +0500 Subject: [PATCH 3/3] Addressed comments and cleaned up code --- openedxscorm/scormxblock.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index fa5f850..f4a8b23 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -8,6 +8,7 @@ import mimetypes import urllib +from django.core.cache import cache from django.core.files.base import ContentFile from django.core.files.storage import default_storage from django.db.models import Q @@ -485,7 +486,7 @@ def _rezip_extracted_package(self, out_file): cur = stack.pop() try: dirs, files = self.storage.listdir(cur) - except (FileNotFoundError, NotImplementedError): + except FileNotFoundError: continue for d in dirs: stack.append(os.path.join(cur, d)) @@ -505,18 +506,24 @@ def _rehydrate_extract_folder_if_missing(self): This covers operations that duplicate block metadata without touching storage — notably course rerun, which clones blocks at - the modulestore level and never invokes ``parse_xml``. Cached - per block instance via ``_extract_folder_verified`` so repeated - calls during a single request (e.g. many ``assets_proxy`` hits) - don't keep listing storage. + the modulestore level and never invokes ``parse_xml``. + + Idempotency is enforced via Django's cache, keyed by + ``scope_ids.usage_id``. Course rerun and OLX import both mint a + new ``usage_id``, so the cache key naturally misses and a fresh + verification fires once on first render of the new block. On + success the key is set and subsequent renders short-circuit + without listing storage. """ - if getattr(self, "_extract_folder_verified", False): + cache_key = f"scorm_xblock:extract_verified:{self.scope_ids.usage_id}" + if cache.get(cache_key): return - self._extract_folder_verified = True if not self.package_meta or "sha1" not in self.package_meta: return + if self.path_exists(self.extract_folder_path): + cache.set(cache_key, True, timeout=None) return sha1 = self.package_meta["sha1"] @@ -524,7 +531,7 @@ def _rehydrate_extract_folder_if_missing(self): try: bucket_dirs, _ = self.storage.listdir(scorm_root) - except (FileNotFoundError, NotImplementedError): + except FileNotFoundError: return own_bucket = os.path.basename(self.extract_folder_base_path) @@ -540,6 +547,7 @@ def _rehydrate_extract_folder_if_missing(self): "Rehydrated SCORM extract folder for %s from %s", self.scope_ids.usage_id, candidate, ) + cache.set(cache_key, True, timeout=None) except Exception: # pylint: disable=broad-except logger.exception( "Failed to rehydrate SCORM extract folder for %s from %s", @@ -559,7 +567,7 @@ def _copy_storage_tree(self, src_root, dst_root): cur = stack.pop() try: dirs, files = self.storage.listdir(cur) - except (FileNotFoundError, NotImplementedError): + except FileNotFoundError: continue for d in dirs: stack.append(os.path.join(cur, d))