From 3ff38b2375013bfbf8908ae9fd6bacfea1180c4c Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Mon, 4 May 2026 11:33:00 +0500 Subject: [PATCH 1/2] feat: mirror SCORM zip to course contentstore for OLX export/import Open edX's OLX export bundles contentstore assets into the export tarball, but SCORM zips are unpacked into Django's default_storage and never make it into the bundle. As a result, a course exported and re-imported into a new course keeps the SCORM block's metadata but not its content. PR #71 made this more visible by keying the extraction directory on sha1(usage_key) instead of block_id; that fix prevented cross-course corruption but left imported blocks pointing at an empty directory. Add an opt-in sync that closes the gap without fighting PR #71: - studio_submit: after extract_package, mirror the original zip into the current course's contentstore as scorm_packages/.zip (locked). - index_page_url and assets_proxy: before serving content, if package_meta is set but the unpacked tree is missing from default_storage, fetch the zip from the current course's contentstore and re-extract it. Gated behind XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"], defaulting to False; legacy behavior is unchanged unless an operator opts in. Imports of xmodule.contentstore are lazy and every contentstore call is wrapped in try/except so uploads still succeed when the contentstore is unavailable (tests, non-platform environments). Co-Authored-By: Claude Opus 4.7 --- README.rst | 20 +++ changelog.d/20260504_062815_ali.abbas02.md | 1 + openedxscorm/scormxblock.py | 151 ++++++++++++++++++++- 3 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 changelog.d/20260504_062815_ali.abbas02.md diff --git a/README.rst b/README.rst index 6e2214b..5f902b7 100644 --- a/README.rst +++ b/README.rst @@ -138,6 +138,26 @@ You may define the following additional settings in ``XBLOCK_SETTINGS["ScormXBlo * ``S3_QUERY_AUTH`` (default: ``True``): boolean flag (``True`` or ``False``) for query string authentication in S3 urls. If your bucket is public, set this value to ``False``. But be aware that in such case your SCORM assets will be publicly available to everyone. * ``S3_EXPIRES_IN`` (default: 604800): time duration (in seconds) for the presigned URLs to stay valid. The default is one week. +Course export/import (contentstore sync) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default, SCORM zips are unpacked into Django's ``default_storage``, which is separate from the contentstore that Open edX bundles into OLX export tarballs. As a consequence, when a course containing a SCORM block is exported and re-imported into a new course, the imported block keeps its metadata (filename, sha1, size) but the actual content does not play because the unpacked files are absent from the new course's storage. + +To survive course export/import, the XBlock can mirror the original SCORM zip into the course's contentstore on save and rehydrate the unpacked tree from the contentstore on first access after an import. Enable it with: + +.. code-block:: python + + XBLOCK_SETTINGS["ScormXBlock"] = { + "CONTENTSTORE_SYNC_ENABLED": True, + } + +When enabled: + +* On upload, the original zip is written to the current course's contentstore as ``scorm_packages/.zip`` (locked). Course export bundles contentstore assets into the OLX tarball; course import copies them under the new ``course_key`` automatically. +* On read, if ``package_meta`` is set but the extracted tree is missing from ``default_storage``, the XBlock fetches the zip from the current course's contentstore and re-extracts it into ``default_storage``. + +This composes with the per-block extraction path introduced in PR #71 (extraction is still keyed by ``sha1(usage_key)``, so courses cannot share extraction directories). Pre-existing blocks already have a populated ``default_storage`` cache, so the rehydrate branch never fires for them. The feature is off by default; if anything misbehaves in your environment, flipping it back to ``False`` restores the prior behavior exactly. + These settings may be added to Tutor by creating a `plugin `__: .. code-block:: python diff --git a/changelog.d/20260504_062815_ali.abbas02.md b/changelog.d/20260504_062815_ali.abbas02.md new file mode 100644 index 0000000..5288be8 --- /dev/null +++ b/changelog.d/20260504_062815_ali.abbas02.md @@ -0,0 +1 @@ +- [Feature] Optionally mirror the SCORM zip into the course contentstore on upload and rehydrate the unpacked tree from the contentstore on first access. This makes blocks survive OLX course export/import without breaking the extraction path keyed by `sha1(usage_key)` introduced in PR #71. Disabled by default; enable with `XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True`. (by @ali.abbas02) diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index 6ae123e..921a567 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -1,3 +1,4 @@ +import io import json import hashlib import os @@ -230,8 +231,12 @@ def assets_proxy(self, request, suffix): ------- Response object containing the content of the requested file with the appropriate content type. """ + # If this block was just imported via OLX, the unpacked files may be + # missing from default_storage. Rehydrate from the contentstore before + # serving. No-op when the feature is disabled or the tree exists. + self._rehydrate_from_contentstore() file_name = os.path.basename(suffix) - file_path = self.find_file_path(file_name) + file_path = self.find_file_path(file_name) file_type, _ = mimetypes.guess_type(file_name) with self.storage.open(file_path) as response: file_content = response.read() @@ -299,6 +304,10 @@ def studio_submit(self, request, _suffix): try: self.extract_package(package_file) self.update_package_fields() + # Mirror the original zip into the course's contentstore so that + # OLX export bundles it and OLX import re-creates it under the new + # course_key. Best-effort and feature-flagged off by default. + self._save_zip_to_contentstore(package_file) except ScormError as e: response["errors"].append(e.args[0]) @@ -384,7 +393,12 @@ def extract_package(self, package_file): def index_page_url(self): if not self.package_meta or not self.index_page_path: return "" - + + # If this block was just imported via OLX, the unpacked files may be + # missing from default_storage. Rehydrate from the contentstore before + # building the URL. No-op when the feature is disabled or the tree exists. + self._rehydrate_from_contentstore() + # Serve assets by proxying them through the LMS by default if self.xblock_settings.get("PROXY_ASSETS_LMS", True): return f"{self.proxy_base_url}/{self.index_page_path}" @@ -884,6 +898,139 @@ def workbench_scenarios(): ), ] + @property + def contentstore_sync_enabled(self): + """ + Whether to mirror the SCORM zip to the course's contentstore on save and + rehydrate it from the contentstore on read. + + Disabled by default; opt in by setting + XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True. + """ + return bool(self.xblock_settings.get("CONTENTSTORE_SYNC_ENABLED", False)) + + def _contentstore_asset_path(self): + """ + Filename used when storing the SCORM zip in the course's contentstore. + + The path is keyed by the package sha1 so that OLX export/import carries + the same asset name into the new course. + """ + sha1 = self.package_meta.get("sha1") if self.package_meta else None + if not sha1: + return None + return f"scorm_packages/{sha1}.zip" + + def _save_zip_to_contentstore(self, package_file): + """ + Mirror the original SCORM zip into the course's contentstore. + + Best-effort: any failure (missing imports, no course context, contentstore + error) is logged and swallowed so that the upload still succeeds. + """ + if not self.contentstore_sync_enabled: + return + try: + from xmodule.contentstore.django import contentstore + from xmodule.contentstore.content import StaticContent + except ImportError: + logger.warning( + "xmodule.contentstore unavailable; skipping SCORM zip mirror" + ) + return + + course_key = getattr(self.runtime, "course_id", None) + asset_path = self._contentstore_asset_path() + if course_key is None or asset_path is None: + return + + try: + asset_key = StaticContent.compute_location(course_key, asset_path) + package_file.seek(0) + data = package_file.read() + package_file.seek(0) + content = StaticContent( + asset_key, + asset_path, + "application/zip", + data, + locked=True, + ) + contentstore().save(content) + logger.info("Mirrored SCORM zip to contentstore at %s", asset_key) + except Exception: + logger.exception("Failed to mirror SCORM zip to contentstore") + + def _fetch_zip_from_contentstore(self): + """ + Fetch the original SCORM zip bytes from the course's contentstore. + + Returns the bytes payload, or None when the feature is disabled, the + contentstore is unavailable, or no asset is found for this block. + """ + if not self.contentstore_sync_enabled: + return None + try: + from xmodule.contentstore.django import contentstore + from xmodule.contentstore.content import StaticContent + except ImportError: + return None + try: + from xmodule.exceptions import NotFoundError + except ImportError: + NotFoundError = None # type: ignore + + course_key = getattr(self.runtime, "course_id", None) + asset_path = self._contentstore_asset_path() + if course_key is None or asset_path is None: + return None + + try: + asset_key = StaticContent.compute_location(course_key, asset_path) + content = contentstore().find(asset_key) + return content.data + except Exception as exc: + if NotFoundError is not None and isinstance(exc, NotFoundError): + return None + logger.exception("Failed to fetch SCORM zip from contentstore") + return None + + def _is_extracted_tree_missing(self): + """ + Return True when package_meta is set but the extracted tree is empty. + + After OLX import the metadata is restored but the unpacked files are not, + so this is the signal to rehydrate from the contentstore. + """ + if not self.package_meta: + return False + return not self.path_exists(self.extract_folder_path) + + def _rehydrate_from_contentstore(self): + """ + Re-extract the SCORM zip from the contentstore back into default_storage. + + No-op unless the feature is enabled, package_meta is present, and the + extracted tree is missing. Returns True on a successful rehydration. + """ + if not self.contentstore_sync_enabled: + return False + if not self._is_extracted_tree_missing(): + return False + data = self._fetch_zip_from_contentstore() + if not data: + return False + try: + self.extract_package(io.BytesIO(data)) + logger.info( + "Rehydrated SCORM package from contentstore for usage_id=%s", + self.scope_ids.usage_id, + ) + return True + except Exception: + logger.exception("Failed to rehydrate SCORM package from contentstore") + return False + @property def storage(self): """ From 615ee7ac93ed2622a62b80c457df84f204e5ee35 Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Mon, 4 May 2026 14:16:45 +0500 Subject: [PATCH 2/2] Updated Changelog Entry --- ... => 20260504_141617_ali.abbas02_contentstore_sync_branch.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename changelog.d/{20260504_062815_ali.abbas02.md => 20260504_141617_ali.abbas02_contentstore_sync_branch.md} (91%) diff --git a/changelog.d/20260504_062815_ali.abbas02.md b/changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md similarity index 91% rename from changelog.d/20260504_062815_ali.abbas02.md rename to changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md index 5288be8..5ac6cd9 100644 --- a/changelog.d/20260504_062815_ali.abbas02.md +++ b/changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md @@ -1 +1 @@ -- [Feature] Optionally mirror the SCORM zip into the course contentstore on upload and rehydrate the unpacked tree from the contentstore on first access. This makes blocks survive OLX course export/import without breaking the extraction path keyed by `sha1(usage_key)` introduced in PR #71. Disabled by default; enable with `XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True`. (by @ali.abbas02) +- [Feature] Optionally mirror the SCORM zip into the course contentstore on upload and rehydrate the unpacked tree from the contentstore on first access. This makes blocks survive OLX course export/import without breaking the extraction path keyed by `sha1(usage_key)` introduced in PR #71. Disabled by default; enable with `XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True`. (by @Syed-Ali-Abbas-568) \ No newline at end of file