From 3ff38b2375013bfbf8908ae9fd6bacfea1180c4c Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Mon, 4 May 2026 11:33:00 +0500 Subject: [PATCH 1/7] feat: mirror SCORM zip to course contentstore for OLX export/import Open edX's OLX export bundles contentstore assets into the export tarball, but SCORM zips are unpacked into Django's default_storage and never make it into the bundle. As a result, a course exported and re-imported into a new course keeps the SCORM block's metadata but not its content. PR #71 made this more visible by keying the extraction directory on sha1(usage_key) instead of block_id; that fix prevented cross-course corruption but left imported blocks pointing at an empty directory. Add an opt-in sync that closes the gap without fighting PR #71: - studio_submit: after extract_package, mirror the original zip into the current course's contentstore as scorm_packages/.zip (locked). - index_page_url and assets_proxy: before serving content, if package_meta is set but the unpacked tree is missing from default_storage, fetch the zip from the current course's contentstore and re-extract it. Gated behind XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"], defaulting to False; legacy behavior is unchanged unless an operator opts in. Imports of xmodule.contentstore are lazy and every contentstore call is wrapped in try/except so uploads still succeed when the contentstore is unavailable (tests, non-platform environments). Co-Authored-By: Claude Opus 4.7 --- README.rst | 20 +++ changelog.d/20260504_062815_ali.abbas02.md | 1 + openedxscorm/scormxblock.py | 151 ++++++++++++++++++++- 3 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 changelog.d/20260504_062815_ali.abbas02.md diff --git a/README.rst b/README.rst index 6e2214b..5f902b7 100644 --- a/README.rst +++ b/README.rst @@ -138,6 +138,26 @@ You may define the following additional settings in ``XBLOCK_SETTINGS["ScormXBlo * ``S3_QUERY_AUTH`` (default: ``True``): boolean flag (``True`` or ``False``) for query string authentication in S3 urls. If your bucket is public, set this value to ``False``. But be aware that in such case your SCORM assets will be publicly available to everyone. * ``S3_EXPIRES_IN`` (default: 604800): time duration (in seconds) for the presigned URLs to stay valid. The default is one week. +Course export/import (contentstore sync) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default, SCORM zips are unpacked into Django's ``default_storage``, which is separate from the contentstore that Open edX bundles into OLX export tarballs. As a consequence, when a course containing a SCORM block is exported and re-imported into a new course, the imported block keeps its metadata (filename, sha1, size) but the actual content does not play because the unpacked files are absent from the new course's storage. + +To survive course export/import, the XBlock can mirror the original SCORM zip into the course's contentstore on save and rehydrate the unpacked tree from the contentstore on first access after an import. Enable it with: + +.. code-block:: python + + XBLOCK_SETTINGS["ScormXBlock"] = { + "CONTENTSTORE_SYNC_ENABLED": True, + } + +When enabled: + +* On upload, the original zip is written to the current course's contentstore as ``scorm_packages/.zip`` (locked). Course export bundles contentstore assets into the OLX tarball; course import copies them under the new ``course_key`` automatically. +* On read, if ``package_meta`` is set but the extracted tree is missing from ``default_storage``, the XBlock fetches the zip from the current course's contentstore and re-extracts it into ``default_storage``. + +This composes with the per-block extraction path introduced in PR #71 (extraction is still keyed by ``sha1(usage_key)``, so courses cannot share extraction directories). Pre-existing blocks already have a populated ``default_storage`` cache, so the rehydrate branch never fires for them. The feature is off by default; if anything misbehaves in your environment, flipping it back to ``False`` restores the prior behavior exactly. + These settings may be added to Tutor by creating a `plugin `__: .. code-block:: python diff --git a/changelog.d/20260504_062815_ali.abbas02.md b/changelog.d/20260504_062815_ali.abbas02.md new file mode 100644 index 0000000..5288be8 --- /dev/null +++ b/changelog.d/20260504_062815_ali.abbas02.md @@ -0,0 +1 @@ +- [Feature] Optionally mirror the SCORM zip into the course contentstore on upload and rehydrate the unpacked tree from the contentstore on first access. This makes blocks survive OLX course export/import without breaking the extraction path keyed by `sha1(usage_key)` introduced in PR #71. Disabled by default; enable with `XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True`. (by @ali.abbas02) diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index 6ae123e..921a567 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -1,3 +1,4 @@ +import io import json import hashlib import os @@ -230,8 +231,12 @@ def assets_proxy(self, request, suffix): ------- Response object containing the content of the requested file with the appropriate content type. """ + # If this block was just imported via OLX, the unpacked files may be + # missing from default_storage. Rehydrate from the contentstore before + # serving. No-op when the feature is disabled or the tree exists. + self._rehydrate_from_contentstore() file_name = os.path.basename(suffix) - file_path = self.find_file_path(file_name) + file_path = self.find_file_path(file_name) file_type, _ = mimetypes.guess_type(file_name) with self.storage.open(file_path) as response: file_content = response.read() @@ -299,6 +304,10 @@ def studio_submit(self, request, _suffix): try: self.extract_package(package_file) self.update_package_fields() + # Mirror the original zip into the course's contentstore so that + # OLX export bundles it and OLX import re-creates it under the new + # course_key. Best-effort and feature-flagged off by default. + self._save_zip_to_contentstore(package_file) except ScormError as e: response["errors"].append(e.args[0]) @@ -384,7 +393,12 @@ def extract_package(self, package_file): def index_page_url(self): if not self.package_meta or not self.index_page_path: return "" - + + # If this block was just imported via OLX, the unpacked files may be + # missing from default_storage. Rehydrate from the contentstore before + # building the URL. No-op when the feature is disabled or the tree exists. + self._rehydrate_from_contentstore() + # Serve assets by proxying them through the LMS by default if self.xblock_settings.get("PROXY_ASSETS_LMS", True): return f"{self.proxy_base_url}/{self.index_page_path}" @@ -884,6 +898,139 @@ def workbench_scenarios(): ), ] + @property + def contentstore_sync_enabled(self): + """ + Whether to mirror the SCORM zip to the course's contentstore on save and + rehydrate it from the contentstore on read. + + Disabled by default; opt in by setting + XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True. + """ + return bool(self.xblock_settings.get("CONTENTSTORE_SYNC_ENABLED", False)) + + def _contentstore_asset_path(self): + """ + Filename used when storing the SCORM zip in the course's contentstore. + + The path is keyed by the package sha1 so that OLX export/import carries + the same asset name into the new course. + """ + sha1 = self.package_meta.get("sha1") if self.package_meta else None + if not sha1: + return None + return f"scorm_packages/{sha1}.zip" + + def _save_zip_to_contentstore(self, package_file): + """ + Mirror the original SCORM zip into the course's contentstore. + + Best-effort: any failure (missing imports, no course context, contentstore + error) is logged and swallowed so that the upload still succeeds. + """ + if not self.contentstore_sync_enabled: + return + try: + from xmodule.contentstore.django import contentstore + from xmodule.contentstore.content import StaticContent + except ImportError: + logger.warning( + "xmodule.contentstore unavailable; skipping SCORM zip mirror" + ) + return + + course_key = getattr(self.runtime, "course_id", None) + asset_path = self._contentstore_asset_path() + if course_key is None or asset_path is None: + return + + try: + asset_key = StaticContent.compute_location(course_key, asset_path) + package_file.seek(0) + data = package_file.read() + package_file.seek(0) + content = StaticContent( + asset_key, + asset_path, + "application/zip", + data, + locked=True, + ) + contentstore().save(content) + logger.info("Mirrored SCORM zip to contentstore at %s", asset_key) + except Exception: + logger.exception("Failed to mirror SCORM zip to contentstore") + + def _fetch_zip_from_contentstore(self): + """ + Fetch the original SCORM zip bytes from the course's contentstore. + + Returns the bytes payload, or None when the feature is disabled, the + contentstore is unavailable, or no asset is found for this block. + """ + if not self.contentstore_sync_enabled: + return None + try: + from xmodule.contentstore.django import contentstore + from xmodule.contentstore.content import StaticContent + except ImportError: + return None + try: + from xmodule.exceptions import NotFoundError + except ImportError: + NotFoundError = None # type: ignore + + course_key = getattr(self.runtime, "course_id", None) + asset_path = self._contentstore_asset_path() + if course_key is None or asset_path is None: + return None + + try: + asset_key = StaticContent.compute_location(course_key, asset_path) + content = contentstore().find(asset_key) + return content.data + except Exception as exc: + if NotFoundError is not None and isinstance(exc, NotFoundError): + return None + logger.exception("Failed to fetch SCORM zip from contentstore") + return None + + def _is_extracted_tree_missing(self): + """ + Return True when package_meta is set but the extracted tree is empty. + + After OLX import the metadata is restored but the unpacked files are not, + so this is the signal to rehydrate from the contentstore. + """ + if not self.package_meta: + return False + return not self.path_exists(self.extract_folder_path) + + def _rehydrate_from_contentstore(self): + """ + Re-extract the SCORM zip from the contentstore back into default_storage. + + No-op unless the feature is enabled, package_meta is present, and the + extracted tree is missing. Returns True on a successful rehydration. + """ + if not self.contentstore_sync_enabled: + return False + if not self._is_extracted_tree_missing(): + return False + data = self._fetch_zip_from_contentstore() + if not data: + return False + try: + self.extract_package(io.BytesIO(data)) + logger.info( + "Rehydrated SCORM package from contentstore for usage_id=%s", + self.scope_ids.usage_id, + ) + return True + except Exception: + logger.exception("Failed to rehydrate SCORM package from contentstore") + return False + @property def storage(self): """ From 615ee7ac93ed2622a62b80c457df84f204e5ee35 Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Mon, 4 May 2026 14:16:45 +0500 Subject: [PATCH 2/7] Updated Changelog Entry --- ... => 20260504_141617_ali.abbas02_contentstore_sync_branch.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename changelog.d/{20260504_062815_ali.abbas02.md => 20260504_141617_ali.abbas02_contentstore_sync_branch.md} (91%) diff --git a/changelog.d/20260504_062815_ali.abbas02.md b/changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md similarity index 91% rename from changelog.d/20260504_062815_ali.abbas02.md rename to changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md index 5288be8..5ac6cd9 100644 --- a/changelog.d/20260504_062815_ali.abbas02.md +++ b/changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md @@ -1 +1 @@ -- [Feature] Optionally mirror the SCORM zip into the course contentstore on upload and rehydrate the unpacked tree from the contentstore on first access. This makes blocks survive OLX course export/import without breaking the extraction path keyed by `sha1(usage_key)` introduced in PR #71. Disabled by default; enable with `XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True`. (by @ali.abbas02) +- [Feature] Optionally mirror the SCORM zip into the course contentstore on upload and rehydrate the unpacked tree from the contentstore on first access. This makes blocks survive OLX course export/import without breaking the extraction path keyed by `sha1(usage_key)` introduced in PR #71. Disabled by default; enable with `XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True`. (by @Syed-Ali-Abbas-568) \ No newline at end of file From d0abdaaea20ef53655d804a5fa42d4aee211903e Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Thu, 7 May 2026 11:17:26 +0500 Subject: [PATCH 3/7] feat: sweep unreferenced SCORM zips from contentstore on upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The contentstore mirror added in 3ff38b2 keeps every uploaded zip at scorm_packages/.zip. Re-uploading produces a new sha1, so repeated edits leave the previous zips behind. Older zips are intentionally retained until they are no longer referenced (a published block whose draft has moved ahead still needs its old zip), but nothing reaped them once they were truly unreferenced. Hook the cleanup into studio_submit itself: after saving the new zip, walk the course's SCORM blocks across both draft and published branches, union their package_meta sha1s, and delete any scorm_packages/.zip the union doesn't cover. The current upload's sha1 is always pinned into the reference set so the just-saved zip is never touched, even before the new package_meta is persisted by the modulestore. Pattern is strict 40-hex sha1 under scorm_packages/, so unrelated contentstore assets are never inspected. Keeps the package a pure XBlock — no Django app registration, no new management command, no operator action required. Co-Authored-By: Claude Opus 4.7 --- README.rst | 1 + .../20260507_120000_ali.abbas02_scorm_gc.md | 1 + openedxscorm/scormxblock.py | 110 ++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 changelog.d/20260507_120000_ali.abbas02_scorm_gc.md diff --git a/README.rst b/README.rst index 5f902b7..7384928 100644 --- a/README.rst +++ b/README.rst @@ -155,6 +155,7 @@ When enabled: * On upload, the original zip is written to the current course's contentstore as ``scorm_packages/.zip`` (locked). Course export bundles contentstore assets into the OLX tarball; course import copies them under the new ``course_key`` automatically. * On read, if ``package_meta`` is set but the extracted tree is missing from ``default_storage``, the XBlock fetches the zip from the current course's contentstore and re-extracts it into ``default_storage``. +* Each upload also sweeps the course for ``scorm_packages/.zip`` assets that no SCORM block on either the draft or the published branch references, and deletes them. The current upload's sha1 is always pinned into the reference set, so the just-saved zip is never touched. Cleanup is best-effort and silent: if the modulestore or contentstore call fails, the asset is left in place and the upload still succeeds. This composes with the per-block extraction path introduced in PR #71 (extraction is still keyed by ``sha1(usage_key)``, so courses cannot share extraction directories). Pre-existing blocks already have a populated ``default_storage`` cache, so the rehydrate branch never fires for them. The feature is off by default; if anything misbehaves in your environment, flipping it back to ``False`` restores the prior behavior exactly. diff --git a/changelog.d/20260507_120000_ali.abbas02_scorm_gc.md b/changelog.d/20260507_120000_ali.abbas02_scorm_gc.md new file mode 100644 index 0000000..01c0d67 --- /dev/null +++ b/changelog.d/20260507_120000_ali.abbas02_scorm_gc.md @@ -0,0 +1 @@ +- [Improvement] When ``CONTENTSTORE_SYNC_ENABLED`` is on, every SCORM upload now sweeps the course's contentstore for ``scorm_packages/.zip`` assets that no draft or published SCORM block references, and deletes them. The current upload's sha1 is always pinned into the reference set, so the just-saved zip is never touched. Best-effort: any failure is logged and ignored so the upload still succeeds. (by @Syed-Ali-Abbas-568) diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index 921a567..533ac35 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -308,6 +308,10 @@ def studio_submit(self, request, _suffix): # OLX export bundles it and OLX import re-creates it under the new # course_key. Best-effort and feature-flagged off by default. self._save_zip_to_contentstore(package_file) + # Reap any scorm_packages/*.zip in this course's contentstore that + # no draft or published SCORM block still references. Keeps the + # mirror from accumulating stale uploads over time. + self._gc_unreferenced_contentstore_zips() except ScormError as e: response["errors"].append(e.args[0]) @@ -995,6 +999,112 @@ def _fetch_zip_from_contentstore(self): logger.exception("Failed to fetch SCORM zip from contentstore") return None + _SCORM_ASSET_PATTERN = re.compile(r"^scorm_packages[/_]([0-9a-fA-F]{40})\.zip$") + + def _gc_unreferenced_contentstore_zips(self): + """ + Delete scorm_packages/.zip assets in this course's contentstore + that no SCORM block still references in either the draft or the + published branch. + + Runs at the tail of studio_submit so cleanup happens organically as + authors edit, with no operator action required. The current upload's + sha1 is always pinned into the reference set, even if the new + package_meta has not yet been persisted by the modulestore. Best- + effort: every failure is swallowed so the upload still succeeds. + """ + if not self.contentstore_sync_enabled: + return + try: + from xmodule.contentstore.django import contentstore + from xmodule.contentstore.content import StaticContent + from xmodule.modulestore import ModuleStoreEnum + from xmodule.modulestore.django import modulestore + except ImportError: + return + + course_key = getattr(self.runtime, "course_id", None) + if course_key is None: + return + + try: + cs = contentstore() + referenced = self._collect_course_scorm_sha1s( + modulestore(), course_key, ModuleStoreEnum + ) + current = (self.package_meta or {}).get("sha1") + if current: + referenced.add(current.lower()) + + asset_docs, _ = cs.get_all_content_for_course(course_key) + for doc in asset_docs: + sha1 = self._scorm_sha1_from_asset_doc(doc) + if sha1 is None or sha1 in referenced: + continue + asset_key = StaticContent.compute_location( + course_key, f"scorm_packages/{sha1}.zip" + ) + try: + cs.delete(asset_key) + logger.info( + "Deleted unreferenced SCORM zip %s from contentstore", + asset_key, + ) + except Exception: + logger.exception( + "Failed to delete unreferenced SCORM zip %s", asset_key + ) + except Exception: + logger.exception( + "SCORM contentstore GC failed; leaving assets in place" + ) + + @staticmethod + def _collect_course_scorm_sha1s(store, course_key, ModuleStoreEnum): + referenced = set() + for revision in ( + ModuleStoreEnum.RevisionOption.published_only, + ModuleStoreEnum.RevisionOption.draft_only, + ): + try: + blocks = store.get_items( + course_key, + qualifiers={"category": "scorm"}, + revision=revision, + ) + except TypeError: + blocks = store.get_items( + course_key, qualifiers={"category": "scorm"} + ) + for block in blocks: + meta = getattr(block, "package_meta", None) or {} + sha1 = meta.get("sha1") + if sha1: + referenced.add(sha1.lower()) + return referenced + + @classmethod + def _scorm_sha1_from_asset_doc(cls, doc): + candidates = [] + if isinstance(doc, dict): + asset_id = doc.get("_id") + if isinstance(asset_id, dict): + candidates.append(asset_id.get("name", "") or "") + elif asset_id is not None: + candidates.append(getattr(asset_id, "block_id", "") or "") + candidates.append(getattr(asset_id, "path", "") or "") + candidates.append(doc.get("displayname", "") or "") + else: + candidates.append(getattr(doc, "name", "") or "") + candidates.append(getattr(doc, "block_id", "") or "") + for name in candidates: + if not name: + continue + match = cls._SCORM_ASSET_PATTERN.match(name) + if match: + return match.group(1).lower() + return None + def _is_extracted_tree_missing(self): """ Return True when package_meta is set but the extracted tree is empty. From 815b1270847e517b2d749ecc03cd8eae76473dbf Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Wed, 13 May 2026 09:35:18 +0500 Subject: [PATCH 4/7] fix: cap contentstore SCORM zips to 1 published + 1 draft per block _gc_unreferenced_contentstore_zips runs inside studio_submit before the XBlock framework persists the new package_meta to the modulestore, so store.get_items(..., revision=draft_only) returned the block being edited with its previous sha1. That stale sha1 stayed in the referenced set alongside the new sha1, and repeated draft uploads accumulated zips in the contentstore. Substitute the in-memory draft sha1 for the current block when collecting referenced sha1s so the stale value drops out and the older draft zip gets reclaimed. Per block, the contentstore now retains at most one published + one draft scorm_packages/.zip. Co-Authored-By: Claude Opus 4.7 --- ...0260512_150000_ali.abbas02_scorm_gc_cap.md | 1 + openedxscorm/scormxblock.py | 53 +++++++++++---- openedxscorm/tests.py | 64 +++++++++++++++++++ 3 files changed, 106 insertions(+), 12 deletions(-) create mode 100644 changelog.d/20260512_150000_ali.abbas02_scorm_gc_cap.md diff --git a/changelog.d/20260512_150000_ali.abbas02_scorm_gc_cap.md b/changelog.d/20260512_150000_ali.abbas02_scorm_gc_cap.md new file mode 100644 index 0000000..9adafc7 --- /dev/null +++ b/changelog.d/20260512_150000_ali.abbas02_scorm_gc_cap.md @@ -0,0 +1 @@ +- [Bugfix] When ``CONTENTSTORE_SYNC_ENABLED`` is on, repeated draft uploads on the same SCORM block now reclaim the previous draft zip from the contentstore instead of accumulating one zip per upload. The contentstore retains at most one published and one draft ``scorm_packages/.zip`` per block. The fix substitutes the in-memory ``package_meta.sha1`` for the block being edited when collecting referenced sha1s, because field changes are not yet persisted to the modulestore at the point the GC runs inside ``studio_submit``. (by @Syed-Ali-Abbas-568) diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index 533ac35..aa1d5ac 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -1008,10 +1008,14 @@ def _gc_unreferenced_contentstore_zips(self): published branch. Runs at the tail of studio_submit so cleanup happens organically as - authors edit, with no operator action required. The current upload's - sha1 is always pinned into the reference set, even if the new - package_meta has not yet been persisted by the modulestore. Best- - effort: every failure is swallowed so the upload still succeeds. + authors edit, with no operator action required. The in-memory + package_meta of the block being edited is supplied as a draft-branch + override to _collect_course_scorm_sha1s, so the previous (stale) + draft sha1 is not kept alive in the reference set. Effect: per block + the contentstore retains at most one published zip and one draft zip + — repeated draft uploads on the same block reclaim the older zips + instead of accumulating them. Best-effort: every failure is + swallowed so the upload still succeeds. """ if not self.contentstore_sync_enabled: return @@ -1029,10 +1033,14 @@ def _gc_unreferenced_contentstore_zips(self): try: cs = contentstore() + current = (self.package_meta or {}).get("sha1") referenced = self._collect_course_scorm_sha1s( - modulestore(), course_key, ModuleStoreEnum + modulestore(), + course_key, + ModuleStoreEnum, + override_usage_id=self.scope_ids.usage_id, + override_draft_sha1=current, ) - current = (self.package_meta or {}).get("sha1") if current: referenced.add(current.lower()) @@ -1060,11 +1068,24 @@ def _gc_unreferenced_contentstore_zips(self): ) @staticmethod - def _collect_course_scorm_sha1s(store, course_key, ModuleStoreEnum): + def _collect_course_scorm_sha1s( + store, + course_key, + ModuleStoreEnum, + override_usage_id=None, + override_draft_sha1=None, + ): + # When called from inside studio_submit, the new package_meta is still + # in memory on the calling block — the modulestore copy is from the + # previous upload. Substitute the in-memory draft sha1 for that block + # so the stale value doesn't keep its now-orphan zip alive. referenced = set() - for revision in ( - ModuleStoreEnum.RevisionOption.published_only, - ModuleStoreEnum.RevisionOption.draft_only, + override_usage_id_str = ( + str(override_usage_id) if override_usage_id is not None else None + ) + for revision_name, revision in ( + ("published", ModuleStoreEnum.RevisionOption.published_only), + ("draft", ModuleStoreEnum.RevisionOption.draft_only), ): try: blocks = store.get_items( @@ -1077,8 +1098,16 @@ def _collect_course_scorm_sha1s(store, course_key, ModuleStoreEnum): course_key, qualifiers={"category": "scorm"} ) for block in blocks: - meta = getattr(block, "package_meta", None) or {} - sha1 = meta.get("sha1") + if ( + revision_name == "draft" + and override_usage_id_str is not None + and override_draft_sha1 + and str(getattr(block.scope_ids, "usage_id", "")) == override_usage_id_str + ): + sha1 = override_draft_sha1 + else: + meta = getattr(block, "package_meta", None) or {} + sha1 = meta.get("sha1") if sha1: referenced.add(sha1.lower()) return referenced diff --git a/openedxscorm/tests.py b/openedxscorm/tests.py index 3f025c3..6df547d 100644 --- a/openedxscorm/tests.py +++ b/openedxscorm/tests.py @@ -266,3 +266,67 @@ def test_scorm_data_has_user_info_in_student_view(self): "cmi.core.student_name", ] self.assertTrue(key in block.scorm_data for key in student_info_keys) + + def test_collect_course_scorm_sha1s_override_replaces_stale_draft(self): + # The draft branch returns a stale sha1 for the block currently being + # edited because field changes aren't persisted to the modulestore + # until studio_submit returns. The override must substitute the + # in-memory sha1 so the previous draft zip falls out of the + # referenced set and gets reclaimed. + block_x_published = mock.Mock() + block_x_published.scope_ids.usage_id = "usage_X" + block_x_published.package_meta = {"sha1": "published_x"} + + block_x_draft_stale = mock.Mock() + block_x_draft_stale.scope_ids.usage_id = "usage_X" + block_x_draft_stale.package_meta = {"sha1": "stale_draft_x"} + + block_y_draft = mock.Mock() + block_y_draft.scope_ids.usage_id = "usage_Y" + block_y_draft.package_meta = {"sha1": "draft_y"} + + store = mock.Mock() + + def fake_get_items(_course_key, qualifiers, revision): + if revision == "published_only": + return [block_x_published] + if revision == "draft_only": + return [block_x_draft_stale, block_y_draft] + return [] + + store.get_items = mock.Mock(side_effect=fake_get_items) + + ModuleStoreEnum = mock.Mock() + ModuleStoreEnum.RevisionOption.published_only = "published_only" + ModuleStoreEnum.RevisionOption.draft_only = "draft_only" + + result = ScormXBlock._collect_course_scorm_sha1s( + store, + "course-key", + ModuleStoreEnum, + override_usage_id="usage_X", + override_draft_sha1="fresh_draft_x", + ) + + self.assertIn("published_x", result) + self.assertIn("fresh_draft_x", result) + self.assertIn("draft_y", result) + self.assertNotIn("stale_draft_x", result) + + def test_collect_course_scorm_sha1s_no_override_keeps_modulestore_value(self): + block_x_draft = mock.Mock() + block_x_draft.scope_ids.usage_id = "usage_X" + block_x_draft.package_meta = {"sha1": "draft_x"} + + store = mock.Mock() + store.get_items = mock.Mock(return_value=[block_x_draft]) + + ModuleStoreEnum = mock.Mock() + ModuleStoreEnum.RevisionOption.published_only = "published_only" + ModuleStoreEnum.RevisionOption.draft_only = "draft_only" + + result = ScormXBlock._collect_course_scorm_sha1s( + store, "course-key", ModuleStoreEnum + ) + + self.assertEqual(result, {"draft_x"}) From bf6a8812039918b751b02a4c7f5139fc496123dd Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Wed, 13 May 2026 10:52:17 +0500 Subject: [PATCH 5/7] Addressed comments and cleaned up code --- README.rst | 22 +++++++++---------- ...17_ali.abbas02_contentstore_sync_branch.md | 1 - .../20260507_120000_ali.abbas02_scorm_gc.md | 1 - ...0260512_150000_ali.abbas02_scorm_gc_cap.md | 1 - ...101219_ali.abbas02_contentstore_sync_v2.md | 1 + openedxscorm/scormxblock.py | 14 ++++-------- 6 files changed, 15 insertions(+), 25 deletions(-) delete mode 100644 changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md delete mode 100644 changelog.d/20260507_120000_ali.abbas02_scorm_gc.md delete mode 100644 changelog.d/20260512_150000_ali.abbas02_scorm_gc_cap.md create mode 100644 changelog.d/20260513_101219_ali.abbas02_contentstore_sync_v2.md diff --git a/README.rst b/README.rst index 7384928..3d727c0 100644 --- a/README.rst +++ b/README.rst @@ -141,9 +141,14 @@ You may define the following additional settings in ``XBLOCK_SETTINGS["ScormXBlo Course export/import (contentstore sync) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -By default, SCORM zips are unpacked into Django's ``default_storage``, which is separate from the contentstore that Open edX bundles into OLX export tarballs. As a consequence, when a course containing a SCORM block is exported and re-imported into a new course, the imported block keeps its metadata (filename, sha1, size) but the actual content does not play because the unpacked files are absent from the new course's storage. +By default, SCORM zips are unpacked into Django's ``default_storage``, which +is not bundled into OLX export tarballs. A course exported and re-imported +into a new course keeps the SCORM block's metadata but cannot play the +package, because the unpacked files are absent from the new course's storage. -To survive course export/import, the XBlock can mirror the original SCORM zip into the course's contentstore on save and rehydrate the unpacked tree from the contentstore on first access after an import. Enable it with: +To make blocks survive OLX export/import, the XBlock can mirror each uploaded +zip into the course's contentstore (which IS bundled into OLX exports) and +re-extract it from there on first access after an import. Enable it with: .. code-block:: python @@ -151,16 +156,9 @@ To survive course export/import, the XBlock can mirror the original SCORM zip in "CONTENTSTORE_SYNC_ENABLED": True, } -When enabled: - -* On upload, the original zip is written to the current course's contentstore as ``scorm_packages/.zip`` (locked). Course export bundles contentstore assets into the OLX tarball; course import copies them under the new ``course_key`` automatically. -* On read, if ``package_meta`` is set but the extracted tree is missing from ``default_storage``, the XBlock fetches the zip from the current course's contentstore and re-extracts it into ``default_storage``. -* Each upload also sweeps the course for ``scorm_packages/.zip`` assets that no SCORM block on either the draft or the published branch references, and deletes them. The current upload's sha1 is always pinned into the reference set, so the just-saved zip is never touched. Cleanup is best-effort and silent: if the modulestore or contentstore call fails, the asset is left in place and the upload still succeeds. - -This composes with the per-block extraction path introduced in PR #71 (extraction is still keyed by ``sha1(usage_key)``, so courses cannot share extraction directories). Pre-existing blocks already have a populated ``default_storage`` cache, so the rehydrate branch never fires for them. The feature is off by default; if anything misbehaves in your environment, flipping it back to ``False`` restores the prior behavior exactly. - -These settings may be added to Tutor by creating a `plugin `__: - +The feature is off by default because it touches an external system +(contentstore) on every SCORM upload and read; flipping it back to ``False`` +restores the prior behavior exactly. .. code-block:: python from tutor import hooks diff --git a/changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md b/changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md deleted file mode 100644 index 5ac6cd9..0000000 --- a/changelog.d/20260504_141617_ali.abbas02_contentstore_sync_branch.md +++ /dev/null @@ -1 +0,0 @@ -- [Feature] Optionally mirror the SCORM zip into the course contentstore on upload and rehydrate the unpacked tree from the contentstore on first access. This makes blocks survive OLX course export/import without breaking the extraction path keyed by `sha1(usage_key)` introduced in PR #71. Disabled by default; enable with `XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True`. (by @Syed-Ali-Abbas-568) \ No newline at end of file diff --git a/changelog.d/20260507_120000_ali.abbas02_scorm_gc.md b/changelog.d/20260507_120000_ali.abbas02_scorm_gc.md deleted file mode 100644 index 01c0d67..0000000 --- a/changelog.d/20260507_120000_ali.abbas02_scorm_gc.md +++ /dev/null @@ -1 +0,0 @@ -- [Improvement] When ``CONTENTSTORE_SYNC_ENABLED`` is on, every SCORM upload now sweeps the course's contentstore for ``scorm_packages/.zip`` assets that no draft or published SCORM block references, and deletes them. The current upload's sha1 is always pinned into the reference set, so the just-saved zip is never touched. Best-effort: any failure is logged and ignored so the upload still succeeds. (by @Syed-Ali-Abbas-568) diff --git a/changelog.d/20260512_150000_ali.abbas02_scorm_gc_cap.md b/changelog.d/20260512_150000_ali.abbas02_scorm_gc_cap.md deleted file mode 100644 index 9adafc7..0000000 --- a/changelog.d/20260512_150000_ali.abbas02_scorm_gc_cap.md +++ /dev/null @@ -1 +0,0 @@ -- [Bugfix] When ``CONTENTSTORE_SYNC_ENABLED`` is on, repeated draft uploads on the same SCORM block now reclaim the previous draft zip from the contentstore instead of accumulating one zip per upload. The contentstore retains at most one published and one draft ``scorm_packages/.zip`` per block. The fix substitutes the in-memory ``package_meta.sha1`` for the block being edited when collecting referenced sha1s, because field changes are not yet persisted to the modulestore at the point the GC runs inside ``studio_submit``. (by @Syed-Ali-Abbas-568) diff --git a/changelog.d/20260513_101219_ali.abbas02_contentstore_sync_v2.md b/changelog.d/20260513_101219_ali.abbas02_contentstore_sync_v2.md new file mode 100644 index 0000000..5820b77 --- /dev/null +++ b/changelog.d/20260513_101219_ali.abbas02_contentstore_sync_v2.md @@ -0,0 +1 @@ +- [Feature] Optional contentstore synchronization for SCORM uploads and imports. When `CONTENTSTORE_SYNC_ENABLED` is enabled, uploaded SCORM packages are mirrored into the course contentstore and automatically rehydrated on first access after OLX export/import, preserving compatibility with extraction paths keyed by `sha1(usage_key)`. Uploads also now clean up unreferenced SCORM package zips, ensuring each block retains at most one published and one draft asset in the contentstore. Cleanup is performed on a best-effort basis and never blocks uploads if deletion fails. (by @Syed-Ali-Abbas-568) \ No newline at end of file diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index aa1d5ac..02636fe 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -231,10 +231,6 @@ def assets_proxy(self, request, suffix): ------- Response object containing the content of the requested file with the appropriate content type. """ - # If this block was just imported via OLX, the unpacked files may be - # missing from default_storage. Rehydrate from the contentstore before - # serving. No-op when the feature is disabled or the tree exists. - self._rehydrate_from_contentstore() file_name = os.path.basename(suffix) file_path = self.find_file_path(file_name) file_type, _ = mimetypes.guess_type(file_name) @@ -911,7 +907,7 @@ def contentstore_sync_enabled(self): Disabled by default; opt in by setting XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True. """ - return bool(self.xblock_settings.get("CONTENTSTORE_SYNC_ENABLED", False)) + return self.xblock_settings.get("CONTENTSTORE_SYNC_ENABLED", False) def _contentstore_asset_path(self): """ @@ -929,8 +925,6 @@ def _save_zip_to_contentstore(self, package_file): """ Mirror the original SCORM zip into the course's contentstore. - Best-effort: any failure (missing imports, no course context, contentstore - error) is logged and swallowed so that the upload still succeeds. """ if not self.contentstore_sync_enabled: return @@ -993,9 +987,9 @@ def _fetch_zip_from_contentstore(self): asset_key = StaticContent.compute_location(course_key, asset_path) content = contentstore().find(asset_key) return content.data - except Exception as exc: - if NotFoundError is not None and isinstance(exc, NotFoundError): - return None + except NotFoundError: + return None + except Exception: logger.exception("Failed to fetch SCORM zip from contentstore") return None From 5a1ec7b33db19e849ad976e4010ab4acc072dbc7 Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Wed, 13 May 2026 10:56:43 +0500 Subject: [PATCH 6/7] Cleaned up ai generated tests --- openedxscorm/tests.py | 64 ------------------------------------------- 1 file changed, 64 deletions(-) diff --git a/openedxscorm/tests.py b/openedxscorm/tests.py index 6df547d..3f025c3 100644 --- a/openedxscorm/tests.py +++ b/openedxscorm/tests.py @@ -266,67 +266,3 @@ def test_scorm_data_has_user_info_in_student_view(self): "cmi.core.student_name", ] self.assertTrue(key in block.scorm_data for key in student_info_keys) - - def test_collect_course_scorm_sha1s_override_replaces_stale_draft(self): - # The draft branch returns a stale sha1 for the block currently being - # edited because field changes aren't persisted to the modulestore - # until studio_submit returns. The override must substitute the - # in-memory sha1 so the previous draft zip falls out of the - # referenced set and gets reclaimed. - block_x_published = mock.Mock() - block_x_published.scope_ids.usage_id = "usage_X" - block_x_published.package_meta = {"sha1": "published_x"} - - block_x_draft_stale = mock.Mock() - block_x_draft_stale.scope_ids.usage_id = "usage_X" - block_x_draft_stale.package_meta = {"sha1": "stale_draft_x"} - - block_y_draft = mock.Mock() - block_y_draft.scope_ids.usage_id = "usage_Y" - block_y_draft.package_meta = {"sha1": "draft_y"} - - store = mock.Mock() - - def fake_get_items(_course_key, qualifiers, revision): - if revision == "published_only": - return [block_x_published] - if revision == "draft_only": - return [block_x_draft_stale, block_y_draft] - return [] - - store.get_items = mock.Mock(side_effect=fake_get_items) - - ModuleStoreEnum = mock.Mock() - ModuleStoreEnum.RevisionOption.published_only = "published_only" - ModuleStoreEnum.RevisionOption.draft_only = "draft_only" - - result = ScormXBlock._collect_course_scorm_sha1s( - store, - "course-key", - ModuleStoreEnum, - override_usage_id="usage_X", - override_draft_sha1="fresh_draft_x", - ) - - self.assertIn("published_x", result) - self.assertIn("fresh_draft_x", result) - self.assertIn("draft_y", result) - self.assertNotIn("stale_draft_x", result) - - def test_collect_course_scorm_sha1s_no_override_keeps_modulestore_value(self): - block_x_draft = mock.Mock() - block_x_draft.scope_ids.usage_id = "usage_X" - block_x_draft.package_meta = {"sha1": "draft_x"} - - store = mock.Mock() - store.get_items = mock.Mock(return_value=[block_x_draft]) - - ModuleStoreEnum = mock.Mock() - ModuleStoreEnum.RevisionOption.published_only = "published_only" - ModuleStoreEnum.RevisionOption.draft_only = "draft_only" - - result = ScormXBlock._collect_course_scorm_sha1s( - store, "course-key", ModuleStoreEnum - ) - - self.assertEqual(result, {"draft_x"}) From 40a85d6d8764d54ce42782ef8018fe274a9cbb16 Mon Sep 17 00:00:00 2001 From: Syed Ali Abbas Date: Thu, 14 May 2026 10:30:33 +0500 Subject: [PATCH 7/7] Resolved Comments --- openedxscorm/scormxblock.py | 42 ++++++++++--------------------------- 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/openedxscorm/scormxblock.py b/openedxscorm/scormxblock.py index 02636fe..66544de 100644 --- a/openedxscorm/scormxblock.py +++ b/openedxscorm/scormxblock.py @@ -22,6 +22,11 @@ from xblock.completable import CompletableXBlockMixin from xblock.exceptions import JsonHandlerError from xblock.fields import Scope, String, Float, Boolean, Dict, DateTime, Integer +from xmodule.contentstore.content import StaticContent +from xmodule.contentstore.django import contentstore +from xmodule.exceptions import NotFoundError +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import modulestore try: # Older Open edX releases (Redwood and earlier) install a backported version of @@ -928,14 +933,6 @@ def _save_zip_to_contentstore(self, package_file): """ if not self.contentstore_sync_enabled: return - try: - from xmodule.contentstore.django import contentstore - from xmodule.contentstore.content import StaticContent - except ImportError: - logger.warning( - "xmodule.contentstore unavailable; skipping SCORM zip mirror" - ) - return course_key = getattr(self.runtime, "course_id", None) asset_path = self._contentstore_asset_path() @@ -963,20 +960,11 @@ def _fetch_zip_from_contentstore(self): """ Fetch the original SCORM zip bytes from the course's contentstore. - Returns the bytes payload, or None when the feature is disabled, the - contentstore is unavailable, or no asset is found for this block. + Returns the bytes payload, or None when the feature is disabled or no + asset is found for this block. """ if not self.contentstore_sync_enabled: return None - try: - from xmodule.contentstore.django import contentstore - from xmodule.contentstore.content import StaticContent - except ImportError: - return None - try: - from xmodule.exceptions import NotFoundError - except ImportError: - NotFoundError = None # type: ignore course_key = getattr(self.runtime, "course_id", None) asset_path = self._contentstore_asset_path() @@ -993,8 +981,6 @@ def _fetch_zip_from_contentstore(self): logger.exception("Failed to fetch SCORM zip from contentstore") return None - _SCORM_ASSET_PATTERN = re.compile(r"^scorm_packages[/_]([0-9a-fA-F]{40})\.zip$") - def _gc_unreferenced_contentstore_zips(self): """ Delete scorm_packages/.zip assets in this course's contentstore @@ -1013,13 +999,6 @@ def _gc_unreferenced_contentstore_zips(self): """ if not self.contentstore_sync_enabled: return - try: - from xmodule.contentstore.django import contentstore - from xmodule.contentstore.content import StaticContent - from xmodule.modulestore import ModuleStoreEnum - from xmodule.modulestore.django import modulestore - except ImportError: - return course_key = getattr(self.runtime, "course_id", None) if course_key is None: @@ -1106,8 +1085,9 @@ def _collect_course_scorm_sha1s( referenced.add(sha1.lower()) return referenced - @classmethod - def _scorm_sha1_from_asset_doc(cls, doc): + @staticmethod + def _scorm_sha1_from_asset_doc(doc): + pattern = re.compile(r"^scorm_packages[/_]([0-9a-fA-F]{40})\.zip$") candidates = [] if isinstance(doc, dict): asset_id = doc.get("_id") @@ -1123,7 +1103,7 @@ def _scorm_sha1_from_asset_doc(cls, doc): for name in candidates: if not name: continue - match = cls._SCORM_ASSET_PATTERN.match(name) + match = pattern.match(name) if match: return match.group(1).lower() return None