Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,27 @@ You may define the following additional settings in ``XBLOCK_SETTINGS["ScormXBlo
* ``S3_QUERY_AUTH`` (default: ``True``): boolean flag (``True`` or ``False``) for query string authentication in S3 urls. If your bucket is public, set this value to ``False``. But be aware that in such case your SCORM assets will be publicly available to everyone.
* ``S3_EXPIRES_IN`` (default: 604800): time duration (in seconds) for the presigned URLs to stay valid. The default is one week.

These settings may be added to Tutor by creating a `plugin <https://docs.tutor.overhang.io/plugins/>`__:
Course export/import (contentstore sync)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

By default, SCORM zips are unpacked into Django's ``default_storage``, which
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ahmed-arb Thoughts on keeping this turned off by default?

is not bundled into OLX export tarballs. A course exported and re-imported
into a new course keeps the SCORM block's metadata but cannot play the
package, because the unpacked files are absent from the new course's storage.

To make blocks survive OLX export/import, the XBlock can mirror each uploaded
zip into the course's contentstore (which IS bundled into OLX exports) and
re-extract it from there on first access after an import. Enable it with:

.. code-block:: python

XBLOCK_SETTINGS["ScormXBlock"] = {
"CONTENTSTORE_SYNC_ENABLED": True,
}

The feature is off by default because it touches an external system
(contentstore) on every SCORM upload and read; flipping it back to ``False``
restores the prior behavior exactly.
.. code-block:: python

from tutor import hooks
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- [Feature] Optional contentstore synchronization for SCORM uploads and imports. When `CONTENTSTORE_SYNC_ENABLED` is enabled, uploaded SCORM packages are mirrored into the course contentstore and automatically rehydrated on first access after OLX export/import, preserving compatibility with extraction paths keyed by `sha1(usage_key)`. Uploads also now clean up unreferenced SCORM package zips, ensuring each block retains at most one published and one draft asset in the contentstore. Cleanup is performed on a best-effort basis and never blocks uploads if deletion fails. (by @Syed-Ali-Abbas-568)
284 changes: 282 additions & 2 deletions openedxscorm/scormxblock.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import io
import json
import hashlib
import os
Expand Down Expand Up @@ -231,7 +232,7 @@ def assets_proxy(self, request, suffix):
Response object containing the content of the requested file with the appropriate content type.
"""
file_name = os.path.basename(suffix)
file_path = self.find_file_path(file_name)
file_path = self.find_file_path(file_name)
file_type, _ = mimetypes.guess_type(file_name)
with self.storage.open(file_path) as response:
file_content = response.read()
Expand Down Expand Up @@ -299,6 +300,14 @@ def studio_submit(self, request, _suffix):
try:
self.extract_package(package_file)
self.update_package_fields()
# Mirror the original zip into the course's contentstore so that
# OLX export bundles it and OLX import re-creates it under the new
# course_key. Best-effort and feature-flagged off by default.
self._save_zip_to_contentstore(package_file)
# Reap any scorm_packages/*.zip in this course's contentstore that
# no draft or published SCORM block still references. Keeps the
# mirror from accumulating stale uploads over time.
self._gc_unreferenced_contentstore_zips()
except ScormError as e:
response["errors"].append(e.args[0])

Expand Down Expand Up @@ -384,7 +393,12 @@ def extract_package(self, package_file):
def index_page_url(self):
if not self.package_meta or not self.index_page_path:
return ""


# If this block was just imported via OLX, the unpacked files may be
# missing from default_storage. Rehydrate from the contentstore before
# building the URL. No-op when the feature is disabled or the tree exists.
self._rehydrate_from_contentstore()

# Serve assets by proxying them through the LMS by default
if self.xblock_settings.get("PROXY_ASSETS_LMS", True):
return f"{self.proxy_base_url}/{self.index_page_path}"
Expand Down Expand Up @@ -884,6 +898,272 @@ def workbench_scenarios():
),
]

@property
def contentstore_sync_enabled(self):
"""
Whether to mirror the SCORM zip to the course's contentstore on save and
rehydrate it from the contentstore on read.

Disabled by default; opt in by setting
XBLOCK_SETTINGS["ScormXBlock"]["CONTENTSTORE_SYNC_ENABLED"] = True.
"""
return self.xblock_settings.get("CONTENTSTORE_SYNC_ENABLED", False)

def _contentstore_asset_path(self):
"""
Filename used when storing the SCORM zip in the course's contentstore.

The path is keyed by the package sha1 so that OLX export/import carries
the same asset name into the new course.
"""
sha1 = self.package_meta.get("sha1") if self.package_meta else None
if not sha1:
return None
return f"scorm_packages/{sha1}.zip"

def _save_zip_to_contentstore(self, package_file):
"""
Mirror the original SCORM zip into the course's contentstore.

"""
if not self.contentstore_sync_enabled:
return
try:
from xmodule.contentstore.django import contentstore
Comment thread
Syed-Ali-Abbas-568 marked this conversation as resolved.
Outdated
from xmodule.contentstore.content import StaticContent
except ImportError:
logger.warning(
"xmodule.contentstore unavailable; skipping SCORM zip mirror"
)
return

course_key = getattr(self.runtime, "course_id", None)
asset_path = self._contentstore_asset_path()
if course_key is None or asset_path is None:
return

try:
asset_key = StaticContent.compute_location(course_key, asset_path)
package_file.seek(0)
data = package_file.read()
package_file.seek(0)
content = StaticContent(
asset_key,
asset_path,
"application/zip",
data,
locked=True,
)
contentstore().save(content)
logger.info("Mirrored SCORM zip to contentstore at %s", asset_key)
except Exception:
logger.exception("Failed to mirror SCORM zip to contentstore")

def _fetch_zip_from_contentstore(self):
"""
Fetch the original SCORM zip bytes from the course's contentstore.

Returns the bytes payload, or None when the feature is disabled, the
contentstore is unavailable, or no asset is found for this block.
"""
if not self.contentstore_sync_enabled:
return None
try:
from xmodule.contentstore.django import contentstore
from xmodule.contentstore.content import StaticContent
except ImportError:
return None
try:
from xmodule.exceptions import NotFoundError
except ImportError:
NotFoundError = None # type: ignore

course_key = getattr(self.runtime, "course_id", None)
asset_path = self._contentstore_asset_path()
if course_key is None or asset_path is None:
return None

try:
asset_key = StaticContent.compute_location(course_key, asset_path)
content = contentstore().find(asset_key)
return content.data
except NotFoundError:
return None
except Exception:
logger.exception("Failed to fetch SCORM zip from contentstore")
return None

_SCORM_ASSET_PATTERN = re.compile(r"^scorm_packages[/_]([0-9a-fA-F]{40})\.zip$")
Comment thread
Syed-Ali-Abbas-568 marked this conversation as resolved.
Outdated

def _gc_unreferenced_contentstore_zips(self):
"""
Delete scorm_packages/<sha1>.zip assets in this course's contentstore
that no SCORM block still references in either the draft or the
published branch.

Runs at the tail of studio_submit so cleanup happens organically as
authors edit, with no operator action required. The in-memory
package_meta of the block being edited is supplied as a draft-branch
override to _collect_course_scorm_sha1s, so the previous (stale)
draft sha1 is not kept alive in the reference set. Effect: per block
the contentstore retains at most one published zip and one draft zip
— repeated draft uploads on the same block reclaim the older zips
instead of accumulating them. Best-effort: every failure is
swallowed so the upload still succeeds.
"""
if not self.contentstore_sync_enabled:
return
try:
from xmodule.contentstore.django import contentstore
from xmodule.contentstore.content import StaticContent
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import modulestore
except ImportError:
return

course_key = getattr(self.runtime, "course_id", None)
if course_key is None:
return

try:
cs = contentstore()
current = (self.package_meta or {}).get("sha1")
referenced = self._collect_course_scorm_sha1s(
modulestore(),
course_key,
ModuleStoreEnum,
override_usage_id=self.scope_ids.usage_id,
override_draft_sha1=current,
)
if current:
referenced.add(current.lower())

asset_docs, _ = cs.get_all_content_for_course(course_key)
for doc in asset_docs:
sha1 = self._scorm_sha1_from_asset_doc(doc)
if sha1 is None or sha1 in referenced:
continue
asset_key = StaticContent.compute_location(
course_key, f"scorm_packages/{sha1}.zip"
)
try:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: lets try to avoid nested try excepts

cs.delete(asset_key)
logger.info(
"Deleted unreferenced SCORM zip %s from contentstore",
asset_key,
)
except Exception:
logger.exception(
"Failed to delete unreferenced SCORM zip %s", asset_key
)
except Exception:
logger.exception(
"SCORM contentstore GC failed; leaving assets in place"
)

@staticmethod
def _collect_course_scorm_sha1s(
store,
course_key,
ModuleStoreEnum,
override_usage_id=None,
override_draft_sha1=None,
):
# When called from inside studio_submit, the new package_meta is still
# in memory on the calling block — the modulestore copy is from the
# previous upload. Substitute the in-memory draft sha1 for that block
# so the stale value doesn't keep its now-orphan zip alive.
referenced = set()
override_usage_id_str = (
str(override_usage_id) if override_usage_id is not None else None
)
for revision_name, revision in (
("published", ModuleStoreEnum.RevisionOption.published_only),
("draft", ModuleStoreEnum.RevisionOption.draft_only),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a really cool approach. Lets also document this for future reference.

):
try:
blocks = store.get_items(
course_key,
qualifiers={"category": "scorm"},
revision=revision,
)
except TypeError:
blocks = store.get_items(
course_key, qualifiers={"category": "scorm"}
)
for block in blocks:
if (
revision_name == "draft"
and override_usage_id_str is not None
and override_draft_sha1
and str(getattr(block.scope_ids, "usage_id", "")) == override_usage_id_str
):
sha1 = override_draft_sha1
else:
meta = getattr(block, "package_meta", None) or {}
sha1 = meta.get("sha1")
if sha1:
referenced.add(sha1.lower())
return referenced

@classmethod
def _scorm_sha1_from_asset_doc(cls, doc):
candidates = []
if isinstance(doc, dict):
asset_id = doc.get("_id")
if isinstance(asset_id, dict):
candidates.append(asset_id.get("name", "") or "")
elif asset_id is not None:
candidates.append(getattr(asset_id, "block_id", "") or "")
candidates.append(getattr(asset_id, "path", "") or "")
candidates.append(doc.get("displayname", "") or "")
else:
candidates.append(getattr(doc, "name", "") or "")
candidates.append(getattr(doc, "block_id", "") or "")
for name in candidates:
if not name:
continue
match = cls._SCORM_ASSET_PATTERN.match(name)
if match:
return match.group(1).lower()
return None

def _is_extracted_tree_missing(self):
"""
Return True when package_meta is set but the extracted tree is empty.

After OLX import the metadata is restored but the unpacked files are not,
so this is the signal to rehydrate from the contentstore.
"""
if not self.package_meta:
return False
return not self.path_exists(self.extract_folder_path)

def _rehydrate_from_contentstore(self):
"""
Re-extract the SCORM zip from the contentstore back into default_storage.

No-op unless the feature is enabled, package_meta is present, and the
extracted tree is missing. Returns True on a successful rehydration.
"""
if not self.contentstore_sync_enabled:
return False
if not self._is_extracted_tree_missing():
return False
data = self._fetch_zip_from_contentstore()
if not data:
return False
try:
self.extract_package(io.BytesIO(data))
logger.info(
"Rehydrated SCORM package from contentstore for usage_id=%s",
self.scope_ids.usage_id,
)
return True
except Exception:
logger.exception("Failed to rehydrate SCORM package from contentstore")
return False

@property
def storage(self):
"""
Expand Down
Loading