Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 32 additions & 12 deletions cms/djangoapps/modulestore_migrator/tasks.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,60 @@
"""
Tasks for the modulestore_migrator
"""
from __future__ import annotations

import mimetypes
import os
import typing as t
from dataclasses import dataclass
from datetime import datetime, timezone
from datetime import datetime, timezone, UTC
from enum import Enum
from gettext import ngettext

from celery import shared_task
from celery.utils.log import get_task_logger
from django.core.exceptions import ObjectDoesNotExist
from django.db import transaction
from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _
from edx_django_utils.monitoring import set_code_owner_attribute_from_module
from lxml import etree
from lxml.etree import _ElementTree as XmlTree
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import (
BlockUsageLocator,
CourseLocator,
LibraryContainerLocator,
LibraryLocator,
LibraryLocatorV2,
LibraryUsageLocatorV2,
)
from openedx_content import api as content_api
from openedx_content.models_api import (
Collection,
Component,
ComponentType,
LearningPackage,
PublishableEntity,
PublishableEntityVersion,
)
from user_tasks.tasks import UserTask, UserTaskStatus
from xblock.core import XBlock
from xblock.plugin import PluginMissingError

from common.djangoapps.split_modulestore_django.models import SplitModulestoreCourseIndex
from common.djangoapps.util.date_utils import DEFAULT_DATE_TIME_FORMAT, strftime_localized
from openedx.core.djangoapps.content_libraries import api as libraries_api
from openedx.core.djangoapps.content_libraries.api import get_library
from openedx.core.djangoapps.content_staging import api as staging_api
from xmodule.modulestore import exceptions as modulestore_exceptions
from xmodule.modulestore.django import modulestore

from . import data, models
from .api.read_api import get_migration_blocks, get_migrations
from .constants import CONTENT_STAGING_PURPOSE_TEMPLATE
from .data import CompositionLevel, RepeatHandlingStrategy, SourceContextKey

Check failure on line 57 in cms/djangoapps/modulestore_migrator/tasks.py

View workflow job for this annotation

GitHub Actions / Quality Others (ubuntu-24.04, 3.12, 20)

ruff (I001)

cms/djangoapps/modulestore_migrator/tasks.py:4:1: I001 Import block is un-sorted or un-formatted help: Organize imports

log = get_task_logger(__name__)

Expand Down Expand Up @@ -341,6 +341,7 @@
openedx_content equivalent.
"""
migration = source_data.migration
migration_start_time = datetime.now(UTC)
migration_context = _MigrationContext(
used_component_keys=set(
LibraryUsageLocatorV2(target_library.key, block_type, block_id) # type: ignore[abstract]
Expand All @@ -367,14 +368,35 @@
repeat_handling_strategy=RepeatHandlingStrategy(migration.repeat_handling_strategy),
preserve_url_slugs=migration.preserve_url_slugs,
created_by=status.user_id,
created_at=datetime.now(timezone.utc), # noqa: UP017
created_at=migration_start_time,
)
with content_api.bulk_draft_changes_for(migration.target.id) as change_log:
with content_api.bulk_draft_changes_for(
learning_package_id=migration.target.id,
changed_by=migration_context.created_by,
changed_at=migration_start_time,
) as change_log:
root_migrated_node = _migrate_node(
context=migration_context,
source_node=root_node,
)
change_log.save()
# Publishing is not allowed inside bulk_draft_changes_for(), so publish
# everything that was modified now that the context has exited. We use the
# change log to identify which drafts to publish. If the context produced
# no records, it deletes the change log on exit (clearing its PK), in which
# case there's nothing to publish and we return None so callers don't try
# to associate the deleted change log with the migration.
if not change_log.pk:
return None, root_migrated_node
if change_log.records.exists():
drafts_to_publish = content_api.get_all_drafts(migration.target.id).filter(
entity_id__in=change_log.records.values_list("entity_id", flat=True),
)
content_api.publish_from_drafts(
migration.target.id,
draft_qset=drafts_to_publish,
published_by=migration_context.created_by,
# published_at will be later than 'migration_start_time' as _migrate_node() may have taken quite a while.
)
return change_log, root_migrated_node


Expand Down Expand Up @@ -421,6 +443,7 @@
library_key: LibraryLocatorV2,
title: str,
course_name: str | None = None,
created_by: int | None = None,
) -> Collection:
"""
Creates a collection in the given library
Expand All @@ -446,6 +469,7 @@
collection_key=modified_key,
title=f"{title}{f'_{attempt}' if attempt > 0 else ''}",
description=description,
created_by=created_by,
)
except libraries_api.LibraryCollectionAlreadyExists:
attempt += 1
Expand Down Expand Up @@ -705,6 +729,7 @@
library_key=target_library_locator,
title=legacy_root_list[i].display_name,
course_name=legacy_root_list[i].display_name if source_data.source.key.is_course else None,
created_by=user_id,
)
)
_populate_collection(user_id, migration)
Expand Down Expand Up @@ -894,11 +919,7 @@
created_by=context.created_by,
).publishable_entity_version

# Publish the container
libraries_api.publish_container_changes(
container.container_key,
context.created_by,
)
# Note: Publishing happens after bulk_draft_changes_for exits, in _import_structure.
context.used_container_slugs.add(container.container_key.container_id)
return container_publishable_entity_version, None

Expand Down Expand Up @@ -969,11 +990,10 @@

# Create the new component version for it
component_version = libraries_api.set_library_block_olx(
target_key, new_olx_str=olx, paths_to_media=paths_to_media_ids,
target_key, new_olx_str=olx, paths_to_media=paths_to_media_ids, created_by=context.created_by,
)

# Publish the component
libraries_api.publish_component_changes(target_key, context.created_by)
# Note: Publishing happens after bulk_draft_changes_for exits, in _import_structure.
context.used_component_keys.add(target_key)
return component_version.publishable_entity_version, None

Expand Down
10 changes: 6 additions & 4 deletions cms/djangoapps/modulestore_migrator/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,9 @@ def test_migrate_component_success(self):
"problem", result.componentversion.component.component_type.name
)

# The component is published
self.assertFalse(result.componentversion.component.versioning.has_unpublished_changes) # noqa: PT009
# The component is left as a draft; publishing is the caller's responsibility
# (handled in _import_structure after bulk_draft_changes_for exits).
self.assertTrue(result.componentversion.component.versioning.has_unpublished_changes) # noqa: PT009

def test_migrate_component_failure(self):
"""
Expand Down Expand Up @@ -802,8 +803,9 @@ def test_migrate_container_different_container_types(self):

container_version = result.containerversion
self.assertEqual(container_version.title, f"Test {block_type.title()}") # noqa: PT009
# The container is published
self.assertFalse(content_api.contains_unpublished_changes(container_version.container.pk)) # noqa: PT009 # pylint: disable=line-too-long
# The container is left as a draft; publishing is the caller's
# responsibility (handled in _import_structure after bulk_draft_changes_for exits).
self.assertTrue(content_api.contains_unpublished_changes(container_version.container.pk)) # noqa: PT009 # pylint: disable=line-too-long

def test_migrate_container_same_title(self):
"""
Expand Down
4 changes: 4 additions & 0 deletions openedx/core/djangoapps/content_libraries/api/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,9 @@ def set_library_block_olx(
usage_key: LibraryUsageLocatorV2,
new_olx_str: str,
paths_to_media: dict | None = None,
# The following arg can be removed after https://github.com/openedx/openedx-core/pull/573 lands
# then we can presumably just get the name from the bulk_draft_changes_for context
created_by: int | None = None,
) -> ComponentVersion:
"""
Replace the OLX source of the given XBlock.
Expand Down Expand Up @@ -488,6 +491,7 @@ def set_library_block_olx(
'block.xml': new_olx_media.pk,
},
created=now,
created_by=created_by,
)

return new_component_version
Expand Down
30 changes: 28 additions & 2 deletions openedx/core/djangoapps/content_libraries/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ def send_change_events_for_modified_entities(
# .. event_implemented_name: LIBRARY_BLOCK_CREATED
# .. event_type: org.openedx.content_authoring.library_block.created.v1
LIBRARY_BLOCK_CREATED.send_event(library_block=event_data)

if change.restored:
# This block was previously soft-deleted and is now un-deleted. It may have tags or collections.
# It would be best to expand the LIBRARY_BLOCK_CREATED event to include the "restored" flag, but in
# the interests of minimizing breaking event changes for now we'll just emit a
# CONTENT_OBJECT_ASSOCIATIONS_CHANGED event to ensure relevant search index records get updated.
# .. event_implemented_name: CONTENT_OBJECT_ASSOCIATIONS_CHANGED
# .. event_type: org.openedx.content_authoring.content.object.associations.changed.v1
CONTENT_OBJECT_ASSOCIATIONS_CHANGED.send_event(
content_object=ContentObjectChangedData(
object_id=str(block_key),
changes=["collections", "tags"],
),
)
elif change.old_version and change.new_version is None:
# .. event_implemented_name: LIBRARY_BLOCK_DELETED
# .. event_type: org.openedx.content_authoring.library_block.deleted.v1
Expand All @@ -148,6 +162,14 @@ def send_change_events_for_modified_entities(
# .. event_implemented_name: LIBRARY_CONTAINER_CREATED
# .. event_type: org.openedx.content_authoring.content_library.container.created.v1
LIBRARY_CONTAINER_CREATED.send_event(library_container=event_data)
if change.restored:
# Same reasoning as above for LIBRARY_BLOCK_CREATED:
CONTENT_OBJECT_ASSOCIATIONS_CHANGED.send_event(
content_object=ContentObjectChangedData(
object_id=str(container_key),
changes=["collections", "tags"],
),
)
elif change.old_version and change.new_version is None:
# .. event_implemented_name: LIBRARY_CONTAINER_DELETED
# .. event_type: org.openedx.content_authoring.content_library.container.deleted.v1
Expand All @@ -171,7 +193,7 @@ def send_change_events_for_modified_entities(
container_key_str=str(container_key),
old_version_id=change.old_version_id,
new_version_id=change.new_version_id,
)
).forget() # Best practice: free celery result using forget() after calling delay()
else:
log.error("Unknown publishable entity type: %s", entity)
continue
Expand Down Expand Up @@ -738,7 +760,11 @@ def dispatch_and_wait(task_fn: Task, wait_for_full_completion: bool = False, **k
result: AsyncResult = task_fn.delay(**kwargs)
# Try waiting a bit for the task to finish before we complete the request:
try:
result.get(timeout=10)
# We use `disable_sync_subtasks=False` because some of our tasks emit
# events whose handlers then spawn additional tasks which are sometimes
# synchronous. Ideal usage of celery would be to "chain" the tasks
# instead of spawning subtasks, but that would require a major refactor.
result.get(timeout=10, disable_sync_subtasks=False)
except CeleryTimeout:
pass
# This is fine! The search index is still being updated, and/or other
Expand Down
Loading