Skip to content
26 changes: 26 additions & 0 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
searchable_doc_tags,
searchable_doc_tags_for_collection,
searchable_doc_units,
searchable_doc_subsections,
searchable_doc_sections,
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -497,6 +499,12 @@ def index_container_batch(batch, num_done, library_key) -> int:
doc = searchable_doc_for_container(container_key)
doc.update(searchable_doc_tags(container_key))
doc.update(searchable_doc_collections(container_key))
container_type = lib_api.ContainerType(container_key.container_type)
match container_type:
case lib_api.ContainerType.Unit:
doc.update(searchable_doc_subsections(container_key))
case lib_api.ContainerType.Subsection:
doc.update(searchable_doc_sections(container_key))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing container {container.key}: {err}")
Expand Down Expand Up @@ -864,6 +872,24 @@ def upsert_item_units_index_docs(opaque_key: OpaqueKey):
_update_index_docs([doc])


def upsert_item_subsections_index_docs(opaque_key: OpaqueKey):
"""
Updates the subsections data in documents for the given Course/Library block
"""
doc = {Fields.id: meili_id_from_opaque_key(opaque_key)}
doc.update(searchable_doc_subsections(opaque_key))
_update_index_docs([doc])


def upsert_item_sections_index_docs(opaque_key: OpaqueKey):
"""
Updates the sections data in documents for the given Course/Library block
"""
doc = {Fields.id: meili_id_from_opaque_key(opaque_key)}
doc.update(searchable_doc_sections(opaque_key))
_update_index_docs([doc])


def upsert_collection_tags_index_docs(collection_key: LibraryCollectionLocator):
"""
Updates the tags data in documents for the given library collection
Expand Down
73 changes: 54 additions & 19 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,15 @@ class Fields:
collections = "collections"
collections_display_name = "display_name"
collections_key = "key"
# Units (dictionary) that this object belongs to.
# Containers (dictionaries) that this object belongs to.
units = "units"
units_display_name = "display_name"
units_key = "key"
subsections = "subsections"
sections = "sections"
containers_display_name = "display_name"
containers_key = "key"

sections_display_name = "display_name"
sections_key = "key"

# The "content" field is a dictionary of arbitrary data, depending on the block_type.
# It comes from each XBlock's index_dictionary() method (if present) plus some processing.
Expand All @@ -97,6 +102,8 @@ class Fields:

# List of children keys
child_usage_keys = "child_usage_keys"
# List of children display names
child_display_names = "child_display_names"

# Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
# search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
Expand Down Expand Up @@ -376,9 +383,9 @@ def _collections_for_content_object(object_id: OpaqueKey) -> dict:
return result


def _units_for_content_object(object_id: OpaqueKey) -> dict:
def _containers_for_content_object(object_id: OpaqueKey, container_type: str) -> dict:
"""
Given an XBlock, course, library, etc., get the units for its index doc.
Given an XBlock, course, library, etc., get the containers that it is part of for its index doc.

e.g. for something in Units "UNIT_A" and "UNIT_B", this would return:
{
Expand All @@ -388,38 +395,39 @@ def _units_for_content_object(object_id: OpaqueKey) -> dict:
}
}

If the object is in no collections, returns:
If the object is in no containers, returns:
{
"collections": {
"units": {
"display_name": [],
"key": [],
},
}
"""
container_field = getattr(Fields, container_type)
result = {
Fields.units: {
Fields.units_display_name: [],
Fields.units_key: [],
container_field: {
Fields.containers_display_name: [],
Fields.containers_key: [],
}
}

# Gather the units associated with this object
units = None
containers = None
try:
if isinstance(object_id, UsageKey):
units = lib_api.get_containers_contains_component(object_id)
if isinstance(object_id, OpaqueKey):
containers = lib_api.get_containers_contains_item(object_id)
else:
log.warning(f"Unexpected key type for {object_id}")

except ObjectDoesNotExist:
log.warning(f"No library item found for {object_id}")

if not units:
if not containers:
return result

for unit in units:
result[Fields.units][Fields.units_display_name].append(unit.display_name)
result[Fields.units][Fields.units_key].append(str(unit.container_key))
for container in containers:
result[container_field][Fields.containers_display_name].append(container.display_name)
result[container_field][Fields.containers_key].append(str(container.container_key))

return result

Expand Down Expand Up @@ -521,7 +529,29 @@ def searchable_doc_units(opaque_key: OpaqueKey) -> dict:
like Meilisearch or Elasticsearch, with the units data for the given content object.
"""
doc = searchable_doc_for_key(opaque_key)
doc.update(_units_for_content_object(opaque_key))
doc.update(_containers_for_content_object(opaque_key, "units"))

return doc


def searchable_doc_sections(opaque_key: OpaqueKey) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the sections data for the given content object.
"""
doc = searchable_doc_for_key(opaque_key)
doc.update(_containers_for_content_object(opaque_key, "sections"))

return doc


def searchable_doc_subsections(opaque_key: OpaqueKey) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the subsections data for the given content object.
"""
doc = searchable_doc_for_key(opaque_key)
doc.update(_containers_for_content_object(opaque_key, "subsections"))

return doc

Expand Down Expand Up @@ -674,13 +704,17 @@ def get_child_keys(children) -> list[str]:
for child in children
]

def get_child_names(children) -> list[str]:
return [child.display_name for child in children]

doc.update({
Fields.display_name: container.display_name,
Fields.created: container.created.timestamp(),
Fields.modified: container.modified.timestamp(),
Fields.num_children: len(draft_children),
Fields.content: {
Fields.child_usage_keys: get_child_keys(draft_children)
Fields.child_usage_keys: get_child_keys(draft_children),
Fields.child_display_names: get_child_names(draft_children),
},
Fields.publish_status: publish_status,
Fields.last_published: container.last_published.timestamp() if container.last_published else None,
Expand All @@ -699,6 +733,7 @@ def get_child_keys(children) -> list[str]:
Fields.published_num_children: len(published_children),
Fields.published_content: {
Fields.child_usage_keys: get_child_keys(published_children),
Fields.child_display_names: get_child_names(published_children),
},
}

Expand Down
6 changes: 6 additions & 0 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
upsert_collection_tags_index_docs,
upsert_item_collections_index_docs,
upsert_item_units_index_docs,
upsert_item_sections_index_docs,
upsert_item_subsections_index_docs,
)
from .tasks import (
delete_library_block_index_doc,
Expand Down Expand Up @@ -266,6 +268,10 @@ def content_object_associations_changed_handler(**kwargs) -> None:
upsert_item_collections_index_docs(opaque_key)
if not content_object.changes or "units" in content_object.changes:
upsert_item_units_index_docs(opaque_key)
if not content_object.changes or "sections" in content_object.changes:
upsert_item_sections_index_docs(opaque_key)
if not content_object.changes or "subsections" in content_object.changes:
upsert_item_subsections_index_docs(opaque_key)


@receiver(LIBRARY_CONTAINER_CREATED)
Expand Down
71 changes: 56 additions & 15 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,10 @@ def setUp(self) -> None:
"display_name": "Unit 1",
# description is not set for containers
"num_children": 0,
"content": {"child_usage_keys": []},
"content": {
"child_usage_keys": [],
"child_display_names": [],
},
"publish_status": "never",
"context_key": "lib:org1:lib",
"org": "org1",
Expand All @@ -276,7 +279,10 @@ def setUp(self) -> None:
"display_name": "Subsection 1",
# description is not set for containers
"num_children": 0,
"content": {"child_usage_keys": []},
"content": {
"child_usage_keys": [],
"child_display_names": [],
},
"publish_status": "never",
"context_key": "lib:org1:lib",
"org": "org1",
Expand All @@ -296,7 +302,10 @@ def setUp(self) -> None:
"display_name": "Section 1",
# description is not set for containers
"num_children": 0,
"content": {"child_usage_keys": []},
"content": {
"child_usage_keys": [],
"child_display_names": [],
},
"publish_status": "never",
"context_key": "lib:org1:lib",
"org": "org1",
Expand Down Expand Up @@ -336,9 +345,11 @@ def test_reindex_meilisearch(self, mock_meilisearch) -> None:
doc_unit = copy.deepcopy(self.unit_dict)
doc_unit["tags"] = {}
doc_unit["collections"] = {'display_name': [], 'key': []}
doc_unit["subsections"] = {"display_name": [], "key": []}
doc_subsection = copy.deepcopy(self.subsection_dict)
doc_subsection["tags"] = {}
doc_subsection["collections"] = {'display_name': [], 'key': []}
doc_subsection["sections"] = {'display_name': [], 'key': []}
doc_section = copy.deepcopy(self.section_dict)
doc_section["tags"] = {}
doc_section["collections"] = {'display_name': [], 'key': []}
Expand Down Expand Up @@ -376,9 +387,11 @@ def test_reindex_meilisearch_incremental(self, mock_meilisearch) -> None:
doc_unit = copy.deepcopy(self.unit_dict)
doc_unit["tags"] = {}
doc_unit["collections"] = {"display_name": [], "key": []}
doc_unit["subsections"] = {"display_name": [], "key": []}
doc_subsection = copy.deepcopy(self.subsection_dict)
doc_subsection["tags"] = {}
doc_subsection["collections"] = {'display_name': [], 'key': []}
doc_subsection["sections"] = {'display_name': [], 'key': []}
doc_section = copy.deepcopy(self.section_dict)
doc_section["tags"] = {}
doc_section["collections"] = {'display_name': [], 'key': []}
Expand Down Expand Up @@ -983,14 +996,21 @@ def test_delete_index_container(self, container_type, mock_meilisearch) -> None:
container_dict["id"],
)

@ddt.data(
"unit",
"subsection",
"section",
)
@override_settings(MEILISEARCH_ENABLED=True)
def test_index_library_container_metadata(self, mock_meilisearch) -> None:
def test_index_library_container_metadata(self, container_type, mock_meilisearch) -> None:
"""
Test indexing a Library Container.
"""
api.upsert_library_container_index_doc(self.unit.container_key)
container = getattr(self, container_type)
container_dict = getattr(self, f"{container_type}_dict")
api.upsert_library_container_index_doc(container.container_key)

mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([self.unit_dict])
mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([container_dict])

@ddt.data(
("unit", "lctorg1libunitunit-1-e4527f7c"),
Expand Down Expand Up @@ -1050,7 +1070,10 @@ def test_block_in_units(self, mock_meilisearch) -> None:
new_unit_dict = {
**self.unit_dict,
"num_children": 1,
'content': {'child_usage_keys': [self.doc_problem1["usage_key"]]}
'content': {
'child_usage_keys': [self.doc_problem1["usage_key"]],
'child_display_names': [self.doc_problem1["display_name"]],
}
}

assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
Expand All @@ -1071,16 +1094,25 @@ def test_units_in_subsection(self, mock_meilisearch) -> None:
None,
)

# TODO verify subsections in units

doc_block_with_subsections = {
"id": self.unit_dict["id"],
"subsections": {
"display_name": [self.subsection.display_name],
"key": [self.subsection_key],
},
}
new_subsection_dict = {
**self.subsection_dict,
"num_children": 1,
'content': {'child_usage_keys': [self.unit_key]}
'content': {
'child_usage_keys': [self.unit_key],
'child_display_names': [self.unit.display_name]
}
}
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 1
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
[
call([doc_block_with_subsections]),
call([new_subsection_dict]),
],
any_order=True,
Expand All @@ -1095,16 +1127,25 @@ def test_section_in_usbsections(self, mock_meilisearch) -> None:
None,
)

# TODO verify section in subsections

doc_block_with_sections = {
"id": self.subsection_dict["id"],
"sections": {
"display_name": [self.section.display_name],
"key": [self.section_key],
},
}
new_section_dict = {
**self.section_dict,
"num_children": 1,
'content': {'child_usage_keys': [self.subsection_key]}
'content': {
'child_usage_keys': [self.subsection_key],
'child_display_names': [self.subsection.display_name],
}
}
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 1
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
[
call([doc_block_with_sections]),
call([new_section_dict]),
],
any_order=True,
Expand Down
Loading
Loading