Skip to content

Commit

Permalink
feat: add SearchAfterMixin for ES search_after capability
Browse files Browse the repository at this point in the history
  • Loading branch information
Ali-D-Akbar committed Jan 8, 2025
1 parent d63d119 commit 2262eb3
Showing 1 changed file with 56 additions and 0 deletions.
56 changes: 56 additions & 0 deletions course_discovery/apps/course_metadata/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,6 +1149,62 @@ def search(cls, query, queryset=None):
return filtered_queryset


class SearchAfterMixin:
"""
Represents objects that have a search method to query Elasticsearch and load by primary key.
Supports paginated results using `search_after`.
"""

@classmethod
def search(cls, query, queryset=None, search_after=None, size=10000, sort_fields=None):
""" Queries the search index with optional pagination using `search_after`.
Args:
query (str): Elasticsearch querystring (e.g., `title:intro*`).
queryset (models.QuerySet): Base queryset to filter, defaults to objects.all().
search_after (list): Pointer to the last sort value of the previous page.
size (int): Number of results per page. 10k is the default MAX_RESULT_WINDOW size.
sort_fields (list): Fields to sort by, defaults to ['id'].
Returns:
tuple: Filtered QuerySet and `search_after` values for the next page.
"""
query = clean_query(query)

Check warning on line 1172 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1172

Added line #L1172 was not covered by tests

if queryset is None:
queryset = cls.objects.all()

Check warning on line 1175 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1175

Added line #L1175 was not covered by tests

if query == '(*)':
# Early-exit optimization. Wildcard searching is very expensive in Elasticsearch. And since we just
# want everything, we don't need to actually query Elasticsearch at all.
return queryset, None

Check warning on line 1180 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1180

Added line #L1180 was not covered by tests

logger.info(f"Attempting Elasticsearch document search against query: {query}")
es_document, *_ = registry.get_documents(models=(cls,))
dsl_query = ESDSLQ('query_string', query=query, analyze_wildcard=True)

Check warning on line 1184 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1182-L1184

Added lines #L1182 - L1184 were not covered by tests

if sort_fields is None:
sort_fields = ['id']
search = es_document.search().query(dsl_query).sort(*sort_fields).extra(size=size)

Check warning on line 1188 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1187-L1188

Added lines #L1187 - L1188 were not covered by tests

if search_after:
search = search.extra(search_after=search_after)

Check warning on line 1191 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1191

Added line #L1191 was not covered by tests

try:
results = search.execute()
except RequestError as exp:
logger.warning('Elasticsearch request failed. Got exception: %r', exp)
results = []

Check warning on line 1197 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1193-L1197

Added lines #L1193 - L1197 were not covered by tests

ids = {result.pk for result in results}
next_search_after = results[-1].meta.sort if results else None

Check warning on line 1200 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1199-L1200

Added lines #L1199 - L1200 were not covered by tests

logger.info(f'{len(ids)} records extracted from Elasticsearch query "{query}"')
filtered_queryset = queryset.filter(pk__in=ids)
logger.info(f'Filtered queryset of length {len(filtered_queryset)} extracted against query "{query}"')
return filtered_queryset, next_search_after

Check warning on line 1205 in course_discovery/apps/course_metadata/models.py

View check run for this annotation

Codecov / codecov/patch

course_discovery/apps/course_metadata/models.py#L1202-L1205

Added lines #L1202 - L1205 were not covered by tests


class Collaborator(TimeStampedModel):
"""
Collaborator model, defining any collaborators who helped write course content.
Expand Down

0 comments on commit 2262eb3

Please sign in to comment.