Skip to content

Commit a1a9b14

Browse files
committed
✨(backend) keep ordering from fulltext search in results
Keep ordering by score from Find API on search/ results when the fulltext search is enabled. Refactor pagination to work with a list instead of a queryset Fix Changelog Signed-off-by: Fabre Florian <[email protected]>
1 parent 38f99fb commit a1a9b14

File tree

8 files changed

+302
-23
lines changed

8 files changed

+302
-23
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ and this project adheres to
1515
- ✨(front) update to ui-kit v2
1616
- 🌐 add dutch translation
1717
- ✨(back) add resource server routes
18+
- ✨(backend) add async indexation of items on save (or access save)
19+
- ✨(backend) add throttle mechanism to limit indexation job
20+
- ✨(api) modify items/search endpoint to use indexed items in Find
1821

1922
### Fixed
2023

src/backend/core/api/viewsets.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,7 @@ def tree(self, request, pk=None):
961961
)
962962

963963
# pylint: disable-next=too-many-arguments,too-many-positional-arguments
964-
def _fulltext_search_queryset(self, queryset, indexer, request, text):
964+
def _fulltext_search(self, queryset, indexer, request, text):
965965
"""
966966
Returns a queryset from the results the fulltext search of Find
967967
"""
@@ -975,10 +975,27 @@ def _fulltext_search_queryset(self, queryset, indexer, request, text):
975975
token=token,
976976
visited=get_visited_items_ids_of(queryset, user),
977977
page=1,
978-
page_size=200,
978+
page_size=100,
979979
)
980980

981-
return queryset.filter(pk__in=results)
981+
queryset = queryset.filter(pk__in=results)
982+
queryset = self.annotate_user_roles(queryset)
983+
queryset = self.annotate_is_favorite(queryset)
984+
985+
files_by_uuid = {str(d.pk): d for d in queryset}
986+
ordered_files = [files_by_uuid[id] for id in results if id in files_by_uuid]
987+
988+
page = self.paginate_queryset(ordered_files)
989+
990+
if page is not None:
991+
items = self._compute_parents(page)
992+
serializer = self.get_serializer(items, many=True)
993+
result = self.get_paginated_response(serializer.data)
994+
return result
995+
996+
items = self._compute_parents(ordered_files)
997+
serializer = self.get_serializer(items, many=True)
998+
return drf.response.Response(serializer.data)
982999

9831000
@drf.decorators.action(
9841001
detail=False,
@@ -1036,7 +1053,7 @@ def search(self, request, *args, **kwargs):
10361053
if indexer:
10371054
# When the indexer is configured pop "title" from queryset search and use
10381055
# fulltext results instead.
1039-
queryset = self._fulltext_search_queryset(
1056+
return self._fulltext_search(
10401057
queryset,
10411058
indexer,
10421059
request,

src/backend/core/management/commands/index.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ def handle(self, *args, **options):
2222
indexer = get_file_indexer()
2323

2424
if not indexer:
25-
raise CommandError("The indexer is not enabled or properly configured.")
25+
logger.warning("The indexer is not enabled or properly configured.")
26+
return
2627

2728
logger.info("Starting to regenerate Find index...")
2829
start = time.perf_counter()

src/backend/core/services/search_indexers.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_ancestor_to_descendants_map(items):
4444
"""
4545
Given a list of items, return a mapping of ancestor_path -> set of descendant_paths.
4646
47-
Each path is assumed to use materialized path format with fixed-length segments.
47+
Each path is assumed to be a list of uid.
4848
4949
Args:
5050
items (list of Item): List of items.
@@ -55,11 +55,11 @@ def get_ancestor_to_descendants_map(items):
5555
ancestor_map = defaultdict(set)
5656

5757
for item in items:
58-
path = str(item.path)
59-
ancestors = path.split(".")
58+
fullpath = str(item.path)
59+
ancestors = item.path
6060
for i in range(1, len(ancestors) + 1):
6161
ancestor = ".".join(ancestors[:i])
62-
ancestor_map[ancestor].add(path)
62+
ancestor_map[ancestor].add(fullpath)
6363

6464
return ancestor_map
6565

@@ -187,10 +187,9 @@ def index(self, queryset=None):
187187

188188
last_id = items_batch[-1].id
189189
accesses_by_item_path = get_batch_accesses_by_users_and_teams(items_batch)
190-
serialize = self.serialize_item
191190

192191
serialized_batch = [
193-
serialize(item, accesses_by_item_path) for item in items_batch
192+
self.serialize_item(item, accesses_by_item_path) for item in items_batch
194193
]
195194

196195
self.push(serialized_batch)
@@ -242,8 +241,6 @@ def search(self, text, token, visited=(), page=1, page_size=50):
242241
"services": ["drive"],
243242
"page_number": page,
244243
"page_size": page_size,
245-
"order_by": "updated_at",
246-
"order_direction": "desc",
247244
},
248245
token=token,
249246
)
@@ -317,7 +314,7 @@ def serialize_item(self, item, accesses):
317314
"users": list(accesses.get(doc_path, {}).get("users", set())),
318315
"groups": list(accesses.get(doc_path, {}).get("teams", set())),
319316
"reach": str(item.link_reach),
320-
"size": item.size,
317+
"size": item.size or 0,
321318
"is_active": not is_deleted,
322319
}
323320

src/backend/core/tests/commands/test_index.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
Unit test for `index` command.
33
"""
44

5+
import logging
56
from operator import itemgetter
67
from unittest import mock
78

8-
from django.core.management import CommandError, call_command
9+
from django.core.management import call_command
910
from django.db import transaction
1011

1112
import pytest
@@ -67,11 +68,21 @@ def test_index():
6768

6869
@pytest.mark.django_db
6970
@pytest.mark.usefixtures("indexer_settings")
70-
def test_index_improperly_configured(indexer_settings):
71+
def test_index_improperly_configured(indexer_settings, caplog):
7172
"""The command should raise an exception if the indexer is not configured"""
7273
indexer_settings.SEARCH_INDEXER_CLASS = None
7374

74-
with pytest.raises(CommandError) as err:
75-
call_command("index")
75+
with transaction.atomic():
76+
factories.ItemFactory(
77+
upload_bytes=b"This is a text file content",
78+
mimetype="text/plain",
79+
type=models.ItemTypeChoices.FILE,
80+
)
81+
82+
with caplog.at_level(logging.WARN):
83+
with mock.patch.object(SearchIndexer, "push") as mock_push:
84+
call_command("index")
85+
86+
mock_push.assert_not_called()
7687

77-
assert str(err.value) == "The indexer is not enabled or properly configured."
88+
assert "The indexer is not enabled or properly configured." in caplog.messages

0 commit comments

Comments
 (0)