Skip to content

Commit e3f3a93

Browse files
authored
Merge pull request #213 from nexB/update-search
Remove search_vector field and index and create SearchVector per request
2 parents 168a86d + 3814375 commit e3f3a93

File tree

7 files changed

+263
-114
lines changed

7 files changed

+263
-114
lines changed

Diff for: packagedb/api.py

+57-75
Original file line numberDiff line numberDiff line change
@@ -8,49 +8,37 @@
88
#
99

1010
import logging
11+
12+
import django_filters
1113
from django.core.exceptions import ValidationError
12-
from django.db.models import OuterRef
13-
from django.db.models import Q
14-
from django.db.models import Subquery
14+
from django.db.models import OuterRef, Q, Subquery
15+
from django_filters.filters import Filter, OrderingFilter
1516
from django_filters.rest_framework import FilterSet
16-
from django_filters.filters import Filter
17-
from django_filters.filters import OrderingFilter
18-
import django_filters
19-
2017
from packageurl import PackageURL
2118
from packageurl.contrib.django.utils import purl_to_lookups
22-
from rest_framework import status
23-
from rest_framework import viewsets
19+
from rest_framework import status, viewsets
2420
from rest_framework.decorators import action
2521
from rest_framework.response import Response
22+
from univers.version_constraint import InvalidConstraintsError
23+
from univers.version_range import RANGE_CLASS_BY_SCHEMES, VersionRange
24+
from univers.versions import InvalidVersion
2625

27-
from matchcode.api import MultipleCharFilter
28-
from matchcode.api import MultipleCharInFilter
26+
from matchcode.api import MultipleCharFilter, MultipleCharInFilter
2927
# UnusedImport here!
3028
# But importing the mappers and visitors module triggers routes registration
3129
from minecode import visitors # NOQA
3230
from minecode import priority_router
33-
from minecode.models import PriorityResourceURI
34-
from minecode.models import ScannableURI
31+
from minecode.models import PriorityResourceURI, ScannableURI
3532
from minecode.route import NoRouteAvailable
36-
from packagedb.models import Package
37-
from packagedb.models import PackageContentType
38-
from packagedb.models import PackageSet
39-
from packagedb.models import Resource
40-
from packagedb.serializers import DependentPackageSerializer
41-
from packagedb.serializers import ResourceAPISerializer
42-
from packagedb.serializers import PackageAPISerializer
43-
from packagedb.serializers import PackageSetAPISerializer
44-
from packagedb.serializers import PartySerializer
45-
from packagedb.package_managers import get_api_package_name
46-
from packagedb.package_managers import get_version_fetcher
47-
from packagedb.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE
48-
49-
from univers import versions
50-
from univers.version_range import RANGE_CLASS_BY_SCHEMES
51-
from univers.versions import InvalidVersion
52-
from univers.version_range import VersionRange
53-
from univers.version_constraint import InvalidConstraintsError
33+
from packagedb.filters import PackageSearchFilter
34+
from packagedb.models import Package, PackageContentType, PackageSet, Resource
35+
from packagedb.package_managers import (VERSION_API_CLASSES_BY_PACKAGE_TYPE,
36+
get_api_package_name,
37+
get_version_fetcher)
38+
from packagedb.serializers import (DependentPackageSerializer,
39+
PackageAPISerializer,
40+
PackageSetAPISerializer, PartySerializer,
41+
ResourceAPISerializer)
5442

5543
logger = logging.getLogger(__name__)
5644

@@ -84,21 +72,21 @@ def filter(self, qs, value):
8472
return qs.filter(package=package)
8573

8674

87-
class ResourceFilter(FilterSet):
75+
class ResourceFilterSet(FilterSet):
8876
package = PackageResourceUUIDFilter(label='Package UUID')
8977
purl = PackageResourcePurlFilter(label='Package pURL')
9078
md5 = MultipleCharInFilter(
91-
help_text="Exact MD5. Multi-value supported.",
79+
help_text='Exact MD5. Multi-value supported.',
9280
)
9381
sha1 = MultipleCharInFilter(
94-
help_text="Exact SHA1. Multi-value supported.",
82+
help_text='Exact SHA1. Multi-value supported.',
9583
)
9684

9785

9886
class ResourceViewSet(viewsets.ReadOnlyModelViewSet):
9987
queryset = Resource.objects.select_related('package')
10088
serializer_class = ResourceAPISerializer
101-
filterset_class = ResourceFilter
89+
filterset_class = ResourceFilterSet
10290
lookup_field = 'sha1'
10391

10492
@action(detail=False, methods=['post'])
@@ -169,70 +157,63 @@ def filter_by_checksums(self, request, *args, **kwargs):
169157
return self.get_paginated_response(serializer.data)
170158

171159

172-
class MultiplePackageURLFilter(Filter):
160+
class MultiplePackageURLFilter(MultipleCharFilter):
173161
def filter(self, qs, value):
174-
try:
175-
request = self.parent.request
176-
except AttributeError:
177-
return None
162+
if not value:
163+
# Even though not a noop, no point filtering if empty.
164+
return qs
178165

179-
values = request.GET.getlist(self.field_name)
180-
if all(v == '' for v in values):
166+
if self.is_noop(qs, value):
181167
return qs
182168

183-
values = {item for item in values}
169+
if all(v == '' for v in value):
170+
return qs
184171

185172
q = Q()
186-
for val in values:
173+
for val in value:
187174
lookups = purl_to_lookups(val)
188175
if not lookups:
189176
continue
190-
191177
q.add(Q(**lookups), Q.OR)
192178

193-
if not q:
194-
return qs.none()
195-
196-
return qs.filter(q)
197-
198-
199-
class PackageSearchFilter(Filter):
200-
def filter(self, qs, value):
201-
try:
202-
request = self.parent.request
203-
except AttributeError:
204-
return None
205-
206-
if not value:
207-
return qs
179+
if q:
180+
qs = self.get_method(qs)(q)
181+
else:
182+
qs = qs.none()
208183

209-
return Package.objects.filter(search_vector=value)
184+
return qs.distinct() if self.distinct else qs
210185

211186

212-
class PackageFilter(FilterSet):
187+
class PackageFilterSet(FilterSet):
213188
type = django_filters.CharFilter(
214-
lookup_expr="iexact",
215-
help_text="Exact type. (case-insensitive)",
189+
lookup_expr='iexact',
190+
help_text='Exact type. (case-insensitive)',
216191
)
217192
namespace = django_filters.CharFilter(
218-
lookup_expr="iexact",
219-
help_text="Exact namespace. (case-insensitive)",
193+
lookup_expr='iexact',
194+
help_text='Exact namespace. (case-insensitive)',
220195
)
221196
name = MultipleCharFilter(
222-
lookup_expr="iexact",
223-
help_text="Exact name. Multi-value supported. (case-insensitive)",
197+
lookup_expr='iexact',
198+
help_text='Exact name. Multi-value supported. (case-insensitive)',
224199
)
225200
version = MultipleCharFilter(
226-
help_text="Exact version. Multi-value supported.",
201+
help_text='Exact version. Multi-value supported.',
227202
)
228203
md5 = MultipleCharInFilter(
229-
help_text="Exact MD5. Multi-value supported.",
204+
help_text='Exact MD5. Multi-value supported.',
230205
)
231206
sha1 = MultipleCharInFilter(
232-
help_text="Exact SHA1. Multi-value supported.",
207+
help_text='Exact SHA1. Multi-value supported.',
208+
)
209+
purl = MultiplePackageURLFilter(
210+
label='Package URL',
211+
)
212+
search = PackageSearchFilter(
213+
label='Search',
214+
field_name='name',
215+
lookup_expr='icontains',
233216
)
234-
purl = MultiplePackageURLFilter(label='Package URL')
235-
search = PackageSearchFilter(label='Search')
236217

237218
sort = OrderingFilter(fields=[
238219
'type',
@@ -250,6 +231,7 @@ class PackageFilter(FilterSet):
250231
class Meta:
251232
model = Package
252233
fields = (
234+
'search',
253235
'type',
254236
'namespace',
255237
'name',
@@ -270,7 +252,7 @@ class PackageViewSet(viewsets.ReadOnlyModelViewSet):
270252
queryset = Package.objects.prefetch_related('dependencies', 'parties')
271253
serializer_class = PackageAPISerializer
272254
lookup_field = 'uuid'
273-
filterset_class = PackageFilter
255+
filterset_class = PackageFilterSet
274256

275257
@action(detail=True, methods=['get'])
276258
def latest_version(self, request, *args, **kwargs):
@@ -429,7 +411,7 @@ def index_packages(self, request, *args, **kwargs):
429411
packages = request.data.get('packages') or []
430412
queued_packages = []
431413
unqueued_packages = []
432-
supported_ecosystems = ["maven", "npm"]
414+
supported_ecosystems = ['maven', 'npm']
433415

434416
unique_purls, unsupported_packages, unsupported_vers = get_resolved_purls(packages, supported_ecosystems)
435417

Diff for: packagedb/filters.py

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import shlex
11+
12+
import django_filters
13+
from django.core.exceptions import FieldError
14+
from django.db.models import Q
15+
16+
# The function and Classes in this file are from https://github.com/nexB/scancode.io/blob/main/scanpipe/filters.py
17+
18+
19+
def parse_query_string_to_lookups(query_string, default_lookup_expr, default_field):
20+
"""Parse a query string and convert it into queryset lookups using Q objects."""
21+
lookups = Q()
22+
terms = shlex.split(query_string)
23+
24+
lookup_types = {
25+
"=": "iexact",
26+
"^": "istartswith",
27+
"$": "iendswith",
28+
"~": "icontains",
29+
">": "gt",
30+
"<": "lt",
31+
}
32+
33+
for term in terms:
34+
lookup_expr = default_lookup_expr
35+
negated = False
36+
37+
if ":" in term:
38+
field_name, search_value = term.split(":", maxsplit=1)
39+
if field_name.endswith(tuple(lookup_types.keys())):
40+
lookup_symbol = field_name[-1]
41+
lookup_expr = lookup_types.get(lookup_symbol)
42+
field_name = field_name[:-1]
43+
44+
if field_name.startswith("-"):
45+
field_name = field_name[1:]
46+
negated = True
47+
48+
else:
49+
search_value = term
50+
field_name = default_field
51+
52+
lookups &= Q(**{f"{field_name}__{lookup_expr}": search_value}, _negated=negated)
53+
54+
return lookups
55+
56+
57+
class QuerySearchFilter(django_filters.CharFilter):
58+
"""Add support for complex query syntax in search filter."""
59+
60+
def filter(self, qs, value):
61+
if not value:
62+
return qs
63+
64+
lookups = parse_query_string_to_lookups(
65+
query_string=value,
66+
default_lookup_expr=self.lookup_expr,
67+
default_field=self.field_name,
68+
)
69+
70+
try:
71+
return qs.filter(lookups)
72+
except FieldError:
73+
return qs.none()
74+
75+
76+
class PackageSearchFilter(QuerySearchFilter):
77+
def filter(self, qs, value):
78+
if not value:
79+
return qs
80+
81+
if value.startswith("pkg:"):
82+
return qs.for_package_url(value)
83+
84+
if "://" not in value and ":" in value:
85+
return super().filter(qs, value)
86+
87+
search_fields = ["type", "namespace", "name", "version", "download_url"]
88+
lookups = Q()
89+
for field_names in search_fields:
90+
lookups |= Q(**{f"{field_names}__{self.lookup_expr}": value})
91+
92+
return qs.filter(lookups)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Generated by Django 4.1.2 on 2023-11-07 00:32
2+
3+
from django.db import migrations
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
("packagedb", "0079_alter_package_name_alter_package_namespace_and_more"),
9+
]
10+
11+
operations = [
12+
migrations.RemoveIndex(
13+
model_name="package",
14+
name="packagedb_p_search__8d33bb_gin",
15+
),
16+
migrations.RemoveField(
17+
model_name="package",
18+
name="search_vector",
19+
),
20+
]

Diff for: packagedb/models.py

-4
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,6 @@ class Package(
531531
),
532532
)
533533

534-
search_vector = SearchVectorField(null=True)
535-
536534
objects = PackageQuerySet.as_manager()
537535

538536
# TODO: Think about ordering, unique together, indexes, etc.
@@ -550,8 +548,6 @@ class Meta:
550548
)
551549
]
552550
indexes = [
553-
# GIN index for search performance increase
554-
GinIndex(fields=['search_vector']),
555551
# multicolumn index for search on a whole `purl`
556552
models.Index(fields=[
557553
'type', 'namespace', 'name', 'version', 'qualifiers', 'subpath'

Diff for: packagedb/signals.py

-21
This file was deleted.

0 commit comments

Comments
 (0)