Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

508 collect api default sort #513

Merged
merged 5 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ Next Release
`/api/scan_queue/index_package_scan/` endpoint.
`/api/scan_queue/update_status/` is now an action on a ScannableURI.
https://github.com/aboutcode-org/purldb/issues/504

- The packages collected via the `/api/collect/` endpoint can be ordered in an
ascending or descending fashion on fields using the ``sort`` query parameter.
This parameter takes in the same fields as ``sort`` from `/api/packages/`.
https://github.com/aboutcode-org/purldb/issues/508

v5.0.1
---------
Expand Down
26 changes: 26 additions & 0 deletions docs/source/purldb/rest_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,25 @@ For example:

curl -X GET "$api_url?$payload" -H "$content_type"

The packages list can be ordered by the following fields:

- ``type``
- ``namespace``
- ``name``
- ``version``
- ``qualifiers``
- ``subpath``
- ``download_url``
- ``filename``
- ``size``
- ``release_date``

To sort a field in a descending fashion, prefix the field name with ``-``.
Packages can be sorted by multiple fields.

For example:

``GET /api/packages/?sort=type,-size``

package details
^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -867,6 +886,13 @@ package. Find all addon pipelines `here. <https://scancodeio.readthedocs.io/en/l
"history": "https://public.purldb.io/api/packages/4f3a57de-e367-43c6-a7f1-51633d0ecd45/history/"
}

The ordering of the packages returned by ``/api/collect/`` can be set using the
``sort`` query parameter.

``GET /api/collect/?purl=pkg:npm/[email protected]&sort=qualifiers,-size``

The same sort fields from ``/api/packages/`` is also used here.

collect actions
---------------

Expand Down
33 changes: 19 additions & 14 deletions packagedb/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,20 @@ def filter(self, qs, value):
return qs.distinct() if self.distinct else qs


PACKAGE_FILTER_SORT_FIELDS = [
'type',
'namespace',
'name',
'version',
'qualifiers',
'subpath',
'download_url',
'filename',
'size',
'release_date',
]


class PackageFilterSet(FilterSet):
type = django_filters.CharFilter(
lookup_expr='iexact',
Expand Down Expand Up @@ -332,18 +346,7 @@ class PackageFilterSet(FilterSet):
lookup_expr='icontains',
)

sort = OrderingFilter(fields=[
'type',
'namespace',
'name',
'version',
'qualifiers',
'subpath',
'download_url',
'filename',
'size',
'release_date'
])
sort = OrderingFilter(fields=PACKAGE_FILTER_SORT_FIELDS)

class Meta:
model = Package
Expand Down Expand Up @@ -826,6 +829,7 @@ def list(self, request, format=None):

validated_data = serializer.validated_data
purl = validated_data.get('purl')
sort = validated_data.get('sort') or ['-version',]

kwargs = dict()
# We want this request to have high priority since the user knows the
Expand All @@ -839,7 +843,7 @@ def list(self, request, format=None):
kwargs["addon_pipelines"] = addon_pipelines

lookups = purl_to_lookups(purl)
packages = Package.objects.filter(**lookups)
packages = Package.objects.filter(**lookups).order_by(*sort)
if packages.count() == 0:
try:
errors = priority_router.process(purl, **kwargs)
Expand All @@ -850,14 +854,15 @@ def list(self, request, format=None):
return Response(message, status=status.HTTP_400_BAD_REQUEST)

lookups = purl_to_lookups(purl)
packages = Package.objects.filter(**lookups)
packages = Package.objects.filter(**lookups).order_by(*sort)
if packages.count() == 0:
message = {}
if errors:
message = {
'status': f'error(s) occurred when fetching metadata for {purl}: {errors}'
}
return Response(message, status=status.HTTP_400_BAD_REQUEST)

for package in packages:
get_source_package_and_add_to_package_set(package)

Expand Down
49 changes: 36 additions & 13 deletions packagedb/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,8 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

from django.http import HttpRequest
from django.urls import reverse_lazy
from packagedb.models import DependentPackage
from packagedb.models import Package
from packagedb.models import PackageSet
from packagedb.models import PackageWatch
from packagedb.models import Party
from packagedb.models import Resource

from packageurl import PackageURL
from rest_framework.exceptions import ValidationError
from rest_framework.serializers import BooleanField
Expand All @@ -29,6 +23,13 @@
from rest_framework.serializers import Serializer
from rest_framework.serializers import SerializerMethodField

from packagedb.models import DependentPackage
from packagedb.models import Package
from packagedb.models import PackageSet
from packagedb.models import PackageWatch
from packagedb.models import Party
from packagedb.models import Resource


class ResourceAPISerializer(HyperlinkedModelSerializer):
package = HyperlinkedRelatedField(
Expand Down Expand Up @@ -382,19 +383,34 @@ class Meta:
fields = ['depth', 'watch_interval', 'is_active']


class CommaListField(ListField):
"""ListField that allows also a str of comma-separated values as value."""

def to_internal_value(self, data):
if isinstance(data, str):
split_data = []
for datum in data:
split_data.extend(datum.split(','))
data = split_data
return super().to_internal_value(data)


class CollectPackageSerializer(Serializer):
purl = CharField(help_text="PackageURL strings in canonical form.")
source_purl = CharField(
required=False,
help_text="Source PackageURL.",
)

addon_pipelines = ListField(
child=CharField(),
required=False,
allow_empty=True,
help_text="Addon pipelines to run on the package.",
)
sort = CommaListField(
required=False,
help_text="Fields to sort Package results by.",
)

def validate_purl(self, value):
try:
Expand All @@ -412,12 +428,13 @@ def validate_source_purl(self, value):
return value

def validate_addon_pipelines(self, value):
invalid_pipelines = [
pipe for pipe in value if not is_supported_addon_pipeline(pipe)]
if invalid_pipelines:
raise ValidationError(
f'Error unsupported addon pipelines: {",".join(invalid_pipelines)}')
if invalid_pipelines := [pipe for pipe in value if not is_supported_addon_pipeline(pipe)]:
raise ValidationError(f'Error unsupported addon pipelines: {",".join(invalid_pipelines)}')
return value

def validate_sort(self, value):
if invalid_sort_fields := [field for field in value if not is_supported_sort_field(field)]:
raise ValidationError(f'Error unsupported sort fields: {",".join(invalid_sort_fields)}')
return value


Expand Down Expand Up @@ -518,3 +535,9 @@ class PurltoGitRepoResponseSerializer(Serializer):
def is_supported_addon_pipeline(addon_pipeline):
from minecode.model_utils import SUPPORTED_ADDON_PIPELINES
return addon_pipeline in SUPPORTED_ADDON_PIPELINES


def is_supported_sort_field(field):
from packagedb.api import PACKAGE_FILTER_SORT_FIELDS
# A field could have a leading `-`
return field.lstrip('-') in PACKAGE_FILTER_SORT_FIELDS
63 changes: 61 additions & 2 deletions packagedb/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,6 @@ def setUp(self):
'size': 100,
}
self.package = Package.objects.create(**self.package_data)
self.package.refresh_from_db()
self.scannableuri = ScannableURI.objects.create(
package=self.package,
uri=self.package_download_url,
Expand Down Expand Up @@ -906,7 +905,6 @@ def setUp(self):
'size': 100,
}
self.package2 = Package.objects.create(**self.package_data2)
self.package2.refresh_from_db()
self.scannableuri2 = ScannableURI.objects.create(
package=self.package2,
uri=self.package_download_url2,
Expand All @@ -919,6 +917,55 @@ def setUp(self):
self.scan_request_date2 = timezone.now()
self.scannableuri2.scan_request_date = self.scan_request_date2

self.package_download_url3 = 'http://clone.org/clone1.zip'
self.package_data3 = {
'type': 'pypi',
'namespace': '',
'name': 'clone',
'version': '1',
'qualifiers': '',
'subpath': '',
'download_url': self.package_download_url3,
'filename': 'clone1.zip',
'sha1': 'clone1',
'md5': '',
'size': 100,
}
self.package3 = Package.objects.create(**self.package_data3)

self.package_download_url4 = 'http://clone.org/clone1-src.zip'
self.package_data4 = {
'type': 'pypi',
'namespace': '',
'name': 'clone',
'version': '1',
'qualifiers': 'package=src',
'subpath': '',
'download_url': self.package_download_url4,
'filename': 'clone1-src.zip',
'sha1': 'clone1-src',
'md5': '',
'size': 50,
}
self.package4 = Package.objects.create(**self.package_data4)

self.package_download_url5 = 'http://clone.org/clone1-all.zip'
self.package_data5 = {
'type': 'pypi',
'namespace': '',
'name': 'clone',
'version': '1',
'qualifiers': 'package=all',
'subpath': '',
'download_url': self.package_download_url5,
'filename': 'clone1-all.zip',
'sha1': 'clone1-all',
'md5': '',
'size': 25,
}
self.package5 = Package.objects.create(**self.package_data5)


def test_package_live(self):
purl_str = 'pkg:maven/org.apache.twill/[email protected]'
download_url = 'https://repo1.maven.org/maven2/org/apache/twill/twill-core/0.12.0/twill-core-0.12.0.jar'
Expand Down Expand Up @@ -993,6 +1040,18 @@ def test_package_live_works_with_purl2vcs(self):
self.check_expected_results(
result, expected, fields_to_remove=fields_to_remove, regen=FIXTURES_REGEN)

def test_collect_sort(self):
purl_str = 'pkg:pypi/clone@1'
response = self.client.get(f'/api/collect/?purl={purl_str}&sort=size')
for i, package_data in enumerate(response.data[1:], start=1):
prev_package_data = response.data[i-1]
self.assertTrue(prev_package_data['size'] < package_data['size'])

response = self.client.get(f'/api/collect/?purl={purl_str}&sort=-size')
for i, package_data in enumerate(response.data[1:], start=1):
prev_package_data = response.data[i-1]
self.assertTrue(prev_package_data['size'] > package_data['size'])

def test_package_api_index_packages_endpoint(self):
priority_resource_uris_count = PriorityResourceURI.objects.all().count()
self.assertEqual(0, priority_resource_uris_count)
Expand Down