From 0869c19626192efb16d440a17b9e8880d7905921 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Tue, 23 Jul 2024 15:42:37 -0700 Subject: [PATCH 1/5] Sort collect endpoint results by most recent version first #508 Signed-off-by: Jono Yang --- packagedb/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packagedb/api.py b/packagedb/api.py index f0e128ea..483dfdbb 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -850,7 +850,7 @@ def list(self, request, format=None): return Response(message, status=status.HTTP_400_BAD_REQUEST) lookups = purl_to_lookups(purl) - packages = Package.objects.filter(**lookups) + packages = Package.objects.filter(**lookups).order_by('-version') if packages.count() == 0: message = {} if errors: From 83b84447ec4fe8a16b3ba1b66ea39e0ad9f76a36 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Thu, 1 Aug 2024 18:34:00 -0700 Subject: [PATCH 2/5] Add sort parameter to /api/collect/ endpoint #508 * Create CommaListField Signed-off-by: Jono Yang --- packagedb/api.py | 31 ++++++++++++++++------------- packagedb/serializers.py | 43 ++++++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 22 deletions(-) diff --git a/packagedb/api.py b/packagedb/api.py index 483dfdbb..33caa193 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -301,6 +301,20 @@ def filter(self, qs, value): return qs.distinct() if self.distinct else qs +PACKAGE_FILTER_SORT_FIELDS = [ + 'type', + 'namespace', + 'name', + 'version', + 'qualifiers', + 'subpath', + 'download_url', + 'filename', + 'size', + 'release_date', +] + + class PackageFilterSet(FilterSet): type = django_filters.CharFilter( lookup_expr='iexact', @@ -332,18 +346,7 @@ class PackageFilterSet(FilterSet): lookup_expr='icontains', ) - sort = OrderingFilter(fields=[ - 'type', - 'namespace', - 'name', - 'version', - 'qualifiers', - 'subpath', - 'download_url', - 'filename', - 'size', - 'release_date' - ]) + sort = OrderingFilter(fields=PACKAGE_FILTER_SORT_FIELDS) class Meta: model = Package @@ -838,7 +841,6 @@ def list(self, request, format=None): if addon_pipelines := validated_data.get('addon_pipelines', []): kwargs["addon_pipelines"] = addon_pipelines - lookups = purl_to_lookups(purl) packages = Package.objects.filter(**lookups) if packages.count() == 0: try: @@ -850,7 +852,8 @@ def list(self, request, format=None): return Response(message, status=status.HTTP_400_BAD_REQUEST) lookups = purl_to_lookups(purl) - packages = Package.objects.filter(**lookups).order_by('-version') + sort = validated_data.get('sort', []) + packages = Package.objects.filter(**lookups).order_by(*sort) if packages.count() == 0: message = {} if errors: diff --git a/packagedb/serializers.py b/packagedb/serializers.py index 90b6322c..2689c415 100644 --- a/packagedb/serializers.py +++ b/packagedb/serializers.py @@ -7,14 +7,8 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -from django.http import HttpRequest from django.urls import reverse_lazy -from packagedb.models import DependentPackage -from packagedb.models import Package -from packagedb.models import PackageSet -from packagedb.models import PackageWatch -from packagedb.models import Party -from packagedb.models import Resource + from packageurl import PackageURL from rest_framework.exceptions import ValidationError from rest_framework.serializers import BooleanField @@ -29,6 +23,13 @@ from rest_framework.serializers import Serializer from rest_framework.serializers import SerializerMethodField +from packagedb.models import DependentPackage +from packagedb.models import Package +from packagedb.models import PackageSet +from packagedb.models import PackageWatch +from packagedb.models import Party +from packagedb.models import Resource + class ResourceAPISerializer(HyperlinkedModelSerializer): package = HyperlinkedRelatedField( @@ -382,19 +383,34 @@ class Meta: fields = ['depth', 'watch_interval', 'is_active'] +class CommaListField(ListField): + """ListField that allows also a str of comma-separated values as value.""" + + def to_internal_value(self, data): + if isinstance(data, str): + split_data = [] + for datum in data: + split_data.extend(datum.split(',')) + data = split_data + return super().to_internal_value(data) + + class CollectPackageSerializer(Serializer): purl = CharField(help_text="PackageURL strings in canonical form.") source_purl = CharField( required=False, help_text="Source PackageURL.", ) - addon_pipelines = ListField( child=CharField(), required=False, allow_empty=True, help_text="Addon pipelines to run on the package.", ) + sort = CommaListField( + required=False, + help_text="Fields to sort Package results by.", + ) def validate_purl(self, value): try: @@ -420,6 +436,12 @@ def validate_addon_pipelines(self, value): return value + def validate_sort(self, value): + invalid_sort_fields = [field for field in value if not is_supported_sort_field(field)] + if invalid_sort_fields: + raise ValidationError(f'Error unsupported sort fields: {",".join(invalid_sort_fields)}') + + return value class PackageVersSerializer(Serializer): purl = CharField() @@ -518,3 +540,8 @@ class PurltoGitRepoResponseSerializer(Serializer): def is_supported_addon_pipeline(addon_pipeline): from minecode.model_utils import SUPPORTED_ADDON_PIPELINES return addon_pipeline in SUPPORTED_ADDON_PIPELINES + + +def is_supported_sort_field(field): + from packagedb.api import PACKAGE_FILTER_SORT_FIELDS + return field in PACKAGE_FILTER_SORT_FIELDS From 7b6cac1e707bc46be1955538e55d25893a06d95c Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Fri, 2 Aug 2024 12:26:35 -0700 Subject: [PATCH 3/5] Add test for sort on /api/collect #508 Signed-off-by: Jono Yang --- packagedb/api.py | 6 ++-- packagedb/serializers.py | 16 ++++----- packagedb/tests/test_api.py | 68 +++++++++++++++++++++++++++++++++++-- 3 files changed, 76 insertions(+), 14 deletions(-) diff --git a/packagedb/api.py b/packagedb/api.py index 33caa193..2404b1d1 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -829,6 +829,7 @@ def list(self, request, format=None): validated_data = serializer.validated_data purl = validated_data.get('purl') + sort = validated_data.get('sort', []) kwargs = dict() # We want this request to have high priority since the user knows the @@ -841,7 +842,8 @@ def list(self, request, format=None): if addon_pipelines := validated_data.get('addon_pipelines', []): kwargs["addon_pipelines"] = addon_pipelines - packages = Package.objects.filter(**lookups) + lookups = purl_to_lookups(purl) + packages = Package.objects.filter(**lookups).order_by(*sort) if packages.count() == 0: try: errors = priority_router.process(purl, **kwargs) @@ -852,7 +854,6 @@ def list(self, request, format=None): return Response(message, status=status.HTTP_400_BAD_REQUEST) lookups = purl_to_lookups(purl) - sort = validated_data.get('sort', []) packages = Package.objects.filter(**lookups).order_by(*sort) if packages.count() == 0: message = {} @@ -861,6 +862,7 @@ def list(self, request, format=None): 'status': f'error(s) occurred when fetching metadata for {purl}: {errors}' } return Response(message, status=status.HTTP_400_BAD_REQUEST) + for package in packages: get_source_package_and_add_to_package_set(package) diff --git a/packagedb/serializers.py b/packagedb/serializers.py index 2689c415..0bb22b03 100644 --- a/packagedb/serializers.py +++ b/packagedb/serializers.py @@ -428,21 +428,16 @@ def validate_source_purl(self, value): return value def validate_addon_pipelines(self, value): - invalid_pipelines = [ - pipe for pipe in value if not is_supported_addon_pipeline(pipe)] - if invalid_pipelines: - raise ValidationError( - f'Error unsupported addon pipelines: {",".join(invalid_pipelines)}') - + if invalid_pipelines := [pipe for pipe in value if not is_supported_addon_pipeline(pipe)]: + raise ValidationError(f'Error unsupported addon pipelines: {",".join(invalid_pipelines)}') return value def validate_sort(self, value): - invalid_sort_fields = [field for field in value if not is_supported_sort_field(field)] - if invalid_sort_fields: + if invalid_sort_fields := [field for field in value if not is_supported_sort_field(field)]: raise ValidationError(f'Error unsupported sort fields: {",".join(invalid_sort_fields)}') - return value + class PackageVersSerializer(Serializer): purl = CharField() vers = CharField(required=False) @@ -544,4 +539,5 @@ def is_supported_addon_pipeline(addon_pipeline): def is_supported_sort_field(field): from packagedb.api import PACKAGE_FILTER_SORT_FIELDS - return field in PACKAGE_FILTER_SORT_FIELDS + # A field could have a leading `-` + return field.lstrip('-') in PACKAGE_FILTER_SORT_FIELDS diff --git a/packagedb/tests/test_api.py b/packagedb/tests/test_api.py index c451925c..b97f6250 100644 --- a/packagedb/tests/test_api.py +++ b/packagedb/tests/test_api.py @@ -878,7 +878,6 @@ def setUp(self): 'size': 100, } self.package = Package.objects.create(**self.package_data) - self.package.refresh_from_db() self.scannableuri = ScannableURI.objects.create( package=self.package, uri=self.package_download_url, @@ -906,7 +905,6 @@ def setUp(self): 'size': 100, } self.package2 = Package.objects.create(**self.package_data2) - self.package2.refresh_from_db() self.scannableuri2 = ScannableURI.objects.create( package=self.package2, uri=self.package_download_url2, @@ -919,6 +917,55 @@ def setUp(self): self.scan_request_date2 = timezone.now() self.scannableuri2.scan_request_date = self.scan_request_date2 + self.package_download_url3 = 'http://clone.org/clone1.zip' + self.package_data3 = { + 'type': 'pypi', + 'namespace': '', + 'name': 'clone', + 'version': '1', + 'qualifiers': '', + 'subpath': '', + 'download_url': self.package_download_url3, + 'filename': 'clone1.zip', + 'sha1': 'clone1', + 'md5': '', + 'size': 100, + } + self.package3 = Package.objects.create(**self.package_data3) + + self.package_download_url4 = 'http://clone.org/clone1-src.zip' + self.package_data4 = { + 'type': 'pypi', + 'namespace': '', + 'name': 'clone', + 'version': '1', + 'qualifiers': 'package=src', + 'subpath': '', + 'download_url': self.package_download_url4, + 'filename': 'clone1-src.zip', + 'sha1': 'clone1-src', + 'md5': '', + 'size': 50, + } + self.package4 = Package.objects.create(**self.package_data4) + + self.package_download_url5 = 'http://clone.org/clone1-all.zip' + self.package_data5 = { + 'type': 'pypi', + 'namespace': '', + 'name': 'clone', + 'version': '1', + 'qualifiers': 'package=all', + 'subpath': '', + 'download_url': self.package_download_url5, + 'filename': 'clone1-all.zip', + 'sha1': 'clone1-all', + 'md5': '', + 'size': 25, + } + self.package5 = Package.objects.create(**self.package_data5) + + def test_package_live(self): purl_str = 'pkg:maven/org.apache.twill/twill-core@0.12.0' download_url = 'https://repo1.maven.org/maven2/org/apache/twill/twill-core/0.12.0/twill-core-0.12.0.jar' @@ -993,6 +1040,23 @@ def test_package_live_works_with_purl2vcs(self): self.check_expected_results( result, expected, fields_to_remove=fields_to_remove, regen=FIXTURES_REGEN) + def test_collect_sort(self): + purl_str = 'pkg:pypi/clone@1' + response = self.client.get(f'/api/collect/?purl={purl_str}&sort=size') + for i, package_data in enumerate(response.data[1:], start=1): + prev_package_data = response.data[i-1] + self.assertTrue(prev_package_data['size'] < package_data['size']) + + response = self.client.get(f'/api/collect/?purl={purl_str}&sort=-size') + for i, package_data in enumerate(response.data[1:], start=1): + prev_package_data = response.data[i-1] + self.assertTrue(prev_package_data['size'] > package_data['size']) + + response = self.client.get(f'/api/collect/?purl={purl_str}&sort=-size') + for i, package_data in enumerate(response.data[1:], start=1): + prev_package_data = response.data[i-1] + self.assertTrue(prev_package_data['size'] > package_data['size']) + def test_package_api_index_packages_endpoint(self): priority_resource_uris_count = PriorityResourceURI.objects.all().count() self.assertEqual(0, priority_resource_uris_count) From 86af3ad3ce314221b31b98730d46215d239063c5 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Fri, 2 Aug 2024 12:45:01 -0700 Subject: [PATCH 4/5] Update CHANGELOG.rst and api doc #508 Signed-off-by: Jono Yang --- CHANGELOG.rst | 5 ++++- docs/source/purldb/rest_api.rst | 26 ++++++++++++++++++++++++++ packagedb/tests/test_api.py | 5 ----- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d1583659..32135a76 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -19,7 +19,10 @@ Next Release `/api/scan_queue/index_package_scan/` endpoint. `/api/scan_queue/update_status/` is now an action on a ScannableURI. https://github.com/aboutcode-org/purldb/issues/504 - +- The packages collected via the `/api/collect/` endpoint can be ordered in an + ascending or descending fashion on fields using the ``sort`` query parameter. + This parameter takes in the same fields as ``sort`` from `/api/packages/`. + https://github.com/aboutcode-org/purldb/issues/508 v5.0.1 --------- diff --git a/docs/source/purldb/rest_api.rst b/docs/source/purldb/rest_api.rst index db7ff7a9..84d9e9f9 100644 --- a/docs/source/purldb/rest_api.rst +++ b/docs/source/purldb/rest_api.rst @@ -221,6 +221,25 @@ For example: curl -X GET "$api_url?$payload" -H "$content_type" +The packages list can be ordered by the following fields: + + - ``type`` + - ``namespace`` + - ``name`` + - ``version`` + - ``qualifiers`` + - ``subpath`` + - ``download_url`` + - ``filename`` + - ``size`` + - ``release_date`` + +To sort a field in a descending fashion, prefix the field name with ``-``. +Packages can be sorted by multiple fields. + +For example: + +``GET /api/packages/?sort=type,-size`` package details ^^^^^^^^^^^^^^^ @@ -867,6 +886,13 @@ package. Find all addon pipelines `here. package_data['size']) - response = self.client.get(f'/api/collect/?purl={purl_str}&sort=-size') - for i, package_data in enumerate(response.data[1:], start=1): - prev_package_data = response.data[i-1] - self.assertTrue(prev_package_data['size'] > package_data['size']) - def test_package_api_index_packages_endpoint(self): priority_resource_uris_count = PriorityResourceURI.objects.all().count() self.assertEqual(0, priority_resource_uris_count) From a9f7d75d66c5333b6d9be18baae197b288f135d4 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Fri, 2 Aug 2024 14:59:54 -0700 Subject: [PATCH 5/5] Set default sort on collect endpoint #508 Signed-off-by: Jono Yang --- packagedb/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packagedb/api.py b/packagedb/api.py index 2404b1d1..104d95a7 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -829,7 +829,7 @@ def list(self, request, format=None): validated_data = serializer.validated_data purl = validated_data.get('purl') - sort = validated_data.get('sort', []) + sort = validated_data.get('sort') or ['-version',] kwargs = dict() # We want this request to have high priority since the user knows the