Skip to content

Commit d4094f5

Browse files
authored
Merge pull request #503 from nexB/502-queue-priority-update
502 queue priority update
2 parents 275d6da + 95097c8 commit d4094f5

File tree

14 files changed

+149
-48
lines changed

14 files changed

+149
-48
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ Next Release
99
``approximate_directory_structure_index``, ``exact_file_index``,
1010
``exact_package_archive_index``, ``cditems``, ``on_demand_queue`` have been
1111
removed.
12+
- The `/api/collect/` and `/api/collect/index_packages/` API endpoints have been
13+
updated such that Package scan and processing requests made with purls with
14+
versions are processed ahead of those made with versionless purls. https://github.com/nexB/purldb/issues/502
1215

1316

1417
v5.0.0

docs/source/purldb/rest_api.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,7 @@ index_packages
875875
876876
Take a list of ``packages`` (where each item is a dictionary containing either PURL
877877
or versionless PURL along with vers range, optionally with source package PURL)
878-
and index it.
878+
and index it. PURLs with versions are processed ahead of versionless PURLs.
879879
Also each package can have list of ``addon_pipelines`` to run on the package.
880880
Find all addon pipelines `here. <https://scancodeio.readthedocs.io/en/latest/built-in-pipelines.html>`_
881881

minecode/management/commands/priority_queue.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,18 @@ def process_request(priority_resource_uri, _priority_router=priority_router):
109109
purl_to_visit = priority_resource_uri.uri
110110
source_purl = priority_resource_uri.source_uri
111111
addon_pipelines = priority_resource_uri.addon_pipelines
112+
priority = priority_resource_uri.priority
112113

113114
try:
114115
if TRACE:
115116
logger.debug('visit_uri: uri: {}'.format(purl_to_visit))
116117
kwargs = dict()
117118
if source_purl:
118-
kwargs["source_purl"] = source_purl
119+
kwargs['source_purl'] = source_purl
119120
if addon_pipelines:
120-
kwargs["addon_pipelines"] = addon_pipelines
121+
kwargs['addon_pipelines'] = addon_pipelines
122+
if priority:
123+
kwargs['priority'] = priority
121124
errors = _priority_router.process(purl_to_visit, **kwargs)
122125
if TRACE:
123126
new_uris_to_visit = list(new_uris_to_visit or [])

minecode/model_utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,11 @@
4444
)
4545

4646

47-
def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, reindex_uri=False, priority=100):
47+
def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, priority=0, reindex_uri=False):
4848
"""
49-
Add a Package `package` to the scan queue to run the list of provided `pipelines`
49+
Add a Package `package` to the scan queue to run the list of provided
50+
`pipelines` with a given `priority`. A ScannableURI with a `priority` of 100
51+
will be processed before a ScannableURI with a `priority` of 0.
5052
5153
If `reindex_uri` is True, force rescanning of the package
5254
"""
@@ -226,7 +228,7 @@ def merge_or_create_package(scanned_package, visit_level, override=False):
226228
227229
If ``scanned_package`` does not exist in the PackageDB, create a new entry in
228230
the PackageDB for ``scanned_package``.
229-
231+
230232
If ``override`` is True, then all existing empty values of the PackageDB package are replaced by
231233
a non-empty value of the provided override.
232234
"""

minecode/visitors/conan.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def get_download_info(conandata, version):
9999
return download_url, sha256
100100

101101

102-
def map_conan_package(package_url, pipelines):
102+
def map_conan_package(package_url, pipelines, priority=0):
103103
"""
104104
Add a conan `package_url` to the PackageDB.
105105
@@ -134,7 +134,7 @@ def map_conan_package(package_url, pipelines):
134134

135135
# Submit package for scanning
136136
if db_package:
137-
add_package_to_scan_queue(db_package, pipelines)
137+
add_package_to_scan_queue(db_package, pipelines, priority)
138138

139139
return error
140140

@@ -154,11 +154,12 @@ def process_request(purl_str, **kwargs):
154154
package_url = PackageURL.from_string(purl_str)
155155
addon_pipelines = kwargs.get('addon_pipelines', [])
156156
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
157+
priority = kwargs.get('priority', 0)
157158

158159
if not package_url.version:
159160
return
160161

161-
error_msg = map_conan_package(package_url, pipelines)
162+
error_msg = map_conan_package(package_url, pipelines, priority)
162163

163164
if error_msg:
164165
return error_msg

minecode/visitors/debian.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ def process_request(purl_str, **kwargs):
337337
source_purl = kwargs.get("source_purl", None)
338338
addon_pipelines = kwargs.get('addon_pipelines', [])
339339
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
340+
priority = kwargs.get('priority', 0)
340341

341342
try:
342343
package_url = PackageURL.from_string(purl_str)
@@ -351,15 +352,16 @@ def process_request(purl_str, **kwargs):
351352
has_version = bool(package_url.version)
352353
if has_version:
353354
error = map_debian_metadata_binary_and_source(
354-
package_url=package_url,
355+
package_url=package_url,
355356
source_package_url=source_package_url,
356357
pipelines=pipelines,
358+
priority=priority,
357359
)
358360

359361
return error
360362

361363

362-
def map_debian_package(debian_package, package_content, pipelines):
364+
def map_debian_package(debian_package, package_content, pipelines, priority=0):
363365
"""
364366
Add a debian `package_url` to the PackageDB.
365367
@@ -372,7 +374,7 @@ def map_debian_package(debian_package, package_content, pipelines):
372374
error = ''
373375

374376
purl = debian_package.package_url
375-
if package_content == PackageContentType.BINARY:
377+
if package_content == PackageContentType.BINARY:
376378
download_url = debian_package.binary_archive_url
377379
elif package_content == PackageContentType.SOURCE_ARCHIVE:
378380
download_url = debian_package.source_archive_url
@@ -427,7 +429,7 @@ def map_debian_package(debian_package, package_content, pipelines):
427429

428430
# Submit package for scanning
429431
if db_package:
430-
add_package_to_scan_queue(db_package, pipelines)
432+
add_package_to_scan_queue(db_package, pipelines, priority)
431433

432434
return db_package, error
433435

@@ -507,13 +509,13 @@ def update_license_copyright_fields(package_from, package_to, replace=True):
507509
setattr(package_to, field, value)
508510

509511

510-
def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines):
512+
def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines, priority=0):
511513
"""
512514
Get metadata for the binary and source release of the Debian package
513515
`package_url` and save it to the PackageDB.
514516
515517
Return an error string for errors that occur, or empty string if there is no error.
516-
"""
518+
"""
517519
error = ''
518520

519521
if "repository_url" in package_url.qualifiers:
@@ -522,7 +524,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
522524
base_url = UBUNTU_BASE_URL
523525
else:
524526
base_url = DEBIAN_BASE_URL
525-
527+
526528
if "api_data_url" in package_url.qualifiers:
527529
metadata_base_url = package_url.qualifiers["api_data_url"]
528530
elif package_url.namespace == 'ubuntu':
@@ -544,6 +546,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
544546
debian_package,
545547
PackageContentType.BINARY,
546548
pipelines,
549+
priority,
547550
)
548551
if emsg:
549552
error += emsg
@@ -552,7 +555,8 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
552555
source_package, emsg = map_debian_package(
553556
debian_package,
554557
PackageContentType.SOURCE_ARCHIVE,
555-
pipelines,
558+
pipelines,
559+
priority,
556560
)
557561
if emsg:
558562
error += emsg
@@ -594,7 +598,7 @@ def from_purls(cls, package_urls):
594598
def package_archive_version(self):
595599
"""
596600
Get the useful part of the debian package version used in
597-
source, binary, metadata and copyright URLs optionally.
601+
source, binary, metadata and copyright URLs optionally.
598602
"""
599603
debvers = DebVersion.from_string(self.package_url.version)
600604
if debvers.revision != "0":
@@ -679,7 +683,7 @@ def package_copyright_url(self):
679683
copyright_file_string = "_copyright"
680684
if self.package_url.namespace == "ubuntu":
681685
copyright_file_string = "/copyright"
682-
686+
683687
metadata_version = self.package_archive_version
684688
if not self.source_package_url:
685689
metadata_package_name = self.package_url.name

minecode/visitors/generic.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
logger.setLevel(logging.INFO)
2828

2929

30-
def map_generic_package(package_url, pipelines):
30+
def map_generic_package(package_url, pipelines, priority=0):
3131
"""
3232
Add a generic `package_url` to the PackageDB.
3333
@@ -52,7 +52,11 @@ def map_generic_package(package_url, pipelines):
5252

5353
# Submit package for scanning
5454
if db_package:
55-
add_package_to_scan_queue(db_package, pipelines)
55+
add_package_to_scan_queue(
56+
package=db_package,
57+
pipelines=pipelines,
58+
priority=priority,
59+
)
5660

5761
return error
5862

@@ -67,6 +71,7 @@ def process_request(purl_str, **kwargs):
6771

6872
addon_pipelines = kwargs.get('addon_pipelines', [])
6973
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
74+
priority = kwargs.get('priority', 0)
7075

7176
try:
7277
package_url = PackageURL.from_string(purl_str)
@@ -79,7 +84,7 @@ def process_request(purl_str, **kwargs):
7984
error = f'package_url {purl_str} does not contain a download_url qualifier'
8085
return error
8186

82-
error_msg = map_generic_package(package_url, pipelines)
87+
error_msg = map_generic_package(package_url, pipelines, priority)
8388

8489
if error_msg:
8590
return error_msg
@@ -97,7 +102,7 @@ def packagedata_from_dict(package_data):
97102
return PackageData.from_data(cleaned_package_data)
98103

99104

100-
def map_fetchcode_supported_package(package_url, pipelines):
105+
def map_fetchcode_supported_package(package_url, pipelines, priority=0):
101106
"""
102107
Add a `package_url` supported by fetchcode to the PackageDB.
103108
@@ -122,7 +127,11 @@ def map_fetchcode_supported_package(package_url, pipelines):
122127

123128
# Submit package for scanning
124129
if db_package:
125-
add_package_to_scan_queue(db_package, pipelines)
130+
add_package_to_scan_queue(
131+
package=db_package,
132+
pipelines=pipelines,
133+
priority=priority,
134+
)
126135

127136
return error
128137

@@ -176,14 +185,15 @@ def process_request_fetchcode_generic(purl_str, **kwargs):
176185

177186
addon_pipelines = kwargs.get('addon_pipelines', [])
178187
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
188+
priority = kwargs.get('priority', 0)
179189

180190
try:
181191
package_url = PackageURL.from_string(purl_str)
182192
except ValueError as e:
183193
error = f"error occurred when parsing {purl_str}: {e}"
184194
return error
185195

186-
error_msg = map_fetchcode_supported_package(package_url, pipelines)
196+
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)
187197

188198
if error_msg:
189199
return error_msg

minecode/visitors/github.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,13 +198,15 @@ def process_request_dir_listed(purl_str, **kwargs):
198198

199199
addon_pipelines = kwargs.get('addon_pipelines', [])
200200
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
201+
priority = kwargs.get('priority', 0)
202+
201203
try:
202204
package_url = PackageURL.from_string(purl_str)
203205
except ValueError as e:
204206
error = f"error occurred when parsing {purl_str}: {e}"
205207
return error
206208

207-
error_msg = map_fetchcode_supported_package(package_url, pipelines)
209+
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)
208210

209211
if error_msg:
210212
return error_msg

minecode/visitors/gnu.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,13 @@ def process_request(purl_str, **kwargs):
3535

3636
addon_pipelines = kwargs.get('addon_pipelines', [])
3737
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
38+
priority = kwargs.get('priority', 0)
3839

3940
package_url = PackageURL.from_string(purl_str)
4041
if not package_url.version:
4142
return
4243

43-
error_msg = map_fetchcode_supported_package(package_url, pipelines)
44+
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)
4445

4546
if error_msg:
4647
return error_msg

minecode/visitors/maven.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,12 @@ def merge_ancestors(ancestor_pom_texts, package):
242242
return package
243243

244244

245-
def map_maven_package(package_url, package_content, pipelines, reindex_metadata=False):
245+
def map_maven_package(package_url, package_content, pipelines, priority=0, reindex_metadata=False):
246246
"""
247247
Add a maven `package_url` to the PackageDB.
248248
249249
Return an error string if errors have occured in the process.
250-
250+
251251
if ``reindex_metadata`` is True, only reindex metadata and DO NOT rescan the full package.
252252
"""
253253
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
@@ -315,16 +315,20 @@ def map_maven_package(package_url, package_content, pipelines, reindex_metadata=
315315
msg = f'Failed to retrieve JAR: {package_url}'
316316
error += msg + '\n'
317317
logger.error(msg)
318-
319-
if not reindex_metadata:
318+
319+
if not reindex_metadata:
320320
# Submit package for scanning
321321
if db_package:
322-
add_package_to_scan_queue(package=db_package, pipelines=pipelines)
322+
add_package_to_scan_queue(
323+
package=db_package,
324+
pipelines=pipelines,
325+
priority=priority
326+
)
323327

324328
return db_package, error
325329

326330

327-
def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
331+
def map_maven_binary_and_source(package_url, pipelines, priority=0, reindex_metadata=False):
328332
"""
329333
Get metadata for the binary and source release of the Maven package
330334
`package_url` and save it to the PackageDB.
@@ -336,6 +340,7 @@ def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
336340
package_url=package_url,
337341
package_content=PackageContentType.BINARY,
338342
pipelines=pipelines,
343+
priority=priority,
339344
reindex_metadata=reindex_metadata,
340345
)
341346
if emsg:
@@ -347,6 +352,7 @@ def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
347352
package_url=source_package_url,
348353
package_content=PackageContentType.SOURCE_ARCHIVE,
349354
pipelines=pipelines,
355+
priority=priority,
350356
reindex_metadata=reindex_metadata,
351357
)
352358
if emsg:
@@ -433,7 +439,7 @@ def process_request(purl_str, **kwargs):
433439

434440
addon_pipelines = kwargs.get('addon_pipelines', [])
435441
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
436-
442+
priority = kwargs.get('priority', 0)
437443

438444
try:
439445
package_url = PackageURL.from_string(purl_str)
@@ -444,7 +450,12 @@ def process_request(purl_str, **kwargs):
444450
has_version = bool(package_url.version)
445451
if has_version:
446452
reindex_metadata=kwargs.get("reindex_metadata", False)
447-
error = map_maven_binary_and_source(package_url, pipelines, reindex_metadata=reindex_metadata)
453+
error = map_maven_binary_and_source(
454+
package_url,
455+
pipelines,
456+
reindex_metadata=reindex_metadata,
457+
priority=priority,
458+
)
448459
else:
449460
error = map_maven_packages(package_url, pipelines)
450461

0 commit comments

Comments
 (0)