Skip to content

Commit

Permalink
Merge pull request #503 from nexB/502-queue-priority-update
Browse files Browse the repository at this point in the history
502 queue priority update
  • Loading branch information
JonoYang authored Jul 19, 2024
2 parents 275d6da + 95097c8 commit d4094f5
Show file tree
Hide file tree
Showing 14 changed files with 149 additions and 48 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ Next Release
``approximate_directory_structure_index``, ``exact_file_index``,
``exact_package_archive_index``, ``cditems``, ``on_demand_queue`` have been
removed.
- The `/api/collect/` and `/api/collect/index_packages/` API endpoints have been
updated such that Package scan and processing requests made with purls with
versions are processed ahead of those made with versionless purls. https://github.com/nexB/purldb/issues/502


v5.0.0
Expand Down
2 changes: 1 addition & 1 deletion docs/source/purldb/rest_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ index_packages
Take a list of ``packages`` (where each item is a dictionary containing either PURL
or versionless PURL along with vers range, optionally with source package PURL)
and index it.
and index it. PURLs with versions are processed ahead of versionless PURLs.
Also each package can have list of ``addon_pipelines`` to run on the package.
Find all addon pipelines `here. <https://scancodeio.readthedocs.io/en/latest/built-in-pipelines.html>`_
Expand Down
7 changes: 5 additions & 2 deletions minecode/management/commands/priority_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,18 @@ def process_request(priority_resource_uri, _priority_router=priority_router):
purl_to_visit = priority_resource_uri.uri
source_purl = priority_resource_uri.source_uri
addon_pipelines = priority_resource_uri.addon_pipelines
priority = priority_resource_uri.priority

try:
if TRACE:
logger.debug('visit_uri: uri: {}'.format(purl_to_visit))
kwargs = dict()
if source_purl:
kwargs["source_purl"] = source_purl
kwargs['source_purl'] = source_purl
if addon_pipelines:
kwargs["addon_pipelines"] = addon_pipelines
kwargs['addon_pipelines'] = addon_pipelines
if priority:
kwargs['priority'] = priority
errors = _priority_router.process(purl_to_visit, **kwargs)
if TRACE:
new_uris_to_visit = list(new_uris_to_visit or [])
Expand Down
8 changes: 5 additions & 3 deletions minecode/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@
)


def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, reindex_uri=False, priority=100):
def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, priority=0, reindex_uri=False):
"""
Add a Package `package` to the scan queue to run the list of provided `pipelines`
Add a Package `package` to the scan queue to run the list of provided
`pipelines` with a given `priority`. A ScannableURI with a `priority` of 100
will be processed before a ScannableURI with a `priority` of 0.
If `reindex_uri` is True, force rescanning of the package
"""
Expand Down Expand Up @@ -226,7 +228,7 @@ def merge_or_create_package(scanned_package, visit_level, override=False):
If ``scanned_package`` does not exist in the PackageDB, create a new entry in
the PackageDB for ``scanned_package``.
If ``override`` is True, then all existing empty values of the PackageDB package are replaced by
a non-empty value of the provided override.
"""
Expand Down
7 changes: 4 additions & 3 deletions minecode/visitors/conan.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def get_download_info(conandata, version):
return download_url, sha256


def map_conan_package(package_url, pipelines):
def map_conan_package(package_url, pipelines, priority=0):
"""
Add a conan `package_url` to the PackageDB.
Expand Down Expand Up @@ -134,7 +134,7 @@ def map_conan_package(package_url, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(db_package, pipelines, priority)

return error

Expand All @@ -154,11 +154,12 @@ def process_request(purl_str, **kwargs):
package_url = PackageURL.from_string(purl_str)
addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

if not package_url.version:
return

error_msg = map_conan_package(package_url, pipelines)
error_msg = map_conan_package(package_url, pipelines, priority)

if error_msg:
return error_msg
24 changes: 14 additions & 10 deletions minecode/visitors/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ def process_request(purl_str, **kwargs):
source_purl = kwargs.get("source_purl", None)
addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
Expand All @@ -351,15 +352,16 @@ def process_request(purl_str, **kwargs):
has_version = bool(package_url.version)
if has_version:
error = map_debian_metadata_binary_and_source(
package_url=package_url,
package_url=package_url,
source_package_url=source_package_url,
pipelines=pipelines,
priority=priority,
)

return error


def map_debian_package(debian_package, package_content, pipelines):
def map_debian_package(debian_package, package_content, pipelines, priority=0):
"""
Add a debian `package_url` to the PackageDB.
Expand All @@ -372,7 +374,7 @@ def map_debian_package(debian_package, package_content, pipelines):
error = ''

purl = debian_package.package_url
if package_content == PackageContentType.BINARY:
if package_content == PackageContentType.BINARY:
download_url = debian_package.binary_archive_url
elif package_content == PackageContentType.SOURCE_ARCHIVE:
download_url = debian_package.source_archive_url
Expand Down Expand Up @@ -427,7 +429,7 @@ def map_debian_package(debian_package, package_content, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(db_package, pipelines, priority)

return db_package, error

Expand Down Expand Up @@ -507,13 +509,13 @@ def update_license_copyright_fields(package_from, package_to, replace=True):
setattr(package_to, field, value)


def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines):
def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines, priority=0):
"""
Get metadata for the binary and source release of the Debian package
`package_url` and save it to the PackageDB.
Return an error string for errors that occur, or empty string if there is no error.
"""
"""
error = ''

if "repository_url" in package_url.qualifiers:
Expand All @@ -522,7 +524,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
base_url = UBUNTU_BASE_URL
else:
base_url = DEBIAN_BASE_URL

if "api_data_url" in package_url.qualifiers:
metadata_base_url = package_url.qualifiers["api_data_url"]
elif package_url.namespace == 'ubuntu':
Expand All @@ -544,6 +546,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
debian_package,
PackageContentType.BINARY,
pipelines,
priority,
)
if emsg:
error += emsg
Expand All @@ -552,7 +555,8 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url, pipel
source_package, emsg = map_debian_package(
debian_package,
PackageContentType.SOURCE_ARCHIVE,
pipelines,
pipelines,
priority,
)
if emsg:
error += emsg
Expand Down Expand Up @@ -594,7 +598,7 @@ def from_purls(cls, package_urls):
def package_archive_version(self):
"""
Get the useful part of the debian package version used in
source, binary, metadata and copyright URLs optionally.
source, binary, metadata and copyright URLs optionally.
"""
debvers = DebVersion.from_string(self.package_url.version)
if debvers.revision != "0":
Expand Down Expand Up @@ -679,7 +683,7 @@ def package_copyright_url(self):
copyright_file_string = "_copyright"
if self.package_url.namespace == "ubuntu":
copyright_file_string = "/copyright"

metadata_version = self.package_archive_version
if not self.source_package_url:
metadata_package_name = self.package_url.name
Expand Down
22 changes: 16 additions & 6 deletions minecode/visitors/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
logger.setLevel(logging.INFO)


def map_generic_package(package_url, pipelines):
def map_generic_package(package_url, pipelines, priority=0):
"""
Add a generic `package_url` to the PackageDB.
Expand All @@ -52,7 +52,11 @@ def map_generic_package(package_url, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(
package=db_package,
pipelines=pipelines,
priority=priority,
)

return error

Expand All @@ -67,6 +71,7 @@ def process_request(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
Expand All @@ -79,7 +84,7 @@ def process_request(purl_str, **kwargs):
error = f'package_url {purl_str} does not contain a download_url qualifier'
return error

error_msg = map_generic_package(package_url, pipelines)
error_msg = map_generic_package(package_url, pipelines, priority)

if error_msg:
return error_msg
Expand All @@ -97,7 +102,7 @@ def packagedata_from_dict(package_data):
return PackageData.from_data(cleaned_package_data)


def map_fetchcode_supported_package(package_url, pipelines):
def map_fetchcode_supported_package(package_url, pipelines, priority=0):
"""
Add a `package_url` supported by fetchcode to the PackageDB.
Expand All @@ -122,7 +127,11 @@ def map_fetchcode_supported_package(package_url, pipelines):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package, pipelines)
add_package_to_scan_queue(
package=db_package,
pipelines=pipelines,
priority=priority,
)

return error

Expand Down Expand Up @@ -176,14 +185,15 @@ def process_request_fetchcode_generic(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url, pipelines)
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)

if error_msg:
return error_msg
4 changes: 3 additions & 1 deletion minecode/visitors/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,15 @@ def process_request_dir_listed(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url, pipelines)
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)

if error_msg:
return error_msg
3 changes: 2 additions & 1 deletion minecode/visitors/gnu.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ def process_request(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get('priority', 0)

package_url = PackageURL.from_string(purl_str)
if not package_url.version:
return

error_msg = map_fetchcode_supported_package(package_url, pipelines)
error_msg = map_fetchcode_supported_package(package_url, pipelines, priority)

if error_msg:
return error_msg
27 changes: 19 additions & 8 deletions minecode/visitors/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,12 +242,12 @@ def merge_ancestors(ancestor_pom_texts, package):
return package


def map_maven_package(package_url, package_content, pipelines, reindex_metadata=False):
def map_maven_package(package_url, package_content, pipelines, priority=0, reindex_metadata=False):
"""
Add a maven `package_url` to the PackageDB.
Return an error string if errors have occured in the process.
if ``reindex_metadata`` is True, only reindex metadata and DO NOT rescan the full package.
"""
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
Expand Down Expand Up @@ -315,16 +315,20 @@ def map_maven_package(package_url, package_content, pipelines, reindex_metadata=
msg = f'Failed to retrieve JAR: {package_url}'
error += msg + '\n'
logger.error(msg)
if not reindex_metadata:

if not reindex_metadata:
# Submit package for scanning
if db_package:
add_package_to_scan_queue(package=db_package, pipelines=pipelines)
add_package_to_scan_queue(
package=db_package,
pipelines=pipelines,
priority=priority
)

return db_package, error


def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
def map_maven_binary_and_source(package_url, pipelines, priority=0, reindex_metadata=False):
"""
Get metadata for the binary and source release of the Maven package
`package_url` and save it to the PackageDB.
Expand All @@ -336,6 +340,7 @@ def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
package_url=package_url,
package_content=PackageContentType.BINARY,
pipelines=pipelines,
priority=priority,
reindex_metadata=reindex_metadata,
)
if emsg:
Expand All @@ -347,6 +352,7 @@ def map_maven_binary_and_source(package_url, pipelines, reindex_metadata=False):
package_url=source_package_url,
package_content=PackageContentType.SOURCE_ARCHIVE,
pipelines=pipelines,
priority=priority,
reindex_metadata=reindex_metadata,
)
if emsg:
Expand Down Expand Up @@ -433,7 +439,7 @@ def process_request(purl_str, **kwargs):

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)

priority = kwargs.get('priority', 0)

try:
package_url = PackageURL.from_string(purl_str)
Expand All @@ -444,7 +450,12 @@ def process_request(purl_str, **kwargs):
has_version = bool(package_url.version)
if has_version:
reindex_metadata=kwargs.get("reindex_metadata", False)
error = map_maven_binary_and_source(package_url, pipelines, reindex_metadata=reindex_metadata)
error = map_maven_binary_and_source(
package_url,
pipelines,
reindex_metadata=reindex_metadata,
priority=priority,
)
else:
error = map_maven_packages(package_url, pipelines)

Expand Down
Loading

0 comments on commit d4094f5

Please sign in to comment.