Skip to content

Commit e7cfcda

Browse files
authored
Refactor run_scancode to handle errors along success #1018 (#1019)
Signed-off-by: tdruez <[email protected]>
1 parent 27f6c2b commit e7cfcda

24 files changed

+5812
-25808
lines changed

CHANGELOG.rst

+10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
Changelog
22
=========
33

4+
v32.8.0 (unreleased)
5+
--------------------
6+
7+
- Refactor run_scancode to not fail on scan errors happening at the resource level,
8+
such as a timeout. Project error message are created instead.
9+
https://github.com/nexB/scancode.io/issues/1018
10+
11+
- Add support for the SCANCODEIO_SCAN_FILE_TIMEOUT setting in the scan_package pipeline.
12+
https://github.com/nexB/scancode.io/issues/1018
13+
414
v32.7.0 (2023-10-25)
515
--------------------
616

docker-compose.dev.yml

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
version: "3"
2+
3+
# Mount the local scanpipe/ directory in the containers
4+
5+
# This can be used to refresh fixtures from the docker container:
6+
# $ docker compose -f docker-compose.yml -f docker-compose.dev.yml up
7+
# $ docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web bash
8+
# $ SCANCODEIO_TEST_FIXTURES_REGEN=1 ./manage.py test
9+
10+
services:
11+
web:
12+
volumes:
13+
- ./scanpipe:/opt/scancodeio/scanpipe
14+
15+
worker:
16+
volumes:
17+
- ./scanpipe:/opt/scancodeio/scanpipe

docs/application-settings.rst

-13
Original file line numberDiff line numberDiff line change
@@ -132,19 +132,6 @@ synchronously in the web server process.
132132

133133
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
134134

135-
SCANCODE_TOOLKIT_RUN_SCAN_ARGS
136-
------------------------------
137-
138-
Use this setting to provide any default options for running ScanCode-toolkit.
139-
140-
.. note::
141-
Refer to `ScanCode-toolkit Available Options <https://scancode-toolkit.readthedocs.io/en/latest/cli-reference/list-options.html>`_
142-
for the full list of available options.
143-
144-
The following example explicitly defines a timeout value of 60::
145-
146-
SCANCODE_TOOLKIT_RUN_SCAN_ARGS="timeout=60,"
147-
148135
SCANCODEIO_TASK_TIMEOUT
149136
-----------------------
150137

scancodeio/settings.py

-2
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,6 @@
7878
"SCANCODEIO_CONFIG_FILE", default="scancode-config.yml"
7979
)
8080

81-
SCANCODE_TOOLKIT_RUN_SCAN_ARGS = env.dict("SCANCODE_TOOLKIT_RUN_SCAN_ARGS", default={})
82-
8381
SCANCODEIO_LOG_LEVEL = env.str("SCANCODEIO_LOG_LEVEL", "INFO")
8482

8583
# Set the number of parallel processes to use for ScanCode related scan execution.

scanpipe/pipelines/scan_package.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from django.core.serializers.json import DjangoJSONEncoder
2626

2727
from commoncode.hash import multi_checksums
28-
from scancode import ScancodeError
2928

3029
from scanpipe.pipelines import Pipeline
3130
from scanpipe.pipes import input
@@ -99,14 +98,18 @@ def run_scancode(self):
9998
if license_score := self.project.get_env("scancode_license_score"):
10099
run_scan_args["license_score"] = license_score
101100

102-
errors = scancode.run_scan(
101+
scanning_errors = scancode.run_scan(
103102
location=str(self.project.codebase_path),
104103
output_file=self.scan_output_location,
105104
run_scan_args=run_scan_args,
106105
)
107106

108-
if errors:
109-
raise ScancodeError(errors)
107+
for resource_path, errors in scanning_errors.items():
108+
self.project.add_error(
109+
description="\n".join(errors),
110+
model=self.pipeline_name,
111+
details={"path": resource_path},
112+
)
110113

111114
if not scan_output_path.exists():
112115
raise FileNotFoundError("ScanCode output not available.")

scanpipe/pipes/scancode.py

+14-21
Original file line numberDiff line numberDiff line change
@@ -466,38 +466,31 @@ def get_pretty_params(args):
466466

467467

468468
def run_scan(location, output_file, run_scan_args):
469-
"""
470-
Scan the `location` content and write the results into an `output_file`.
471-
If `raise_on_error` is enabled, a ScancodeError will be raised if an error occurs
472-
during the scan.
473-
"""
474-
run_args = settings.SCANCODE_TOOLKIT_RUN_SCAN_ARGS.copy()
475-
# The run_scan_args should override any values provided in the settings
476-
run_args.update(run_scan_args)
477-
478-
if "timeout" in run_args:
479-
run_args["timeout"] = int(run_args.get("timeout"))
480-
481-
success, results = scancode_run_scan(
469+
"""Scan the `location` content and write the results into an `output_file`."""
470+
_success, results = scancode_run_scan(
482471
input=shlex.quote(location),
483472
processes=get_max_workers(keep_available=1),
484473
quiet=True,
485474
verbose=False,
486475
return_results=True,
487476
echo_func=None,
488-
pretty_params=get_pretty_params(run_args),
489-
**run_args,
477+
pretty_params=get_pretty_params(run_scan_args),
478+
timeout=settings.SCANCODEIO_SCAN_FILE_TIMEOUT,
479+
**run_scan_args,
490480
)
491481

492-
if success:
482+
# ``_success`` will be False if any scanning errors occur, but we still want
483+
# to generate the results output in that case.
484+
if results:
493485
Path(output_file).write_text(json.dumps(results, indent=2))
494-
return
495486

496-
errors = {}
487+
# Capture scan errors logged at the files level.
488+
scanning_errors = {}
497489
for file in results.get("files", []):
498-
if scan_errors := file.get("scan_errors"):
499-
errors[file.get("path")] = scan_errors
500-
return errors
490+
if errors := file.get("scan_errors"):
491+
scanning_errors[file.get("path")] = errors
492+
493+
return scanning_errors
501494

502495

503496
def get_virtual_codebase(project, input_location):

0 commit comments

Comments
 (0)