Skip to content

Commit e458a1a

Browse files
committed
Merge main and fix conflicts #1524
Signed-off-by: tdruez <[email protected]>
2 parents 2292804 + ce227cd commit e458a1a

File tree

9 files changed

+274
-20
lines changed

9 files changed

+274
-20
lines changed

CHANGELOG.rst

+4
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ v34.9.4 (unreleased)
4747
sheets with a dedicated VULNERABILITIES sheet.
4848
https://github.com/aboutcode-org/scancode.io/issues/1519
4949

50+
- Add a ``report`` management command that allows to generate XLSX reports for
51+
multiple projects at once using labels and searching by project name.
52+
https://github.com/aboutcode-org/scancode.io/issues/1524
53+
5054
v34.9.3 (2024-12-31)
5155
--------------------
5256

docs/command-line-interface.rst

+64-5
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ ScanPipe's own commands are listed under the ``[scanpipe]`` section::
6868
list-project
6969
output
7070
purldb-scan-worker
71+
report
7172
reset-project
7273
run
7374
show-pipeline
@@ -174,6 +175,10 @@ Required arguments (one of):
174175
| project-2 | pkg:deb/debian/[email protected] |
175176
+----------------+---------------------------------+
176177

178+
.. tip::
179+
In place of a local path, a download URL to the CSV file is supported for the
180+
``--input-list`` argument.
181+
177182
Optional arguments:
178183

179184
- ``--project-name-suffix`` Optional custom suffix to append to project names.
@@ -194,14 +199,15 @@ Optional arguments:
194199
Example: Processing Multiple Docker Images
195200
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
196201

197-
Assume multiple Docker images are available in a directory named ``local-data/`` on
202+
Suppose you have multiple Docker images stored in a directory named ``local-data/`` on
198203
the host machine.
199-
To process these images with the ``analyze_docker_image`` pipeline using asynchronous
200-
execution::
204+
To process these images using the ``analyze_docker_image`` pipeline with asynchronous
205+
execution, you can use this command::
201206

202207
$ docker compose run --rm \
203-
--volume local-data/:/input-data:ro \
204-
web scanpipe batch-create input-data/ \
208+
--volume local-data/:/input-data/:ro \
209+
web scanpipe batch-create
210+
--input-directory /input-data/ \
205211
--pipeline analyze_docker_image \
206212
--label "Docker" \
207213
--execute --async
@@ -224,6 +230,19 @@ Each Docker image in the ``local-data/`` directory will result in the creation o
224230
project with the specified pipeline (``analyze_docker_image``) executed by worker
225231
services.
226232

233+
Example: Processing Multiple Develop to Deploy Mapping
234+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
235+
236+
To process an input list CSV file with the ``map_deploy_to_develop`` pipeline using
237+
asynchronous execution::
238+
239+
$ docker compose run --rm \
240+
web scanpipe batch-create \
241+
--input-list https://url/input_list.csv \
242+
--pipeline map_deploy_to_develop \
243+
--label "d2d_mapping" \
244+
--execute --async
245+
227246
`$ scanpipe list-pipeline [--verbosity {0,1,2,3}]`
228247
--------------------------------------------------
229248

@@ -375,6 +394,46 @@ your outputs on the host machine when running with Docker.
375394
.. tip:: To specify a CycloneDX spec version (default to latest), use the syntax
376395
``cyclonedx:VERSION`` as format value. For example: ``--format cyclonedx:1.5``.
377396

397+
.. _cli_report:
398+
399+
`$ scanpipe report --sheet SHEET`
400+
---------------------------------
401+
402+
Generates an XLSX report of selected projects based on the provided criteria.
403+
404+
Required arguments:
405+
406+
- ``--sheet {package,dependency,resource,relation,message,todo}``
407+
Specifies the sheet to include in the XLSX report. Available choices are based on
408+
predefined object types.
409+
410+
Optional arguments:
411+
412+
- ``--output-directory OUTPUT_DIRECTORY``
413+
The path to the directory where the report file will be created. If not provided,
414+
the report file will be created in the current working directory.
415+
416+
- ``--search SEARCH``
417+
Filter projects by searching for the provided string in their name.
418+
419+
- ``--label LABELS``
420+
Filter projects by the provided label(s). Multiple labels can be provided by using
421+
this argument multiple times.
422+
423+
.. note::
424+
Either ``--label`` or ``--search`` must be provided to select projects.
425+
426+
Example usage:
427+
428+
1. Generate a report for all projects tagged with "d2d" and include the **TODOS**
429+
worksheet::
430+
431+
$ scanpipe report --sheet todo --label d2d
432+
433+
2. Generate a report for projects whose names contain the word "audit" and include the
434+
**PACKAGES** worksheet::
435+
436+
$ scanpipe report --sheet package --search audit
378437

379438
.. _cli_check_compliance:
380439

scanpipe/forms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ class ProjectReportForm(BaseProjectActionForm):
295295
("codebaseresource", "Resources"),
296296
("codebaserelation", "Relations"),
297297
("projectmessage", "Messages"),
298-
("todos", "TODOs"),
298+
("todo", "TODOs"),
299299
],
300300
required=True,
301301
initial="discoveredpackage",

scanpipe/management/commands/batch-create.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@
2727
from django.core.management import CommandError
2828
from django.core.management.base import BaseCommand
2929

30+
import requests
31+
3032
from scanpipe.management.commands import CreateProjectCommandMixin
3133
from scanpipe.management.commands import PipelineCommandMixin
34+
from scanpipe.pipes import fetch
3235

3336

3437
class Command(CreateProjectCommandMixin, PipelineCommandMixin, BaseCommand):
@@ -54,7 +57,8 @@ def add_arguments(self, parser):
5457
"Path to a CSV file with project names and input URLs. "
5558
"The first column must contain project names, and the second column "
5659
"should list comma-separated input URLs (e.g., Download URL, PURL, or "
57-
"Docker reference)."
60+
"Docker reference). "
61+
"In place of a local path, a download URL to the CSV file is supported."
5862
),
5963
)
6064
parser.add_argument(
@@ -110,7 +114,16 @@ def handle_input_directory(self, **options):
110114
self.created_project_count += 1
111115

112116
def handle_input_list(self, **options):
113-
input_file = Path(options["input_list"])
117+
input_file = options["input_list"]
118+
119+
if input_file.startswith("http"):
120+
try:
121+
download = fetch.fetch_http(input_file)
122+
except requests.exceptions.RequestException as e:
123+
raise CommandError(e)
124+
input_file = download.path
125+
126+
input_file = Path(input_file)
114127
if not input_file.exists():
115128
raise CommandError(f"The {input_file} file does not exist.")
116129

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
23+
from pathlib import Path
24+
from timeit import default_timer as timer
25+
26+
from django.core.management import CommandError
27+
from django.core.management.base import BaseCommand
28+
29+
import xlsxwriter
30+
31+
from aboutcode.pipeline import humanize_time
32+
from scanpipe.models import Project
33+
from scanpipe.pipes import filename_now
34+
from scanpipe.pipes import output
35+
36+
37+
class Command(BaseCommand):
38+
help = "Report of selected projects."
39+
40+
def add_arguments(self, parser):
41+
super().add_arguments(parser)
42+
parser.add_argument(
43+
"--output-directory",
44+
help=(
45+
"The path to the directory where the report file will be created. "
46+
"If not provided, the report file will be created in the current "
47+
"working directory."
48+
),
49+
)
50+
parser.add_argument(
51+
"--sheet",
52+
required=True,
53+
choices=list(output.object_type_to_model_name.keys()),
54+
help="Specifies the sheet to include in the XLSX report.",
55+
)
56+
parser.add_argument(
57+
"--search",
58+
help="Select projects searching for the provided string in their name.",
59+
)
60+
parser.add_argument(
61+
"--label",
62+
action="append",
63+
dest="labels",
64+
default=list(),
65+
help=(
66+
"Filter projects by the provided label(s). Multiple labels can be "
67+
"provided by using this argument multiple times."
68+
),
69+
)
70+
71+
def handle(self, *args, **options):
72+
start_time = timer()
73+
self.verbosity = options["verbosity"]
74+
75+
output_directory = options["output_directory"]
76+
labels = options["labels"]
77+
search = options["search"]
78+
sheet = options["sheet"]
79+
model_name = output.object_type_to_model_name.get(sheet)
80+
81+
if not (labels or search):
82+
raise CommandError(
83+
"You must provide either --label or --search to select projects."
84+
)
85+
86+
project_qs = Project.objects.all()
87+
if labels:
88+
project_qs = project_qs.filter(labels__name__in=labels)
89+
if search:
90+
project_qs = project_qs.filter(name__icontains=search)
91+
project_count = project_qs.count()
92+
93+
if not project_count:
94+
raise CommandError("No projects found for the provided criteria.")
95+
96+
if self.verbosity > 0:
97+
msg = f"{project_count} project(s) will be included in the report."
98+
self.stdout.write(msg, self.style.SUCCESS)
99+
100+
worksheet_queryset = output.get_queryset(project=None, model_name=model_name)
101+
worksheet_queryset = worksheet_queryset.filter(project__in=project_qs)
102+
103+
filename = f"scancodeio-report-{filename_now()}.xlsx"
104+
if output_directory:
105+
output_file = Path(f"{output_directory}/{filename}")
106+
else:
107+
output_file = Path(filename)
108+
109+
with xlsxwriter.Workbook(output_file) as workbook:
110+
output.queryset_to_xlsx_worksheet(
111+
worksheet_queryset,
112+
workbook,
113+
exclude_fields=output.XLSX_EXCLUDE_FIELDS,
114+
prepend_fields=["project"],
115+
worksheet_name="TODOS",
116+
)
117+
118+
run_time = timer() - start_time
119+
if self.verbosity > 0:
120+
msg = f"Report generated at {output_file} in {humanize_time(run_time)}."
121+
self.stdout.write(msg, self.style.SUCCESS)

scanpipe/pipes/output.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def get_queryset(project, model_name):
9696
CodebaseRelation.objects.select_related("from_resource", "to_resource")
9797
),
9898
"projectmessage": ProjectMessage.objects.all(),
99-
"todos": CodebaseResource.objects.files().status(flag.REQUIRES_REVIEW),
99+
"todo": CodebaseResource.objects.files().status(flag.REQUIRES_REVIEW),
100100
}
101101

102102
queryset = querysets.get(model_name)
@@ -309,6 +309,11 @@ def to_json(project):
309309
"codebaseresource": "resource",
310310
"codebaserelation": "relation",
311311
"projectmessage": "message",
312+
"todo": "todo",
313+
}
314+
315+
object_type_to_model_name = {
316+
value: key for key, value in model_name_to_object_type.items()
312317
}
313318

314319

@@ -469,6 +474,16 @@ def _adapt_value_for_xlsx(fieldname, value, maximum_length=32767, _adapt=True):
469474
return value, error
470475

471476

477+
XLSX_EXCLUDE_FIELDS = [
478+
"extra_data",
479+
"package_data",
480+
"license_detections",
481+
"other_license_detections",
482+
"license_clues",
483+
"affected_by_vulnerabilities",
484+
]
485+
486+
472487
def to_xlsx(project):
473488
"""
474489
Generate output for the provided ``project`` in XLSX format.
@@ -479,15 +494,8 @@ def to_xlsx(project):
479494
with possible error messages for a row when converting the data to XLSX
480495
exceed the limits of what can be stored in a cell.
481496
"""
497+
exclude_fields = XLSX_EXCLUDE_FIELDS.copy()
482498
output_file = project.get_output_file_path("results", "xlsx")
483-
exclude_fields = [
484-
"extra_data",
485-
"package_data",
486-
"license_detections",
487-
"other_license_detections",
488-
"license_clues",
489-
"affected_by_vulnerabilities",
490-
]
491499

492500
if not project.policies_enabled:
493501
exclude_fields.append("compliance_alert")
@@ -572,7 +580,7 @@ def add_vulnerabilities_sheet(workbook, project):
572580

573581

574582
def add_todos_sheet(workbook, project, exclude_fields):
575-
todos_queryset = get_queryset(project, "todos")
583+
todos_queryset = get_queryset(project, "todo")
576584
if todos_queryset:
577585
queryset_to_xlsx_worksheet(
578586
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"

0 commit comments

Comments
 (0)