Skip to content

Commit c83f677

Browse files
authored
Move the BasePipeline class to a new aboutcode.pipeline module #1351 (#1357)
Signed-off-by: tdruez <[email protected]>
1 parent 4da3913 commit c83f677

10 files changed

+244
-175
lines changed

CHANGELOG.rst

+3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ v34.7.2 (unreleased)
4141
- Fix an issue with conflicting groups checkbox id in the Add pipeline modal.
4242
https://github.com/nexB/scancode.io/issues/1353
4343

44+
- Move the BasePipeline class to a new `aboutcode.pipeline` module.
45+
https://github.com/nexB/scancode.io/issues/1351
46+
4447
v34.7.1 (2024-07-15)
4548
--------------------
4649

aboutcode/pipeline/__init__.py

+213
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
import logging
24+
import traceback
25+
from datetime import datetime
26+
from datetime import timezone
27+
from pydoc import getdoc
28+
from pydoc import splitdoc
29+
from timeit import default_timer as timer
30+
31+
logger = logging.getLogger(__name__)
32+
33+
34+
def group(*groups):
35+
"""Mark a function as part of a particular group."""
36+
37+
def decorator(obj):
38+
if hasattr(obj, "groups"):
39+
obj.groups = obj.groups.union(groups)
40+
else:
41+
setattr(obj, "groups", set(groups))
42+
return obj
43+
44+
return decorator
45+
46+
47+
def humanize_time(seconds):
48+
"""Convert the provided ``seconds`` number into human-readable time."""
49+
message = f"{seconds:.0f} seconds"
50+
51+
if seconds > 86400:
52+
message += f" ({seconds / 86400:.1f} days)"
53+
if seconds > 3600:
54+
message += f" ({seconds / 3600:.1f} hours)"
55+
elif seconds > 60:
56+
message += f" ({seconds / 60:.1f} minutes)"
57+
58+
return message
59+
60+
61+
class BasePipeline:
62+
"""Base class for all pipeline implementations."""
63+
64+
# Flag indicating if the Pipeline is an add-on, meaning it cannot be run first.
65+
is_addon = False
66+
67+
def __init__(self, run):
68+
"""Load the Run and Project instances."""
69+
self.run = run
70+
self.project = run.project
71+
self.pipeline_name = run.pipeline_name
72+
self.env = self.project.get_env()
73+
74+
@classmethod
75+
def steps(cls):
76+
raise NotImplementedError
77+
78+
@classmethod
79+
def get_steps(cls, groups=None):
80+
"""
81+
Return the list of steps defined in the ``steps`` class method.
82+
83+
If the optional ``groups`` parameter is provided, only include steps labeled
84+
with groups that intersect with the provided list. If a step has no groups or
85+
if ``groups`` is not specified, include the step in the result.
86+
"""
87+
if not callable(cls.steps):
88+
raise TypeError("Use a ``steps(cls)`` classmethod to declare the steps.")
89+
90+
steps = cls.steps()
91+
92+
if groups is not None:
93+
steps = tuple(
94+
step
95+
for step in steps
96+
if not getattr(step, "groups", [])
97+
or set(getattr(step, "groups")).intersection(groups)
98+
)
99+
100+
return steps
101+
102+
@classmethod
103+
def get_initial_steps(cls):
104+
"""
105+
Return a tuple of extra initial steps to be run at the start of the pipeline
106+
execution.
107+
"""
108+
return
109+
110+
@classmethod
111+
def get_doc(cls):
112+
"""Get the doc string of this pipeline."""
113+
return getdoc(cls)
114+
115+
@classmethod
116+
def get_graph(cls):
117+
"""Return a graph of steps."""
118+
return [
119+
{
120+
"name": step.__name__,
121+
"doc": getdoc(step),
122+
"groups": getattr(step, "groups", []),
123+
}
124+
for step in cls.get_steps()
125+
]
126+
127+
@classmethod
128+
def get_info(cls):
129+
"""Get a dictionary of combined information data about this pipeline."""
130+
summary, description = splitdoc(cls.get_doc())
131+
steps = cls.get_graph()
132+
133+
return {
134+
"summary": summary,
135+
"description": description,
136+
"steps": steps,
137+
"available_groups": cls.get_available_groups(),
138+
}
139+
140+
@classmethod
141+
def get_summary(cls):
142+
"""Get the doc string summary."""
143+
return cls.get_info()["summary"]
144+
145+
@classmethod
146+
def get_available_groups(cls):
147+
return sorted(
148+
set(
149+
group_name
150+
for step in cls.get_steps()
151+
for group_name in getattr(step, "groups", [])
152+
)
153+
)
154+
155+
def log(self, message):
156+
"""Log the given `message` to the current module logger and Run instance."""
157+
now_local = datetime.now(timezone.utc).astimezone()
158+
timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
159+
message = f"{timestamp} {message}"
160+
logger.info(message)
161+
self.run.append_to_log(message)
162+
163+
@staticmethod
164+
def output_from_exception(exception):
165+
"""Return a formatted error message including the traceback."""
166+
output = f"{exception}\n\n"
167+
168+
if exception.__cause__ and str(exception.__cause__) != str(exception):
169+
output += f"Cause: {exception.__cause__}\n\n"
170+
171+
traceback_formatted = "".join(traceback.format_tb(exception.__traceback__))
172+
output += f"Traceback:\n{traceback_formatted}"
173+
174+
return output
175+
176+
def execute(self):
177+
"""Execute each steps in the order defined on this pipeline class."""
178+
self.log(f"Pipeline [{self.pipeline_name}] starting")
179+
180+
steps = self.get_steps(groups=self.run.selected_groups)
181+
selected_steps = self.run.selected_steps
182+
183+
if initial_steps := self.get_initial_steps():
184+
steps = initial_steps + steps
185+
186+
steps_count = len(steps)
187+
pipeline_start_time = timer()
188+
189+
for current_index, step in enumerate(steps, start=1):
190+
step_name = step.__name__
191+
192+
if selected_steps and step_name not in selected_steps:
193+
self.log(f"Step [{step_name}] skipped")
194+
continue
195+
196+
self.run.set_current_step(f"{current_index}/{steps_count} {step_name}")
197+
self.log(f"Step [{step_name}] starting")
198+
step_start_time = timer()
199+
200+
try:
201+
step(self)
202+
except Exception as exception:
203+
self.log("Pipeline failed")
204+
return 1, self.output_from_exception(exception)
205+
206+
step_run_time = timer() - step_start_time
207+
self.log(f"Step [{step_name}] completed in {humanize_time(step_run_time)}")
208+
209+
self.run.set_current_step("") # Reset the `current_step` field on completion
210+
pipeline_run_time = timer() - pipeline_start_time
211+
self.log(f"Pipeline completed in {humanize_time(pipeline_run_time)}")
212+
213+
return 0, ""

docs/conf.py

+2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111
# documentation root, use os.path.abspath to make it absolute, like shown here.
1212

1313
import os
14+
import sys
1415

1516
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "scancodeio.settings")
17+
sys.path.insert(0, os.path.abspath("../."))
1618

1719
# -- Project information -----------------------------------------------------
1820

0 commit comments

Comments
 (0)