diff --git a/scripts/labels/__init__.py b/scripts/labels/__init__.py index 4259749345..7d1d7e4de0 100644 --- a/scripts/labels/__init__.py +++ b/scripts/labels/__init__.py @@ -5,3 +5,31 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # ------------------------------------------------------------------------- +""" +This library ships reusable components and user-facing tools to verify, +generate, and adapt the checker labels in the CodeChecker configuration +structure. +""" +# Load the interpreter injection first. +from . import codechecker + +from . import \ + checker_labels, \ + exception, \ + http_, \ + output, \ + projects, \ + transformer, \ + util + + +__all__ = [ + "checker_labels", + "codechecker", + "exception", + "http_", + "output", + "projects", + "transformer", + "util", +] diff --git a/scripts/labels/label_tool/__main__.py b/scripts/labels/__main__.py similarity index 83% rename from scripts/labels/label_tool/__main__.py rename to scripts/labels/__main__.py index 1142df073f..44dad256f0 100755 --- a/scripts/labels/label_tool/__main__.py +++ b/scripts/labels/__main__.py @@ -12,7 +12,9 @@ try: + from .doc_url.generate_tool import __main__ as doc_url_generate from .doc_url.verify_tool import __main__ as doc_url_verify + from .severity.generate_tool import __main__ as severity_generate except ModuleNotFoundError as e: import traceback traceback.print_exc() @@ -41,18 +43,20 @@ def args() -> argparse.ArgumentParser: dest="subcommand", required=True) - def add_subparser(command: str, package): + def add_subparser(package): subparser = subparsers.add_parser( - command, + list(globals().keys())[list(globals().values()).index(package)], prog=package.__package__, help=package.short_help, description=package.description, epilog=package.epilogue, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - subparser = package.args(subparser) + subparser = package.arg_parser(subparser) subparser.set_defaults(__main=package.main) - add_subparser("doc_url_verify", doc_url_verify) + add_subparser(doc_url_generate) + add_subparser(doc_url_verify) + add_subparser(severity_generate) return parser diff --git a/scripts/labels/label_tool/checker_labels.py b/scripts/labels/checker_labels.py similarity index 94% rename from scripts/labels/label_tool/checker_labels.py rename to scripts/labels/checker_labels.py index 520dc46e08..362af5c816 100644 --- a/scripts/labels/label_tool/checker_labels.py +++ b/scripts/labels/checker_labels.py @@ -43,7 +43,7 @@ def _load_json(path: pathlib.Path) -> Dict: def _save_json(path: pathlib.Path, data: Dict): try: with path.open("w") as file: - json.dump(data, file, indent=2) + json.dump(data, file, indent=2, sort_keys=True) file.write('\n') except OSError: import traceback @@ -128,7 +128,14 @@ def update_checker_labels(analyser: str, label_indices = {checker: indices[0] if len(indices) == 1 else None for checker, indices in label_indices.items()} for checker, new_label in updates.items(): - checker_labels = label_cfg[checker] + try: + checker_labels = label_cfg[checker] + except KeyError: + label_cfg[checker] = [] + label_indices[checker] = None + + checker_labels = label_cfg[checker] + idx = label_indices[checker] e = f"{key}:{new_label}" if idx is not None: diff --git a/scripts/labels/label_tool/codechecker.py b/scripts/labels/codechecker.py similarity index 78% rename from scripts/labels/label_tool/codechecker.py rename to scripts/labels/codechecker.py index 74fb2bc65f..2a7876bd77 100644 --- a/scripts/labels/label_tool/codechecker.py +++ b/scripts/labels/codechecker.py @@ -21,15 +21,11 @@ def codechecker_src_root() -> Optional[pathlib.Path]: """ try: this_file = pathlib.Path(__file__).resolve(strict=True) - labels_idx = find_if(this_file.parents, - lambda p: p.stem == "labels") - if not labels_idx: + scripts_idx = find_if(this_file.parents, + lambda p: p.stem == "scripts") + if not scripts_idx: return None - - if this_file.parents[labels_idx + 1].stem == "scripts": - return this_file.parents[labels_idx + 2] - - return None + return this_file.parents[scripts_idx + 1] except Exception: import traceback traceback.print_exc() @@ -37,6 +33,16 @@ def codechecker_src_root() -> Optional[pathlib.Path]: return None +def default_checker_label_dir() -> Optional[pathlib.Path]: + """ + Returns the directory where the configuration labels for checkers are + stored. + """ + codechecker_root = codechecker_src_root() + return codechecker_root / "config" / "labels" / "analyzers" \ + if codechecker_root else None + + def inject_codechecker_to_interpreter(): """ Adds the built CodeChecker package relative to the root of the working diff --git a/scripts/labels/compiler_warnings.py b/scripts/labels/compiler_warnings.py deleted file mode 100644 index 1a128d2e90..0000000000 --- a/scripts/labels/compiler_warnings.py +++ /dev/null @@ -1,71 +0,0 @@ -# FIXME: Subsume into the newer label_tool package. -import argparse -import json -import urllib3 -import xml.etree.ElementTree as ET - - -def cli_args(): - parser = argparse.ArgumentParser() - - parser.add_argument( - '--label-file', - required=True, - help='Path to the label file which will be inserted the checker ' - 'documentation URLs.') - - return parser.parse_args() - - -def main(): - """ Get CodeChecker labels for compiler warnings analyzer. """ - args = cli_args() - - url = 'https://clang.llvm.org/docs/DiagnosticsReference.html' - - http = urllib3.PoolManager() - r = http.request('GET', url) - data = r.data.replace(b' ', b' ') - root = ET.fromstring(data) - - with open(args.label_file, 'rb') as f: - labels_data = json.load(f) - - labels = labels_data["labels"] - - for section in root.findall('.//*[@class="section"]'): - perm = section.find('.//*[@title="Permalink to this headline"]') - if perm is None: - continue - - backref = section.find('.//*[@class="toc-backref"]') - name = backref.text[2:].lower() # Remove -W and convert to lower case. - if name: - checker_name = f"clang-diagnostic-{name}" - else: - checker_name = "clang-diagnostic" - - if checker_name not in labels: - labels[checker_name] = [] - - anchor = perm.attrib['href'].lstrip('#') - if not any(lbl.startswith("doc_url:") for lbl in labels[checker_name]): - labels[checker_name].append(f"doc_url:{url}#{anchor}") - - is_error = section.find('.//*[@class="error"]') is not None - if not any(lbl.startswith("severity:") - for lbl in labels[checker_name]): - if is_error: - severity = "HIGH" - else: - severity = "MEDIUM" - - labels[checker_name].append(f"severity:{severity}") - - labels_data["labels"] = dict(sorted(labels.items())) - with open(args.label_file, 'w', encoding='utf-8') as f: - json.dump(labels_data, f, indent=2) - - -if __name__ == "__main__": - main() diff --git a/scripts/labels/cppcheck.py b/scripts/labels/cppcheck.py deleted file mode 100644 index 5aaedfdf5d..0000000000 --- a/scripts/labels/cppcheck.py +++ /dev/null @@ -1,60 +0,0 @@ -import json -import subprocess -from typing import Optional -import xml.etree.ElementTree as ET - - -def get_severity_label_for_cppcheck(cppcheck_severity: Optional[str]) -> str: - """ - Get CodeChecker severity for a cppcheck. - - Cppcheck severity levels: - * error: when code is executed there is either undefined behavior or - other error, such as a memory leak or resource leak. - * warning: when code is executed there might be undefined behavior - * style: stylistic issues, such as unused functions, redundant code. - * performance: run time performance suggestions based on common - knowledge. - * portability: portability warnings. Implementation defined behavior. - * information: configuration problems. - """ - severity = "UNSPECIFIED" - - if cppcheck_severity == "error": - severity = "HIGH" - elif cppcheck_severity == "warning": - severity = "MEDIUM" - elif cppcheck_severity == "style": - severity = "STYLE" - elif cppcheck_severity in ["performance", "portability", "information"]: - severity = "LOW" - - return f"severity:{severity}" - - -def main(): - """ Get CodeChecker labels for pylint analyzer. """ - out = subprocess.check_output( - ["cppcheck", "--errorlist"], - stderr=subprocess.STDOUT, - universal_newlines=True, - encoding="utf-8", - errors="ignore") - - root = ET.fromstring(out) - - labels = {} - for error in root.find("errors"): - checker_name = error.get("id") - cppcheck_severity = error.get("severity") - labels[checker_name] = [ - get_severity_label_for_cppcheck(cppcheck_severity)] - - print(json.dumps({ - "analyzer": "cppcheck", - "labels": labels - }, sort_keys=True, indent=2)) - - -if __name__ == "__main__": - main() diff --git a/scripts/labels/label_tool/doc_url/__init__.py b/scripts/labels/doc_url/__init__.py similarity index 93% rename from scripts/labels/label_tool/doc_url/__init__.py rename to scripts/labels/doc_url/__init__.py index 48d748dec8..363876efb0 100644 --- a/scripts/labels/label_tool/doc_url/__init__.py +++ b/scripts/labels/doc_url/__init__.py @@ -10,11 +10,13 @@ configuration. """ from . import \ + generators, \ output, \ verifiers __all__ = [ + "generators", "output", "verifiers", ] diff --git a/scripts/labels/doc_url/generate_tool/__init__.py b/scripts/labels/doc_url/generate_tool/__init__.py new file mode 100644 index 0000000000..4d5b09d7c2 --- /dev/null +++ b/scripts/labels/doc_url/generate_tool/__init__.py @@ -0,0 +1,17 @@ +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +This subpackage implements logic that is primarily user-facing, as opposed to +reusable library-like components. +""" +from . import \ + tool + + +__all__ = [ + "tool", +] diff --git a/scripts/labels/doc_url/generate_tool/__main__.py b/scripts/labels/doc_url/generate_tool/__main__.py new file mode 100755 index 0000000000..e0831a42fe --- /dev/null +++ b/scripts/labels/doc_url/generate_tool/__main__.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Implementation of the user-facing entry point to the script.""" +import argparse +from functools import partial +import os +import pathlib +import sys +from typing import List, Optional, Set + +from tabulate import tabulate + +from ...checker_labels import SingleLabels, get_checker_labels, \ + update_checker_labels +from ...codechecker import default_checker_label_dir +from ...exception import EngineError +from ...output import Settings as GlobalOutputSettings, \ + error, log, trace, coloured, emoji +from ...util import merge_if_no_collision, plural +from ..generators import analyser_selection +from ..output import Settings as OutputSettings +from . import tool + + +short_help: str = """ +Auto-generate 'doc_url' labels for checkers based on a "Table of Contents" +(ToC) structure. +""" +description: str = ( + """ +Automatically generates the 'doc_url' labels which point the users to the +documentation of a checker from a known and available, analyser-specific +(this tool does not support a "generic" execution pattern) "Table of Contents" +(ToC) structure. + +The tool's output is primarily engineered to be human readable (with the added +sprinkle of colours and emojis). +If the output is not sent to an interactive terminal, the output switches to +the creation of a machine-readable output. + +The return code of this tool is indicative of errors encountered during +execution. +'0' is returned for no errors (success), '1' indicates general errors, +'2' indicates configuration errors. +In every other case, the return value is the OR of a bitmask: +""" + f""" +If there was a checker which already had a 'doc_url' but now the ToC points to +a new location, the '{tool.ReturnFlags.HadUpdate}' bit will be set. +If there were checkers without a 'doc_url' (or without any labels at all) but +available in the ToC and thus given a 'doc_url', the +'{tool.ReturnFlags.HadNew}' bit will be set. +If there are checkers with 'doc_url' labels that are no longer available in the +ToC, the '{tool.ReturnFlags.HadGone}' bit will be set. +(Note that this does NOT mean that the documentation URL would be invalid!) +In case after the analysis there are still checkers which do not have a +'doc_url' at all, the '{tool.ReturnFlags.RemainsMissing}' bit will be set. +""" +) +epilogue: str = "" + + +def arg_parser(parser: Optional[argparse.ArgumentParser]) \ + -> argparse.ArgumentParser: + if not parser: + parser = argparse.ArgumentParser( + prog=__package__, + description=description, + epilog=epilogue, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument( + "checker_label_dir", + metavar="LABEL_DIR", + nargs='?', + default=default_checker_label_dir(), + type=pathlib.PurePath, + help=""" +The configuration directory where the checker labels are available. +""") + + parser.add_argument( + "-f", "--fix", + dest="apply_fixes", + action="store_true", + help=""" +Apply the updated or generated 'doc_url's back into the input configuration +file. +""") + + filters = parser.add_argument_group("filter arguments") + + filters.add_argument( + "--analysers", "--analyzers", + metavar="ANALYSER", + nargs='*', + type=str, + help=""" +Filter for only the specified analysers before executing the verification. +Each analyser's configuration is present in exactly one JSON file, named +'.json'. +If 'None' is given, automatically run for every found configuration file. +""") + + output = parser.add_argument_group("output control arguments", """ +These optional arguments allow enabling additional verbosity for the output +of the program. +By default, the tool tries to be the most concise possible, and only report +meaningful findings and encountered errors. +""") + + output.add_argument( + "-v", "--verbose", + dest="verbose", + action="store_true", + help=""" +Shortcut to enable all verbosity options in this group that increase the +useful information presented on the output. +Does not enable any trace or debug information. +""") + + output.add_argument( + "--report-missing", + dest="report_missing", + action="store_true", + help=""" +If set, the output will contain an additional list that details which checkers +remain in the configuration file without an appropriate 'doc_url' label +("MISSING"). +""") + + output.add_argument( + "--report-ok", + dest="report_ok", + action="store_true", + help=""" +If set, the output will contain the "OK" reports for checkers which +documentation URL is already the same as would be generated by this tool. +""") + + output.add_argument( + "-vd", "--verbose-debug", + dest="verbose_debug", + action="store_true", + help="Emit additional trace and debug output.") + + output.add_argument( + "-vv", "--very-verbose", + dest="very_verbose", + action="store_true", + help=""" +Shortcut to enable all verbosity options, including trace and debug +information. +""") + + return parser + + +def _handle_package_args(args: argparse.Namespace): + if not args.checker_label_dir: + log("%sFATAL: Failed to find the checker label configuration " + "directory, and it was not specified. " + "Please specify!", + emoji(":no_entry: ")) + raise argparse.ArgumentError(None, + "positional argument 'checker_label_dir'") + OutputSettings.set_report_missing(args.report_missing or + args.verbose or + args.very_verbose) + OutputSettings.set_report_ok(args.report_ok or + args.verbose or + args.very_verbose) + GlobalOutputSettings.set_trace(args.verbose_debug or args.very_verbose) + + +def _emit_collision_error(analyser: str, + checker: str, + existing_fix: str, + new_fix: str): + error("%s%s/%s: %s [%s] =/= [%s]", emoji(":collision: "), + analyser, checker, + coloured("FIX COLLISION", "red"), + existing_fix, new_fix) + + +def main(args: argparse.Namespace) -> Optional[int]: + try: + _handle_package_args(args) + except argparse.ArgumentError as arg_err: + # Simulate argparse's return code of parse_args. + raise SystemExit(2) from arg_err + + rc = 0 + statistics: List[tool.Statistics] = [] + trace("Checking checker labels from '%s'", args.checker_label_dir) + + args.checker_label_dir = pathlib.Path(args.checker_label_dir) + if not args.checker_label_dir.is_dir(): + error("'%s' is not a directory!", args.checker_label_dir) + return 1 + + # FIXME: pathlib.Path.walk() is only available Python >= 3.12. + for root, _, files in os.walk(args.checker_label_dir): + root = pathlib.Path(root) + + for file in sorted(files): + file = pathlib.Path(file) + if file.suffix != ".json": + continue + analyser = file.stem + if args.analysers and analyser not in args.analysers: + continue + + path = root / file + log("%sLoading '%s'... ('%s')", + emoji(":magnifying_glass_tilted_left: "), + analyser, + path) + try: + labels = get_checker_labels(analyser, path, "doc_url") + except Exception: + import traceback + traceback.print_exc() + + error("Failed to obtain checker labels for '%s'!", analyser) + continue + + geners = list(analyser_selection.select_generator(analyser)) + if not geners: + log("%sSkipped '%s', no generator implementation!", + emoji(":no_littering: "), + analyser) + continue + + urls: SingleLabels = {} + conflicts: Set[str] = set() + for generator_class in geners: + log("%sGenerating '%s' as '%s' (%s)...", + emoji(":thought_balloon: "), + analyser, + generator_class.kind, + generator_class) + try: + status, generated_urls, statistic = tool.execute( + analyser, + generator_class, + labels, + ) + statistics.append(statistic) + rc = int(tool.ReturnFlags(rc) | status) + except EngineError: + import traceback + traceback.print_exc() + + error("Failed to execute generator '%s' (%s)", + generator_class.kind, generator_class) + rc = int(tool.ReturnFlags(rc) | + tool.ReturnFlags.GeneralError) + continue + + merge_if_no_collision(urls, generated_urls, conflicts, + partial(_emit_collision_error, analyser)) + if args.apply_fixes and urls: + log("%sUpdating %s %s for '%s'... ('%s')", + emoji(":writing_hand: "), + coloured(f"{len(urls)}", "green"), + plural(urls, "checker", "checkers"), + analyser, + path) + try: + update_checker_labels(analyser, path, "doc_url", urls) + except Exception: + import traceback + traceback.print_exc() + + error("Failed to write checker labels for '%s'!", + analyser) + continue + + log(tabulate(tabular_data=statistics, + headers=tuple(map(lambda s: s.replace('_', ' '), + tool.Statistics._fields)), + tablefmt="fancy_outline" if sys.stderr.isatty() + else "outline"), + file=sys.stderr) + + log("%s", repr(tool.ReturnFlags(rc))) + return rc + + +if __name__ == "__main__": + def _main(): + args = arg_parser(None).parse_args() + sys.exit(main(args) or 0) + _main() diff --git a/scripts/labels/doc_url/generate_tool/tool.py b/scripts/labels/doc_url/generate_tool/tool.py new file mode 100644 index 0000000000..1ba31d307b --- /dev/null +++ b/scripts/labels/doc_url/generate_tool/tool.py @@ -0,0 +1,261 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Provides implementations for the tool's pipeline.""" +from collections import deque +from enum import IntFlag, auto as Enumerator +import sys +from typing import List, NamedTuple, Optional, Tuple, Type, cast + +from ...checker_labels import SingleLabels +from ...output import Settings as GlobalOutputSettings, log, coloured, emoji +from ...util import plural +from ..generators.base import Base +from ..output import Settings as OutputSettings + + +class Statistics(NamedTuple): + """ + The result of the execution of one generator. + """ + + Analyser: str + Generator: str + Checkers: int + Skipped: Optional[int] + Missing: Optional[int] + OK: Optional[int] + Updated: Optional[int] + Gone: Optional[int] + New: Optional[int] + All_Changed: int + Not_Found: Optional[int] + + +class ReturnFlags(IntFlag): + """ + A bit flag structure indicating the return value of the tool's `execute` + function. + """ + # pylint: disable=invalid-name + + # Zero indicates an all-success, but `Enumerator()` starts from 1. + + # Reserved flags used for other purposes external to the tool. + GeneralError = Enumerator() + ConfigurationOrArgsError = Enumerator() + + HadUpdate = Enumerator() + HadNew = Enumerator() + HadGone = Enumerator() + RemainsMissing = Enumerator() + + +def run_generator(generator: Base, urls: SingleLabels) \ + -> Tuple[List[str], SingleLabels, SingleLabels, List[str]]: + analyser = generator.analyser + ok: List[str] = [] + updated: SingleLabels = {} + new: SingleLabels = {} + gone: List[str] = [] + + generation_result: SingleLabels = dict(generator.generate()) + for checker in sorted(urls.keys() | generation_result.keys()): + if generator.skip(checker): + if GlobalOutputSettings.trace(): + log("%s%s/%s: %s", + emoji(":screwdriver: "), + analyser, checker, + coloured("SKIP", "light_magenta"), + file=sys.stderr) + continue + + existing_url, new_url = \ + urls.get(checker), generation_result.get(checker) + + if not existing_url: + if new_url: + new[checker] = new_url + log("%s%s/%s: %s [%s]", + emoji(":magic_wand: "), + analyser, checker, + coloured("NEW", "magenta"), + new_url, + file=sys.stdout) + else: + if OutputSettings.report_missing(): + log("%s%s/%s: %s []", + emoji(":white_question_mark: "), + analyser, checker, + coloured("MISSING", "yellow"), + file=sys.stdout) + elif existing_url == new_url: + ok.append(checker) + if OutputSettings.report_ok(): + log("%s%s/%s: %s [%s]", + emoji(":check_box_with_check: "), + analyser, checker, + coloured("OK", "green"), + existing_url, + file=sys.stdout) + elif new_url: + updated[checker] = new_url + log("%s%s/%s: %s [%s] -> [%s]", + emoji(":sparkles: "), + analyser, checker, + coloured("UPDATED", "yellow"), + existing_url, new_url, + file=sys.stdout) + else: + gone.append(checker) + log("%s%s/%s: %s [%s]", + emoji(":ghost: "), + analyser, checker, + coloured("GONE", "red"), + existing_url, + file=sys.stdout) + + return ok, updated, new, gone + + +def print_generation(analyser: str, + original_urls: SingleLabels, + ok: List[str], + updated: SingleLabels, + new: SingleLabels): + if not updated and not new: + log("%s%s: Documentation for all %s %s is OK.", + emoji(":magnifying_glass_tilted_left::check_mark_button: "), + analyser, + coloured(f"{len(ok)}", "green"), + plural(ok, "checker", "checkers"), + ) + else: + if updated: + log("%s%s: %s %s changed documentation URL. (%s kept previous.)", + emoji(":magnifying_glass_tilted_left::warning: "), + analyser, + coloured(f"{len(updated)}", "yellow"), + plural(updated, "checker", "checkers"), + coloured(f"{len(ok)}", "green") + if ok else coloured("0", "red"), + ) + if new: + log("%s%s: %s new %s did not have a `doc_url` label previously!", + emoji(":magnifying_glass_tilted_left:" + ":magnifying_glass_tilted_right: "), + analyser, + coloured(f"{len(new)}", "magenta"), + plural(new, "checker", "checkers"), + ) + + for checker in sorted((ok if OutputSettings.report_ok() else []) + + list(updated.keys()) + + list(new.keys())): + is_ok = (checker in ok) if OutputSettings.report_ok() else False + is_updated = checker in updated + icon = ":globe_showing_Europe-Africa: " if is_ok \ + else ":bookmark: " if is_updated \ + else ":world_map: " + colour = "green" if is_ok \ + else "yellow" if is_updated \ + else "magenta" + url = original_urls[checker] if is_ok \ + else updated[checker] if is_updated \ + else new[checker] + + log(" %s· %s [%s]", emoji(icon), coloured(checker, colour), url) + + +def print_gone(analyser: str, + gone: SingleLabels): + if not gone: + return + + log("%s%s: %s %s documentation gone.", + emoji(":magnifying_glass_tilted_left::bar_chart: "), + analyser, + coloured(f"{len(gone)}", "red"), + plural(len(gone), "checker's", "checkers'"), + ) + deque((log(" %s· %s [%s]", + emoji(":skull_and_crossbones: "), + coloured(checker, "red"), + gone[checker]) + for checker in sorted(gone)), + maxlen=0) + + +def print_missing(analyser: str, + missing: List[str]): + if not OutputSettings.report_missing(): + log("%s%s: %s %s will not have a `doc_url` label!", + emoji(":magnifying_glass_tilted_left:" + ":magnifying_glass_tilted_right: "), + analyser, + coloured(f"{len(missing)}", "yellow"), + plural(missing, "checker", "checkers"), + ) + if OutputSettings.report_missing(): + deque((log(" %s· %s ", + emoji(":bookmark: "), + coloured(checker, "yellow")) + for checker in sorted(missing)), + maxlen=0) + + +def execute(analyser: str, generator_class: Type, labels: SingleLabels) \ + -> Tuple[ReturnFlags, SingleLabels, Statistics]: + """ + Runs one instance of the generation for a specific analyser. + """ + status = cast(ReturnFlags, 0) + generator = generator_class(analyser) + missing = [checker for checker in labels if not labels[checker]] + stats = Statistics(Analyser=analyser, + Generator=generator_class.kind, + Checkers=len(labels), + Skipped=None, + Missing=len(missing) if missing else None, + OK=None, + Updated=None, + Gone=None, + New=None, + All_Changed=0, + Not_Found=len(missing) if missing else None, + ) + urls: SingleLabels = {} + ok, updated, new, gone = run_generator(generator_class(analyser), labels) + print_generation(analyser, labels, ok, updated, new) + urls.update(updated) + urls.update(new) + + ok = set(ok) + new = set(new) + gone = set(gone) + to_skip = {checker for checker + in (labels.keys() | ok | new | gone) + if generator.skip(checker)} + + print_gone(analyser, {checker: labels[checker] + for checker in gone - to_skip}) + remaining_missing = list(labels.keys() - ok - updated.keys() - to_skip) + print_missing(analyser, remaining_missing) + stats = stats._replace(Skipped=len(to_skip) if to_skip else None, + OK=len(ok) if ok else None, + Updated=len(updated) if updated else None, + Gone=len(gone) if gone else None, + New=len(new) if new else None, + All_Changed=len(urls), + Not_Found=len(remaining_missing), + ) + status |= (ReturnFlags.HadUpdate if updated else 0) \ + | (ReturnFlags.HadNew if new else 0) \ + | (ReturnFlags.HadGone if gone else 0) \ + | (ReturnFlags.RemainsMissing if remaining_missing else 0) + + return status, urls, stats diff --git a/scripts/labels/doc_url/generators/__init__.py b/scripts/labels/doc_url/generators/__init__.py new file mode 100644 index 0000000000..16cfceb7d1 --- /dev/null +++ b/scripts/labels/doc_url/generators/__init__.py @@ -0,0 +1,17 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Implements the logic for analyser-specific generation of documentation URLs. +""" +from .analyser_selection import select_generator +from .base import Base + +__all__ = [ + "select_generator", + "Base", +] diff --git a/scripts/labels/doc_url/generators/analyser_selection.py b/scripts/labels/doc_url/generators/analyser_selection.py new file mode 100644 index 0000000000..b9412c1b5e --- /dev/null +++ b/scripts/labels/doc_url/generators/analyser_selection.py @@ -0,0 +1,38 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Selects the appropriate generator engine for the analyser configuration. +""" +from typing import Dict, Iterable, Tuple, Type, Union + +from .clang_diagnostic import ClangDiagnosticGenerator +from .clang_tidy import ClangTidyGenerator +from .clangsa import ClangSAGenerator +from .markdownlint import MarkdownlintGenerator + + +AnalyserGenerators: Dict[str, Union[Type, Tuple[Type, ...]]] = { + "clangsa": ClangSAGenerator, + "clang-tidy": (ClangDiagnosticGenerator, ClangTidyGenerator,), + "mdl": MarkdownlintGenerator, +} + + +def select_generator(analyser: str) -> Iterable[Type]: + """ + Dispatches the `analyser` to one of the generator classes and returns + which class(es) should be used for the label generation. + """ + generators = AnalyserGenerators.get(analyser) + if not generators: + return iter(()) + if not isinstance(generators, tuple): + generators = (generators,) + AnalyserGenerators[analyser] = generators[0] + + return iter(generators) diff --git a/scripts/labels/doc_url/generators/base.py b/scripts/labels/doc_url/generators/base.py new file mode 100644 index 0000000000..ff931a5483 --- /dev/null +++ b/scripts/labels/doc_url/generators/base.py @@ -0,0 +1,37 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Describes the base interface for the generation of documentation URL labels. +""" +from typing import Iterable, Optional, Tuple + + +class Base: + kind = "abstract" + + def __init__(self, analyser: str): + self.analyser = analyser + + def skip(self, _checker: str) -> bool: + """ + Returns ``True`` if the result for `checker` from the current generator + should be discarded. + """ + return False + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + """ + Returns a generator that can be consumed in order to obtain + ``(checker, url)`` pairs, one documentation URL for each encountered + checker. + The exact details are analyser-specific! + + A ``None`` in the place of ``url`` indicates that the ``checker`` was + encountered, but no viable URL could be generated for it. + """ + return iter(()) diff --git a/scripts/labels/doc_url/generators/clang_diagnostic.py b/scripts/labels/doc_url/generators/clang_diagnostic.py new file mode 100644 index 0000000000..d8df3f1fea --- /dev/null +++ b/scripts/labels/doc_url/generators/clang_diagnostic.py @@ -0,0 +1,41 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Clang compiler diagnostics (implemented through CodeChecker as Clang-Tidy +checks.) +""" +from typing import Iterable, Optional, Tuple + +from ... import http_ as http +from ...projects.llvm import clang_diagnostic +from .base import Base + + +class ClangDiagnosticGenerator(Base): + """ + Generates documentation URLs for Clang diagnostics from the Sphinx-based + documentation metastructure. + """ + + kind = "clang-diagnostic" + + def __init__(self, analyser: str): + super().__init__(analyser=analyser) + self._http = http.HTMLAcquirer() + + def skip(self, checker: str) -> bool: + return not checker.startswith("clang-diagnostic") + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + for checker, _, section in clang_diagnostic \ + .get_clang_diagnostic_documentation(self._http): + anchor = section.find(".//a[@class=\"headerlink\"]") \ + .attrib["href"] \ + .lstrip('#') + + yield checker, f"{clang_diagnostic.URL}#{anchor}" diff --git a/scripts/labels/doc_url/generators/clang_tidy.py b/scripts/labels/doc_url/generators/clang_tidy.py new file mode 100644 index 0000000000..d948b2cabe --- /dev/null +++ b/scripts/labels/doc_url/generators/clang_tidy.py @@ -0,0 +1,48 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Clang-Tidy.""" +from typing import Iterable, Optional, Tuple + +from ... import http_ as http +from .base import Base + + +class ClangTidyGenerator(Base): + """ + Generates documentation URLs for Clang-Tidy checkers from the Sphinx-based + documentation table of contents. + """ + + kind = "clang-tidy" + + def __init__(self, analyser: str): + super().__init__(analyser=analyser) + self._http = http.HTMLAcquirer() + self.documentation_root = \ + "https://clang.llvm.org/extra/clang-tidy/checks" + self.toc_url = f"{self.documentation_root}/list.html" + + def skip(self, checker: str) -> bool: + return checker.startswith("clang-diagnostic") \ + or checker.startswith("clang-analyzer") + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + dom = self._http.get_dom(self.toc_url) + if dom is None: + return iter(()) + + for link in dom.xpath("//a[contains(@class, \"reference\") and " + "descendant::span[@class=\"doc\"]]"): + checker = link.text_content() + url = link.attrib["href"] + if self.skip(checker): + continue + + yield checker, f"{self.documentation_root}/{url}" + + return iter(()) diff --git a/scripts/labels/doc_url/generators/clangsa.py b/scripts/labels/doc_url/generators/clangsa.py new file mode 100644 index 0000000000..06dd8d594e --- /dev/null +++ b/scripts/labels/doc_url/generators/clangsa.py @@ -0,0 +1,65 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Clang Static Analyzer.""" +from typing import Dict, Iterable, Optional, Tuple + +from ... import http_ as http +from .base import Base + + +class ClangSAGenerator(Base): + """ + Generates documentation URLs for Clang SA checkers from the Sphinx-based + documentation metastructure. + """ + + kind = "clangsa" + + def __init__(self, analyser: str): + super().__init__(analyser) + self._http = http.HTMLAcquirer() + self.toc_url = "https://clang.llvm.org/docs/analyzer/checkers.html" + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + dom = self._http.get_dom(self.toc_url) + if dom is None: + return iter(()) + + sections: Dict[str, str] = {} + for section in dom.xpath( + "//section[descendant::a[@class=\"toc-backref\"]]"): + anchor = section.find(".//a[@class=\"headerlink\"]") \ + .attrib["href"] \ + .lstrip('#') + header = list(section.find(".//a[@class=\"toc-backref\"]") + .itertext()) + section_num, checker_name_parts = header[0], header[1].split(" ") + checker_name = checker_name_parts[0] + # languages = checker_name_parts[1] \ + # .split('(')[0] \ + # .split(')')[0] \ + # .split(", ") + + if '.' not in checker_name: + continue + if sum((1 for c in section_num if c == '.')) != 4: + continue + sections[checker_name] = anchor + + # Some sections are for larger groups in the text, such as the list of + # "Experimental checkers", or for the description of a group like + # "core". + non_checker_keys = {k for k in sections + if [k2 for k2 in sections + if k2.lower() != k.lower() and + k2.lower().startswith(f"{k.lower()}.")] + } + + for header in sorted(sections.keys() - non_checker_keys): + yield header, f"{self.toc_url}#{sections[header]}" + return iter(()) diff --git a/scripts/labels/doc_url/generators/markdownlint.py b/scripts/labels/doc_url/generators/markdownlint.py new file mode 100644 index 0000000000..f09b5b6793 --- /dev/null +++ b/scripts/labels/doc_url/generators/markdownlint.py @@ -0,0 +1,41 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Markdownlint.""" +from typing import Iterable, Optional, Tuple + +from ... import http_ as http +from ...exception import EngineError +from ...projects import markdownlint +from .base import Base + + +class MarkdownlintGenerator(Base): + """ + Generates documentation URLs for Markdownlint rules based on its Markdown + documentation file. + """ + + kind = "markdownlint" + + def __init__(self, analyser: str): + super().__init__(analyser=analyser) + self._http = http.HTMLAcquirer() + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + try: + version = markdownlint.get_markdownlint_latest_release(self._http) + except Exception as e: + raise EngineError( + "Failed to obtain the Markdownlint documentation") from e + + url = "https://github.com/markdownlint/markdownlint/blob/" \ + f"{version}" \ + "/docs/RULES.md" + for checker, anchor in markdownlint \ + .get_markdownlint_rules(self._http, url): + yield checker, f"{url}#{anchor}" diff --git a/scripts/labels/label_tool/doc_url/output.py b/scripts/labels/doc_url/output.py similarity index 100% rename from scripts/labels/label_tool/doc_url/output.py rename to scripts/labels/doc_url/output.py diff --git a/scripts/labels/label_tool/doc_url/verifiers/__init__.py b/scripts/labels/doc_url/verifiers/__init__.py similarity index 95% rename from scripts/labels/label_tool/doc_url/verifiers/__init__.py rename to scripts/labels/doc_url/verifiers/__init__.py index 3d2938b0b5..e24cd75d8c 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/__init__.py +++ b/scripts/labels/doc_url/verifiers/__init__.py @@ -13,6 +13,7 @@ from .generic import Outcome, \ HTTPStatusCodeVerifier, HTMLAnchorVerifier from .status import Status +from . import generic __all__ = [ @@ -21,4 +22,5 @@ "HTTPStatusCodeVerifier", "HTMLAnchorVerifier", "Status", + "generic" ] diff --git a/scripts/labels/label_tool/doc_url/verifiers/analyser_selection.py b/scripts/labels/doc_url/verifiers/analyser_selection.py similarity index 96% rename from scripts/labels/label_tool/doc_url/verifiers/analyser_selection.py rename to scripts/labels/doc_url/verifiers/analyser_selection.py index cfb6a109cc..1267d46eab 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/analyser_selection.py +++ b/scripts/labels/doc_url/verifiers/analyser_selection.py @@ -34,6 +34,7 @@ def select(labels: SingleLabels) -> Type: else HTTPStatusCodeVerifier +# Set an analyser to explicit None to disable the default "generic" behaviour. AnalyserVerifiers: Dict[str, Union[Type, Tuple[Type, ...]]] = defaultdict( lambda: _Generic, { diff --git a/scripts/labels/label_tool/doc_url/verifiers/clang_diagnostic.py b/scripts/labels/doc_url/verifiers/clang_diagnostic.py similarity index 98% rename from scripts/labels/label_tool/doc_url/verifiers/clang_diagnostic.py rename to scripts/labels/doc_url/verifiers/clang_diagnostic.py index 69f06305b7..0d282e6464 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/clang_diagnostic.py +++ b/scripts/labels/doc_url/verifiers/clang_diagnostic.py @@ -13,9 +13,9 @@ import urllib.parse from ... import http_ as http, transformer +from ...projects.llvm import fetch_llvm_release_versions from ...transformer import Version from .generic import HTMLAnchorVerifier -from .llvm import fetch_llvm_release_versions from .status import Status diff --git a/scripts/labels/label_tool/doc_url/verifiers/clang_tidy.py b/scripts/labels/doc_url/verifiers/clang_tidy.py similarity index 91% rename from scripts/labels/label_tool/doc_url/verifiers/clang_tidy.py rename to scripts/labels/doc_url/verifiers/clang_tidy.py index 0d70fcdec5..d04904c6a5 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/clang_tidy.py +++ b/scripts/labels/doc_url/verifiers/clang_tidy.py @@ -9,9 +9,9 @@ from typing import Optional from ... import transformer +from ...projects.llvm import fetch_llvm_release_versions from ...transformer import Version from .generic import HTTPStatusCodeVerifier -from .llvm import fetch_llvm_release_versions from .status import Status @@ -55,6 +55,10 @@ def skip(self, checker: str, url: str) -> Status: # Clang-Tidy checks, but their documentation is in a completely # different structure. return Status.SKIP + if checker.startswith("clang-analyzer"): + # Clang-Tidy allows calling the Clang Static Analyzer internally, + # but we do not support this through CodeChecker. + return Status.SKIP if not url: return Status.MISSING return Status.OK diff --git a/scripts/labels/label_tool/doc_url/verifiers/clangsa.py b/scripts/labels/doc_url/verifiers/clangsa.py similarity index 98% rename from scripts/labels/label_tool/doc_url/verifiers/clangsa.py rename to scripts/labels/doc_url/verifiers/clangsa.py index 6610c01047..1c13de7994 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/clangsa.py +++ b/scripts/labels/doc_url/verifiers/clangsa.py @@ -10,9 +10,9 @@ import urllib.parse from ... import http_ as http, transformer +from ...projects.llvm import fetch_llvm_release_versions from ...transformer import Version from .generic import HTMLAnchorVerifier -from .llvm import fetch_llvm_release_versions from .status import Status diff --git a/scripts/labels/label_tool/doc_url/verifiers/generic.py b/scripts/labels/doc_url/verifiers/generic.py similarity index 98% rename from scripts/labels/label_tool/doc_url/verifiers/generic.py rename to scripts/labels/doc_url/verifiers/generic.py index 4072c751a9..20fa545c10 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/generic.py +++ b/scripts/labels/doc_url/verifiers/generic.py @@ -32,9 +32,10 @@ def __init__(self, analyser: str): def skip(self, _checker: str, _url: str) -> Status: """ Returns `Status.OK` if the current verifier is capable of verifying the - `checker`. `Status.SKIP` is returned in case the `checker` is - unverifiable due to a pattern, and `Status.MISSING` is returned if - it is unverifiable due to its lack of `url`. + `checker`. + `Status.SKIP` is returned in case the `checker` is unverifiable due to + a pattern, and `Status.MISSING` is returned if it is unverifiable due + to its lack of `url`. """ return Status.OK diff --git a/scripts/labels/label_tool/doc_url/verifiers/status.py b/scripts/labels/doc_url/verifiers/status.py similarity index 95% rename from scripts/labels/label_tool/doc_url/verifiers/status.py rename to scripts/labels/doc_url/verifiers/status.py index 022a5a5b30..a062ab3272 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/status.py +++ b/scripts/labels/doc_url/verifiers/status.py @@ -11,23 +11,23 @@ class Status(Enum): """The outcome of an attempt at verifying a checker's documentation.""" + # The result could not be determined. UNKNOWN = Enumerator() - """The result could not be determined.""" - SKIP = Enumerator() """ The verifier engine skipped verifying the checker. This is an internal indicator used for "multi-pass" verifications, and it is not normally reported to the user. """ + SKIP = Enumerator() - MISSING = Enumerator() """ The verification could not execute because the documentation data is empty. """ + MISSING = Enumerator() - OK = Enumerator() """Successful.""" + OK = Enumerator() - NOT_OK = Enumerator() """Not successful. (Deterministic result.)""" + NOT_OK = Enumerator() diff --git a/scripts/labels/label_tool/doc_url/verify_tool/__init__.py b/scripts/labels/doc_url/verify_tool/__init__.py similarity index 100% rename from scripts/labels/label_tool/doc_url/verify_tool/__init__.py rename to scripts/labels/doc_url/verify_tool/__init__.py diff --git a/scripts/labels/label_tool/doc_url/verify_tool/__main__.py b/scripts/labels/doc_url/verify_tool/__main__.py similarity index 74% rename from scripts/labels/label_tool/doc_url/verify_tool/__main__.py rename to scripts/labels/doc_url/verify_tool/__main__.py index 3717805e9d..9cf7129cc9 100755 --- a/scripts/labels/label_tool/doc_url/verify_tool/__main__.py +++ b/scripts/labels/doc_url/verify_tool/__main__.py @@ -9,6 +9,7 @@ """Implementation of the user-facing entry point to the script.""" import argparse import fnmatch +from functools import partial import os import pathlib import sys @@ -21,10 +22,11 @@ from ...checker_labels import SingleLabels, get_checker_labels, \ update_checker_labels -from ...codechecker import codechecker_src_root +from ...codechecker import default_checker_label_dir +from ...exception import EngineError from ...output import Settings as GlobalOutputSettings, \ error, log, trace, coloured, emoji -from ...util import plural +from ...util import merge_if_no_collision, plural from ..output import Settings as OutputSettings from ..verifiers import analyser_selection from . import tool @@ -53,30 +55,26 @@ The return code of this tool is indicative of errors encountered during execution. -'0' is returned for no errors (success), '1' indicate general -errors, '2' indicate configuration errors. +'0' is returned for no errors (success), '1' indicates general errors, +'2' indicates configuration errors. In every other case, the return value is the OR of a bitmask: """ f""" Having found checkers without a 'doc_url' label will set the bit -'{tool.ReturnFlags.HAD_MISSING}'. +'{tool.ReturnFlags.HadMissing}'. Having found checkers that have a "Not OK" label will set the bit -'{tool.ReturnFlags.HAD_NOT_OK}'. +'{tool.ReturnFlags.HadNotOK}'. Having found checkers that were "Not OK" but managed to obtain a fixed, -working URL will set the bit '{tool.ReturnFlags.HAD_FOUND}'. +working URL will set the bit '{tool.ReturnFlags.HadFound}'. Having found checkers that were "Not OK" and failed the attempted -automatic fixing routing will set the bit '{tool.ReturnFlags.HAD_GONE}'. +automatic fixing routing will set the bit '{tool.ReturnFlags.HadGone}'. """ ) epilogue: str = "" -def args(parser: Optional[argparse.ArgumentParser]) -> argparse.ArgumentParser: - def default_checker_label_dir() -> Optional[pathlib.Path]: - codechecker_root = codechecker_src_root() - return codechecker_root / "config" / "labels" / "analyzers" \ - if codechecker_root else None - +def arg_parser(parser: Optional[argparse.ArgumentParser]) \ + -> argparse.ArgumentParser: if not parser: parser = argparse.ArgumentParser( prog=__package__, @@ -235,45 +233,55 @@ def default_checker_label_dir() -> Optional[pathlib.Path]: return parser -def _handle_package_args(args_: argparse.Namespace): - if not args_.checker_label_dir: +def _handle_package_args(args: argparse.Namespace): + if not args.checker_label_dir: log("%sFATAL: Failed to find the checker label configuration " "directory, and it was not specified. " "Please specify!", emoji(":no_entry: ")) raise argparse.ArgumentError(None, "positional argument 'checker_label_dir'") - if args_.jobs < 0: + if args.jobs < 0: log("%sFATAL: There can not be a non-positive number of jobs.", emoji(":no_entry: ")) raise argparse.ArgumentError(None, "-j/--jobs") - OutputSettings.set_report_missing(args_.report_missing or - args_.verbose or - args_.very_verbose) - OutputSettings.set_report_ok(args_.report_ok or - args_.verbose or - args_.very_verbose) - GlobalOutputSettings.set_trace(args_.verbose_debug or args_.very_verbose) - - -def main(args_: argparse.Namespace) -> Optional[int]: + OutputSettings.set_report_missing(args.report_missing or + args.verbose or + args.very_verbose) + OutputSettings.set_report_ok(args.report_ok or + args.verbose or + args.very_verbose) + GlobalOutputSettings.set_trace(args.verbose_debug or args.very_verbose) + + +def _emit_collision_error(analyser: str, + checker: str, + existing_fix: str, + new_fix: str): + error("%s%s/%s: %s [%s] =/= [%s]", emoji(":collision: "), + analyser, checker, + coloured("FIX COLLISION", "red"), + existing_fix, new_fix) + + +def main(args: argparse.Namespace) -> Optional[int]: try: - _handle_package_args(args_) - except argparse.ArgumentError as err: + _handle_package_args(args) + except argparse.ArgumentError as arg_err: # Simulate argparse's return code of parse_args(). - raise SystemExit(2) from err + raise SystemExit(2) from arg_err rc = 0 statistics: List[tool.Statistics] = [] - trace("Checking checker labels from '%s'", args_.checker_label_dir) + trace("Checking checker labels from '%s'", args.checker_label_dir) - args_.checker_label_dir = pathlib.Path(args_.checker_label_dir) - if not args_.checker_label_dir.is_dir(): - error("'%s' is not a directory!", args_.checker_label_dir) + args.checker_label_dir = pathlib.Path(args.checker_label_dir) + if not args.checker_label_dir.is_dir(): + error("'%s' is not a directory!", args.checker_label_dir) return 1 # FIXME: pathlib.Path.walk() is only available Python >= 3.12. - for root, _, files in os.walk(args_.checker_label_dir): + for root, _, files in os.walk(args.checker_label_dir): root = pathlib.Path(root) for file in sorted(files): @@ -281,7 +289,7 @@ def main(args_: argparse.Namespace) -> Optional[int]: if file.suffix != ".json": continue analyser = file.stem - if args_.analysers and analyser not in args_.analysers: + if args.analysers and analyser not in args.analysers: continue path = root / file @@ -298,57 +306,53 @@ def main(args_: argparse.Namespace) -> Optional[int]: error("Failed to obtain checker labels for '%s'!", analyser) continue - if args_.checkers: + if args.checkers: labels = {checker: url for checker, url in labels.items() - for filter_ in args_.checkers + for filter_ in args.checkers if fnmatch.fnmatchcase(checker, filter_)} if not labels: filt = " or match the \"--checkers\" %s" + \ - plural(args_.checkers, "filter", "filters") \ - if args_.checkers else "" + plural(args.checkers, "filter", "filters") \ + if args.checkers else "" log(f'{emoji(":cup_with_straw: ")}' f'No checkers are configured{filt}.') continue - process_count = clamp(1, args_.jobs, len(labels)) \ - if len(labels) > 2 * args_.jobs else 1 + process_count = clamp(1, args.jobs, len(labels)) \ + if len(labels) > 2 * args.jobs else 1 fixes: SingleLabels = {} conflicts: Set[str] = set() - for verifier in analyser_selection.select_verifier(analyser, - labels): + for verifier_class in analyser_selection \ + .select_verifier(analyser, labels): log("%sVerifying '%s' as '%s' (%s)...", emoji(":thought_balloon: "), analyser, - verifier.kind, verifier) - status, local_fixes, statistic = tool.execute( - analyser, - verifier, - labels, - process_count, - args_.skip_fixes, - args_.reset_to_upstream, - ) - statistics.append(statistic) - rc = int(tool.ReturnFlags(rc) | status) - - for checker in local_fixes.keys() - conflicts: - fix = local_fixes[checker] - try: - existing_fix = fixes[checker] - if existing_fix != fix: - error("%s%s/%s: %s [%s] =/= [%s]", - emoji(":collision: "), - analyser, checker, - coloured("FIX COLLISION", "red"), - existing_fix, fix - ) - conflicts.add(checker) - del fixes[checker] - except KeyError: - fixes[checker] = fix - - if args_.apply_fixes and fixes: + verifier_class.kind, verifier_class) + try: + status, local_fixes, statistic = tool.execute( + analyser, + verifier_class, + labels, + process_count, + arg_parser.skip_fixes, + arg_parser.reset_to_upstream, + ) + statistics.append(statistic) + rc = int(tool.ReturnFlags(rc) | status) + except EngineError: + import traceback + traceback.print_exc() + + error("Failed to execute verifier '%s' (%s)", + verifier_class.kind, verifier_class) + rc = int(tool.ReturnFlags(rc) | + tool.ReturnFlags.GeneralError) + continue + + merge_if_no_collision(fixes, local_fixes, conflicts, + partial(_emit_collision_error, analyser)) + if arg_parser.apply_fixes and fixes: log("%sUpdating %s %s for '%s'... ('%s')", emoji(":writing_hand: "), coloured(len(fixes), "green"), @@ -358,6 +362,9 @@ def main(args_: argparse.Namespace) -> Optional[int]: try: update_checker_labels(analyser, path, "doc_url", fixes) except Exception: + import traceback + traceback.print_exc() + error("Failed to write checker labels for '%s'!", analyser) continue @@ -375,6 +382,6 @@ def main(args_: argparse.Namespace) -> Optional[int]: if __name__ == "__main__": def _main(): - _args = args(None).parse_args() - sys.exit(main(_args) or 0) + args = arg_parser(None).parse_args() + sys.exit(main(args) or 0) _main() diff --git a/scripts/labels/label_tool/doc_url/verify_tool/action.py b/scripts/labels/doc_url/verify_tool/action.py similarity index 100% rename from scripts/labels/label_tool/doc_url/verify_tool/action.py rename to scripts/labels/doc_url/verify_tool/action.py diff --git a/scripts/labels/label_tool/doc_url/verify_tool/report.py b/scripts/labels/doc_url/verify_tool/report.py similarity index 91% rename from scripts/labels/label_tool/doc_url/verify_tool/report.py rename to scripts/labels/doc_url/verify_tool/report.py index d938c0d8fd..e4c1ea2a88 100644 --- a/scripts/labels/label_tool/doc_url/verify_tool/report.py +++ b/scripts/labels/doc_url/verify_tool/report.py @@ -25,7 +25,7 @@ def print_verifications(analyser: str, emoji(":magnifying_glass_tilted_left:" ":magnifying_glass_tilted_right: "), analyser, - coloured(len(missing), "yellow"), + coloured(f"{len(missing)}", "yellow"), plural(missing, "checker", "checkers"), plural(missing, "does", "do"), ) @@ -41,16 +41,16 @@ def print_verifications(analyser: str, log("%s%s: All %s %s successfully verified.", emoji(":magnifying_glass_tilted_left::check_mark_button: "), analyser, - coloured(len(ok), "green"), + coloured(f"{len(ok)}", "green"), plural(ok, "checker", "checkers"), ) else: log("%s%s: %s %s failed documentation verification. (%s succeeded.)", emoji(":magnifying_glass_tilted_left::warning: "), analyser, - coloured(len(not_ok), "red"), + coloured(f"{len(not_ok)}", "red"), plural(not_ok, "checker", "checkers"), - coloured(len(ok), "green") + coloured(f"{len(ok)}", "green") if ok else coloured("0", "red"), ) @@ -81,7 +81,7 @@ def print_resets(analyser: str, analyser, coloured(attempted, "magenta"), plural(attempted, "checker's", "checkers'"), - coloured(len(new_urls), "cyan") + coloured(f"{len(new_urls)}", "cyan") if new_urls else coloured("0", "red"), ) deque((log(" %s· %s [%s]", @@ -93,7 +93,6 @@ def print_resets(analyser: str, def print_fixes(analyser: str, - _urls: SingleLabels, found: SingleLabels, gone: SingleLabels): if not gone: @@ -101,7 +100,7 @@ def print_fixes(analyser: str, log("%s%s: Found new documentation for all %s %s.", emoji(":magnifying_glass_tilted_left::telescope: "), analyser, - coloured(len(found), "green"), + coloured(f"{len(found)}", "green"), plural(len(found), "checker", "checkers"), ) else: @@ -109,16 +108,16 @@ def print_fixes(analyser: str, log("%s%s: All %s %s gone.", emoji(":magnifying_glass_tilted_left::headstone: "), analyser, - coloured(len(gone), "red"), + coloured(f"{len(gone)}", "red"), plural(len(gone), "checker", "checkers"), ) else: log("%s%s: %s %s gone. (Found %s.)", emoji(":magnifying_glass_tilted_left::bar_chart: "), analyser, - coloured(len(gone), "red"), + coloured(f"{len(gone)}", "red"), plural(len(gone), "checker", "checkers"), - coloured(len(found), "green") + coloured(f"{len(found)}", "green") if found else coloured("0", "red") ) diff --git a/scripts/labels/label_tool/doc_url/verify_tool/tool.py b/scripts/labels/doc_url/verify_tool/tool.py similarity index 88% rename from scripts/labels/label_tool/doc_url/verify_tool/tool.py rename to scripts/labels/doc_url/verify_tool/tool.py index dce24b1567..7d4ad3c222 100644 --- a/scripts/labels/label_tool/doc_url/verify_tool/tool.py +++ b/scripts/labels/doc_url/verify_tool/tool.py @@ -40,16 +40,18 @@ class ReturnFlags(IntFlag): A bit flag structure indicating the return value of the execution of the tool's `execute` function. """ + # pylint: disable=invalid-name + # Zero indicates an all-success, but `Enumerator()` starts from 1. # Reserved flags used for other purposes external to the tool. - GENERAL_ERROR = Enumerator() - CONFIGURATION_OR_ARGS_ERROR = Enumerator() + GeneralError = Enumerator() + ConfigurationOrArgsError = Enumerator() - HAD_MISSING = Enumerator() - HAD_NOT_OK = Enumerator() - HAD_FOUND = Enumerator() - HAD_GONE = Enumerator() + HadMissing = Enumerator() + HadNotOK = Enumerator() + HadFound = Enumerator() + HadGone = Enumerator() def execute(analyser: str, @@ -99,21 +101,21 @@ def execute(analyser: str, OK=len(ok) if ok else None, Not_OK=len(not_ok) if not_ok else None, ) - status = status | (ReturnFlags.HAD_MISSING if missing else 0) + status |= (ReturnFlags.HadMissing if missing else 0) if not_ok: - status |= ReturnFlags.HAD_NOT_OK + status |= ReturnFlags.HadNotOK if not skip_fixes: found, gone = action.run_fixes( pool, {checker: labels[checker] for checker in labels.keys() & not_ok} ) - report.print_fixes(analyser, labels, found, gone) + report.print_fixes(analyser, found, gone) urls_to_save.update(found) stats = stats._replace(Found=len(found) if found else None, Gone=len(gone) if gone else None, ) - status = status | (ReturnFlags.HAD_FOUND if found else 0) \ - | (ReturnFlags.HAD_GONE if gone else 0) + status |= (ReturnFlags.HadFound if found else 0) \ + | (ReturnFlags.HadGone if gone else 0) return status, urls_to_save, stats diff --git a/scripts/labels/doc_url_generate.py b/scripts/labels/doc_url_generate.py deleted file mode 100644 index f149a987a8..0000000000 --- a/scripts/labels/doc_url_generate.py +++ /dev/null @@ -1,138 +0,0 @@ -# FIXME: Subsume into the newer label_tool/doc_url package! -import argparse -import json -import sys -import urllib3 -import xml.etree.ElementTree as ET -from collections import OrderedDict - - -def clangsa(label_file): - url = 'https://clang.llvm.org/docs/analyzer/checkers.html' - - http = urllib3.PoolManager() - r = http.request('GET', url) - root = ET.fromstring(r.data) - - checker_anchors = [] - for x in root.findall('.//{*}a[@title="Permalink to this headline"]'): - checker_anchors.append(x.attrib['href'].lstrip('#')) - - with open(label_file, encoding='utf-8') as f: - checkers = json.load(f)['labels'].keys() - - docs = {} - for checker in checkers: - c = checker.lower().replace('.', '-') - # next() evaluates the generator immediately. - # pylint: disable=cell-var-from-loop - anchor = next(filter( - lambda anchor: anchor.startswith(c), checker_anchors), None) - - if anchor: - docs[checker] = f'{url}#{anchor}' - - return docs - - -def clang_tidy(label_file): - url = 'https://clang.llvm.org/extra/clang-tidy/checks/list.html' - - http = urllib3.PoolManager() - r = http.request('GET', url) - root = ET.fromstring(r.data) - - checker_anchors = [] - for x in root.findall('.//{*}a[@class="reference external"]'): - checker_anchors.append(x.attrib['href']) - - with open(label_file, encoding='utf-8') as f: - checkers = json.load(f)['labels'].keys() - - url = url[:url.rfind('/') + 1] - docs = {} - for checker in checkers: - # next() evaluates the generator immediately. - # pylint: disable=cell-var-from-loop - anchor = next(filter( - lambda anchor: anchor.startswith(checker), checker_anchors), None) - - if anchor: - docs[checker] = f'{url}{anchor}' - - return docs - - -def get_labels_with_docs(label_file, docs): - with open(label_file, encoding='utf-8') as f: - labels = json.load(f, object_pairs_hook=OrderedDict) - - for checker, label in labels['labels'].items(): - if checker in docs: - while True: - try: - x = next(filter(lambda x: x.startswith('doc_url'), label)) - label.remove(x) - except StopIteration: - break - - label.append(f'doc_url:{docs[checker]}') - else: - x = next(filter(lambda x: x.startswith('doc_url'), label), None) - info = f'Previous URL: {x[x.find(":") + 1:]}' if x \ - else 'No previous URL.' - - print( - f'Documentation URL not found for {checker}. {info}', - file=sys.stderr) - - label.sort() - - return labels - - -analyzer_doc_getter = { - 'clangsa': clangsa, - 'clang-tidy': clang_tidy -} - - -def cli_args(): - parser = argparse.ArgumentParser() - - parser.add_argument( - '--label-file', - required=True, - help='Path to the label file which will be inserted the checker ' - 'documentation URLs.') - - parser.add_argument( - '--analyzer', - required=True, - choices=analyzer_doc_getter.keys(), - help='Analyzer name that defines the format of the URL.') - - parser.add_argument( - '--dry-run', - action='store_true', - help='Print the content of the resulting label file instead of ' - 'modifying it.') - - return parser.parse_args() - - -def main(): - args = cli_args() - - docs = analyzer_doc_getter[args.analyzer](args.label_file) - labels = get_labels_with_docs(args.label_file, docs) - - if args.dry_run: - print(json.dumps(labels, indent=2)) - else: - with open(args.label_file, 'w', encoding='utf-8') as f: - json.dump(labels, f, indent=2) - - -if __name__ == '__main__': - main() diff --git a/scripts/labels/exception.py b/scripts/labels/exception.py new file mode 100644 index 0000000000..94638cf742 --- /dev/null +++ b/scripts/labels/exception.py @@ -0,0 +1,12 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Miscellaneous `Exception` classes.""" + + +class EngineError(Exception): + """Indiciates a generic failure of a generator or verifier engine.""" diff --git a/scripts/labels/label_tool/http_.py b/scripts/labels/http_.py similarity index 94% rename from scripts/labels/label_tool/http_.py rename to scripts/labels/http_.py index ceaa93c23c..b91a77663e 100644 --- a/scripts/labels/label_tool/http_.py +++ b/scripts/labels/http_.py @@ -18,7 +18,7 @@ Response = urllib3.response.BaseHTTPResponse -Url = Union[str, urllib.parse.ParseResult] +URL = Union[str, urllib.parse.ParseResult] class HTMLAcquirer: @@ -43,7 +43,7 @@ def _get_url_raw(self, url: str) -> Response: trace("HTTP GET '%s'", url) return self._pool.request("GET", url) - def get_url(self, url: Url) -> Response: + def get_url(self, url: URL) -> Response: """ Downloads the content of `url` and returns the raw HTTP response. """ @@ -61,7 +61,7 @@ def _get_dom_raw(self, url: str) -> Optional[html.HtmlElement]: dom = html.fromstring(response.data) if response.data else None return dom - def get_dom(self, url: Url) -> Optional[html.HtmlElement]: + def get_dom(self, url: URL) -> Optional[html.HtmlElement]: """ Downloads the content of `url`. If the download is successful, parses the obtained HTML and returns the @@ -71,7 +71,7 @@ def get_dom(self, url: Url) -> Optional[html.HtmlElement]: url = url.geturl() return self._get_dom_raw(url) - def split_anchor(self, url: Url) -> Tuple[str, str]: + def split_anchor(self, url: URL) -> Tuple[str, str]: if isinstance(url, str) and '#' not in url: return url, "" @@ -100,7 +100,7 @@ def __init__(self, cache_size: int = DefaultCacheSize): self._cache: Dict[str, CachingHTMLAcquirer.CacheType] = {} self._cache_lru: Dict[str, datetime.datetime] = {} - def get_url(self, url: Url) -> Response: + def get_url(self, url: URL) -> Response: """ Downloads the content of `url` after stripping the HTML anchor off of the request, and returns the raw HTTP response. @@ -115,7 +115,7 @@ def get_url(self, url: Url) -> Response: response, _ = cached return response - def get_dom(self, url: Url) -> Optional[html.HtmlElement]: + def get_dom(self, url: URL) -> Optional[html.HtmlElement]: """ Downloads the content of `url` after stripping the HTML anchor off of the request. diff --git a/scripts/labels/mdl.py b/scripts/labels/mdl.py deleted file mode 100644 index 4b93e683de..0000000000 --- a/scripts/labels/mdl.py +++ /dev/null @@ -1,55 +0,0 @@ -import argparse -import json -import re -import urllib3 - - -def cli_args(): - parser = argparse.ArgumentParser() - - parser.add_argument( - '--label-file', - required=True, - help='Path to the label file which will be inserted the checker ' - 'documentation URLs.') - - return parser.parse_args() - - -def main(): - """ Get CodeChecker labels for markdownlint analyzer. """ - args = cli_args() - - url = 'https://github.com/markdownlint/markdownlint/blob/v0.11.0/docs/RULES.md' - raw_url = url \ - .replace("github.com", "raw.githubusercontent.com") \ - .replace("/blob", "") - - http = urllib3.PoolManager() - r = http.request('GET', raw_url) - lines = r.data.decode().split('\n') - - labels = {} - rgx = re.compile(r"\s+\* \[(?PMD\d+)[^\]]+\]\((?P\S+)\)") - for line in lines: - m = rgx.match(line) - if m: - checker_name = m.group("name") - anchor = m.group("anchor") - if checker_name not in labels: - labels[checker_name] = [] - - labels[checker_name] = [ - f"doc_url:{url}{anchor}", - "severity:STYLE" - ] - - with open(args.label_file, 'w', encoding='utf-8') as f: - json.dump({ - "analyzer": "mdl", - "labels": dict(sorted(labels.items())) - }, f, indent=2) - - -if __name__ == "__main__": - main() diff --git a/scripts/labels/label_tool/output.py b/scripts/labels/output.py similarity index 100% rename from scripts/labels/label_tool/output.py rename to scripts/labels/output.py diff --git a/scripts/labels/projects/__init__.py b/scripts/labels/projects/__init__.py new file mode 100644 index 0000000000..a90a52b911 --- /dev/null +++ b/scripts/labels/projects/__init__.py @@ -0,0 +1,17 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Helper package for analyser-specific implementation of common tools.""" +from . import \ + llvm, \ + markdownlint + + +__all__ = [ + "llvm", + "markdownlint", +] diff --git a/scripts/labels/label_tool/doc_url/verifiers/llvm/__init__.py b/scripts/labels/projects/llvm/__init__.py similarity index 80% rename from scripts/labels/label_tool/doc_url/verifiers/llvm/__init__.py rename to scripts/labels/projects/llvm/__init__.py index ef221bfbc1..b44a00fb2a 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/llvm/__init__.py +++ b/scripts/labels/projects/llvm/__init__.py @@ -6,7 +6,12 @@ # # ------------------------------------------------------------------------- """Helper package to hoist common logic specific to the LLVM Project.""" +from . import \ + clang_diagnostic from .releases import fetch_llvm_release_versions -__all__ = ["fetch_llvm_release_versions"] +__all__ = [ + "clang_diagnostic", + "fetch_llvm_release_versions", +] diff --git a/scripts/labels/projects/llvm/clang_diagnostic.py b/scripts/labels/projects/llvm/clang_diagnostic.py new file mode 100644 index 0000000000..635357b417 --- /dev/null +++ b/scripts/labels/projects/llvm/clang_diagnostic.py @@ -0,0 +1,53 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Fetch the list of Clang compiler diagnostic sections from the documentation. +""" +from typing import Iterable, Tuple + +from lxml import html + +from ... import http_ as http + + +URL = "https://clang.llvm.org/docs/DiagnosticsReference.html" +DiagnosticPrefixes = ( + "-W", # Warnings. + "-R", # Remarks. +) + + +def get_clang_diagnostic_documentation(request: http.HTMLAcquirer) \ + -> Iterable[Tuple[str, str, html.HtmlElement]]: + """ + Returns the diagnostic ``
``s from the DOM of the documentation + `URL` page for Clang compiler warnings. + """ + dom = request.get_dom(URL) + if dom is None: + return iter(()) + + for section in dom.xpath( + "//section[descendant::a[@class=\"toc-backref\"]]"): + header = list(section.find(".//a[@class=\"toc-backref\"]") + .itertext()) + diagnostic_name = header[0] + if not diagnostic_name.startswith(DiagnosticPrefixes): + continue + + checker_name = diagnostic_name + for prefix in DiagnosticPrefixes: + if checker_name.startswith(prefix): + checker_name = checker_name.replace(prefix, '', 1) + if not checker_name: + continue + checker_name = f"clang-diagnostic-{checker_name.lower()}" + + yield checker_name, diagnostic_name, section + + return iter(()) diff --git a/scripts/labels/label_tool/doc_url/verifiers/llvm/releases.py b/scripts/labels/projects/llvm/releases.py similarity index 96% rename from scripts/labels/label_tool/doc_url/verifiers/llvm/releases.py rename to scripts/labels/projects/llvm/releases.py index 59f6253d8c..368dd30f20 100644 --- a/scripts/labels/label_tool/doc_url/verifiers/llvm/releases.py +++ b/scripts/labels/projects/llvm/releases.py @@ -12,8 +12,8 @@ from selenium import webdriver from selenium.webdriver.common.by import By as WebdriverBy -from ....output import error -from ....transformer import Version, Versions +from ...output import error +from ...transformer import Version, Versions def fetch_llvm_release_versions() -> Versions: diff --git a/scripts/labels/projects/markdownlint.py b/scripts/labels/projects/markdownlint.py new file mode 100644 index 0000000000..2ad4825567 --- /dev/null +++ b/scripts/labels/projects/markdownlint.py @@ -0,0 +1,67 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Markdownlint.""" +import json +import os +import re +import subprocess +from typing import Iterable, Tuple + +from urllib3.exceptions import HTTPError + +from .. import http_ as http + + +def get_markdownlint_latest_release(request: http.HTMLAcquirer) -> str: + """Get the version of the latest tag of ``markdownlint``.""" + api = "https://api.github.com/repos/markdownlint/markdownlint/tags" + response = request.get_url(api) + if response.status in (http.HTTPStatusCode.UNAUTHORIZED, + http.HTTPStatusCode.FORBIDDEN): + # GitHub API returns "403 Forbidden" for "Rate limit exceeded" cases. + try: + github_token = subprocess.check_output(["gh", "auth", "token"]) \ + .decode().strip() + except Exception: + try: + github_token = os.environ["GITHUB_TOKEN"] + except KeyError: + # pylint: disable=raise-missing-from + raise PermissionError("GitHub API rate limit exceeded, " + "specify 'GITHUB_TOKEN' enviromment " + "variable!") + + response = request._pool.request("GET", api, headers={ + "Authorization": f"Bearer {github_token}" + }) + + if response.status != http.HTTPStatusCode.OK: + raise HTTPError("Failed to get a valid response on second try, got " + f"{response.status} {response.reason} instead") + + data = json.loads(response.data) + return data[0]["name"] + + +RuleRe = re.compile(r"\s+\* \[(?PMD\d+)[^\]]+\]\(#(?P\S+)\)") + + +def get_markdownlint_rules(request: http.HTMLAcquirer, base_url: str) \ + -> Iterable[Tuple[str, str]]: + """Returns ``(rule, anchor)`` pairs of ``markdownlint`` rules.""" + raw_data_url = base_url \ + .replace("github.com", "raw.githubusercontent.com", 1) \ + .replace("/blob", '', 1) + + response = request.get_url(raw_data_url) + for line in response.data.decode().split('\n'): + match = RuleRe.match(line) + if not match: + continue + + yield match.group("name"), match.group("anchor") diff --git a/scripts/labels/pylint.py b/scripts/labels/pylint.py deleted file mode 100644 index 6c5995f27b..0000000000 --- a/scripts/labels/pylint.py +++ /dev/null @@ -1,58 +0,0 @@ -import json -import re -import subprocess - - -def get_severity_label_for_kind(kind: str) -> str: - """ - Get CodeChecker severity for a pylint kind. - - There are 5 kind of message types : - * (C) convention, for programming standard violation - * (R) refactor, for bad code smell - * (W) warning, for python specific problems - * (E) error, for probable bugs in the code - * (F) fatal, if an error occurred which prevented pylint from doing - further processing. - """ - severity = "UNSPECIFIED" - if kind == "F": - severity = "CRITICAL" - elif kind == "E": - severity = "HIGH" - elif kind == "W": - severity = "MEDIUM" - elif kind == "R": - severity = "STYLE" - elif kind == "C": - severity = "LOW" - - return f"severity:{severity}" - - -def main(): - """ Get CodeChecker labels for pylint analyzer. """ - out = subprocess.check_output( - ["pylint", "--list-msgs"], - stderr=subprocess.STDOUT, - universal_newlines=True, - encoding="utf-8", - errors="ignore") - - pattern = re.compile(r"^:(?P[^ ]+) \((?P\S)(?P\S+)\): .*") - labels = {} - for line in out.split('\n'): - m = pattern.match(line) - if m: - checker_name = m.group("name") - kind = m.group("kind") - labels[checker_name] = [get_severity_label_for_kind(kind)] - - print(json.dumps({ - "analyzer": "pylint", - "labels": labels - }, sort_keys=True, indent=2)) - - -if __name__ == "__main__": - main() diff --git a/scripts/labels/label_tool/requirements.txt b/scripts/labels/requirements.txt similarity index 100% rename from scripts/labels/label_tool/requirements.txt rename to scripts/labels/requirements.txt diff --git a/scripts/labels/label_tool/__init__.py b/scripts/labels/severity/__init__.py similarity index 50% rename from scripts/labels/label_tool/__init__.py rename to scripts/labels/severity/__init__.py index b734353a7f..35ad7fe87d 100644 --- a/scripts/labels/label_tool/__init__.py +++ b/scripts/labels/severity/__init__.py @@ -6,26 +6,14 @@ # # ------------------------------------------------------------------------- """ -This library ships reusable components and user-facing tools to verify, -generate, and adapt the checker labels in the CodeChecker configuration -structure. +Generates ``severity`` labels for checkers in the configuration. """ -# Load the interpreter injection first. -from . import codechecker - from . import \ - checker_labels, \ - http_, \ - output, \ - transformer, \ - util + generators, \ + output __all__ = [ - "checker_labels", - "codechecker", - "http_", + "generators", "output", - "transformer", - "util", ] diff --git a/scripts/labels/severity/generate_tool/__init__.py b/scripts/labels/severity/generate_tool/__init__.py new file mode 100644 index 0000000000..4d5b09d7c2 --- /dev/null +++ b/scripts/labels/severity/generate_tool/__init__.py @@ -0,0 +1,17 @@ +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +This subpackage implements logic that is primarily user-facing, as opposed to +reusable library-like components. +""" +from . import \ + tool + + +__all__ = [ + "tool", +] diff --git a/scripts/labels/severity/generate_tool/__main__.py b/scripts/labels/severity/generate_tool/__main__.py new file mode 100755 index 0000000000..a730d0cc35 --- /dev/null +++ b/scripts/labels/severity/generate_tool/__main__.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python3 +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Implementation of the user-facing entry point to the script.""" +import argparse +from functools import partial +import os +import pathlib +import sys +from typing import List, Optional, Set + +from tabulate import tabulate + +from ...checker_labels import SingleLabels, get_checker_labels, \ + update_checker_labels +from ...codechecker import default_checker_label_dir +from ...exception import EngineError +from ...output import Settings as GlobalOutputSettings, \ + error, log, trace, coloured, emoji +from ...util import merge_if_no_collision, plural +from ..generators import analyser_selection +from ..output import Settings as OutputSettings +from . import tool + + +short_help: str = """ +Auto-generate 'severity' labels for checkers based on analyser-specific +information and heuristics. +""" +description: str = ( + """ +Automatically generate the 'severity' categorisation labels from a known and +available, analyser-specific (this tool does not support a "generic" execution +pattern) heuristic. +This could be a "Table of Contents" (ToC) structure officially maintained by +the analyser, or an another form of similar classification, or an entirely +customised classifier heuristic implemented only by CodeChecker. + +The tool's output is primarily engineered to be human readable (with the added +sprinkle of colours and emojis). +If the output is not sent to an interactive terminal, the output switches to +the creation of a machine-readable output. + +The return code of this tool is indicative of errors encountered during +execution. +'0' is returned for no errors (success), '1' indicates general errors, +'2' indicates configuration errors. +In every other case, the return value is the OR of a bitmask: +""" + f""" +If there was a checker which already had a 'severity' but now the generator +generated a different value, the '{tool.ReturnFlags.HadUpdate}' bit will be +set. +If there were checkers without a 'severity' (or without any labels at all) but +the tool generated a valid 'severity' for them, the '{tool.ReturnFlags.HadNew}' +bit will be set. +If there are checkers with 'severity' labels that are no longer available in +the generated result, the '{tool.ReturnFlags.HadGone}' bit will be set. +In case after the analysis there are still checkers which do not have a +'severity' at all, the '{tool.ReturnFlags.RemainsMissing}' bit will be set. +""" +) +epilogue: str = "" + + +def arg_parser(parser: Optional[argparse.ArgumentParser]) \ + -> argparse.ArgumentParser: + if not parser: + parser = argparse.ArgumentParser( + prog=__package__, + description=description, + epilog=epilogue, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument( + "checker_label_dir", + metavar="LABEL_DIR", + nargs='?', + default=default_checker_label_dir(), + type=pathlib.PurePath, + help=""" +The configuration directory where the checker labels are available. +""") + + parser.add_argument( + "-f", "--fix", + dest="apply_fixes", + action="store_true", + help=""" +Apply the updated or generated 'severity' labels back into the input +configuration file. +""") + + filters = parser.add_argument_group("filter arguments") + + filters.add_argument( + "--analysers", "--analyzers", + metavar="ANALYSER", + nargs='*', + type=str, + help=""" +Filter for only the specified analysers before executing the verification. +Each analyser's configuration is present in exactly one JSON file, named +'.json'. +If 'None' is given, automatically run for every found configuration file. +""") + + output = parser.add_argument_group("output control arguments", """ +These optional arguments allow enabling additional verbosity for the output +of the program. +By default, the tool tries to be the most concise possible, and only report +meaningful findings and encountered errors. +""") + + output.add_argument( + "-v", "--verbose", + dest="verbose", + action="store_true", + help=""" +Shortcut to enable all verbosity options in this group that increase the +useful information presented on the output. +Does not enable any trace or debug information. +""") + + output.add_argument( + "--report-missing", + dest="report_missing", + action="store_true", + help=""" +If set, the output will contain an additional list that details which checkers +remain in the configuration file without an appropriate 'severity' label +("MISSING"). +""") + + output.add_argument( + "--report-ok", + dest="report_ok", + action="store_true", + help=""" +If set, the output will contain the "OK" reports for checkers which +severity classification is already the same as would be generated by this tool. +""") + + output.add_argument( + "-vd", "--verbose-debug", + dest="verbose_debug", + action="store_true", + help="Emit additional trace and debug output.") + + output.add_argument( + "-vv", "--very-verbose", + dest="very_verbose", + action="store_true", + help=""" +Shortcut to enable all verbosity options, including trace and debug +information. +""") + + return parser + + +def _handle_package_args(args: argparse.Namespace): + if not args.checker_label_dir: + log("%sFATAL: Failed to find the checker label configuration " + "directory, and it was not specified. " + "Please specify!", + emoji(":no_entry: ")) + raise argparse.ArgumentError(None, + "positional argument 'checker_label_dir'") + OutputSettings.set_report_missing(args.report_missing or + args.verbose or + args.very_verbose) + OutputSettings.set_report_ok(args.report_ok or + args.verbose or + args.very_verbose) + GlobalOutputSettings.set_trace(args.verbose_debug or args.very_verbose) + + +def _emit_collision_error(analyser: str, + checker: str, + existing_fix: str, + new_fix: str): + error("%s%s/%s: %s [%s] =/= [%s]", emoji(":collision: "), + analyser, checker, + coloured("FIX COLLISION", "red"), + existing_fix, new_fix) + + +def main(args: argparse.Namespace) -> Optional[int]: + try: + _handle_package_args(args) + except argparse.ArgumentError as arg_err: + # Simulate argparse's return code of parse_args. + raise SystemExit(2) from arg_err + + rc = 0 + statistics: List[tool.Statistics] = [] + trace("Checking checker labels from '%s'", args.checker_label_dir) + + args.checker_label_dir = pathlib.Path(args.checker_label_dir) + if not args.checker_label_dir.is_dir(): + error("'%s' is not a directory!", args.checker_label_dir) + return 1 + + # FIXME: pathlib.Path.walk() is only available Python >= 3.12. + for root, _, files in os.walk(args.checker_label_dir): + root = pathlib.Path(root) + + for file in sorted(files): + file = pathlib.Path(file) + if file.suffix != ".json": + continue + analyser = file.stem + if args.analysers and analyser not in args.analysers: + continue + + path = root / file + log("%sLoading '%s'... ('%s')", + emoji(":magnifying_glass_tilted_left: "), + analyser, + path) + try: + labels = get_checker_labels(analyser, path, "severity") + except Exception: + import traceback + traceback.print_exc() + + error("Failed to obtain checker labels for '%s'!", analyser) + continue + + geners = list(analyser_selection.select_generator(analyser)) + if not geners: + log("%sSkipped '%s', no generator implementation!", + emoji(":no_littering: "), + analyser) + continue + + severities: SingleLabels = {} + conflicts: Set[str] = set() + for generator_class in geners: + log("%sGenerating '%s' as '%s' (%s)...", + emoji(":thought_balloon: "), + analyser, + generator_class.kind, + generator_class) + try: + status, generated_urls, statistic = tool.execute( + analyser, + generator_class, + labels, + ) + statistics.append(statistic) + rc = int(tool.ReturnFlags(rc) | status) + except EngineError: + import traceback + traceback.print_exc() + + error("Failed to execute generator '%s' (%s)", + generator_class.kind, generator_class) + rc = int(tool.ReturnFlags(rc) | + tool.ReturnFlags.GeneralError) + continue + + merge_if_no_collision(severities, generated_urls, conflicts, + partial(_emit_collision_error, analyser)) + + if args.apply_fixes and severities: + log("%sUpdating %s %s for '%s'... ('%s')", + emoji(":writing_hand: "), + coloured(f"{len(severities)}", "green"), + plural(severities, "checker", "checkers"), + analyser, + path) + try: + update_checker_labels(analyser, path, "severity", + severities) + except Exception: + import traceback + traceback.print_exc() + + error("Failed to write checker labels for '%s'!", + analyser) + continue + + log(tabulate(tabular_data=statistics, + headers=tuple(map(lambda s: s.replace('_', ' '), + tool.Statistics._fields)), + tablefmt="fancy_outline" if sys.stderr.isatty() + else "outline"), + file=sys.stderr) + + log("%s", repr(tool.ReturnFlags(rc))) + return rc + + +if __name__ == "__main__": + def _main(): + args = arg_parser(None).parse_args() + sys.exit(main(args) or 0) + _main() diff --git a/scripts/labels/severity/generate_tool/tool.py b/scripts/labels/severity/generate_tool/tool.py new file mode 100644 index 0000000000..4ec8461bc5 --- /dev/null +++ b/scripts/labels/severity/generate_tool/tool.py @@ -0,0 +1,262 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Provides implementations for the tool's pipeline.""" +from collections import deque +from enum import IntFlag, auto as Enumerator +import sys +from typing import List, NamedTuple, Optional, Tuple, Type, cast + +from ...checker_labels import SingleLabels +from ...output import Settings as GlobalOutputSettings, log, coloured, emoji +from ...util import plural +from ..generators.base import Base +from ..output import Settings as OutputSettings + + +class Statistics(NamedTuple): + """ + The result of the execution of one generator. + """ + + Analyser: str + Generator: str + Checkers: int + Skipped: Optional[int] + Missing: Optional[int] + OK: Optional[int] + Updated: Optional[int] + Gone: Optional[int] + New: Optional[int] + All_Changed: int + Not_Found: Optional[int] + + +class ReturnFlags(IntFlag): + """ + A bit flag structure indicating the return value of the tool's `execute` + function. + """ + # pylint: disable=invalid-name + + # Zero indicates an all-success, but `Enumerator()` starts from 1. + + # Reserved flags used for other purposes external to the tool. + GeneralError = Enumerator() + ConfigurationOrArgsError = Enumerator() + + HadUpdate = Enumerator() + HadNew = Enumerator() + HadGone = Enumerator() + RemainsMissing = Enumerator() + + +def run_generator(generator: Base, severities: SingleLabels) \ + -> Tuple[List[str], SingleLabels, SingleLabels, List[str]]: + analyser = generator.analyser + ok: List[str] = [] + updated: SingleLabels = {} + new: SingleLabels = {} + gone: List[str] = [] + + generation_result: SingleLabels = dict(generator.generate()) + for checker in sorted(severities.keys() | generation_result.keys()): + if generator.skip(checker): + if GlobalOutputSettings.trace(): + log("%s%s/%s: %s", + emoji(":screwdriver: "), + analyser, checker, + coloured("SKIP", "light_magenta"), + file=sys.stderr) + continue + + existing_severity, new_severity = \ + severities.get(checker), generation_result.get(checker) + + if not existing_severity: + if new_severity: + new[checker] = new_severity + log("%s%s/%s: %s [%s]", + emoji(":magic_wand: "), + analyser, checker, + coloured("NEW", "magenta"), + new_severity, + file=sys.stdout) + else: + if OutputSettings.report_missing(): + log("%s%s/%s: %s []", + emoji(":white_question_mark: "), + analyser, checker, + coloured("MISSING", "yellow"), + file=sys.stdout) + elif existing_severity == new_severity: + ok.append(checker) + if OutputSettings.report_ok(): + log("%s%s/%s: %s [%s]", + emoji(":check_box_with_check: "), + analyser, checker, + coloured("OK", "green"), + existing_severity, + file=sys.stdout) + elif new_severity: + updated[checker] = new_severity + log("%s%s/%s: %s [%s] -> [%s]", + emoji(":sparkles: "), + analyser, checker, + coloured("UPDATED", "yellow"), + existing_severity, new_severity, + file=sys.stdout) + else: + gone.append(checker) + log("%s%s/%s: %s [%s]", + emoji(":ghost: "), + analyser, checker, + coloured("GONE", "red"), + existing_severity, + file=sys.stdout) + + return ok, updated, new, gone + + +def print_generation(analyser: str, + original_severities: SingleLabels, + ok: List[str], + updated: SingleLabels, + new: SingleLabels): + if not updated and not new: + log("%s%s: Severity for all %s %s is OK.", + emoji(":magnifying_glass_tilted_left::check_mark_button: "), + analyser, + coloured(f"{len(ok)}", "green"), + plural(ok, "checker", "checkers"), + ) + else: + if updated: + log("%s%s: %s %s changed severity. (%s kept previous.)", + emoji(":magnifying_glass_tilted_left::warning: "), + analyser, + coloured(f"{len(updated)}", "yellow"), + plural(updated, "checker", "checkers"), + coloured(f"{len(ok)}", "green") + if ok else coloured("0", "red"), + ) + if new: + log("%s%s: %s new %s did not have a `severity` label previously!", + emoji(":magnifying_glass_tilted_left:" + ":magnifying_glass_tilted_right: "), + analyser, + coloured(f"{len(new)}", "magenta"), + plural(new, "checker", "checkers"), + ) + + for checker in sorted((ok if OutputSettings.report_ok() else []) + + list(updated.keys()) + + list(new.keys())): + is_ok = (checker in ok) if OutputSettings.report_ok() else False + is_updated = checker in updated + icon = ":check_mark_button: " if is_ok \ + else ":pencil: " if is_updated \ + else ":notebook: " + colour = "green" if is_ok \ + else "yellow" if is_updated \ + else "magenta" + severity = original_severities[checker] if is_ok \ + else updated[checker] if is_updated \ + else new[checker] + + log(" %s· %s [%s]", emoji(icon), coloured(checker, colour), + severity) + + +def print_gone(analyser: str, + gone: SingleLabels): + if not gone: + return + + log("%s%s: %s %s severity gone.", + emoji(":magnifying_glass_tilted_left::bar_chart: "), + analyser, + coloured(f"{len(gone)}", "red"), + plural(len(gone), "checker's", "checkers'"), + ) + deque((log(" %s· %s [%s]", + emoji(":skull_and_crossbones: "), + coloured(checker, "red"), + gone[checker]) + for checker in sorted(gone)), + maxlen=0) + + +def print_missing(analyser: str, + missing: List[str]): + if not OutputSettings.report_missing(): + log("%s%s: %s %s will not have a `severity` label!", + emoji(":magnifying_glass_tilted_left:" + ":magnifying_glass_tilted_right: "), + analyser, + coloured(f"{len(missing)}", "yellow"), + plural(missing, "checker", "checkers"), + ) + if OutputSettings.report_missing(): + deque((log(" %s· %s ", + emoji(":bookmark: "), + coloured(checker, "yellow")) + for checker in sorted(missing)), + maxlen=0) + + +def execute(analyser: str, generator_class: Type, labels: SingleLabels) \ + -> Tuple[ReturnFlags, SingleLabels, Statistics]: + """ + Runs one instance of the generation for a specific analyser. + """ + status = cast(ReturnFlags, 0) + generator = generator_class(analyser) + missing = [checker for checker in labels if not labels[checker]] + stats = Statistics(Analyser=analyser, + Generator=generator_class.kind, + Checkers=len(labels), + Skipped=None, + Missing=len(missing) if missing else None, + OK=None, + Updated=None, + Gone=None, + New=None, + All_Changed=0, + Not_Found=len(missing) if missing else None, + ) + severities: SingleLabels = {} + ok, updated, new, gone = run_generator(generator_class(analyser), labels) + print_generation(analyser, labels, ok, updated, new) + severities.update(updated) + severities.update(new) + + ok = set(ok) + new = set(new) + gone = set(gone) + to_skip = {checker for checker + in (labels.keys() | ok | new | gone) + if generator.skip(checker)} + + print_gone(analyser, {checker: labels[checker] + for checker in gone - to_skip}) + remaining_missing = list(labels.keys() - ok - updated.keys() - to_skip) + print_missing(analyser, remaining_missing) + stats = stats._replace(Skipped=len(to_skip) if to_skip else None, + OK=len(ok) if ok else None, + Updated=len(updated) if updated else None, + Gone=len(gone) if gone else None, + New=len(new) if new else None, + All_Changed=len(severities), + Not_Found=len(remaining_missing), + ) + status |= (ReturnFlags.HadUpdate if updated else 0) \ + | (ReturnFlags.HadNew if new else 0) \ + | (ReturnFlags.HadGone if gone else 0) \ + | (ReturnFlags.RemainsMissing if remaining_missing else 0) + + return status, severities, stats diff --git a/scripts/labels/severity/generators/__init__.py b/scripts/labels/severity/generators/__init__.py new file mode 100644 index 0000000000..166e291d26 --- /dev/null +++ b/scripts/labels/severity/generators/__init__.py @@ -0,0 +1,18 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Implements the logic for analyser-specific generation of severities. +""" +from .analyser_selection import select_generator +from .base import Base + + +__all__ = [ + "select_generator", + "Base", +] diff --git a/scripts/labels/severity/generators/analyser_selection.py b/scripts/labels/severity/generators/analyser_selection.py new file mode 100644 index 0000000000..bf37a36f65 --- /dev/null +++ b/scripts/labels/severity/generators/analyser_selection.py @@ -0,0 +1,39 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Selects the appropriate generator engine for the analyser configuration. +""" +from typing import Dict, Iterable, Tuple, Type, Union + +from .clang_diagnostic import ClangDiagnosticGenerator +from .cppcheck import CppcheckGenerator +from .markdownlint import MarkdownlintGenerator +from .pylint import PylintGenerator + + +AnalyserGenerators: Dict[str, Union[Type, Tuple[Type, ...]]] = { + "clang-tidy": (ClangDiagnosticGenerator,), + "cppcheck": CppcheckGenerator, + "mdl": MarkdownlintGenerator, + "pylint": PylintGenerator, +} + + +def select_generator(analyser: str) -> Iterable[Type]: + """ + Dispatches the `analyser` to one of the generator classes and returns + which class(es) should be used for the label generation. + """ + generators = AnalyserGenerators.get(analyser) + if not generators: + return iter(()) + if not isinstance(generators, tuple): + generators = (generators,) + AnalyserGenerators[analyser] = generators[0] + + return iter(generators) diff --git a/scripts/labels/severity/generators/base.py b/scripts/labels/severity/generators/base.py new file mode 100644 index 0000000000..7f8e387701 --- /dev/null +++ b/scripts/labels/severity/generators/base.py @@ -0,0 +1,36 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Describes the base interface for the generation of severity labels. +""" +from typing import Iterable, Optional, Tuple + + +class Base: + kind = "abstract" + + def __init__(self, analyser: str): + self.analyser = analyser + + def skip(self, _checker: str) -> bool: + """ + Returns ``True`` if the result for `checker` from the current generator + should be discarded. + """ + return False + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + """ + Returns a generator that can be consumed in order to obtain + ``(checker, severity)`` pairs, one for each encountered checker. + The exact details are analyser-specific! + + A ``None`` in the place of ``severity`` indicates that the ``checker`` + was encountered in the documentation, but no severity was generated. + """ + return iter(()) diff --git a/scripts/labels/severity/generators/clang_diagnostic.py b/scripts/labels/severity/generators/clang_diagnostic.py new file mode 100644 index 0000000000..898f590bf9 --- /dev/null +++ b/scripts/labels/severity/generators/clang_diagnostic.py @@ -0,0 +1,51 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Clang compiler diagnostics (implemented through CodeChecker as Clang-Tidy +checks.) +""" +from typing import Iterable, Optional, Tuple + +from ... import http_ as http +from ...projects.llvm import clang_diagnostic +from .base import Base + + +class ClangDiagnosticGenerator(Base): + """ + Generates severities for Clang diagnostics from the Sphinx-based + documentation metastructure. + """ + + kind = "clang-diagnostic" + + def __init__(self, analyser: str): + super().__init__(analyser=analyser) + self._http = http.HTMLAcquirer() + + def skip(self, checker: str) -> bool: + return not checker.startswith("clang-diagnostic") + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + for checker, _, section in clang_diagnostic \ + .get_clang_diagnostic_documentation(self._http): + has_error_diagnostic = section.find(".//span[@class=\"error\"]") \ + is not None + has_warn_diagnostic = section.find(".//span[@class=\"warning\"]") \ + is not None + has_remark_diagnostic = section.find( + ".//span[@class=\"remark\"]") is not None + + if has_error_diagnostic: + severity = "HIGH" + elif not has_warn_diagnostic and has_remark_diagnostic: + severity = "LOW" + else: + severity = "MEDIUM" + + yield checker, severity diff --git a/scripts/labels/severity/generators/cppcheck.py b/scripts/labels/severity/generators/cppcheck.py new file mode 100644 index 0000000000..b76d8698c6 --- /dev/null +++ b/scripts/labels/severity/generators/cppcheck.py @@ -0,0 +1,89 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +# pylint: disable=c-extension-no-member +"""Cppcheck.""" +from collections import defaultdict +import subprocess +import sys +from typing import Iterable, Optional, Tuple + +import lxml.etree + +from ...exception import EngineError +from ...output import Settings as GlobalOutputSettings, trace +from .base import Base + + +class CppcheckGenerator(Base): + """ + Generates severities for Cppcheck checkers based on the classification + emitted by a ``cppcheck`` program. + """ + + kind = "cppcheck" + + def __init__(self, analyser: str): + super().__init__(analyser=analyser) + self._cppcheck_xml: Optional[lxml.etree.ElementTree] = None + + def fetch_cppcheck_errorlist(self) -> lxml.etree.ElementTree: + if self._cppcheck_xml is not None: + return self._cppcheck_xml + + try: + stdout = subprocess.check_output(["cppcheck", "--errorlist"]) + except OSError as e: + raise EngineError("Could not call Cppcheck, is it in 'PATH'?") \ + from e + + try: + self._cppcheck_xml = lxml.etree.fromstring(stdout) + except lxml.etree.LxmlError as e: + if GlobalOutputSettings.trace(): + print("------------------------------------------------------", + file=sys.stderr) + print(stdout, file=sys.stderr) + print("------------------------------------------------------", + file=sys.stderr) + raise EngineError("Could not understand the output of Cppcheck") \ + from e + + return self._cppcheck_xml + + SeverityMap = defaultdict( + lambda: "UNSPECIFIED", + { + # When code is executed there is either undefined behaviour, or + # other error, such as a memory leak, or a resource leak. + "error": "HIGH", + + # Configuration problems. + "information": "LOW", + + # Run-time performance suggestions based on common knowledge. + "performance": "LOW", + + # Portability warnings, implementation-defined behaviour. + "portability": "LOW", + + # Stylistic issues, such as unused functions, redundant code. + "style": "STYLE", + + # When code is executed there might be undefined behaviour. + "warning": "MEDIUM", + } + ) + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + xml_results = self.fetch_cppcheck_errorlist() + version = xml_results.find("./cppcheck").get("version") + trace("Cppcheck version '%s'", version) + + for error_node in xml_results.findall("./errors/error"): + yield "cppcheck-" + error_node.get("id"), \ + self.SeverityMap[error_node.get("severity")] diff --git a/scripts/labels/severity/generators/markdownlint.py b/scripts/labels/severity/generators/markdownlint.py new file mode 100644 index 0000000000..167df1125f --- /dev/null +++ b/scripts/labels/severity/generators/markdownlint.py @@ -0,0 +1,39 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Markdownlint.""" +from typing import Iterable, Optional, Tuple + +from ... import http_ as http +from ...exception import EngineError +from ...projects import markdownlint +from .base import Base + + +class MarkdownlintGenerator(Base): + """ + Generates severities for Markdownlint rules. + """ + + kind = "markdownlint" + + def __init__(self, analyser: str): + super().__init__(analyser=analyser) + self._http = http.HTMLAcquirer() + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + try: + version = markdownlint.get_markdownlint_latest_release(self._http) + except Exception as e: + raise EngineError( + "Failed to obtain the Markdownlint documentation") from e + + url = "https://github.com/markdownlint/markdownlint/blob/" \ + f"{version}" \ + "/docs/RULES.md" + for checker, _ in markdownlint.get_markdownlint_rules(self._http, url): + yield checker, "STYLE" diff --git a/scripts/labels/severity/generators/pylint.py b/scripts/labels/severity/generators/pylint.py new file mode 100644 index 0000000000..d121e0c734 --- /dev/null +++ b/scripts/labels/severity/generators/pylint.py @@ -0,0 +1,78 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""PyLint.""" +from collections import defaultdict +import re +import subprocess +from typing import Iterable, Optional, Tuple + +from ...exception import EngineError +from ...output import trace +from .base import Base + + +class PylintGenerator(Base): + """ + Generates severities for PyLint checkers based on the classification + emitted by a ``pylint`` program. + """ + + kind = "pylint" + + def __init__(self, analyser: str): + super().__init__(analyser=analyser) + self._pylint_msgs: Optional[str] = None + + def fetch_pylint_msgs(self) -> str: + if self._pylint_msgs is not None: + return self._pylint_msgs + + try: + version = subprocess.check_output(["pylint", "--version"]) + trace("pylint version '%s'", + version.decode().split('\n', maxsplit=1)[0].split(' ')[1]) + self._pylint_msgs = subprocess.check_output( + ["pylint", "--list-msgs"]) \ + .decode() + except OSError as e: + raise EngineError("Could not call pylint, is it in 'PATH'?") \ + from e + + return self._pylint_msgs + + SeverityMap = defaultdict( + lambda: "UNSPECIFIED", + { + # Fatal: An error occurred which prevented pylint from doing + # further processing. + 'F': "CRITICAL", + + # Error: Probable bugs in the code. + 'E': "HIGH", + + # Warning: Python-specific problems. + 'W': "MEDIUM", + + # Refactor: Bad code smell. + 'R': "STYLE", + + # Convention: Programming standard violation. + 'C': "LOW", + } + ) + + pattern = re.compile(r"^:(?P[^ ]+) \((?P\S)(?P\S+)\): .*") + + def generate(self) -> Iterable[Tuple[str, Optional[str]]]: + msgs = self.fetch_pylint_msgs() + for line in msgs.split('\n'): + match = self.pattern.match(line) + if not match: + continue + + yield match.group("name"), self.SeverityMap[match.group("kind")] diff --git a/scripts/labels/severity/output.py b/scripts/labels/severity/output.py new file mode 100644 index 0000000000..4195fd63d6 --- /dev/null +++ b/scripts/labels/severity/output.py @@ -0,0 +1,42 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +"""Tool-level output settings.""" +from ..util import _Singleton + + +class Settings(_Singleton): + """Tool-level output settings.""" + + def __init__(self): + """Returns the instance that was loaded as a `_Singleton`.""" + if "_report_missing" not in self.__dict__: + self._report_missing: bool = False + if "_report_ok" not in self.__dict__: + self._report_ok: bool = False + + @staticmethod + def factory(): + """Initialises the `_Singleton`.""" + o = Settings() + return o + + @staticmethod + def report_missing() -> bool: + return Settings.factory()._report_missing # type: ignore + + @staticmethod + def set_report_missing(v: bool): + Settings.factory()._report_missing = v # type: ignore + + @staticmethod + def report_ok() -> bool: + return Settings.factory()._report_ok # type: ignore + + @staticmethod + def set_report_ok(v: bool): + Settings.factory()._report_ok = v # type: ignore diff --git a/scripts/labels/label_tool/transformer.py b/scripts/labels/transformer.py similarity index 98% rename from scripts/labels/label_tool/transformer.py rename to scripts/labels/transformer.py index 9eac6dd9fc..e07853edac 100644 --- a/scripts/labels/label_tool/transformer.py +++ b/scripts/labels/transformer.py @@ -100,7 +100,7 @@ def add_rule(self, rule: Optional[Union[str, Callable]], of versions. `rule` can be either a `str`, in which placeholders, in the format of - `` (e.g., ``) are rewritten (see `__call__`, and + ```` (e.g., ````) are rewritten (see `__call__`, and `_Rule.__call__`), or a callback function, which receives the arguments of `__call__` and is expected to produce a `str` result. """ diff --git a/scripts/labels/label_tool/util.py b/scripts/labels/util.py similarity index 70% rename from scripts/labels/label_tool/util.py rename to scripts/labels/util.py index 9fa1d3f482..67bd59028a 100644 --- a/scripts/labels/label_tool/util.py +++ b/scripts/labels/util.py @@ -7,7 +7,7 @@ # ------------------------------------------------------------------------- """Helper functions, mixin classes, and miscellaneous utilities.""" import bisect -from typing import Any, Callable, Collection, Dict, List, Optional, \ +from typing import Any, Callable, Collection, Dict, List, Optional, Set, \ Sequence, Type, TypeVar, Union @@ -78,3 +78,30 @@ def lower_bound(l_: List[_T], e: _T) -> Optional[_T]: if l_[idx] == e: return l_[idx] return l_[idx - 1] + + +def merge_if_no_collision(existing: Dict[Any, Any], + new: Dict[Any, Any], + conflicts: Set[Any], + conflict_cb: Optional[Callable[[Any, Any, Any], + None]] = None): + """ + Update the contents of `existing` with the contents of `new` piecewise + if and only if the `new` value for an element is not in conflict with + the `existing` one. + Conflicting keys are added to `conflicts`, and if set, `conflict_cb` is + called for them. + """ + for k in sorted(new.keys() - conflicts): + v = new[k] + try: + existing_v = existing[k] + if existing_v != v: + if conflict_cb: + conflict_cb(k, existing_v, v) + conflicts.add(k) + # There was a conflict, drop the element from the merged set. + del existing[k] + except KeyError: + # No conflicts for truly new elements. + existing[k] = v