Skip to content

Commit b295383

Browse files
authoredJun 4, 2024··
Add manual DCO check using Python script dco-check (#5528)
## Which problem is this PR solving? - Several PRs are stuck with DCO check pending. Apparently there's an ongoing issue with the DCO app: dcoapp/app#211 ## Description of the changes - Add an alternative DCO check into the lint workflow using https://github.com/christophebedard/dco-check ## How was this change tested? Ran the script manually: ```shell $ python3 scripts/dco_check.py -b main Detected: git (default) Checking commits: 5863430..7a5e58b All good! ``` With an unsigned commit: ```shell $ gca -m bad [manual-dco-check 7da95684] bad 1 file changed, 1 insertion(+) $ python3 scripts/dco_check.py -b main Detected: git (default) Checking commits: 5863430..7da95684408ebd3f32692e9444940bdf0e04348d Missing sign-off(s): 7da95684408ebd3f32692e9444940bdf0e04348d no sign-off found ``` --------- Signed-off-by: Yuri Shkuro <github@ysh.us>
1 parent 5863430 commit b295383

File tree

2 files changed

+1202
-0
lines changed

2 files changed

+1202
-0
lines changed
 

‎.github/workflows/ci-lint-checks.yaml

+10
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,13 @@ jobs:
4141

4242
- name: Ensure PR is not on main branch
4343
uses: ./.github/actions/block-pr-not-on-main
44+
45+
- name: Set up Python 3.x for DCO check
46+
uses: actions/setup-python@v2
47+
with:
48+
python-version: '3.x'
49+
50+
- name: Run DCO check
51+
run: python3 scripts/dco_check.py -b main -v
52+
env:
53+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

‎scripts/dco_check.py

+1,192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,1192 @@
1+
# Script copied from https://github.com/christophebedard/dco-check/blob/master/dco_check/dco_check.py
2+
#
3+
# Copyright 2020 Christophe Bedard
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Check that all commits for a proposed change are signed off."""
18+
19+
import argparse
20+
from collections import defaultdict
21+
import json
22+
import os
23+
import re
24+
import subprocess
25+
import sys
26+
from typing import Any
27+
from typing import Dict
28+
from typing import List
29+
from typing import Optional
30+
from typing import Tuple
31+
from urllib import request
32+
33+
34+
__version__ = '0.4.0'
35+
36+
37+
DEFAULT_BRANCH = 'master'
38+
DEFAULT_REMOTE = 'origin'
39+
ENV_VAR_CHECK_MERGE_COMMITS = 'DCO_CHECK_CHECK_MERGE_COMMITS'
40+
ENV_VAR_DEFAULT_BRANCH = 'DCO_CHECK_DEFAULT_BRANCH'
41+
ENV_VAR_DEFAULT_BRANCH_FROM_REMOTE = 'DCO_CHECK_DEFAULT_BRANCH_FROM_REMOTE'
42+
ENV_VAR_DEFAULT_REMOTE = 'DCO_CHECK_DEFAULT_REMOTE'
43+
ENV_VAR_EXCLUDE_EMAILS = 'DCO_CHECK_EXCLUDE_EMAILS'
44+
ENV_VAR_EXCLUDE_PATTERN = 'DCO_CHECK_EXCLUDE_PATTERN'
45+
ENV_VAR_QUIET = 'DCO_CHECK_QUIET'
46+
ENV_VAR_VERBOSE = 'DCO_CHECK_VERBOSE'
47+
TRAILER_KEY_SIGNED_OFF_BY = 'Signed-off-by:'
48+
49+
50+
class EnvDefaultOption(argparse.Action):
51+
"""
52+
Action that uses an env var value as the default if it exists.
53+
54+
Inspired by: https://stackoverflow.com/a/10551190/6476709
55+
"""
56+
57+
def __init__(
58+
self,
59+
env_var: str,
60+
default: Any,
61+
help: Optional[str] = None, # noqa: A002
62+
**kwargs: Any,
63+
) -> None:
64+
"""Create an EnvDefaultOption."""
65+
# Set default to env var value if it exists
66+
if env_var in os.environ:
67+
default = os.environ[env_var]
68+
if help: # pragma: no cover
69+
help += f' [env: {env_var}]'
70+
super(EnvDefaultOption, self).__init__(
71+
default=default,
72+
help=help,
73+
**kwargs,
74+
)
75+
76+
def __call__( # noqa: D102
77+
self,
78+
parser: argparse.ArgumentParser,
79+
namespace: argparse.Namespace,
80+
values: Any,
81+
option_string: Optional[str] = None,
82+
) -> None:
83+
setattr(namespace, self.dest, values)
84+
85+
86+
class EnvDefaultStoreTrue(argparse.Action):
87+
"""
88+
Action similar to 'store_true' that uses an env var value as the default if it exists.
89+
90+
Partly copied from arparse.{_StoreConstAction,_StoreTrueAction}.
91+
"""
92+
93+
def __init__(
94+
self,
95+
option_strings: str,
96+
dest: str,
97+
env_var: str,
98+
default: bool = False,
99+
help: Optional[str] = None, # noqa: A002
100+
) -> None:
101+
"""Create an EnvDefaultStoreTrue."""
102+
# Set default value to true if the env var exists
103+
default = env_var in os.environ
104+
if help: # pragma: no cover
105+
help += f' [env: {env_var} (any value to enable)]'
106+
super(EnvDefaultStoreTrue, self).__init__(
107+
option_strings=option_strings,
108+
dest=dest,
109+
nargs=0,
110+
const=True,
111+
default=default,
112+
required=False,
113+
help=help,
114+
)
115+
116+
def __call__( # noqa: D102
117+
self,
118+
parser: argparse.ArgumentParser,
119+
namespace: argparse.Namespace,
120+
values: Any,
121+
option_string: Optional[str] = None,
122+
) -> None:
123+
setattr(namespace, self.dest, self.const)
124+
125+
126+
def get_parser() -> argparse.ArgumentParser:
127+
"""Get argument parser."""
128+
parser = argparse.ArgumentParser(
129+
description='Check that all commits of a proposed change have a DCO, i.e. are signed-off.',
130+
)
131+
default_branch_group = parser.add_mutually_exclusive_group()
132+
default_branch_group.add_argument(
133+
'-b', '--default-branch', metavar='BRANCH',
134+
action=EnvDefaultOption, env_var=ENV_VAR_DEFAULT_BRANCH,
135+
default=DEFAULT_BRANCH,
136+
help=(
137+
'default branch to use, if necessary (default: %(default)s)'
138+
),
139+
)
140+
default_branch_group.add_argument(
141+
'--default-branch-from-remote',
142+
action=EnvDefaultStoreTrue, env_var=ENV_VAR_DEFAULT_BRANCH_FROM_REMOTE,
143+
default=False,
144+
help=(
145+
'get the default branch value from the remote (default: %(default)s)'
146+
),
147+
)
148+
parser.add_argument(
149+
'-m', '--check-merge-commits',
150+
action=EnvDefaultStoreTrue, env_var=ENV_VAR_CHECK_MERGE_COMMITS,
151+
default=False,
152+
help=(
153+
'check sign-offs on merge commits as well (default: %(default)s)'
154+
),
155+
)
156+
parser.add_argument(
157+
'-r', '--default-remote', metavar='REMOTE',
158+
action=EnvDefaultOption, env_var=ENV_VAR_DEFAULT_REMOTE,
159+
default=DEFAULT_REMOTE,
160+
help=(
161+
'default remote to use, if necessary (default: %(default)s)'
162+
),
163+
)
164+
parser.add_argument(
165+
'-e', '--exclude-emails', metavar='EMAIL[,EMAIL]',
166+
action=EnvDefaultOption, env_var=ENV_VAR_EXCLUDE_EMAILS,
167+
default=None,
168+
help=(
169+
'exclude a comma-separated list of author emails from checks '
170+
'(commits with an author email matching one of these emails will be ignored)'
171+
),
172+
)
173+
parser.add_argument(
174+
'-p', '--exclude-pattern', metavar='REGEX',
175+
action=EnvDefaultOption, env_var=ENV_VAR_EXCLUDE_PATTERN,
176+
default=None,
177+
help=(
178+
'exclude regular expresssion matched author emails from checks '
179+
'(commits with an author email matching regular expression pattern will be ignored)'
180+
),
181+
)
182+
output_options_group = parser.add_mutually_exclusive_group()
183+
output_options_group.add_argument(
184+
'-q', '--quiet',
185+
action=EnvDefaultStoreTrue, env_var=ENV_VAR_QUIET,
186+
default=False,
187+
help=(
188+
'quiet mode (do not print anything; simply exit with 0 or non-zero) '
189+
'(default: %(default)s)'
190+
),
191+
)
192+
output_options_group.add_argument(
193+
'-v', '--verbose',
194+
action=EnvDefaultStoreTrue, env_var=ENV_VAR_VERBOSE,
195+
default=False,
196+
help=(
197+
'verbose mode (print out more information) (default: %(default)s)'
198+
),
199+
)
200+
parser.add_argument(
201+
'--version',
202+
action='version',
203+
help='show version number and exit',
204+
version=f'dco-check version {__version__}',
205+
)
206+
return parser
207+
208+
209+
def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
210+
"""
211+
Parse arguments.
212+
213+
:param argv: the arguments to use, or `None` for sys.argv
214+
:return: the parsed arguments
215+
"""
216+
return get_parser().parse_args(argv)
217+
218+
219+
class Options:
220+
"""Simple container and utilities for options."""
221+
222+
def __init__(self, parser: argparse.ArgumentParser) -> None:
223+
"""Create using default argument values."""
224+
self.check_merge_commits = parser.get_default('m')
225+
self.default_branch = parser.get_default('b')
226+
self.default_branch_from_remote = parser.get_default('default-branch-from-remote')
227+
self.default_remote = parser.get_default('r')
228+
self.exclude_emails = parser.get_default('e')
229+
self.exclude_pattern = parser.get_default('p')
230+
self.quiet = parser.get_default('q')
231+
self.verbose = parser.get_default('v')
232+
233+
def set_options(self, args: argparse.Namespace) -> None:
234+
"""Set options using parsed arguments."""
235+
self.check_merge_commits = args.check_merge_commits
236+
self.default_branch = args.default_branch
237+
self.default_branch_from_remote = args.default_branch_from_remote
238+
self.default_remote = args.default_remote
239+
# Split into list and filter out empty elements
240+
self.exclude_emails = list(filter(None, (args.exclude_emails or '').split(',')))
241+
self.exclude_pattern = (
242+
None if not args.exclude_pattern else re.compile(args.exclude_pattern)
243+
)
244+
self.quiet = args.quiet
245+
self.verbose = args.verbose
246+
# Shouldn't happen with a mutually exclusive group,
247+
# but can happen if one is set with an env var
248+
# and the other is set with an arg
249+
if self.quiet and self.verbose:
250+
# Similar message to what is printed when using args for both
251+
get_parser().print_usage()
252+
print("options '--quiet' and '--verbose' cannot both be true")
253+
sys.exit(1)
254+
if self.default_branch != DEFAULT_BRANCH and self.default_branch_from_remote:
255+
# Similar message to what is printed when using args for both
256+
get_parser().print_usage()
257+
print(
258+
"options '--default-branch' and '--default-branch-from-remote' cannot both be set"
259+
)
260+
sys.exit(1)
261+
262+
def get_options(self) -> Dict[str, Any]:
263+
"""Get all options as a dict."""
264+
return self.__dict__
265+
266+
267+
options = Options(get_parser())
268+
269+
270+
class Logger:
271+
"""Simple logger to stdout which can be quiet or verbose."""
272+
273+
def __init__(self, parser: argparse.ArgumentParser) -> None:
274+
"""Create using default argument values."""
275+
self.__quiet = parser.get_default('q')
276+
self.__verbose = parser.get_default('v')
277+
278+
def set_options(self, options: Options) -> None:
279+
"""Set options using options object."""
280+
self.__quiet = options.quiet
281+
self.__verbose = options.verbose
282+
283+
def print(self, msg: str = '', *args: Any, **kwargs: Any) -> None: # noqa: A003
284+
"""Print if not quiet."""
285+
if not self.__quiet:
286+
print(msg, *args, **kwargs)
287+
288+
def verbose_print(self, msg: str = '', *args: Any, **kwargs: Any) -> None:
289+
"""Print if verbose."""
290+
if self.__verbose:
291+
print(msg, *args, **kwargs)
292+
293+
294+
logger = Logger(get_parser())
295+
296+
297+
def run(
298+
command: List[str],
299+
) -> Optional[str]:
300+
"""
301+
Run command.
302+
303+
:param command: the command list
304+
:return: the stdout output if the return code is 0, otherwise `None`
305+
"""
306+
output = None
307+
try:
308+
env = os.environ.copy()
309+
if 'LANG' in env:
310+
del env['LANG']
311+
for key in list(env.keys()):
312+
if key.startswith('LC_'):
313+
del env[key]
314+
process = subprocess.Popen(
315+
command,
316+
stdout=subprocess.PIPE,
317+
stderr=subprocess.STDOUT,
318+
env=env,
319+
)
320+
output_stdout, _ = process.communicate()
321+
if process.returncode != 0:
322+
logger.print(f'error: {output_stdout.decode("utf8")}')
323+
else:
324+
output = output_stdout.rstrip().decode('utf8').strip('\n')
325+
except subprocess.CalledProcessError as e:
326+
logger.print(f'error: {e.output.decode("utf8")}')
327+
return output
328+
329+
330+
def is_valid_email(
331+
email: str,
332+
) -> bool:
333+
"""
334+
Check if email is valid.
335+
336+
Simple regex checking for:
337+
<nonwhitespace string>@<nonwhitespace string>.<nonwhitespace string>
338+
339+
:param email: the email address to check
340+
:return: true if email is valid, false otherwise
341+
"""
342+
return bool(re.match(r'^\S+@\S+\.\S+', email))
343+
344+
345+
def get_head_commit_hash() -> Optional[str]:
346+
"""
347+
Get the hash of the HEAD commit.
348+
349+
:return: the hash of the HEAD commit, or `None` if it failed
350+
"""
351+
command = [
352+
'git',
353+
'rev-parse',
354+
'--verify',
355+
'HEAD',
356+
]
357+
return run(command)
358+
359+
360+
def get_common_ancestor_commit_hash(
361+
base_ref: str,
362+
) -> Optional[str]:
363+
"""
364+
Get the common ancestor commit of the current commit and a given reference.
365+
366+
See: git merge-base --fork-point
367+
368+
:param base_ref: the other reference
369+
:return: the common ancestor commit, or `None` if it failed
370+
"""
371+
command = [
372+
'git',
373+
'merge-base',
374+
'--fork-point',
375+
base_ref,
376+
]
377+
return run(command)
378+
379+
380+
def fetch_branch(
381+
branch: str,
382+
remote: str = 'origin',
383+
) -> int:
384+
"""
385+
Fetch branch from remote.
386+
387+
See: git fetch
388+
389+
:param branch: the name of the branch
390+
:param remote: the name of the remote
391+
:return: zero for success, nonzero otherwise
392+
"""
393+
command = [
394+
'git',
395+
'fetch',
396+
remote,
397+
branch,
398+
]
399+
# We don't want the output
400+
return 0 if run(command) is not None else 1
401+
402+
403+
def get_default_branch_from_remote(
404+
remote: str,
405+
) -> Optional[str]:
406+
"""
407+
Get default branch from remote.
408+
409+
:param remote: the remote name
410+
:return: the default branch, or None if it failed
411+
"""
412+
# https://stackoverflow.com/questions/28666357/git-how-to-get-default-branch#comment92366240_50056710 # noqa: E501
413+
# $ git remote show origin
414+
cmd = ['git', 'remote', 'show', remote]
415+
result = run(cmd)
416+
if not result:
417+
return None
418+
result_lines = result.split('\n')
419+
branch = None
420+
for result_line in result_lines:
421+
# There is a two-space indentation
422+
match = re.match(' HEAD branch: (.*)', result_line)
423+
if match:
424+
branch = match[1]
425+
break
426+
return branch
427+
428+
429+
def get_commits_data(
430+
base: str,
431+
head: str,
432+
ignore_merge_commits: bool = True,
433+
) -> Optional[str]:
434+
"""
435+
Get data (full sha & commit body) for commits in a range.
436+
437+
The range excludes the 'before' commit, e.g. ]base, head]
438+
The output data contains data for individual commits, separated by special characters:
439+
* 1st line: full commit sha
440+
* 2nd line: author name and email
441+
* 3rd line: commit title (subject)
442+
* subsequent lines: commit body (which excludes the commit title line)
443+
* record separator (0x1e)
444+
445+
:param base: the sha of the commit just before the start of the range
446+
:param head: the sha of the last commit of the range
447+
:param ignore_merge_commits: whether to ignore merge commits
448+
:return: the data, or `None` if it failed
449+
"""
450+
command = [
451+
'git',
452+
'log',
453+
f'{base}..{head}',
454+
'--pretty=%H%n%an <%ae>%n%s%n%-b%x1e',
455+
]
456+
if ignore_merge_commits:
457+
command += ['--no-merges']
458+
return run(command)
459+
460+
461+
def split_commits_data(
462+
commits_data: str,
463+
commits_sep: str = '\x1e',
464+
) -> List[str]:
465+
"""
466+
Split data into individual commits using a separator.
467+
468+
:param commits_data: the full data to be split
469+
:param commits_sep: the string which separates individual commits
470+
:return: the list of data for each individual commit
471+
"""
472+
# Remove leading/trailing newlines
473+
commits_data = commits_data.strip('\n')
474+
# Split in individual commits and remove leading/trailing newlines
475+
individual_commits = [
476+
single_output.strip('\n') for single_output in commits_data.split(commits_sep)
477+
]
478+
# Filter out empty elements
479+
individual_commits = list(filter(None, individual_commits))
480+
return individual_commits
481+
482+
483+
def extract_name_and_email(
484+
name_and_email: str,
485+
) -> Optional[Tuple[str, str]]:
486+
"""
487+
Extract a name and an email from a 'name <email>' string.
488+
489+
:param name_and_email: the name and email string
490+
:return: the extracted (name, email) tuple, or `None` if it failed
491+
"""
492+
match = re.search('(.*) <(.*)>', name_and_email)
493+
if not match:
494+
return None
495+
return match.group(1), match.group(2)
496+
497+
498+
def format_name_and_email(
499+
name: Optional[str],
500+
email: Optional[str],
501+
) -> str:
502+
"""
503+
Format a name and a email into a 'name <email>' string.
504+
505+
:param name: the name, or `None` if N/A
506+
:param email: the email, or `None` if N/A
507+
:return: the formatted string
508+
"""
509+
return f"{name or 'N/A'} <{email or 'N/A'}>"
510+
511+
512+
def get_env_var(
513+
env_var: str,
514+
print_if_not_found: bool = True,
515+
default: Optional[str] = None,
516+
) -> Optional[str]:
517+
"""
518+
Get the value of an environment variable.
519+
520+
:param env_var: the environment variable name/key
521+
:param print_if_not_found: whether to print if the environment variable could not be found
522+
:param default: the value to use if the environment variable could not be found
523+
:return: the environment variable value, or `None` if not found and no default value was given
524+
"""
525+
value = os.environ.get(env_var, None)
526+
if value is None:
527+
if default is not None:
528+
if print_if_not_found:
529+
logger.print(
530+
f"could not get environment variable: '{env_var}'; "
531+
f"using value default value: '{default}'"
532+
)
533+
value = default
534+
elif print_if_not_found:
535+
logger.print(f"could not get environment variable: '{env_var}'")
536+
return value
537+
538+
539+
class CommitInfo:
540+
"""Container for all necessary commit information."""
541+
542+
def __init__(
543+
self,
544+
commit_hash: str,
545+
title: str,
546+
body: List[str],
547+
author_name: Optional[str],
548+
author_email: Optional[str],
549+
is_merge_commit: bool = False,
550+
) -> None:
551+
"""Create a CommitInfo object."""
552+
self.hash = commit_hash
553+
self.title = title
554+
self.body = body
555+
self.author_name = author_name
556+
self.author_email = author_email
557+
self.is_merge_commit = is_merge_commit
558+
559+
560+
class CommitDataRetriever:
561+
"""
562+
Abstract commit data retriever.
563+
564+
It first provides a method to check whether it applies to the current setup or not.
565+
It also provides other methods to get commits to be checked.
566+
These should not be called if it doesn't apply.
567+
"""
568+
569+
def name(self) -> str:
570+
"""Get a name that represents this retriever."""
571+
raise NotImplementedError # pragma: no cover
572+
573+
def applies(self) -> bool:
574+
"""Check if this retriever applies, i.e. can provide commit data."""
575+
raise NotImplementedError # pragma: no cover
576+
577+
def get_commit_range(self) -> Optional[Tuple[str, str]]:
578+
"""
579+
Get the range of commits to be checked: (last commit that was checked, latest commit).
580+
581+
The range excludes the first commit, e.g. ]first commit, second commit]
582+
583+
:return the (last commit that was checked, latest commit) tuple, or `None` if it failed
584+
"""
585+
raise NotImplementedError # pragma: no cover
586+
587+
def get_commits(self, base: str, head: str, **kwargs: Any) -> Optional[List[CommitInfo]]:
588+
"""Get commit data."""
589+
raise NotImplementedError # pragma: no cover
590+
591+
592+
class GitRetriever(CommitDataRetriever):
593+
"""Implementation for any git repository."""
594+
595+
def name(self) -> str: # noqa: D102
596+
return 'git (default)'
597+
598+
def applies(self) -> bool: # noqa: D102
599+
# Unless we only have access to a partial commit history
600+
return True
601+
602+
def get_commit_range(self) -> Optional[Tuple[str, str]]: # noqa: D102
603+
default_branch = options.default_branch
604+
logger.verbose_print(f"\tusing default branch '{default_branch}'")
605+
commit_hash_base = get_common_ancestor_commit_hash(default_branch)
606+
if not commit_hash_base:
607+
return None
608+
commit_hash_head = get_head_commit_hash()
609+
if not commit_hash_head:
610+
return None
611+
return commit_hash_base, commit_hash_head
612+
613+
def get_commits( # noqa: D102
614+
self,
615+
base: str,
616+
head: str,
617+
check_merge_commits: bool = False,
618+
**kwargs: Any,
619+
) -> Optional[List[CommitInfo]]:
620+
ignore_merge_commits = not check_merge_commits
621+
commits_data = get_commits_data(base, head, ignore_merge_commits=ignore_merge_commits)
622+
commits: List[CommitInfo] = []
623+
if commits_data is None:
624+
return commits
625+
individual_commits = split_commits_data(commits_data)
626+
for commit_data in individual_commits:
627+
commit_lines = commit_data.split('\n')
628+
commit_hash = commit_lines[0]
629+
commit_author_data = commit_lines[1]
630+
commit_title = commit_lines[2]
631+
commit_body = commit_lines[3:]
632+
author_result = extract_name_and_email(commit_author_data)
633+
author_name, author_email = None, None
634+
if author_result:
635+
author_name, author_email = author_result
636+
# There won't be any merge commits at this point
637+
is_merge_commit = False
638+
commits.append(
639+
CommitInfo(
640+
commit_hash,
641+
commit_title,
642+
commit_body,
643+
author_name,
644+
author_email,
645+
is_merge_commit,
646+
)
647+
)
648+
return commits
649+
650+
651+
class GitLabRetriever(GitRetriever):
652+
"""Implementation for GitLab CI."""
653+
654+
def name(self) -> str: # noqa: D102
655+
return 'GitLab'
656+
657+
def applies(self) -> bool: # noqa: D102
658+
return get_env_var('GITLAB_CI', print_if_not_found=False) is not None
659+
660+
def get_commit_range(self) -> Optional[Tuple[str, str]]: # noqa: D102
661+
# See: https://docs.gitlab.com/ee/ci/variables/predefined_variables.html
662+
default_branch = get_env_var('CI_DEFAULT_BRANCH', default=options.default_branch)
663+
664+
commit_hash_head = get_env_var('CI_COMMIT_SHA')
665+
if not commit_hash_head:
666+
return None
667+
668+
current_branch = get_env_var('CI_COMMIT_BRANCH')
669+
if get_env_var('CI_PIPELINE_SOURCE') == 'schedule':
670+
# Do not check scheduled pipelines
671+
logger.verbose_print("\ton scheduled pipeline: won't check commits")
672+
return commit_hash_head, commit_hash_head
673+
elif current_branch == default_branch:
674+
# If we're on the default branch, just test new commits
675+
logger.verbose_print(
676+
f"\ton default branch '{current_branch}': "
677+
'will check new commits'
678+
)
679+
commit_hash_base = get_env_var('CI_COMMIT_BEFORE_SHA')
680+
if commit_hash_base == '0000000000000000000000000000000000000000':
681+
logger.verbose_print('\tfound no new commits')
682+
return commit_hash_head, commit_hash_head
683+
if not commit_hash_base:
684+
return None
685+
return commit_hash_base, commit_hash_head
686+
elif get_env_var('CI_MERGE_REQUEST_ID', print_if_not_found=False):
687+
# Get merge request target branch
688+
target_branch = get_env_var('CI_MERGE_REQUEST_TARGET_BRANCH_NAME')
689+
if not target_branch:
690+
return None
691+
logger.verbose_print(
692+
f"\ton merge request branch '{current_branch}': "
693+
f"will check new commits off of target branch '{target_branch}'"
694+
)
695+
target_branch_sha = get_env_var('CI_MERGE_REQUEST_TARGET_BRANCH_SHA')
696+
if not target_branch_sha:
697+
return None
698+
return target_branch_sha, commit_hash_head
699+
elif get_env_var('CI_EXTERNAL_PULL_REQUEST_IID', print_if_not_found=False):
700+
# Get external merge request target branch
701+
target_branch = get_env_var('CI_EXTERNAL_PULL_REQUEST_TARGET_BRANCH_NAME')
702+
if not target_branch:
703+
return None
704+
logger.verbose_print(
705+
f"\ton merge request branch '{current_branch}': "
706+
f"will check new commits off of target branch '{target_branch}'"
707+
)
708+
target_branch_sha = get_env_var('CI_EXTERNAL_PULL_REQUEST_TARGET_BRANCH_SHA')
709+
if not target_branch_sha:
710+
return None
711+
return target_branch_sha, commit_hash_head
712+
else:
713+
if not default_branch:
714+
return None
715+
# Otherwise test all commits off of the default branch
716+
logger.verbose_print(
717+
f"\ton branch '{current_branch}': "
718+
f"will check forked commits off of default branch '{default_branch}'"
719+
)
720+
# Fetch default branch
721+
remote = options.default_remote
722+
if 0 != fetch_branch(default_branch, remote):
723+
logger.print(f"failed to fetch '{default_branch}' from remote '{remote}'")
724+
return None
725+
# Use remote default branch ref
726+
remote_branch_ref = remote + '/' + default_branch
727+
commit_hash_base = get_common_ancestor_commit_hash(remote_branch_ref)
728+
if not commit_hash_base:
729+
return None
730+
return commit_hash_base, commit_hash_head
731+
732+
733+
class CircleCiRetriever(GitRetriever):
734+
"""Implementation for CircleCI."""
735+
736+
def name(self) -> str: # noqa: D102
737+
return 'CircleCI'
738+
739+
def applies(self) -> bool: # noqa: D102
740+
return get_env_var('CIRCLECI', print_if_not_found=False) is not None
741+
742+
def get_commit_range(self) -> Optional[Tuple[str, str]]: # noqa: D102
743+
# See: https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables
744+
default_branch = options.default_branch
745+
746+
commit_hash_head = get_env_var('CIRCLE_SHA1')
747+
if not commit_hash_head:
748+
return None
749+
750+
# Check if base revision is provided to the environment, e.g.
751+
# environment:
752+
# CIRCLE_BASE_REVISION: << pipeline.git.base_revision >>
753+
# See:
754+
# https://circleci.com/docs/2.0/pipeline-variables/
755+
# https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables
756+
base_revision = get_env_var('CIRCLE_BASE_REVISION', print_if_not_found=False)
757+
if base_revision:
758+
# For PRs, this is the commit of the base branch,
759+
# and, for pushes to a branch, this is the commit before the new commits
760+
logger.verbose_print(
761+
f"\tchecking commits off of base revision '{base_revision}'"
762+
)
763+
return base_revision, commit_hash_head
764+
else:
765+
current_branch = get_env_var('CIRCLE_BRANCH')
766+
if not current_branch:
767+
return None
768+
# Test all commits off of the default branch
769+
logger.verbose_print(
770+
f"\ton branch '{current_branch}': "
771+
f"will check forked commits off of default branch '{default_branch}'"
772+
)
773+
# Fetch default branch
774+
remote = options.default_remote
775+
if 0 != fetch_branch(default_branch, remote):
776+
logger.print(f"failed to fetch '{default_branch}' from remote '{remote}'")
777+
return None
778+
# Use remote default branch ref
779+
remote_branch_ref = remote + '/' + default_branch
780+
commit_hash_base = get_common_ancestor_commit_hash(remote_branch_ref)
781+
if not commit_hash_base:
782+
return None
783+
return commit_hash_base, commit_hash_head
784+
785+
786+
class AzurePipelinesRetriever(GitRetriever):
787+
"""Implementation for Azure Pipelines."""
788+
789+
def name(self) -> str: # noqa: D102
790+
return 'Azure Pipelines'
791+
792+
def applies(self) -> bool: # noqa: D102
793+
return get_env_var('TF_BUILD', print_if_not_found=False) is not None
794+
795+
def get_commit_range(self) -> Optional[Tuple[str, str]]: # noqa: D102
796+
# See: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml#build-variables # noqa: E501
797+
commit_hash_head = get_env_var('BUILD_SOURCEVERSION')
798+
if not commit_hash_head:
799+
return None
800+
current_branch = get_env_var('BUILD_SOURCEBRANCHNAME')
801+
if not current_branch:
802+
return None
803+
804+
base_branch = None
805+
# Check if pull request
806+
is_pull_request = get_env_var(
807+
'SYSTEM_PULLREQUEST_PULLREQUESTID',
808+
print_if_not_found=False,
809+
)
810+
if is_pull_request:
811+
# Test all commits off of the target branch
812+
target_branch = get_env_var('SYSTEM_PULLREQUEST_TARGETBRANCH')
813+
if not target_branch:
814+
return None
815+
logger.verbose_print(
816+
f"\ton pull request branch '{current_branch}': "
817+
f"will check forked commits off of target branch '{target_branch}'"
818+
)
819+
base_branch = target_branch
820+
else:
821+
# Test all commits off of the default branch
822+
default_branch = options.default_branch
823+
logger.verbose_print(
824+
f"\ton branch '{current_branch}': "
825+
f"will check forked commits off of default branch '{default_branch}'"
826+
)
827+
base_branch = default_branch
828+
# Fetch base branch
829+
assert base_branch
830+
remote = options.default_remote
831+
if 0 != fetch_branch(base_branch, remote):
832+
logger.print(f"failed to fetch '{base_branch}' from remote '{remote}'")
833+
return None
834+
# Use remote default branch ref
835+
remote_branch_ref = remote + '/' + base_branch
836+
commit_hash_base = get_common_ancestor_commit_hash(remote_branch_ref)
837+
if not commit_hash_base:
838+
return None
839+
return commit_hash_base, commit_hash_head
840+
841+
842+
class AppVeyorRetriever(GitRetriever):
843+
"""Implementation for AppVeyor."""
844+
845+
def name(self) -> str: # noqa: D102
846+
return 'AppVeyor'
847+
848+
def applies(self) -> bool: # noqa: D102
849+
return get_env_var('APPVEYOR', print_if_not_found=False) is not None
850+
851+
def get_commit_range(self) -> Optional[Tuple[str, str]]: # noqa: D102
852+
# See: https://www.appveyor.com/docs/environment-variables/
853+
default_branch = options.default_branch
854+
855+
commit_hash_head = get_env_var('APPVEYOR_REPO_COMMIT')
856+
if not commit_hash_head:
857+
commit_hash_head = get_head_commit_hash()
858+
if not commit_hash_head:
859+
return None
860+
861+
branch = get_env_var('APPVEYOR_REPO_BRANCH')
862+
if not branch:
863+
return None
864+
865+
# Check if pull request
866+
if get_env_var('APPVEYOR_PULL_REQUEST_NUMBER', print_if_not_found=False):
867+
current_branch = get_env_var('APPVEYOR_PULL_REQUEST_HEAD_REPO_BRANCH')
868+
if not current_branch:
869+
return None
870+
target_branch = branch
871+
logger.verbose_print(
872+
f"\ton pull request branch '{current_branch}': "
873+
f"will check commits off of target branch '{target_branch}'"
874+
)
875+
commit_hash_head = get_env_var('APPVEYOR_PULL_REQUEST_HEAD_COMMIT') or commit_hash_head
876+
if not commit_hash_head:
877+
return None
878+
commit_hash_base = get_common_ancestor_commit_hash(target_branch)
879+
if not commit_hash_base:
880+
return None
881+
return commit_hash_base, commit_hash_head
882+
else:
883+
# Otherwise test all commits off of the default branch
884+
current_branch = branch
885+
logger.verbose_print(
886+
f"\ton branch '{current_branch}': "
887+
f"will check forked commits off of default branch '{default_branch}'"
888+
)
889+
commit_hash_base = get_common_ancestor_commit_hash(default_branch)
890+
if not commit_hash_base:
891+
return None
892+
return commit_hash_base, commit_hash_head
893+
894+
895+
class GitHubRetriever(CommitDataRetriever):
896+
"""Implementation for GitHub CI."""
897+
898+
def name(self) -> str: # noqa: D102
899+
return 'GitHub CI'
900+
901+
def applies(self) -> bool: # noqa: D102
902+
return get_env_var('GITHUB_ACTIONS', print_if_not_found=False) == 'true'
903+
904+
def get_commit_range(self) -> Optional[Tuple[str, str]]: # noqa: D102
905+
# See: https://docs.gitlab.com/ee/ci/variables/predefined_variables.html
906+
self.github_token = get_env_var('GITHUB_TOKEN')
907+
if not self.github_token:
908+
logger.print('Did you forget to include this in your workflow config?')
909+
logger.print('\n\tenv:\n\t\tGITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}')
910+
return None
911+
912+
# See: https://help.github.com/en/actions/configuring-and-managing-workflows/using-environment-variables # noqa: E501
913+
event_payload_path = get_env_var('GITHUB_EVENT_PATH')
914+
if not event_payload_path:
915+
return None
916+
f = open(event_payload_path)
917+
self.event_payload = json.load(f)
918+
f.close()
919+
920+
# Get base & head commits depending on the workflow event type
921+
event_name = get_env_var('GITHUB_EVENT_NAME')
922+
if not event_name:
923+
return None
924+
commit_hash_base = None
925+
commit_hash_head = None
926+
if event_name in ('pull_request', 'pull_request_target'):
927+
# See: https://developer.github.com/v3/activity/events/types/#pullrequestevent
928+
commit_hash_base = self.event_payload['pull_request']['base']['sha']
929+
commit_hash_head = self.event_payload['pull_request']['head']['sha']
930+
commit_branch_base = self.event_payload['pull_request']['base']['ref']
931+
commit_branch_head = self.event_payload['pull_request']['head']['ref']
932+
logger.verbose_print(
933+
f"\ton pull request branch '{commit_branch_head}': "
934+
f"will check commits off of base branch '{commit_branch_base}'"
935+
)
936+
elif event_name == 'push':
937+
# See: https://developer.github.com/v3/activity/events/types/#pushevent
938+
created = self.event_payload['created']
939+
if created:
940+
# If the branch was just created, there won't be a 'before' commit,
941+
# therefore just get the first commit in the new branch and append '^'
942+
# to get the commit before that one
943+
commits = self.event_payload['commits']
944+
# TODO check len(commits),
945+
# it's probably 0 when pushing a new branch that is based on an existing one
946+
commit_hash_base = commits[0]['id'] + '^'
947+
else:
948+
commit_hash_base = self.event_payload['before']
949+
commit_hash_head = self.event_payload['head_commit']['id']
950+
else: # pragma: no cover
951+
logger.print('Unknown workflow event:', event_name)
952+
return None
953+
return commit_hash_base, commit_hash_head
954+
955+
def get_commits( # noqa: D102
956+
self,
957+
base: str,
958+
head: str,
959+
**kwargs: Any,
960+
) -> Optional[List[CommitInfo]]:
961+
# Request commit data
962+
compare_url_template = self.event_payload['repository']['compare_url']
963+
compare_url = compare_url_template.format(base=base, head=head)
964+
req = request.Request(compare_url, headers={
965+
'User-Agent': 'dco_check',
966+
'Authorization': 'token ' + (self.github_token or ''),
967+
})
968+
response = request.urlopen(req)
969+
if 200 != response.getcode(): # pragma: no cover
970+
from pprint import pformat
971+
logger.print('Request failed: compare_url')
972+
logger.print('reponse:', pformat(response.read().decode()))
973+
return None
974+
975+
# Extract data
976+
response_json = json.load(response)
977+
commits = []
978+
for commit in response_json['commits']:
979+
commit_hash = commit['sha']
980+
message = commit['commit']['message'].split('\n')
981+
message = list(filter(None, message))
982+
commit_title = message[0]
983+
commit_body = message[1:]
984+
author_name = commit['commit']['author']['name']
985+
author_email = commit['commit']['author']['email']
986+
is_merge_commit = len(commit['parents']) > 1
987+
commits.append(
988+
CommitInfo(
989+
commit_hash,
990+
commit_title,
991+
commit_body,
992+
author_name,
993+
author_email,
994+
is_merge_commit,
995+
)
996+
)
997+
return commits
998+
999+
1000+
def process_commits(
1001+
commits: List[CommitInfo],
1002+
check_merge_commits: bool,
1003+
) -> Dict[str, List[str]]:
1004+
"""
1005+
Process commit information to detect DCO infractions.
1006+
1007+
:param commits: the list of commit info
1008+
:param check_merge_commits: true to check merge commits, false otherwise
1009+
:return: the infractions as a dict {commit sha, infraction explanation}
1010+
"""
1011+
infractions: Dict[str, List[str]] = defaultdict(list)
1012+
for commit in commits:
1013+
# Skip this commit if it is a merge commit and the
1014+
# option for checking merge commits is not enabled
1015+
if commit.is_merge_commit and not check_merge_commits:
1016+
logger.verbose_print('\t' + 'ignoring merge commit:', commit.hash)
1017+
logger.verbose_print()
1018+
continue
1019+
1020+
logger.verbose_print(
1021+
'\t' + commit.hash + (' (merge commit)' if commit.is_merge_commit else '')
1022+
)
1023+
logger.verbose_print('\t' + format_name_and_email(commit.author_name, commit.author_email))
1024+
logger.verbose_print('\t' + commit.title)
1025+
logger.verbose_print('\t' + '\n\t'.join(commit.body))
1026+
1027+
# Check author name and email
1028+
if any(not d for d in [commit.author_name, commit.author_email]):
1029+
infractions[commit.hash].append(
1030+
f'could not extract author data for commit: {commit.hash}'
1031+
)
1032+
continue
1033+
1034+
# Check if the commit should be ignored because of the commit author email
1035+
if options.exclude_emails and commit.author_email in options.exclude_emails:
1036+
logger.verbose_print('\t\texcluding commit since author email is in exclude list')
1037+
logger.verbose_print()
1038+
continue
1039+
1040+
# Check if the commit should be ignored because of the commit author email pattern
1041+
if commit.author_email and options.exclude_pattern:
1042+
if options.exclude_pattern.search(commit.author_email):
1043+
logger.verbose_print('\t\texcluding commit since author email is matched by')
1044+
logger.verbose_print('\t\tpattern')
1045+
logger.verbose_print()
1046+
continue
1047+
1048+
# Extract sign-off data
1049+
sign_offs = [
1050+
body_line.replace(TRAILER_KEY_SIGNED_OFF_BY, '').strip(' ')
1051+
for body_line in commit.body
1052+
if body_line.startswith(TRAILER_KEY_SIGNED_OFF_BY)
1053+
]
1054+
1055+
# Check that there is at least one sign-off right away
1056+
if len(sign_offs) == 0:
1057+
infractions[commit.hash].append('no sign-off found')
1058+
continue
1059+
1060+
# Extract sign off information
1061+
sign_offs_name_email: List[Tuple[str, str]] = []
1062+
for sign_off in sign_offs:
1063+
sign_off_result = extract_name_and_email(sign_off)
1064+
if not sign_off_result:
1065+
continue
1066+
name, email = sign_off_result
1067+
logger.verbose_print(f'\t\tfound sign-off: {format_name_and_email(name, email)}')
1068+
if not is_valid_email(email):
1069+
infractions[commit.hash].append(f'invalid email: {email}')
1070+
else:
1071+
sign_offs_name_email.append((name, email))
1072+
1073+
# Check that author is in the sign-offs
1074+
if not (commit.author_name, commit.author_email) in sign_offs_name_email:
1075+
infractions[commit.hash].append(
1076+
'sign-off not found for commit author: '
1077+
f'{commit.author_name} {commit.author_email}; found: {sign_offs}'
1078+
)
1079+
1080+
# Separator between commits
1081+
logger.verbose_print()
1082+
1083+
return infractions
1084+
1085+
1086+
def check_infractions(
1087+
infractions: Dict[str, List[str]],
1088+
) -> int:
1089+
"""
1090+
Check infractions.
1091+
1092+
:param infractions: the infractions dict {commit sha, infraction explanation}
1093+
:return: 0 if no infractions, non-zero otherwise
1094+
"""
1095+
if len(infractions) > 0:
1096+
logger.print('Missing sign-off(s):')
1097+
logger.print()
1098+
for commit_sha, commit_infractions in infractions.items():
1099+
logger.print('\t' + commit_sha)
1100+
for commit_infraction in commit_infractions:
1101+
logger.print('\t\t' + commit_infraction)
1102+
return 1
1103+
logger.print('All good!')
1104+
return 0
1105+
1106+
1107+
def main(argv: Optional[List[str]] = None) -> int:
1108+
"""
1109+
Entrypoint.
1110+
1111+
:param argv: the arguments to use, or `None` for sys.argv
1112+
:return: 0 if successful, non-zero otherwise
1113+
"""
1114+
args = parse_args(argv)
1115+
options.set_options(args)
1116+
logger.set_options(options)
1117+
1118+
# Print options
1119+
if options.verbose:
1120+
logger.verbose_print('Options:')
1121+
for name, value in options.get_options().items():
1122+
logger.verbose_print(f'\t{name}: {str(value)}')
1123+
logger.verbose_print()
1124+
1125+
# Detect CI
1126+
# Use first one that applies
1127+
retrievers = [
1128+
GitLabRetriever,
1129+
GitHubRetriever,
1130+
AzurePipelinesRetriever,
1131+
AppVeyorRetriever,
1132+
CircleCiRetriever,
1133+
GitRetriever,
1134+
]
1135+
commit_retriever = None
1136+
for retriever_cls in retrievers:
1137+
retriever = retriever_cls()
1138+
if retriever.applies():
1139+
commit_retriever = retriever
1140+
break
1141+
if not commit_retriever:
1142+
logger.print('Could not find an applicable GitRetriever')
1143+
return 1
1144+
logger.print('Detected:', commit_retriever.name())
1145+
1146+
# Get default branch from remote if enabled
1147+
if options.default_branch_from_remote:
1148+
remote_default_branch = get_default_branch_from_remote(options.default_remote)
1149+
if not remote_default_branch:
1150+
logger.print('Could not get default branch from remote')
1151+
return 1
1152+
options.default_branch = remote_default_branch
1153+
logger.print(f"\tgot default branch '{remote_default_branch}' from remote")
1154+
1155+
# Get range of commits
1156+
commit_range = commit_retriever.get_commit_range()
1157+
if not commit_range:
1158+
return 1
1159+
commit_hash_base, commit_hash_head = commit_range
1160+
1161+
logger.print()
1162+
# Return success now if base == head
1163+
if commit_hash_base == commit_hash_head:
1164+
logger.print('No commits to check')
1165+
return 0
1166+
1167+
logger.print(f'Checking commits: {commit_hash_base}..{commit_hash_head}')
1168+
logger.print()
1169+
1170+
# Get commits
1171+
commits = commit_retriever.get_commits(
1172+
commit_hash_base,
1173+
commit_hash_head,
1174+
check_merge_commits=options.check_merge_commits,
1175+
)
1176+
if commits is None:
1177+
return 1
1178+
1179+
# Process them
1180+
infractions = process_commits(commits, options.check_merge_commits)
1181+
1182+
# Check if there are any infractions
1183+
result = check_infractions(infractions)
1184+
1185+
if len(commits) == 0:
1186+
logger.print('Warning: no commits were actually checked')
1187+
1188+
return result
1189+
1190+
1191+
if __name__ == '__main__': # pragma: no cover
1192+
sys.exit(main())

0 commit comments

Comments
 (0)
Please sign in to comment.