|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +# Copyright (c) 2018,2020 Intel Corporation |
| 5 | +# Copyright (c) 2022 Nordic Semiconductor ASA |
| 6 | + |
| 7 | +import argparse |
| 8 | +import collections |
| 9 | +import logging |
| 10 | +import os |
| 11 | +from pathlib import Path |
| 12 | +import re |
| 13 | +import subprocess |
| 14 | +import sys |
| 15 | +import shlex |
| 16 | + |
| 17 | +logger = None |
| 18 | + |
| 19 | +failures = 0 |
| 20 | + |
| 21 | +def err(msg): |
| 22 | + cmd = sys.argv[0] # Empty if missing |
| 23 | + if cmd: |
| 24 | + cmd += ": " |
| 25 | + sys.exit(f"{cmd}fatal error: {msg}") |
| 26 | + |
| 27 | + |
| 28 | +def cmd2str(cmd): |
| 29 | + # Formats the command-line arguments in the iterable 'cmd' into a string, |
| 30 | + # for error messages and the like |
| 31 | + |
| 32 | + return " ".join(shlex.quote(word) for word in cmd) |
| 33 | + |
| 34 | + |
| 35 | +def annotate(severity, file, title, message, line=None, col=None): |
| 36 | + """ |
| 37 | + https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#about-workflow-commands |
| 38 | + """ |
| 39 | + notice = f'::{severity} file={file}' + \ |
| 40 | + (f',line={line}' if line else '') + \ |
| 41 | + (f',col={col}' if col else '') + \ |
| 42 | + f',title={title}::{message}' |
| 43 | + print(notice) |
| 44 | + |
| 45 | + |
| 46 | +def failure(msg, file='CODEOWNERS', line=None): |
| 47 | + global failures |
| 48 | + failures += 1 |
| 49 | + annotate('error', file=file, title="CODEOWNERS", message=msg, |
| 50 | + line=line) |
| 51 | + |
| 52 | + |
| 53 | +def git(*args, cwd=None): |
| 54 | + # Helper for running a Git command. Returns the rstrip()ed stdout output. |
| 55 | + # Called like git("diff"). Exits with SystemError (raised by sys.exit()) on |
| 56 | + # errors. 'cwd' is the working directory to use (default: current |
| 57 | + # directory). |
| 58 | + |
| 59 | + git_cmd = ("git",) + args |
| 60 | + try: |
| 61 | + cp = subprocess.run(git_cmd, capture_output=True, cwd=cwd) |
| 62 | + except OSError as e: |
| 63 | + err(f"failed to run '{cmd2str(git_cmd)}': {e}") |
| 64 | + |
| 65 | + if cp.returncode or cp.stderr: |
| 66 | + err(f"'{cmd2str(git_cmd)}' exited with status {cp.returncode} and/or " |
| 67 | + f"wrote to stderr.\n" |
| 68 | + f"==stdout==\n" |
| 69 | + f"{cp.stdout.decode('utf-8')}\n" |
| 70 | + f"==stderr==\n" |
| 71 | + f"{cp.stderr.decode('utf-8')}\n") |
| 72 | + |
| 73 | + return cp.stdout.decode("utf-8").rstrip() |
| 74 | + |
| 75 | + |
| 76 | +def get_files(filter=None, paths=None): |
| 77 | + filter_arg = (f'--diff-filter={filter}',) if filter else () |
| 78 | + paths_arg = ('--', *paths) if paths else () |
| 79 | + return git('diff', '--name-only', *filter_arg, COMMIT_RANGE, *paths_arg) |
| 80 | + |
| 81 | + |
| 82 | +def ls_owned_files(codeowners): |
| 83 | + """Returns an OrderedDict mapping git patterns from the CODEOWNERS file |
| 84 | + to the corresponding list of files found on the filesystem. It |
| 85 | + unfortunately does not seem possible to invoke git and re-use |
| 86 | + how 'git ignore' and/or 'git attributes' already implement this, |
| 87 | + we must re-invent it. |
| 88 | + """ |
| 89 | + |
| 90 | + # TODO: filter out files not in "git ls-files" (e.g., |
| 91 | + # twister-out) _if_ the overhead isn't too high for a clean tree. |
| 92 | + # |
| 93 | + # pathlib.match() doesn't support **, so it looks like we can't |
| 94 | + # recursively glob the output of ls-files directly, only real |
| 95 | + # files :-( |
| 96 | + |
| 97 | + pattern2files = collections.OrderedDict() |
| 98 | + top_path = Path(GIT_TOP) |
| 99 | + |
| 100 | + with open(codeowners, "r") as codeo: |
| 101 | + for lineno, line in enumerate(codeo, start=1): |
| 102 | + |
| 103 | + if line.startswith("#") or not line.strip(): |
| 104 | + continue |
| 105 | + |
| 106 | + match = re.match(r"^([^\s,]+)\s+[^\s]+", line) |
| 107 | + if not match: |
| 108 | + failure(f"Invalid CODEOWNERS line {lineno}\n\t{line}", |
| 109 | + file='CODEOWNERS', line=lineno) |
| 110 | + continue |
| 111 | + |
| 112 | + git_patrn = match.group(1) |
| 113 | + glob = git_pattern_to_glob(git_patrn) |
| 114 | + files = [] |
| 115 | + for abs_path in top_path.glob(glob): |
| 116 | + # comparing strings is much faster later |
| 117 | + files.append(str(abs_path.relative_to(top_path))) |
| 118 | + |
| 119 | + if not files: |
| 120 | + failure(f"Path '{git_patrn}' not found in the tree" |
| 121 | + f"but is listed in CODEOWNERS") |
| 122 | + |
| 123 | + pattern2files[git_patrn] = files |
| 124 | + |
| 125 | + return pattern2files |
| 126 | + |
| 127 | + |
| 128 | +def git_pattern_to_glob(git_pattern): |
| 129 | + """Appends and prepends '**[/*]' when needed. Result has neither a |
| 130 | + leading nor a trailing slash. |
| 131 | + """ |
| 132 | + |
| 133 | + if git_pattern.startswith("/"): |
| 134 | + ret = git_pattern[1:] |
| 135 | + else: |
| 136 | + ret = "**/" + git_pattern |
| 137 | + |
| 138 | + if git_pattern.endswith("/"): |
| 139 | + ret = ret + "**/*" |
| 140 | + elif os.path.isdir(os.path.join(GIT_TOP, ret)): |
| 141 | + failure("Expected '/' after directory '{}' " |
| 142 | + "in CODEOWNERS".format(ret)) |
| 143 | + |
| 144 | + return ret |
| 145 | + |
| 146 | + |
| 147 | +def codeowners(): |
| 148 | + codeowners = os.path.join(GIT_TOP, "CODEOWNERS") |
| 149 | + if not os.path.exists(codeowners): |
| 150 | + err("CODEOWNERS not available in this repo") |
| 151 | + |
| 152 | + name_changes = get_files(filter="ARCD") |
| 153 | + owners_changes = get_files(paths=(codeowners,)) |
| 154 | + |
| 155 | + if not name_changes and not owners_changes: |
| 156 | + # TODO: 1. decouple basic and cheap CODEOWNERS syntax |
| 157 | + # validation from the expensive ls_owned_files() scanning of |
| 158 | + # the entire tree. 2. run the former always. |
| 159 | + return |
| 160 | + |
| 161 | + logger.info("If this takes too long then cleanup and try again") |
| 162 | + patrn2files = ls_owned_files(codeowners) |
| 163 | + |
| 164 | + # The way git finds Renames and Copies is not "exact science", |
| 165 | + # however if one is missed then it will always be reported as an |
| 166 | + # Addition instead. |
| 167 | + new_files = get_files(filter="ARC").splitlines() |
| 168 | + logger.debug(f"New files {new_files}") |
| 169 | + |
| 170 | + # Convert to pathlib.Path string representation (e.g., |
| 171 | + # backslashes 'dir1\dir2\' on Windows) to be consistent |
| 172 | + # with ls_owned_files() |
| 173 | + new_files = [str(Path(f)) for f in new_files] |
| 174 | + |
| 175 | + new_not_owned = [] |
| 176 | + for newf in new_files: |
| 177 | + f_is_owned = False |
| 178 | + |
| 179 | + for git_pat, owned in patrn2files.items(): |
| 180 | + logger.debug(f"Scanning {git_pat} for {newf}") |
| 181 | + |
| 182 | + if newf in owned: |
| 183 | + logger.info(f"{git_pat} matches new file {newf}") |
| 184 | + f_is_owned = True |
| 185 | + # Unlike github, we don't care about finding any |
| 186 | + # more specific owner. |
| 187 | + break |
| 188 | + |
| 189 | + if not f_is_owned: |
| 190 | + new_not_owned.append(newf) |
| 191 | + |
| 192 | + if new_not_owned: |
| 193 | + failure("New files added that are not covered in " |
| 194 | + "CODEOWNERS:\n\n" + "\n".join(new_not_owned) + |
| 195 | + "\n\nPlease add one or more entries in the " |
| 196 | + "CODEOWNERS file to cover those files") |
| 197 | + |
| 198 | + |
| 199 | +def init_logs(cli_arg): |
| 200 | + # Initializes logging |
| 201 | + |
| 202 | + global logger |
| 203 | + |
| 204 | + level = os.environ.get('LOG_LEVEL', "WARN") |
| 205 | + |
| 206 | + console = logging.StreamHandler() |
| 207 | + console.setFormatter(logging.Formatter('%(levelname)-8s: %(message)s')) |
| 208 | + |
| 209 | + logger = logging.getLogger('') |
| 210 | + logger.addHandler(console) |
| 211 | + logger.setLevel(cli_arg or level) |
| 212 | + |
| 213 | + logger.info("Log init completed, level=%s", |
| 214 | + logging.getLevelName(logger.getEffectiveLevel())) |
| 215 | + |
| 216 | + |
| 217 | +def parse_args(): |
| 218 | + default_range = 'HEAD~1..HEAD' |
| 219 | + parser = argparse.ArgumentParser( |
| 220 | + allow_abbrev=False, |
| 221 | + description="Check for CODEOWNERS file ownership.") |
| 222 | + parser.add_argument('-c', '--commits', default=default_range, |
| 223 | + help=f'''Commit range in the form: a..[b], default is |
| 224 | + {default_range}''') |
| 225 | + parser.add_argument("-v", "--loglevel", choices=['DEBUG', 'INFO', 'WARNING', |
| 226 | + 'ERROR', 'CRITICAL'], |
| 227 | + help="python logging level") |
| 228 | + |
| 229 | + return parser.parse_args() |
| 230 | + |
| 231 | + |
| 232 | +def main(): |
| 233 | + args = parse_args() |
| 234 | + |
| 235 | + # The absolute path of the top-level git directory. Initialize it here so |
| 236 | + # that issues running Git can be reported to GitHub. |
| 237 | + global GIT_TOP |
| 238 | + GIT_TOP = git("rev-parse", "--show-toplevel") |
| 239 | + |
| 240 | + # The commit range passed in --commit, e.g. "HEAD~3" |
| 241 | + global COMMIT_RANGE |
| 242 | + COMMIT_RANGE = args.commits |
| 243 | + |
| 244 | + init_logs(args.loglevel) |
| 245 | + logger.info(f'Running tests on commit range {COMMIT_RANGE}') |
| 246 | + |
| 247 | + codeowners() |
| 248 | + |
| 249 | + sys.exit(failures) |
| 250 | + |
| 251 | + |
| 252 | +if __name__ == "__main__": |
| 253 | + main() |
0 commit comments