From a1bba4ad9610df7eefaf66d1e9232613069a5f30 Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Mon, 4 Sep 2023 15:38:42 +0200 Subject: [PATCH 01/20] [GR-47806] Start work on integrating code-owners into mx --- mx.py | 1 + mx_codeowners.py | 178 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 mx_codeowners.py diff --git a/mx.py b/mx.py index dc58272a..95fb014a 100755 --- a/mx.py +++ b/mx.py @@ -3809,6 +3809,7 @@ def _register_visit(s): import mx_logcompilation # pylint: disable=unused-import import mx_downstream import mx_subst +import mx_codeowners # pylint: disable=unused-import import mx_ideconfig # pylint: disable=unused-import import mx_ide_eclipse import mx_compdb diff --git a/mx_codeowners.py b/mx_codeowners.py new file mode 100644 index 00000000..0a57ed63 --- /dev/null +++ b/mx_codeowners.py @@ -0,0 +1,178 @@ +# +# ---------------------------------------------------------------------------------------------------- +# +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# +# ---------------------------------------------------------------------------------------------------- +# + +import argparse +import fnmatch +import logging +import os + +import mx + +class TomlParsingException(Exception): + pass + +def load_toml_from_fd(fd): + try: + import tomllib + try: + return tomllib.load(fd) + except tomllib.TOMLDecodeError: + raise TomlParsingException() + except ImportError: + # Try another library + pass + + try: + import toml + try: + return toml.load(fd) + except toml.TomlDecodeError: + raise TomlParsingException() + except ImportError: + # Try another library + pass + + # No other libraries to try, aborting + mx.abort("Could not find any suitable TOML library.") + + +def whitespace_split_(inp): + if isinstance(inp, str): + return inp.split() + else: + return inp + +def is_some_item_in_set(items, the_set): + for i in items: + if i in the_set: + return True + return False + +class FileOwners: + def __init__(self, src_root): + self.src = os.path.abspath(src_root) + + def get_path_components(self, filepath): + res = [] + while filepath != '': + (dirs, filename) = os.path.split(filepath) + res.append(filepath) + filepath = dirs + return reversed(res) + + def parse_ownership(self, fd, name): + try: + tree = load_toml_from_fd(fd) + logging.debug("Tree is %s", tree) + for rule in tree.get('rule', []): + if not 'files' in rule: + logging.warning("Ignoring rule %s in %s as it contains no files", rule, name) + continue + if (not 'any' in rule) and (not 'all' in rule): + logging.warning("Ignoring rule %s in %s as it contains no owner specification", rule, name) + continue + + rule['files'] = whitespace_split_(rule['files']) + optional_owners = whitespace_split_(rule.get('any', [])) + if optional_owners: + for pat in rule['files']: + yield pat, optional_owners, "any" + mandatory_owners = whitespace_split_(rule.get('all', [])) + if mandatory_owners: + for pat in rule['files']: + yield pat, mandatory_owners, "all" + + except TomlParsingException: + logging.warning("Ignoring invalid input from %s", name) + + def parse_ownership_from_files(self, files): + for fo in files: + try: + full_path = os.path.join(self.src, fo) + with open(full_path, 'rb') as f: + for i in self.parse_ownership(f, full_path): + yield i + except IOError: + pass + + def get_owners_of(self, filepath): + components = ['.'] + list(self.get_path_components(filepath)) + filename = os.path.split(filepath)[1] + owners_files = [ + os.path.join(i, 'OWNERS.toml') + for i in components[:-1] + ] + result = {} + ownership = self.parse_ownership_from_files(owners_files) + for pat, owners, modifiers in ownership: + if fnmatch.fnmatch(filename, pat): + if "all" in modifiers: + result["all"] = sorted(owners) + if "any" in modifiers: + result["any"] = sorted(owners) + return result + +def summarize_owners(all_owners): + must_review = set() + anyof_reviewers = [] + + for owners in all_owners: + for owner in owners.get('all', []): + must_review.add(owner) + + for owners in all_owners: + if owners.get('any', []): + # One reviewer is already present? Skip this completely + if not is_some_item_in_set(owners['any'], must_review): + anyof_reviewers.append(owners['any']) + + return { + "all": sorted(must_review), + "any": list(set(map(tuple, anyof_reviewers))), + } + + +@mx.command('mx', 'codeowners') +def codeowners(args): + """Code owners check""" + parser = argparse.ArgumentParser(prog='mx codeowners') + parser.add_argument('files', metavar='FILENAME', nargs='*', help='Filenames to list owners of') + args = parser.parse_args(args) + + owners = FileOwners('.') + file_owners = [owners.get_owners_of(f) for f in args.files] + reviewers = summarize_owners(file_owners) + + if reviewers['all']: + print("Mandatory reviewers (all of these must review):") + for i in reviewers['all']: + print(" o", i) + if reviewers['any']: + print("Any-of reviewers (at least one from each line):") + for i in reviewers['any']: + print(" o", ' or '.join(i)) + From a66f0dd49582d0028c72bc0bab96ddc7592b8a42 Mon Sep 17 00:00:00 2001 From: Aleksandar Prokopec Date: Mon, 4 Sep 2023 17:32:07 +0200 Subject: [PATCH 02/20] Add initial version of the SimpleTOML parser. --- stoml_parser.py | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 stoml_parser.py diff --git a/stoml_parser.py b/stoml_parser.py new file mode 100644 index 00000000..32858fc8 --- /dev/null +++ b/stoml_parser.py @@ -0,0 +1,146 @@ + +from argparse import ArgumentParser + +def parse_file(path): + with open(path, "r") as f: + content = f.read() + return parse_string(content, path=path) + +def parse_string(content, path=""): + parser = StomlParser() + return parser.parse(path, content) + +class Streamer: + def __init__(self, path, content): + self.path = path + self.content = content + self.lines = [] + self.line = "" + self.row = 0 + self.column = 0 + self.pos = 0 + + def terminate(self, message): + row = self.row + column = self.column + self.slurp(len(self.content)) + raise RuntimeError( + self.path + ":" + str(row + 1) + ":" + str(column) + ": " + message + "\n" + + self.lines[row] + "\n" + + (" " * column) + "^" + "\n") + + def peek(self, ahead=0): + if self.pos + ahead < len(self.content): + return self.content[self.pos + ahead] + return "" + + def pull(self, expected=None): + if expected == None: + self.slurp(1) + return + for i in range(0, len(expected)): + if self.peek(i) != expected[i]: + self.terminate("Unexpected string, expected '" + expected + "'") + self.slurp(len(expected)) + + def pullSpaces(self): + while self.peek().isspace(): + self.pull() + + def slurp(self, count): + for i in range(0, count): + character = self.peek() + if character == "\n": + self.lines.append(self.line) + self.line = "" + self.row = self.row + 1 + self.column = 0 + else: + self.line = self.line + character + self.pos = self.pos + 1 + +class StomlParser: + def parse(self, path, content): + rules = [] + streamer = Streamer(path, content); + self.root(streamer, rules) + return rules + + def root(self, streamer, rules): + while True: + while streamer.peek().isspace(): + streamer.pull() + if streamer.peek() == "": + return + streamer.pull("[[rule]]") + rule = self.rule(streamer) + rules.append(rule) + + def rule(self, streamer): + rule = {} + while True: + while streamer.peek().isspace(): + streamer.pull() + if streamer.peek().isalpha(): + self.keyvalue(streamer, rule) + else: + return rule + + def keyvalue(self, streamer, rule): + key = self.identifier(streamer) + streamer.pullSpaces() + streamer.pull("=") + streamer.pullSpaces() + if streamer.peek() == "\"": + # string + value = self.string(streamer) + elif streamer.peek() == "[": + # list of strings + value = self.list(streamer) + else: + value = None + streamer.terminate("Expected either a string or a list of strings.") + rule[key] = value + + def identifier(self, streamer): + ident = "" + while streamer.peek().isalpha(): + ident = ident + streamer.peek() + streamer.pull() + return ident + + def string(self, streamer): + streamer.pull("\"") + content = "" + while streamer.peek() != "\"": + content = content + streamer.peek() + streamer.pull() + streamer.pull() + return content + + def list(self, streamer): + streamer.pull("[") + values = [] + streamer.pullSpaces() + while streamer.peek() != "]": + streamer.pullSpaces() + value = self.string(streamer) + values.append(value) + streamer.pullSpaces() + if streamer.peek() == ",": + streamer.pull() + streamer.pullSpaces() + streamer.pull() + return values + + +if __name__ == "__main__": + parser = ArgumentParser( + prog="SimpleTOML parser.", + description="Parses a simplified version of TOML.") + parser.add_argument("filename") + args = parser.parse_args() + + rules = parse_file(args.filename) + print(rules) + From 2cfa94a9e6318df85fabedcf7c09f73e44d076a4 Mon Sep 17 00:00:00 2001 From: Aleksandar Prokopec Date: Mon, 4 Sep 2023 17:36:39 +0200 Subject: [PATCH 03/20] Add missing column update. --- stoml_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/stoml_parser.py b/stoml_parser.py index 32858fc8..c7e217d7 100644 --- a/stoml_parser.py +++ b/stoml_parser.py @@ -57,6 +57,7 @@ def slurp(self, count): self.column = 0 else: self.line = self.line + character + self.column = self.column + 1 self.pos = self.pos + 1 class StomlParser: From 55d67f4db144f70992ffebe867ca5b5a7444954f Mon Sep 17 00:00:00 2001 From: Aleksandar Prokopec Date: Tue, 5 Sep 2023 14:33:08 +0200 Subject: [PATCH 04/20] Minor fixes in error messages. --- stoml_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stoml_parser.py b/stoml_parser.py index c7e217d7..e9e821e6 100644 --- a/stoml_parser.py +++ b/stoml_parser.py @@ -26,7 +26,7 @@ def terminate(self, message): self.slurp(len(self.content)) raise RuntimeError( self.path + ":" + str(row + 1) + ":" + str(column) + ": " + message + "\n" + - self.lines[row] + "\n" + + (self.lines[row] if row < len(self.lines) else ("") + "\n" + (" " * column) + "^" + "\n") def peek(self, ahead=0): @@ -50,7 +50,7 @@ def pullSpaces(self): def slurp(self, count): for i in range(0, count): character = self.peek() - if character == "\n": + if character == "\n" or character == "": self.lines.append(self.line) self.line = "" self.row = self.row + 1 From 5b2cda33ec800b0c2e842cb495f0ca4d6d2ea65c Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Wed, 6 Sep 2023 22:38:11 +0200 Subject: [PATCH 05/20] Rename STOML file --- stoml_parser.py => mx_stoml.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename stoml_parser.py => mx_stoml.py (100%) diff --git a/stoml_parser.py b/mx_stoml.py similarity index 100% rename from stoml_parser.py rename to mx_stoml.py From bd3db234a6b5370caab06c98320c8e81c6fdcb3f Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Wed, 6 Sep 2023 22:43:54 +0200 Subject: [PATCH 06/20] Code owners: fallback to our TOML parser --- mx_codeowners.py | 11 +++++++++-- mx_stoml.py | 7 +++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/mx_codeowners.py b/mx_codeowners.py index 0a57ed63..c3ff5b35 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -31,6 +31,7 @@ import os import mx +import mx_stoml class TomlParsingException(Exception): pass @@ -56,8 +57,14 @@ def load_toml_from_fd(fd): # Try another library pass - # No other libraries to try, aborting - mx.abort("Could not find any suitable TOML library.") + # No other libraries to try, falling back to our simplified parser + try: + tree = mx_stoml.parse_fd(fd) + return { + 'rule': tree, + } + except RuntimeError: + raise TomlParsingException() def whitespace_split_(inp): diff --git a/mx_stoml.py b/mx_stoml.py index e9e821e6..c1c25fc8 100644 --- a/mx_stoml.py +++ b/mx_stoml.py @@ -1,10 +1,13 @@ from argparse import ArgumentParser +def parse_fd(fd, path=""): + content = fd.read().decode('utf-8') + return parse_string(content, path=path) + def parse_file(path): with open(path, "r") as f: - content = f.read() - return parse_string(content, path=path) + return parse_fd(f, path) def parse_string(content, path=""): parser = StomlParser() From 572ea4812b2bea575d5a0294e3718ed368f73376 Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Mon, 11 Sep 2023 15:52:40 +0200 Subject: [PATCH 07/20] Code owners: use git when no file list is given --- mx_codeowners.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/mx_codeowners.py b/mx_codeowners.py index c3ff5b35..9befae26 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -162,15 +162,47 @@ def summarize_owners(all_owners): "any": list(set(map(tuple, anyof_reviewers))), } +def run_capture(args, must_succeed=True): + cmd_stdout = mx.OutputCapture() + cmd_stderr = mx.OutputCapture() + cmd_rc = mx.run(args, must_succeed, cmd_stdout, cmd_stderr) + return (cmd_rc, cmd_stdout.data, cmd_stderr.data) + +def git_diff_name_only(extra_args=None): + args = ['git', 'diff', '--name-only', '-z'] + if extra_args: + args.extend(extra_args) + rc, out, errout = run_capture(args) + assert rc == 0 + return list(filter(lambda x: x != '', out.split('\0'))) @mx.command('mx', 'codeowners') def codeowners(args): """Code owners check""" parser = argparse.ArgumentParser(prog='mx codeowners') parser.add_argument('files', metavar='FILENAME', nargs='*', help='Filenames to list owners of') + parser.add_argument('-a', dest='all_changes', action='store_true', default=False) + parser.add_argument('-b', dest='upstream_branch', default=None) args = parser.parse_args(args) + if args.upstream_branch: + args.all_changes = True + else: + args.upstream_branch = 'master' + + if args.all_changes and args.files: + mx.abort("Do not specify list of files with -b or -a") + + # TODO: what is the right starting directory? owners = FileOwners('.') + + if args.all_changes: + # Current modifications and all changes up to the upstream branch + args.files = git_diff_name_only([args.upstream_branch]) + git_diff_name_only() + elif not args.files: + # No arguments, query list of currently modified files + args.files = git_diff_name_only() + file_owners = [owners.get_owners_of(f) for f in args.files] reviewers = summarize_owners(file_owners) From 7168cbbc0aed86a34d87818774f5f65d1cab146f Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Mon, 11 Sep 2023 16:01:44 +0200 Subject: [PATCH 08/20] Update mx version --- mx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mx.py b/mx.py index 95fb014a..cf97efa1 100755 --- a/mx.py +++ b/mx.py @@ -18784,7 +18784,7 @@ def alarm_handler(signum, frame): abort(1, killsig=signal.SIGINT) # The version must be updated for every PR (checked in CI) and the comment should reflect the PR's issue -version = VersionSpec("6.45.0") # multi-arch layout dirs +version = VersionSpec("6.46.0") # code owners _mx_start_datetime = datetime.utcnow() _last_timestamp = _mx_start_datetime From 801b5109acae15a0d4b927f0964aa589fa38f569 Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 08:41:16 +0200 Subject: [PATCH 09/20] codeowners: argparse documentation --- mx_codeowners.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/mx_codeowners.py b/mx_codeowners.py index 9befae26..a2900c75 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -176,13 +176,34 @@ def git_diff_name_only(extra_args=None): assert rc == 0 return list(filter(lambda x: x != '', out.split('\0'))) +MX_CODEOWNERS_HELP = """Find code owners from OWNERS.toml files. + + +Can be executed in three modes. + +* Without any options but with list of files: print owners of the + provided files. Example: + + mx codeowners -- substratevm/LICENSE substratevm/ci/ci.jsonnet + +* Without any arguments at all it prints owners of currently modified + but unstaged files (for Git). In other words, it prints possible + reviewers for changed but uncomitted files. Internally uses + git diff --name-only to query list of files. + +* When -a or -b BRANCH is provided, it looks also for all files + modified with comparison to given BRANCH (or to master with -a + only). In other words, it prints possible reviewers for the whole + pull request. +""" + @mx.command('mx', 'codeowners') def codeowners(args): - """Code owners check""" - parser = argparse.ArgumentParser(prog='mx codeowners') + """Find code owners from OWNERS.toml files.""" + parser = argparse.ArgumentParser(prog='mx codeowners', formatter_class=argparse.RawTextHelpFormatter, description=MX_CODEOWNERS_HELP) parser.add_argument('files', metavar='FILENAME', nargs='*', help='Filenames to list owners of') - parser.add_argument('-a', dest='all_changes', action='store_true', default=False) - parser.add_argument('-b', dest='upstream_branch', default=None) + parser.add_argument('-a', dest='all_changes', action='store_true', default=False, help='Print reviewers for this branch against master.') + parser.add_argument('-b', dest='upstream_branch', metavar='BRANCH', default=None, help='Print reviewers for this branch against BRANCH.') args = parser.parse_args(args) if args.upstream_branch: From 4091d25924bcdde7f0911f6bc1327612be12d1ed Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 08:55:34 +0200 Subject: [PATCH 10/20] codeowners: document OWNERS.toml too --- mx_codeowners.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/mx_codeowners.py b/mx_codeowners.py index a2900c75..d4933585 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -197,11 +197,37 @@ def git_diff_name_only(extra_args=None): pull request. """ +MX_CODEOWNERS_HELP2 = """The ownership is read from OWNERS.toml files that can be added to any +directory. As an example, let us have a look at the following snippet. + + [[rule]] + files = "*.jsonnet *.libsonnet" + any = [ + "ci.master@oracle.com", + "another.ci.master@oracle.com", + ] + [[rule]] + files = "*.md" + any = "doc.owner@oracle.com release.manager@oracle.com" + +This says that files matching *.jsonnet and *.libsonnet are owned +by ci.master@oracle.com and another.ci.master@oracle.com. +Similarly, *.md files are owned by doc.owner@oracle.com and +release.manager@oracle.com. + +Note that we allow both explicit TOML arrays as well as implicit +separator of whitespace when specifying list of owners or list +of file patterns. + +When no rule matches, the tool searches in parent directories too. + +""" + @mx.command('mx', 'codeowners') def codeowners(args): """Find code owners from OWNERS.toml files.""" - parser = argparse.ArgumentParser(prog='mx codeowners', formatter_class=argparse.RawTextHelpFormatter, description=MX_CODEOWNERS_HELP) - parser.add_argument('files', metavar='FILENAME', nargs='*', help='Filenames to list owners of') + parser = argparse.ArgumentParser(prog='mx codeowners', formatter_class=argparse.RawTextHelpFormatter, description=MX_CODEOWNERS_HELP, epilog=MX_CODEOWNERS_HELP2) + parser.add_argument('files', metavar='FILENAME', nargs='*', help='File names to list owners of (relative to current work dir).') parser.add_argument('-a', dest='all_changes', action='store_true', default=False, help='Print reviewers for this branch against master.') parser.add_argument('-b', dest='upstream_branch', metavar='BRANCH', default=None, help='Print reviewers for this branch against BRANCH.') args = parser.parse_args(args) From 5bf1d0b9b2cb8038519d37fa54e05aa1d6e4db6b Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 08:56:30 +0200 Subject: [PATCH 11/20] Add missing license header --- mx_stoml.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/mx_stoml.py b/mx_stoml.py index c1c25fc8..4ab691dc 100644 --- a/mx_stoml.py +++ b/mx_stoml.py @@ -1,3 +1,29 @@ +# +# ---------------------------------------------------------------------------------------------------- +# +# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# +# ---------------------------------------------------------------------------------------------------- +# from argparse import ArgumentParser From d3d35aa7ad8276dccf052e7f8d6582759334e496 Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 09:03:56 +0200 Subject: [PATCH 12/20] codeowners: proper names of private methods --- mx_codeowners.py | 59 ++++++++++++++++++++++++------------------------ mx_stoml.py | 12 +++++----- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/mx_codeowners.py b/mx_codeowners.py index d4933585..3029d482 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -33,16 +33,16 @@ import mx import mx_stoml -class TomlParsingException(Exception): +class _TomlParsingException(Exception): pass -def load_toml_from_fd(fd): +def _load_toml_from_fd(fd): try: import tomllib try: return tomllib.load(fd) except tomllib.TOMLDecodeError: - raise TomlParsingException() + raise _TomlParsingException() except ImportError: # Try another library pass @@ -52,7 +52,7 @@ def load_toml_from_fd(fd): try: return toml.load(fd) except toml.TomlDecodeError: - raise TomlParsingException() + raise _TomlParsingException() except ImportError: # Try another library pass @@ -64,16 +64,16 @@ def load_toml_from_fd(fd): 'rule': tree, } except RuntimeError: - raise TomlParsingException() + raise _TomlParsingException() -def whitespace_split_(inp): +def _whitespace_split(inp): if isinstance(inp, str): return inp.split() else: return inp -def is_some_item_in_set(items, the_set): +def _is_some_item_in_set(items, the_set): for i in items: if i in the_set: return True @@ -83,7 +83,7 @@ class FileOwners: def __init__(self, src_root): self.src = os.path.abspath(src_root) - def get_path_components(self, filepath): + def _get_path_components(self, filepath): res = [] while filepath != '': (dirs, filename) = os.path.split(filepath) @@ -91,9 +91,9 @@ def get_path_components(self, filepath): filepath = dirs return reversed(res) - def parse_ownership(self, fd, name): + def _parse_ownership(self, fd, name): try: - tree = load_toml_from_fd(fd) + tree = _load_toml_from_fd(fd) logging.debug("Tree is %s", tree) for rule in tree.get('rule', []): if not 'files' in rule: @@ -103,38 +103,38 @@ def parse_ownership(self, fd, name): logging.warning("Ignoring rule %s in %s as it contains no owner specification", rule, name) continue - rule['files'] = whitespace_split_(rule['files']) - optional_owners = whitespace_split_(rule.get('any', [])) + rule['files'] = _whitespace_split(rule['files']) + optional_owners = _whitespace_split(rule.get('any', [])) if optional_owners: for pat in rule['files']: yield pat, optional_owners, "any" - mandatory_owners = whitespace_split_(rule.get('all', [])) + mandatory_owners = _whitespace_split(rule.get('all', [])) if mandatory_owners: for pat in rule['files']: yield pat, mandatory_owners, "all" - except TomlParsingException: + except _TomlParsingException: logging.warning("Ignoring invalid input from %s", name) - def parse_ownership_from_files(self, files): + def _parse_ownership_from_files(self, files): for fo in files: try: full_path = os.path.join(self.src, fo) with open(full_path, 'rb') as f: - for i in self.parse_ownership(f, full_path): + for i in self._parse_ownership(f, full_path): yield i except IOError: pass def get_owners_of(self, filepath): - components = ['.'] + list(self.get_path_components(filepath)) + components = ['.'] + list(self._get_path_components(filepath)) filename = os.path.split(filepath)[1] owners_files = [ os.path.join(i, 'OWNERS.toml') for i in components[:-1] ] result = {} - ownership = self.parse_ownership_from_files(owners_files) + ownership = self._parse_ownership_from_files(owners_files) for pat, owners, modifiers in ownership: if fnmatch.fnmatch(filename, pat): if "all" in modifiers: @@ -143,7 +143,7 @@ def get_owners_of(self, filepath): result["any"] = sorted(owners) return result -def summarize_owners(all_owners): +def _summarize_owners(all_owners): must_review = set() anyof_reviewers = [] @@ -154,7 +154,7 @@ def summarize_owners(all_owners): for owners in all_owners: if owners.get('any', []): # One reviewer is already present? Skip this completely - if not is_some_item_in_set(owners['any'], must_review): + if not _is_some_item_in_set(owners['any'], must_review): anyof_reviewers.append(owners['any']) return { @@ -162,21 +162,21 @@ def summarize_owners(all_owners): "any": list(set(map(tuple, anyof_reviewers))), } -def run_capture(args, must_succeed=True): +def _run_capture(args, must_succeed=True): cmd_stdout = mx.OutputCapture() cmd_stderr = mx.OutputCapture() cmd_rc = mx.run(args, must_succeed, cmd_stdout, cmd_stderr) return (cmd_rc, cmd_stdout.data, cmd_stderr.data) -def git_diff_name_only(extra_args=None): +def _git_diff_name_only(extra_args=None): args = ['git', 'diff', '--name-only', '-z'] if extra_args: args.extend(extra_args) - rc, out, errout = run_capture(args) + rc, out, errout = _run_capture(args) assert rc == 0 return list(filter(lambda x: x != '', out.split('\0'))) -MX_CODEOWNERS_HELP = """Find code owners from OWNERS.toml files. +_MX_CODEOWNERS_HELP = """Find code owners from OWNERS.toml files. Can be executed in three modes. @@ -197,7 +197,7 @@ def git_diff_name_only(extra_args=None): pull request. """ -MX_CODEOWNERS_HELP2 = """The ownership is read from OWNERS.toml files that can be added to any +_MX_CODEOWNERS_HELP2 = """The ownership is read from OWNERS.toml files that can be added to any directory. As an example, let us have a look at the following snippet. [[rule]] @@ -226,7 +226,7 @@ def git_diff_name_only(extra_args=None): @mx.command('mx', 'codeowners') def codeowners(args): """Find code owners from OWNERS.toml files.""" - parser = argparse.ArgumentParser(prog='mx codeowners', formatter_class=argparse.RawTextHelpFormatter, description=MX_CODEOWNERS_HELP, epilog=MX_CODEOWNERS_HELP2) + parser = argparse.ArgumentParser(prog='mx codeowners', formatter_class=argparse.RawTextHelpFormatter, description=_MX_CODEOWNERS_HELP, epilog=_MX_CODEOWNERS_HELP2) parser.add_argument('files', metavar='FILENAME', nargs='*', help='File names to list owners of (relative to current work dir).') parser.add_argument('-a', dest='all_changes', action='store_true', default=False, help='Print reviewers for this branch against master.') parser.add_argument('-b', dest='upstream_branch', metavar='BRANCH', default=None, help='Print reviewers for this branch against BRANCH.') @@ -245,13 +245,13 @@ def codeowners(args): if args.all_changes: # Current modifications and all changes up to the upstream branch - args.files = git_diff_name_only([args.upstream_branch]) + git_diff_name_only() + args.files = _git_diff_name_only([args.upstream_branch]) + _git_diff_name_only() elif not args.files: # No arguments, query list of currently modified files - args.files = git_diff_name_only() + args.files = _git_diff_name_only() file_owners = [owners.get_owners_of(f) for f in args.files] - reviewers = summarize_owners(file_owners) + reviewers = _summarize_owners(file_owners) if reviewers['all']: print("Mandatory reviewers (all of these must review):") @@ -261,4 +261,3 @@ def codeowners(args): print("Any-of reviewers (at least one from each line):") for i in reviewers['any']: print(" o", ' or '.join(i)) - diff --git a/mx_stoml.py b/mx_stoml.py index 4ab691dc..9bc777ef 100644 --- a/mx_stoml.py +++ b/mx_stoml.py @@ -28,18 +28,18 @@ from argparse import ArgumentParser def parse_fd(fd, path=""): - content = fd.read().decode('utf-8') - return parse_string(content, path=path) + content = fd.read().decode('utf-8') + return parse_string(content, path=path) def parse_file(path): with open(path, "r") as f: return parse_fd(f, path) def parse_string(content, path=""): - parser = StomlParser() + parser = _StomlParser() return parser.parse(path, content) -class Streamer: +class _Streamer: def __init__(self, path, content): self.path = path self.content = content @@ -89,10 +89,10 @@ def slurp(self, count): self.column = self.column + 1 self.pos = self.pos + 1 -class StomlParser: +class _StomlParser: def parse(self, path, content): rules = [] - streamer = Streamer(path, content); + streamer = _Streamer(path, content); self.root(streamer, rules) return rules From 4350a9518d74cff36987effb3a9dca08da09cd7c Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 09:07:13 +0200 Subject: [PATCH 13/20] codeowners: improve error reporting --- mx_codeowners.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/mx_codeowners.py b/mx_codeowners.py index 3029d482..98425ce7 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -34,15 +34,19 @@ import mx_stoml class _TomlParsingException(Exception): - pass + def __init__(self, cause): + self.cause = cause + + def __str__(self): + return str(self.cause) def _load_toml_from_fd(fd): try: import tomllib try: return tomllib.load(fd) - except tomllib.TOMLDecodeError: - raise _TomlParsingException() + except tomllib.TOMLDecodeError as e: + raise _TomlParsingException(str(e)) except ImportError: # Try another library pass @@ -51,8 +55,8 @@ def _load_toml_from_fd(fd): import toml try: return toml.load(fd) - except toml.TomlDecodeError: - raise _TomlParsingException() + except toml.TomlDecodeError as e: + raise _TomlParsingException(e) except ImportError: # Try another library pass @@ -63,8 +67,8 @@ def _load_toml_from_fd(fd): return { 'rule': tree, } - except RuntimeError: - raise _TomlParsingException() + except RuntimeError as e: + raise _TomlParsingException(e) def _whitespace_split(inp): @@ -113,8 +117,8 @@ def _parse_ownership(self, fd, name): for pat in rule['files']: yield pat, mandatory_owners, "all" - except _TomlParsingException: - logging.warning("Ignoring invalid input from %s", name) + except _TomlParsingException as e: + logging.warning("Ignoring invalid input from %s: %s", name, e) def _parse_ownership_from_files(self, files): for fo in files: From 04a14ee4c2303b66a234abcf19a74af92ed44d4b Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 09:37:33 +0200 Subject: [PATCH 14/20] codeowners: better handling of absolute paths --- mx_codeowners.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/mx_codeowners.py b/mx_codeowners.py index 98425ce7..c5a1eb49 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -92,6 +92,9 @@ def _get_path_components(self, filepath): while filepath != '': (dirs, filename) = os.path.split(filepath) res.append(filepath) + # For absolute path on Unix, we end with '/' + if filepath == dirs: + break filepath = dirs return reversed(res) @@ -123,7 +126,10 @@ def _parse_ownership(self, fd, name): def _parse_ownership_from_files(self, files): for fo in files: try: - full_path = os.path.join(self.src, fo) + if os.path.isabs(fo): + full_path = fo + else: + full_path = os.path.join(self.src, fo) with open(full_path, 'rb') as f: for i in self._parse_ownership(f, full_path): yield i @@ -131,12 +137,13 @@ def _parse_ownership_from_files(self, files): pass def get_owners_of(self, filepath): - components = ['.'] + list(self._get_path_components(filepath)) + components = ([] if os.path.isabs(filepath) else ['.']) + list(self._get_path_components(filepath)) filename = os.path.split(filepath)[1] owners_files = [ os.path.join(i, 'OWNERS.toml') for i in components[:-1] ] + owners_files = [i for i in owners_files if os.path.commonprefix([i, self.src]) == self.src] result = {} ownership = self._parse_ownership_from_files(owners_files) for pat, owners, modifiers in ownership: @@ -180,6 +187,13 @@ def _git_diff_name_only(extra_args=None): assert rc == 0 return list(filter(lambda x: x != '', out.split('\0'))) +def _git_get_repo_root_or_cwd(): + rc, out, errout = _run_capture(['git', 'rev-parse', '--show-toplevel']) + if rc != 0: + return '.' + else: + return out.rstrip('\n') + _MX_CODEOWNERS_HELP = """Find code owners from OWNERS.toml files. @@ -223,7 +237,8 @@ def _git_diff_name_only(extra_args=None): separator of whitespace when specifying list of owners or list of file patterns. -When no rule matches, the tool searches in parent directories too. +When no rule matches, the tool searches in parent directories too +(up to nearest Git repository root). """ @@ -244,8 +259,7 @@ def codeowners(args): if args.all_changes and args.files: mx.abort("Do not specify list of files with -b or -a") - # TODO: what is the right starting directory? - owners = FileOwners('.') + owners = FileOwners(_git_get_repo_root_or_cwd()) if args.all_changes: # Current modifications and all changes up to the upstream branch @@ -254,7 +268,7 @@ def codeowners(args): # No arguments, query list of currently modified files args.files = _git_diff_name_only() - file_owners = [owners.get_owners_of(f) for f in args.files] + file_owners = [owners.get_owners_of(os.path.abspath(f)) for f in args.files] reviewers = _summarize_owners(file_owners) if reviewers['all']: From 218cb0b7cc9f00be24bd03ae4fa179eb591487f6 Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 09:38:38 +0200 Subject: [PATCH 15/20] Typo fix --- mx_codeowners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mx_codeowners.py b/mx_codeowners.py index c5a1eb49..1e341382 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -206,7 +206,7 @@ def _git_get_repo_root_or_cwd(): * Without any arguments at all it prints owners of currently modified but unstaged files (for Git). In other words, it prints possible - reviewers for changed but uncomitted files. Internally uses + reviewers for changed but uncommitted files. Internally uses git diff --name-only to query list of files. * When -a or -b BRANCH is provided, it looks also for all files From c16c85c9efbf139027526e2b30d4c6fa47af3f9e Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 09:51:54 +0200 Subject: [PATCH 16/20] mx_stoml: reformat to use 4 space --- mx_stoml.py | 257 ++++++++++++++++++++++++++-------------------------- 1 file changed, 127 insertions(+), 130 deletions(-) diff --git a/mx_stoml.py b/mx_stoml.py index 9bc777ef..961a3d89 100644 --- a/mx_stoml.py +++ b/mx_stoml.py @@ -28,149 +28,146 @@ from argparse import ArgumentParser def parse_fd(fd, path=""): - content = fd.read().decode('utf-8') - return parse_string(content, path=path) + content = fd.read().decode('utf-8') + return parse_string(content, path=path) def parse_file(path): - with open(path, "r") as f: - return parse_fd(f, path) + with open(path, "r") as f: + return parse_fd(f, path) def parse_string(content, path=""): - parser = _StomlParser() - return parser.parse(path, content) + parser = _StomlParser() + return parser.parse(path, content) class _Streamer: - def __init__(self, path, content): - self.path = path - self.content = content - self.lines = [] - self.line = "" - self.row = 0 - self.column = 0 - self.pos = 0 - - def terminate(self, message): - row = self.row - column = self.column - self.slurp(len(self.content)) - raise RuntimeError( - self.path + ":" + str(row + 1) + ":" + str(column) + ": " + message + "\n" + - (self.lines[row] if row < len(self.lines) else ("") + "\n" + - (" " * column) + "^" + "\n") - - def peek(self, ahead=0): - if self.pos + ahead < len(self.content): - return self.content[self.pos + ahead] - return "" - - def pull(self, expected=None): - if expected == None: - self.slurp(1) - return - for i in range(0, len(expected)): - if self.peek(i) != expected[i]: - self.terminate("Unexpected string, expected '" + expected + "'") - self.slurp(len(expected)) - - def pullSpaces(self): - while self.peek().isspace(): - self.pull() - - def slurp(self, count): - for i in range(0, count): - character = self.peek() - if character == "\n" or character == "": - self.lines.append(self.line) + def __init__(self, path, content): + self.path = path + self.content = content + self.lines = [] self.line = "" - self.row = self.row + 1 + self.row = 0 self.column = 0 - else: - self.line = self.line + character - self.column = self.column + 1 - self.pos = self.pos + 1 + self.pos = 0 + + def terminate(self, message): + row = self.row + column = self.column + self.slurp(len(self.content)) + raise RuntimeError( + self.path + ":" + str(row + 1) + ":" + str(column) + ": " + message + "\n" + + (self.lines[row] if row < len(self.lines) else ("") + "\n" + + (" " * column) + "^" + "\n") + + def peek(self, ahead=0): + if self.pos + ahead < len(self.content): + return self.content[self.pos + ahead] + return "" + + def pull(self, expected=None): + if expected == None: + self.slurp(1) + return + for i in range(0, len(expected)): + if self.peek(i) != expected[i]: + self.terminate("Unexpected string, expected '" + expected + "'") + self.slurp(len(expected)) + + def pullSpaces(self): + while self.peek().isspace(): + self.pull() + + def slurp(self, count): + for i in range(0, count): + character = self.peek() + if character == "\n" or character == "": + self.lines.append(self.line) + self.line = "" + self.row = self.row + 1 + self.column = 0 + else: + self.line = self.line + character + self.column = self.column + 1 + self.pos = self.pos + 1 class _StomlParser: - def parse(self, path, content): - rules = [] - streamer = _Streamer(path, content); - self.root(streamer, rules) - return rules - - def root(self, streamer, rules): - while True: - while streamer.peek().isspace(): - streamer.pull() - if streamer.peek() == "": - return - streamer.pull("[[rule]]") - rule = self.rule(streamer) - rules.append(rule) - - def rule(self, streamer): - rule = {} - while True: - while streamer.peek().isspace(): - streamer.pull() - if streamer.peek().isalpha(): - self.keyvalue(streamer, rule) - else: - return rule - - def keyvalue(self, streamer, rule): - key = self.identifier(streamer) - streamer.pullSpaces() - streamer.pull("=") - streamer.pullSpaces() - if streamer.peek() == "\"": - # string - value = self.string(streamer) - elif streamer.peek() == "[": - # list of strings - value = self.list(streamer) - else: - value = None - streamer.terminate("Expected either a string or a list of strings.") - rule[key] = value - - def identifier(self, streamer): - ident = "" - while streamer.peek().isalpha(): - ident = ident + streamer.peek() - streamer.pull() - return ident - - def string(self, streamer): - streamer.pull("\"") - content = "" - while streamer.peek() != "\"": - content = content + streamer.peek() - streamer.pull() - streamer.pull() - return content - - def list(self, streamer): - streamer.pull("[") - values = [] - streamer.pullSpaces() - while streamer.peek() != "]": - streamer.pullSpaces() - value = self.string(streamer) - values.append(value) - streamer.pullSpaces() - if streamer.peek() == ",": + def parse(self, path, content): + rules = [] + streamer = _Streamer(path, content); + self.root(streamer, rules) + return rules + + def root(self, streamer, rules): + while True: + while streamer.peek().isspace(): + streamer.pull() + if streamer.peek() == "": + return + streamer.pull("[[rule]]") + rule = self.rule(streamer) + rules.append(rule) + + def rule(self, streamer): + rule = {} + while True: + while streamer.peek().isspace(): + streamer.pull() + if streamer.peek().isalpha(): + self.keyvalue(streamer, rule) + else: + return rule + + def keyvalue(self, streamer, rule): + key = self.identifier(streamer) + streamer.pullSpaces() + streamer.pull("=") + streamer.pullSpaces() + if streamer.peek() == "\"": + # string + value = self.string(streamer) + elif streamer.peek() == "[": + # list of strings + value = self.list(streamer) + else: + value = None + streamer.terminate("Expected either a string or a list of strings.") + rule[key] = value + + def identifier(self, streamer): + ident = "" + while streamer.peek().isalpha(): + ident = ident + streamer.peek() + streamer.pull() + return ident + + def string(self, streamer): + streamer.pull("\"") + content = "" + while streamer.peek() != "\"": + content = content + streamer.peek() + streamer.pull() streamer.pull() + return content + + def list(self, streamer): + streamer.pull("[") + values = [] streamer.pullSpaces() - streamer.pull() - return values + while streamer.peek() != "]": + streamer.pullSpaces() + value = self.string(streamer) + values.append(value) + streamer.pullSpaces() + if streamer.peek() == ",": + streamer.pull() + streamer.pullSpaces() + streamer.pull() + return values if __name__ == "__main__": - parser = ArgumentParser( - prog="SimpleTOML parser.", - description="Parses a simplified version of TOML.") - parser.add_argument("filename") - args = parser.parse_args() - - rules = parse_file(args.filename) - print(rules) + parser = ArgumentParser(prog="SimpleTOML parser.", description="Parses a simplified version of TOML.") + parser.add_argument("filename") + args = parser.parse_args() + rules = parse_file(args.filename) + print(rules) From cce2740c1d156173c92d444a0392b2bd85f119ef Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 10:01:02 +0200 Subject: [PATCH 17/20] codeowners: fix Pylint warnings --- mx_codeowners.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mx_codeowners.py b/mx_codeowners.py index 1e341382..f5748972 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -35,6 +35,7 @@ class _TomlParsingException(Exception): def __init__(self, cause): + Exception.__init__() self.cause = cause def __str__(self): @@ -90,7 +91,7 @@ def __init__(self, src_root): def _get_path_components(self, filepath): res = [] while filepath != '': - (dirs, filename) = os.path.split(filepath) + (dirs, _) = os.path.split(filepath) res.append(filepath) # For absolute path on Unix, we end with '/' if filepath == dirs: @@ -183,12 +184,12 @@ def _git_diff_name_only(extra_args=None): args = ['git', 'diff', '--name-only', '-z'] if extra_args: args.extend(extra_args) - rc, out, errout = _run_capture(args) + rc, out, _ = _run_capture(args) assert rc == 0 return list(filter(lambda x: x != '', out.split('\0'))) def _git_get_repo_root_or_cwd(): - rc, out, errout = _run_capture(['git', 'rev-parse', '--show-toplevel']) + rc, out, _ = _run_capture(['git', 'rev-parse', '--show-toplevel']) if rc != 0: return '.' else: From 6c74516f2cffd9f895124922c44e9202ce7a940c Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 10:30:07 +0200 Subject: [PATCH 18/20] mx_stoml: pylint fixes --- mx_stoml.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mx_stoml.py b/mx_stoml.py index 961a3d89..0c660070 100644 --- a/mx_stoml.py +++ b/mx_stoml.py @@ -64,7 +64,7 @@ def peek(self, ahead=0): return "" def pull(self, expected=None): - if expected == None: + if expected is None: self.slurp(1) return for i in range(0, len(expected)): @@ -73,11 +73,11 @@ def pull(self, expected=None): self.slurp(len(expected)) def pullSpaces(self): - while self.peek().isspace(): - self.pull() + while self.peek().isspace(): + self.pull() def slurp(self, count): - for i in range(0, count): + for _ in range(0, count): character = self.peek() if character == "\n" or character == "": self.lines.append(self.line) @@ -92,7 +92,7 @@ def slurp(self, count): class _StomlParser: def parse(self, path, content): rules = [] - streamer = _Streamer(path, content); + streamer = _Streamer(path, content) self.root(streamer, rules) return rules From 5804c7511170446a09d58bace5a616c49f94667b Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 10:37:44 +0200 Subject: [PATCH 19/20] pylint --- mx_stoml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mx_stoml.py b/mx_stoml.py index 0c660070..00abf888 100644 --- a/mx_stoml.py +++ b/mx_stoml.py @@ -79,7 +79,7 @@ def pullSpaces(self): def slurp(self, count): for _ in range(0, count): character = self.peek() - if character == "\n" or character == "": + if character in ("\n", ""): self.lines.append(self.line) self.line = "" self.row = self.row + 1 From 17345ddffc1f112fe24d37a03ec46774b507ea07 Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Thu, 14 Sep 2023 14:12:35 +0200 Subject: [PATCH 20/20] codeowners: extend documentation --- mx_codeowners.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mx_codeowners.py b/mx_codeowners.py index f5748972..e4cfafde 100644 --- a/mx_codeowners.py +++ b/mx_codeowners.py @@ -234,6 +234,15 @@ def _git_get_repo_root_or_cwd(): Similarly, *.md files are owned by doc.owner@oracle.com and release.manager@oracle.com. +These rules are applied to files in the same directory (i.e. same +as the one where this OWNERS.toml is stored) as well as to files +matching the pattern in subdirectories. The pattern can be +overridden by another OWNERS.toml in a subdirectory. In other words, +ownership tries to find first matching rule, starting with file +OWNERS.toml in current directory and traversing to parent ones. +Directories without OWNERS.toml are skipped and search continues +in their parent. + Note that we allow both explicit TOML arrays as well as implicit separator of whitespace when specifying list of owners or list of file patterns.