From 521375956dec30a1f41750273c1855ae182b9d72 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Mon, 19 Feb 2024 11:01:09 +0100 Subject: [PATCH 1/7] gc-dist: outsource generic functionality --- src/mx/_impl/mx_gc.py | 65 ++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/src/mx/_impl/mx_gc.py b/src/mx/_impl/mx_gc.py index 306ee09b..0f5ee605 100644 --- a/src/mx/_impl/mx_gc.py +++ b/src/mx/_impl/mx_gc.py @@ -129,47 +129,41 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, td) -@mx.command('mx', 'gc-dists') -def gc_dists(args): - """ Garbage collect mx distributions.""" - - parser = argparse.ArgumentParser(prog='mx gc-dists', description='''Garbage collect layout distributions. - By default, it collects all found layout distributions that are *not* part of the current configuration (see `--keep-current`). - This command respects mx level suite filtering (e.g., `mx --suite my-suite gc-dists`). - ''', epilog='''If the environment variable `MX_GC_AFTER_BUILD` is set, %(prog)s will be executed after `mx build` - using the content of the environment variable as parameters.''') +def _gc_collect_generic(args, parser, _gc_collect_candidates): # mutually exclusive groups do not support title and description - wrapping in another group as a workaround - action_group_desc = parser.add_argument_group('actions', 'What to do with the result. One of the following arguments is required.') + action_group_desc = parser.add_argument_group('actions', + 'What to do with the result. One of the following arguments is required.') action_group = action_group_desc.add_mutually_exclusive_group(required=True) - action_group.add_argument('-f', '--force', action='store_true', help='remove layout distributions without further questions') - action_group.add_argument('-n', '--dry-run', action='store_true', help='show what would be removed without actually doing anything') - action_group.add_argument('-i', '--interactive', action='store_true', help='ask for every layout distributions whether it should be removed') - keep_current_group_desc = parser.add_argument_group('current configuration handling', description='How to deal with the current configuration, i.e., what `mx build` would rebuild.') + action_group.add_argument('-f', '--force', action='store_true', + help='remove candidates without further questions') + action_group.add_argument('-n', '--dry-run', action='store_true', + help='show what would be removed without actually doing anything') + action_group.add_argument('-i', '--interactive', action='store_true', + help='ask for every candidate whether it should be removed') + keep_current_group_desc = parser.add_argument_group('current configuration handling', + description='How to deal with the current configuration, i.e., what `mx build` would rebuild.') keep_current_group = keep_current_group_desc.add_mutually_exclusive_group() - keep_current_group.add_argument('--keep-current', action='store_true', default=True, help='keep layout distributions of the current configuration (default)') - keep_current_group.add_argument('--no-keep-current', action='store_false', dest='keep_current', help='remove layout distributions of the current configuration') + keep_current_group.add_argument('--keep-current', action='store_true', default=True, + help='keep candidate referenced by current configuration (default)') + keep_current_group.add_argument('--no-keep-current', action='store_false', dest='keep_current', + help='remove candidate referenced by the current configuration') filter_group = parser.add_argument_group('result filters', description='Filter can be combined.') filter_group.add_argument('--reverse', action='store_true', help='reverse the result') - filter_group.add_argument('--older-than', action=TimeAction, help=f"only show results older than the specified point in time (format: {TimeAction.fmt.replace('%', '%%')})") + filter_group.add_argument('--older-than', action=TimeAction, + help=f"only show results older than the specified point in time (format: {TimeAction.fmt.replace('%', '%%')})") try: parsed_args = parser.parse_args(args) except ValueError as ve: parser.error(str(ve)) - suites = mx.suites(opt_limit_to_suite=True, includeBinary=False, include_mx=False) - c = [] - - for s in suites: - c += _gc_layout_dists(s, parsed_args) - + return + c = _gc_collect_candidates(parsed_args) if not c: mx.log("Nothing to do!") return - if parsed_args.older_than: c = [x for x in c if x[1] < parsed_args.older_than] # sort by mod date c = sorted(c, key=lambda x: x[1], reverse=parsed_args.reverse) - # calculate max sizes max_path = 0 max_mod_time = 0 @@ -192,13 +186,32 @@ def gc_dists(args): mx.log('rm ' + path) mx.rmtree(path) size_sum += size - if parsed_args.dry_run: mx.log('Would free ' + _format_bytes(size_sum)) else: mx.log('Freed ' + _format_bytes(size_sum)) +@mx.command('mx', 'gc-dists') +def gc_dists(args): + """ Garbage collect mx distributions.""" + + parser = argparse.ArgumentParser(prog='mx gc-dists', description='''Garbage collect layout distributions. + By default, it collects all found layout distributions that are *not* part of the current configuration (see `--keep-current`). + This command respects mx level suite filtering (e.g., `mx --suite my-suite gc-dists`). + ''', epilog='''If the environment variable `MX_GC_AFTER_BUILD` is set, %(prog)s will be executed after `mx build` + using the content of the environment variable as parameters.''') + + def _gc_collect_candidates(parsed_args): + suites = mx.suites(opt_limit_to_suite=True, includeBinary=False, include_mx=False) + c = [] + for s in suites: + c += _gc_layout_dists(s, parsed_args) + return c + + _gc_collect_generic(args, parser, _gc_collect_candidates) + + def _gc_layout_dists(suite, parsed_args): """Returns a list of collected layout distributions as a tuples of form (path, modification time, size in bytes).""" mx.logv("GC layout distributions of suite " + suite.name) From 3429b68adf34e2d7762243edc73236bfc41e1d7a Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Mon, 19 Feb 2024 11:41:03 +0100 Subject: [PATCH 2/7] handle fetch-jdk --list outside of _parse_args --- src/mx/_impl/mx_fetchjdk.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mx/_impl/mx_fetchjdk.py b/src/mx/_impl/mx_fetchjdk.py index 7f14359e..ee8c85e1 100644 --- a/src/mx/_impl/mx_fetchjdk.py +++ b/src/mx/_impl/mx_fetchjdk.py @@ -65,6 +65,11 @@ def fetch_jdk(args): :return str: the JAVA_HOME for the JDK at the installation location denoted by `args` """ settings = _parse_args(args) + if settings["list"]: + for jdk in sorted(settings["jdk-defs"].keys()): + mx.log(jdk) + # cannot use mx.abort as it always adds a newline + raise SystemExit(0) jdk_binary = settings["jdk-binary"] jdks_dir = settings["jdks-dir"] @@ -400,11 +405,12 @@ def _parse_args(args): jdk_defs = _parse_jdk_defs(jdk_defs_location) jdk_binaries = _parse_jdk_binaries(jdk_binaries_locations, jdk_defs, args.arch) + settings["list"] = args.list + if args.list: - for jdk in sorted(jdk_defs.keys()): - mx.log(jdk) - # cannot use mx.abort as it always adds a newline - raise SystemExit(0) + settings["jdk-defs"] = jdk_defs + settings["jdk-binaries"] = jdk_binaries + return settings # use positional or option argument jdk_id = args.jdk_id_pos or args.jdk_id From b8e9978aa057b241629a3e36f08c5edc955f64e6 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Mon, 19 Feb 2024 12:05:21 +0100 Subject: [PATCH 3/7] add gc-jdks --- src/mx/_impl/mx_gc.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/mx/_impl/mx_gc.py b/src/mx/_impl/mx_gc.py index 0f5ee605..1277d071 100644 --- a/src/mx/_impl/mx_gc.py +++ b/src/mx/_impl/mx_gc.py @@ -28,7 +28,7 @@ import os import re -from . import mx +from . import mx, mx_fetchjdk from datetime import datetime, date, timedelta @@ -247,3 +247,31 @@ def _to_archive_name(d): archive_dir = os.path.join(dist_dir, "dists") candidates.update({os.path.join(archive_dir, x): unknown_archives.get(x) for x in _listdir(archive_dir) if x in unknown_archives.keys()}) return [(full_path, unknown_dists.get(dist), _get_size_in_bytes(full_path)) for full_path, dist in candidates.items()] + + +@mx.command('mx', 'gc-jdks') +def gc_dists(args): + """ Garbage collect mx distributions.""" + + parser = argparse.ArgumentParser(prog='mx gc-jdks', description='''Garbage collect JDKs downloaded by mx fetch-jdk. + By default, it collects all JDKs not referenced in common.json (see `--keep-current`). + ''') + + def _gc_collect_candidates(parsed_args): + """Returns a list of collected layout distributions as a tuples of form (path, modification time, size in bytes).""" + settings = mx_fetchjdk._parse_args(["--list"]) + jdks_dir = settings["jdks-dir"] + jdk_binaries = settings["jdk-binaries"] + current_jdks = [jdk_binary.get_final_path(jdks_dir) for jdk_binary in jdk_binaries.values()] + + result = [] + for entry in os.listdir(jdks_dir): + full_path = os.path.join(jdks_dir, entry) + if parsed_args.keep_current and os.path.realpath(full_path) in current_jdks: + continue + modtime = datetime.fromtimestamp(os.path.getmtime(full_path)) + size = _get_size_in_bytes(full_path) + result.append((full_path, modtime, size)) + return result + + _gc_collect_generic(args, parser, _gc_collect_candidates) From 15366250df82b045bd620288f405f48a8a2d5677 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Mon, 19 Feb 2024 12:20:32 +0100 Subject: [PATCH 4/7] Bump version --- src/mx/_impl/mx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mx/_impl/mx.py b/src/mx/_impl/mx.py index 06900e3c..0455ec55 100755 --- a/src/mx/_impl/mx.py +++ b/src/mx/_impl/mx.py @@ -19294,7 +19294,7 @@ def alarm_handler(signum, frame): _CACHE_DIR = get_env('MX_CACHE_DIR', join(dot_mx_dir(), 'cache')) # The version must be updated for every PR (checked in CI) and the comment should reflect the PR's issue -version = VersionSpec("7.12.1") # Do not fail in the proftool test gate with multiple files. +version = VersionSpec("7.13.0") # mx gc-jdks _mx_start_datetime = datetime.utcnow() From 841ae9c547f1102de749204c57523c9ce437b0e3 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Tue, 20 Feb 2024 14:21:48 +0100 Subject: [PATCH 5/7] gc-jdks: address review comments --- src/mx/_impl/mx_gc.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/mx/_impl/mx_gc.py b/src/mx/_impl/mx_gc.py index 1277d071..fb433212 100644 --- a/src/mx/_impl/mx_gc.py +++ b/src/mx/_impl/mx_gc.py @@ -129,7 +129,14 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, td) -def _gc_collect_generic(args, parser, _gc_collect_candidates): +class CollectionCandidate(object): + def __init__(self, path, modification_time, size_in_bytes): + self.path = path + self.modification_time = modification_time + self.size_in_bytes = size_in_bytes + + +def _gc_collect_generic(args, parser, collect_candidates): # mutually exclusive groups do not support title and description - wrapping in another group as a workaround action_group_desc = parser.add_argument_group('actions', 'What to do with the result. One of the following arguments is required.') @@ -156,19 +163,22 @@ def _gc_collect_generic(args, parser, _gc_collect_candidates): except ValueError as ve: parser.error(str(ve)) return - c = _gc_collect_candidates(parsed_args) - if not c: + candidates = collect_candidates(parsed_args) + if not candidates: mx.log("Nothing to do!") return if parsed_args.older_than: - c = [x for x in c if x[1] < parsed_args.older_than] + candidates = [x for x in candidates if x.modification_time < parsed_args.older_than] # sort by mod date - c = sorted(c, key=lambda x: x[1], reverse=parsed_args.reverse) + candidates = sorted(candidates, key=lambda x: x.modification_time, reverse=parsed_args.reverse) # calculate max sizes max_path = 0 max_mod_time = 0 max_size = 0 - for path, mod_time, size in c: + for candidate in candidates: + path = candidate.path + mod_time = candidate.modification_time + size = candidate.size_in_bytes max_path = max(len(path), max_path) max_mod_time = max(len(_format_datetime(mod_time)), max_mod_time) max_size = max(len(_format_bytes(size)), max_size) @@ -176,7 +186,10 @@ def _gc_collect_generic(args, parser, _gc_collect_candidates): msg_fmt = '{0:<' + str(max_path) + '} modified {1:<' + str(max_mod_time + len(' ago')) +'} {2:<' + str(max_size) + '}' size_sum = 0 - for path, mod_time, size in c: + for candidate in candidates: + path = candidate.path + mod_time = candidate.modification_time + size = candidate.size_in_bytes if parsed_args.dry_run: mx.log(msg_fmt.format(path, _format_datetime(mod_time) + ' ago', _format_bytes(size))) size_sum += size @@ -246,11 +259,11 @@ def _to_archive_name(d): unknown_archives = {_to_archive_name(d) + ext: d for d in unknown_dists.keys()} archive_dir = os.path.join(dist_dir, "dists") candidates.update({os.path.join(archive_dir, x): unknown_archives.get(x) for x in _listdir(archive_dir) if x in unknown_archives.keys()}) - return [(full_path, unknown_dists.get(dist), _get_size_in_bytes(full_path)) for full_path, dist in candidates.items()] + return [CollectionCandidate(full_path, unknown_dists.get(dist), _get_size_in_bytes(full_path)) for full_path, dist in candidates.items()] @mx.command('mx', 'gc-jdks') -def gc_dists(args): +def gc_jdks(args): """ Garbage collect mx distributions.""" parser = argparse.ArgumentParser(prog='mx gc-jdks', description='''Garbage collect JDKs downloaded by mx fetch-jdk. @@ -271,7 +284,7 @@ def _gc_collect_candidates(parsed_args): continue modtime = datetime.fromtimestamp(os.path.getmtime(full_path)) size = _get_size_in_bytes(full_path) - result.append((full_path, modtime, size)) + result.append(CollectionCandidate(full_path, modtime, size)) return result _gc_collect_generic(args, parser, _gc_collect_candidates) From 76f9533d2f48a7e54c481a369886092f22897c4c Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Tue, 20 Feb 2024 15:15:25 +0100 Subject: [PATCH 6/7] gc-jdks: never remove JAVA_HOME and EXTRA_JAVA_HOMES --- src/mx/_impl/mx_gc.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/mx/_impl/mx_gc.py b/src/mx/_impl/mx_gc.py index fb433212..53f2504c 100644 --- a/src/mx/_impl/mx_gc.py +++ b/src/mx/_impl/mx_gc.py @@ -275,12 +275,29 @@ def _gc_collect_candidates(parsed_args): settings = mx_fetchjdk._parse_args(["--list"]) jdks_dir = settings["jdks-dir"] jdk_binaries = settings["jdk-binaries"] - current_jdks = [jdk_binary.get_final_path(jdks_dir) for jdk_binary in jdk_binaries.values()] + + # JDKs that should be kept (realpaths) + keep_jdks = set() + + def _keep_jdk(jdk): + keep_jdks.add(os.path.realpath(jdk)) + + if parsed_args.keep_current: + # remove JDKs specified in common.json + for jdk_binary in jdk_binaries.values(): + _keep_jdk(jdk_binary.get_final_path(jdks_dir)) + + # always keep the current JAVA_HOME and EXTRA_JAVA_HOMES entries + if mx._java_home(): + _keep_jdk(mx._java_home()) + + for jdk in mx._extra_java_homes(): + _keep_jdk(jdk) result = [] for entry in os.listdir(jdks_dir): full_path = os.path.join(jdks_dir, entry) - if parsed_args.keep_current and os.path.realpath(full_path) in current_jdks: + if os.path.realpath(full_path) in keep_jdks: continue modtime = datetime.fromtimestamp(os.path.getmtime(full_path)) size = _get_size_in_bytes(full_path) From 11ec3efd06a4f870442f64782190d45bebbfc8ff Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Tue, 20 Feb 2024 15:52:34 +0100 Subject: [PATCH 7/7] fix select_jdk.py --- src/mx/_impl/select_jdk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mx/_impl/select_jdk.py b/src/mx/_impl/select_jdk.py index ab7388bc..a1758dc6 100755 --- a/src/mx/_impl/select_jdk.py +++ b/src/mx/_impl/select_jdk.py @@ -296,7 +296,6 @@ def choose_jdks(jdk_cache_path=default_jdk_cache_path, only_list=False): if only_list: os.unlink(tmp_cache_path) else: - os.unlink(jdk_cache_path) os.rename(tmp_cache_path, jdk_cache_path) choices = {str(index):jdk for index, jdk in choices} jdks = [choices[n] for n in input('Select JDK(s) (separate multiple choices by whitespace)> ').split() if n in choices]