From d4900305d0117af4fc47825372f06d4899741c53 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 2 Jul 2024 17:45:48 +0000 Subject: [PATCH] chore: delete obsolete porting scripts (#14346) This is a more conservative version of #14314, deleting just the scripts that we should have deleted a year ago when the port finished. --- scripts/align-import.py | 61 ---------- scripts/align.py | 64 ----------- scripts/benchmark.sh | 45 -------- scripts/make_port_status.py | 218 ------------------------------------ scripts/start_port.sh | 139 ----------------------- 5 files changed, 527 deletions(-) delete mode 100755 scripts/align-import.py delete mode 100755 scripts/align.py delete mode 100755 scripts/benchmark.sh delete mode 100755 scripts/make_port_status.py delete mode 100755 scripts/start_port.sh diff --git a/scripts/align-import.py b/scripts/align-import.py deleted file mode 100755 index c86ce1e159e14..0000000000000 --- a/scripts/align-import.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python3 - -# This script was written by ChatGPT. -# https://chat.openai.com/share/e0363ebf-ed6f-4fd8-9b76-ebf422ed9f62 - -import re -import sys - -def update_file_header(file_path): - with open(file_path, 'r') as f: - lines = f.readlines() - - # Initialize variables - end_of_header_index = 0 - source_module = "" - repo_ref = "" - commit_id = "" - - # Lines to delete - delete_indices = [] - - for i, line in enumerate(lines): - # Check for the end of the "import" lines - if line.startswith('import'): - end_of_header_index = i - elif end_of_header_index != 0 and not line.startswith('import'): - break - - # Extract the necessary info for the align import line and mark lines for deletion - if line.startswith('! This file was ported from'): - source_module = line.split()[-1] - delete_indices.append(i) - elif line.startswith('!') and 'commit' in line and commit_id == "": - split_line = line.split() - repo_ref = split_line[1] - commit_id = split_line[-1] - delete_indices.append(i) - elif line.startswith('!'): - delete_indices.append(i) - elif line == "\n" and lines[i+1].startswith("!"): - delete_indices.append(i) - - # Only proceed if we have found the necessary info for the align import line - if source_module and repo_ref and commit_id: - # Generate the new line - new_line = f'\n#align_import {source_module} from "{repo_ref}"@"{commit_id}"\n' - - # Delete the marked lines - for index in sorted(delete_indices, reverse=True): - del lines[index] - - # Insert the new line after the "import" lines - lines.insert(end_of_header_index - len(delete_indices) + 1, new_line) - - # Write the updated lines back to the file - with open(file_path, 'w') as f: - f.writelines(lines) - -# The first command line argument is the file path -file_path = sys.argv[1] -update_file_header(file_path) diff --git a/scripts/align.py b/scripts/align.py deleted file mode 100755 index 2c8f9a4efaac0..0000000000000 --- a/scripts/align.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 - -# Tool to add source headers to ported theory files, -# archived for historical purposes. - -from pathlib import Path -import re -import yaml - -excepts = { - 'categorytheory.category.rel': 'categorytheory.category.relcat', - 'categorytheory.isomorphism': 'categorytheory.iso', - 'categorytheory.naturalisomorphism': 'categorytheory.natiso', - 'categorytheory.naturaltransformation': 'categorytheory.nattrans', - 'leancore.data.vector': 'data.vector', - 'order.monovary': 'order.monotone.monovary' - } - -def condense(s): - if s.startswith('Mathlib/'): - s = s[len('Mathlib/'):] - if s.endswith('.lean'): - s = s[:-5] - s = s.lower() - s = s.replace('/', '.') - s = s.replace('_', '') - if s in excepts: - s = excepts[s] - return s - -port_status = yaml.load(open("mathlib4-port-status.yaml").read()) - -# map from condensed names to mathlib4 paths -map = {} -for path in Path('Mathlib').glob('**/*.lean'): - path = str(path) - map[condense(path)] = path - -count = 0 -for key, val in port_status.items(): - if val.startswith('Yes'): - sha = val.split()[2] - mathlib3 = key - mathlib4 = map[condense(key)] - - place = '(\n-/\n\n?import )' - blob = "\n\n! This file was ported from Lean 3 source module " + mathlib3 + "\n" + \ - "! leanprover-community/mathlib commit " + sha + "\n" + \ - "! Please do not edit these lines, except to modify the commit id\n" + \ - "! if you have ported upstream changes." - old = open(mathlib4).read() - - if blob[1:] in old: # match even without leading newline - print(f'{mathlib4} already has header') - elif "! leanprover-community/mathlib commit " in old: - m = re.search("^! leanprover-community/mathlib commit (.*)$", old, flags=re.MULTILINE) - print(f'file says {m.groups()[0]} but we want {sha}') - assert(False) - else: - new = re.sub(place, blob + '\\1', old, flags=re.MULTILINE) - open(mathlib4, 'w').write(new) - count += 1 - -print(count) diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh deleted file mode 100755 index fe526df07ef23..0000000000000 --- a/scripts/benchmark.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash -# This script should be run from a copy of `mathlib4`, with a parallel copy of `mathlib` available. -set -euo pipefail - -lake exe cache get > /dev/null -lake build > /dev/null - -cd ../mathlib -leanproject get-cache 2> /dev/null -leanproject build > /dev/null 2>&1 - -cd ../mathlib4 - -targets=$(cat Mathlib.lean | grep -v Mathlib.Tactic | grep -v Mathlib.Lean | grep -v Mathlib.Util \ - | grep -v Mathlib.Mathport | grep -v Mathlib.Init | grep -v Mathlib.Testing \ - | sed -e 's/import Mathlib\.//' | sed -e 's|\.|/|g') -mathlib_targets=() - -for t in $targets; do - s=$(grep "! This file was ported from Lean 3 source module" < Mathlib/$t.lean | awk '{ print $NF }' | sed -e 's|\.|/|g') || continue - mathlib_targets+=(src/$s.lean) - rm -f build/ir/Mathlib/$t.c - rm -f build/ir/Mathlib/$t.c.trace - rm -f build/lib/Mathlib/$t.olean - rm -f build/lib/Mathlib/$t.ilean - rm -f build/lib/Mathlib/$t.trace - rm -f ../mathlib/src/$s.olean -done - -echo "mathlib4 theory files:" -/usr/bin/time lake build > /dev/null - -cd ../mathlib -echo "corresponding files in mathlib3:" -/usr/bin/time lean --make ${mathlib_targets[@]} > /dev/null - -#cd ../mathlib4 -#for t in $targets; do -# s=$(grep "! This file was ported from Lean 3 source module" < Mathlib/$t.lean | awk '{ print $NF }' | sed -e 's|\.|/|g') -# echo $t -# lake env /usr/bin/time lean Mathlib/$t.lean > /dev/null -# cd ../mathlib -# /usr/bin/time lean -j1 src/$s.lean -# cd ../mathlib4 -#done diff --git a/scripts/make_port_status.py b/scripts/make_port_status.py deleted file mode 100755 index 1fbb5f15d1fc8..0000000000000 --- a/scripts/make_port_status.py +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env python3 - -import pytz -import datetime -import github -import os -import re -import requests -import subprocess -import sys -import yaml -import networkx as nx -from collections import defaultdict -from pathlib import Path - -# Must run from root of mathlib4 directory. - -if not os.path.exists('port-repos/mathlib'): - print("Make sure you are in the root of the mathlib4 directory") - print("and have checked out mathlib under port-repos/mathlib.") - sys.exit(1) - -GITHUB_TOKEN_FILE = 'port-repos/github-token' -github_token = open(GITHUB_TOKEN_FILE).read().strip() - -mathlib3_root = 'port-repos/mathlib' -mathlib4_root = './' - -source_module_re = re.compile(r"^! .*source module (.*)$") -commit_re = re.compile(r"^! (leanprover-community/[a-z]*) commit ([0-9a-f]*)") -import_re = re.compile(r"^import ([^ ]*)") - -align_import_re = re.compile( - r'^#align_import ([^ ]*) from "(leanprover-community/[a-z]*)" ?@ ?"([0-9a-f]*)"') - -def mk_label(path: Path) -> str: - rel = path.relative_to(Path(mathlib3_root)) - rel = Path(*rel.parts[1:]) - return str(rel.with_suffix('')).replace(os.sep, '.') - -paths = [] -for path in Path(mathlib3_root).glob('**/*.lean'): - if path.relative_to(mathlib3_root).parts[0] not in ['src', 'archive', 'counterexamples']: - continue - if path.relative_to(mathlib3_root).parts[1] in ['tactic', 'meta']: - continue - paths.append(path) - -graph = nx.DiGraph() -for path in paths: - graph.add_node(mk_label(path)) - -for path in paths: - label = mk_label(path) - for line in path.read_text().split('\n'): - m = import_re.match(line) - if m: - imported = m.group(1) - if imported.startswith('tactic.') or imported.startswith('meta.') or imported.startswith('.'): - continue - if imported not in graph.nodes: - if imported + '.default' in graph.nodes: - imported = imported + '.default' - else: - imported = imported - graph.add_edge(imported, label) - -def get_mathlib4_module_commit_info(contents): - module = repo = commit = None - for line in contents.split('\n'): - m = align_import_re.match(line) - if m: - module = m.group(1) - repo = m.group(2) - commit = m.group(3) - break - m = source_module_re.match(line) - if m: - module = m.group(1) - m = commit_re.match(line) - if m: - repo = m.group(1) - commit = m.group(2) - return module, repo, commit - -# contains ported files -# lean 3 module name -> { mathlib4_file, mathlib3_hash } -data = dict() -for path4 in Path(mathlib4_root).glob('**/*.lean'): - # we definitely do not want to look in `port-repos` here! - if path4.relative_to(mathlib4_root).parts[0] not in ('Mathlib', 'Archive', 'Counterexamples'): - continue - module, repo, commit = get_mathlib4_module_commit_info(path4.read_text()) - if module is None: - continue - - if commit is None: - print(f"Commit is None for module: {module}") - continue - - log = subprocess.run( - ['git', 'log', '--oneline', str(path4)], - capture_output=True) - pr_matches = re.search(r'#([0-9]+)\)$', log.stdout.decode().splitlines()[-1]) - if pr_matches: - mathlib4_pr = int(pr_matches.groups()[0]) - else: - mathlib4_pr = None - - data[module] = { - 'mathlib4_file': str(path4.relative_to(mathlib4_root)), - 'mathlib4_pr': mathlib4_pr, - 'source': dict(repo=repo, commit=commit) - } - - graph.add_node(module) - -prs = {} -fetch_args = ['git', 'fetch', 'origin'] -nums = [] -sync_prs = defaultdict(set) -mathlib4repo = github.Github(github_token).get_repo("leanprover-community/mathlib4") -for pr in mathlib4repo.get_pulls(state='open'): - if pr.created_at < datetime.datetime(2022, 12, 1, 0, 0, 0, tzinfo=pytz.UTC): - continue - if 'no-source-header' in (l.name for l in pr.labels): - continue - if 'mathlib3-pair' in (l.name for l in pr.labels): - for file in (f.filename for f in pr.get_files()): - sync_prs[file].add(pr.number) - num = pr.number - nums.append(num) - prs[num] = pr - fetch_args.append(f'pull/{num}/head:port-status-pull/{num}') - -os.system("git branch -D $(git branch --list 'port-status-pull/*')") -subprocess.run(fetch_args) - -prs_of_import = {} -for num in nums: - p = subprocess.run( - ['git', 'diff', '--name-only', '--diff-filter=A', - f'origin/master...port-status-pull/{num}'], - capture_output=True) - for l in p.stdout.decode().splitlines(): - f = subprocess.run( - ['git', 'cat-file', 'blob', f'port-status-pull/{num}:{l}'], - capture_output=True) - import_, repo, commit = get_mathlib4_module_commit_info(f.stdout.decode(encoding='utf8', errors='replace')) - prs_of_import.setdefault(import_, []).append({'pr': num, 'repo': repo, 'commit': commit, 'fname': l}) - -COMMENTS_URL = "https://raw.githubusercontent.com/wiki/leanprover-community/mathlib4/port-comments.md" -comments_dict = yaml.safe_load(requests.get(COMMENTS_URL).content.replace(b"```", b"")) - -yaml_dict = {} -new_yaml_dict = {} -for node in sorted(graph.nodes): - if node in data: - new_status = dict( - ported=True, - mathlib4_file=data[node]['mathlib4_file'], - mathlib4_pr=data[node]['mathlib4_pr'], - source=data[node]['source'] - ) - _sync_prs = [ - dict( - num=sync_pr_num, - labels=[dict(name=l.name, color=l.color) for l in prs[sync_pr_num].labels] - ) - for sync_pr_num in sync_prs[data[node]['mathlib4_file']] - ] - if _sync_prs: - new_status.update(mathlib4_sync_prs=_sync_prs) - pr_status = f"mathlib4#{data[node]['mathlib4_pr']}" if data[node]['mathlib4_pr'] is not None else "_" - sha = data[node]['source']['commit'] if data[node]['source']['repo'] == 'leanprover-community/mathlib' else "_" - - status = f"Yes {pr_status} {sha}" - else: - new_status = dict(ported=False) - status = f'No' - if node in prs_of_import: - pr_info = prs_of_import[node][0] - if pr_info['commit'] is None: - print('PR seems to be missing a source header', node, pr_info) - assert(False) - new_status.update( - mathlib4_pr=pr_info['pr'], - mathlib4_file=pr_info['fname'], - source=dict(repo=pr_info['repo'], commit=pr_info['commit'])) - labels = [{'name': l.name, 'color': l.color} for l in prs[pr_info['pr']].labels] - if labels: - new_status.update(labels=labels) - sha = pr_info['commit'] if pr_info['repo'] == 'leanprover-community/mathlib' else "_" - status += f" mathlib4#{pr_info['pr']} {sha}" - try: - comment_data = comments_dict[node] - except KeyError: - pass - else: - if isinstance(comment_data, str): - # old comment format - comment_data = dict(message=comment_data) - # new comment format - status += ' ' + comment_data['message'] - new_status.update(comment=comment_data) - yaml_dict[node] = status - new_yaml_dict[node] = new_status - -DO_NOT_EDIT_MESSAGE = """ -# Do not edit this file. -# If you want to add free-form comments about files that don't have PRs yet, -# edit https://github.com/leanprover-community/mathlib4/wiki/port-comments/_edit instead. -""" + ("\n" * 37) - -with open('port_status.yaml', 'w') as f: - f.write(DO_NOT_EDIT_MESSAGE + "```\n" + yaml.dump(yaml_dict) + "```\n") -with open('port_status_new.yaml', 'w') as f: - f.write(DO_NOT_EDIT_MESSAGE + "```\n" + yaml.dump(new_yaml_dict) + "```\n") diff --git a/scripts/start_port.sh b/scripts/start_port.sh deleted file mode 100755 index 26f36dd50f172..0000000000000 --- a/scripts/start_port.sh +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env bash - -set -e -set -o pipefail - -if [ ! -d Mathlib ] ; then - echo "No Mathlib/ directory; are you at the root of the mathlib4 repo?" - exit 1 -fi - -if [ ! $1 ] ; then - echo "usage: ./scripts/start_port.sh Mathlib/Foo/Bar/Baz.lean" - exit 1 -fi - -# arguments -root_path="$(pwd)" -mathlib4_path="$1" - -case $mathlib4_path in - Mathlib/*) true ;; - Archive/*) true ;; - Counterexamples/*) true ;; - *) echo "argument must begin with Mathlib/, Archive/, or Counterexamples/" - exit 1 ;; -esac - -MATHLIB3PORT_BASE_URL=https://raw.githubusercontent.com/leanprover-community/mathlib3port/master -LEAN3PORT_BASE_URL=https://raw.githubusercontent.com/leanprover-community/lean3port/master -PORT_STATUS_YAML=https://raw.githubusercontent.com/wiki/leanprover-community/mathlib/mathlib4-port-status.md - -# process path name -mathlib4_mod=$(basename $(echo "$mathlib4_path" | tr / .) .lean) -mathlib4_mod_tail=${mathlib4_mod#Mathlib.} -mathlib3port_url=$MATHLIB3PORT_BASE_URL/${1/#Mathlib/Mathbin} -lean3port_url=$LEAN3PORT_BASE_URL/${1/#Mathlib/Leanbin} - -# start the port from the latest master -git fetch -BASE_BRANCH=${BASE_BRANCH=origin/master} -BASE_COMMIT=$(git rev-parse $BASE_BRANCH) - -TMP_DIR=$(mktemp -d) -trap 'rm -fr "$TMP_DIR"' 0 1 2 13 15 - -export GIT_WORK_TREE=$TMP_DIR/working_tree -export GIT_INDEX_FILE=$TMP_DIR/index -mkdir -p "$GIT_WORK_TREE" - -echo "Checking out a new branch in a temporary working tree" -git ls-tree -z -r --full-name "$BASE_COMMIT" | git update-index -z --index-info -git checkout-index -a - -echo "Downloading latest version from mathlib3port" -( - cd $GIT_WORK_TREE; - # download the file, but don't commit it yet - mkdir -p "$(dirname "$mathlib4_path")" - curl --silent --show-error --fail -o "$mathlib4_path" "$lean3port_url" || curl --silent --show-error --fail -o "$mathlib4_path" "$mathlib3port_url" -) - -# Empty commit with nice title. Used by gh and hub to suggest PR title. -BASE_COMMIT="$(echo "feat: port $mathlib4_mod_tail" | git commit-tree "$(git write-tree)" -p "$BASE_COMMIT")" - -# Add the mathport file -git update-index --add "$mathlib4_path" -BASE_COMMIT="$(echo "Initial file copy from mathport" | git commit-tree "$(git write-tree)" -p "$BASE_COMMIT")" - -echo "Applying automated fixes" -# Apply automated fixes - -pushd $GIT_WORK_TREE; -sed -i 's/Mathbin\./Mathlib\./g' "$mathlib4_path" -sed -i 's/Leanbin\./Mathlib\./g' "$mathlib4_path" -sed -i '/^import/{s/[.]Gcd/.GCD/g; s/[.]Modeq/.ModEq/g; s/[.]Nary/.NAry/g; s/[.]Peq/.PEq/g; s/[.]Pfun/.PFun/g; s/[.]Pnat/.PNat/g; s/[.]Smul/.SMul/g; s/[.]Zmod/.ZMod/g; s/[.]Nnreal/.NNReal/g; s/[.]Ennreal/.ENNReal/g}' "$mathlib4_path" - -python3 "$root_path/scripts/fix-line-breaks.py" "$mathlib4_path" "$mathlib4_path.tmp" -mv "$mathlib4_path.tmp" "$mathlib4_path" - -if [[ "$mathlib4_mod" =~ ^Mathlib. ]]; then - which_all=Mathlib -elif [[ "$mathlib4_mod" =~ ^Counterexamples. ]]; then - which_all=Counterexamples -elif [[ "$mathlib4_mod" =~ ^Archive. ]]; then - which_all=Archive -fi -(echo "import $mathlib4_mod" ; cat $which_all.lean) | LC_ALL=C sort | uniq > $which_all.lean.tmp -mv -f $which_all.lean.tmp $which_all.lean - -popd - -# Commit them -git update-index --add $which_all.lean -git update-index --add "$mathlib4_path" -BASE_COMMIT="$(git commit-tree "$(git write-tree)" -p "$BASE_COMMIT" << EOF -automated fixes - -Mathbin -> Mathlib -fix certain import statements -move "by" to end of line -add import to $which_all.lean -EOF -)" - -echo "Successfully created initial commits:" -git log -n3 $BASE_COMMIT --graph --oneline -echo "" - -mathlib3_module=$(grep '^! .*source module ' <"$GIT_WORK_TREE/$mathlib4_path" | sed 's/.*source module \(.*\)$/\1/') - -# stop using the temporary working tree -unset GIT_WORK_TREE -unset GIT_INDEX_FILE - -if git cat-file -e origin/master:$mathlib4_path 2> /dev/null; then - echo "WARNING: this file has already been ported!" - echo "To continue anyway with a fresh port, you can run" - echo "" - echo " git checkout $BASE_COMMIT -b port/${mathlib4_mod_tail}-again" - echo "" - exit 1 -fi - -if PR=$(curl --silent --show-error --fail "$PORT_STATUS_YAML" | grep -F "$mathlib3_module: " | grep -o -E "mathlib4#[0-9]+"); then - set +x - echo "WARNING: The file is already in the process of being ported in $PR." - echo "To continue anyway with a fresh port, you can run" - echo "" - echo " git checkout $BASE_COMMIT -b port/${mathlib4_mod_tail}-" - echo "" - exit 1 -fi - -branch_name=port/${mathlib4_mod_tail} -echo "Checking out a new $branch_name branch from $BASE_COMMIT" -git checkout --no-track -b "$branch_name" "$BASE_COMMIT" - -echo "After pushing, you can open a PR at:" -echo "https://github.com/leanprover-community/mathlib4/compare/$branch_name?expand=1&title=feat:+port+$mathlib4_mod_tail&labels=mathlib-port"