forked from Uberspace/lab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
spelling_tools.py
executable file
·121 lines (100 loc) · 3.95 KB
/
spelling_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!.venv/bin/python
"""
Creates `new_words.txt` with words that failed a spellcheck.
If the `--merge` flag is given, the content of that file is merged in the
spelling dictionary instead.
Run `make spelling` to run the actual spellcheck.
"""
import argparse
import sys
from collections import Counter
from pathlib import Path
from typing import List
from typing import Optional
*_, EPILOG = __doc__.strip().split("\n")
DESCRIPTION = "\n".join(_)
BASE_PATH = Path(__file__).parent.absolute()
NEW_WORDS_FILE = BASE_PATH / "new_words.txt"
SPELLCHECK_DIR = BASE_PATH / "build/spelling"
WORDLIST_FILE = BASE_PATH / "source/dict.txt"
def get_misspelled_words(error_dir: Path) -> Counter:
"""Return counter for misspelled words form files in *error_dir*."""
words = Counter()
for error_file in (e for e in error_dir.glob("*.spelling")):
lines = error_file.read_text().split("\n")
for line in (line for l in lines if (line := l.strip())):
try:
word = line.split("(")[1].split(")")[0]
except IndexError:
print("failed to parse line: {line}", file=sys.stderr)
continue
words[word] += 1
return words
def merge_wordfiles(*files: List[Path], out_file: Optional[Path] = None) -> List:
"""Combine words form *files*, return sorted results and write to *out_file* if set."""
words = set()
for wordfile in (f for f in files if f.exists()):
with wordfile.open() as f:
for line in (line for l in f if (line := l.strip())):
words.add(line)
words = sorted(words, key=lambda word: (word.lower(), word))
if out_file is not None:
out_file.write_text("\n".join(words) + "\n")
return words
def write_errors_to_newlist(error_dir: Path, outfile: Path) -> Counter:
"""Write errors found in *error_dir* to *outfile*, sorted by hit count."""
if not (error_dir.exists() and error_dir.is_dir()):
raise ValueError(f"'{error_dir}' not found")
error_counter = get_misspelled_words(error_dir)
outfile.write_text("\n".join((t[0] for t in error_counter.most_common())))
return error_counter
def merge_newlist_to_wordlist(newlist: Path, wordlist: Path) -> List:
"""Merge *newlist* to *wordlist* and return the results."""
if not (newlist.exists() and newlist.is_file()):
raise ValueError(f"'{newlist}' not found")
return merge_wordfiles(newlist, wordlist, out_file=wordlist)
def get_args(argv=None):
ap = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG)
ap.add_argument(
"--merge", action="store_true", help="merge new words to dictionary"
)
return ap.parse_args(argv)
def main(
merge=False,
newlist: Path = NEW_WORDS_FILE,
wordlist: Path = WORDLIST_FILE,
error_dir: Path = SPELLCHECK_DIR,
):
"""
Write errors found in *error_dir* to *newlist*, sorted by hit count.
Or merge *newlist* to *wordlist* instead, if *merge* is set.
"""
if merge:
try:
words = merge_newlist_to_wordlist(newlist, wordlist)
print(f"Wrote {len(words)} words to dictionary '{wordlist}'.")
except ValueError:
print(
f"[ERROR] no wordlist found in '{newlist}'.\n",
"You can run without the `--merge` flag, to create it.",
file=sys.stderr,
)
return 4
else:
try:
error_counter = write_errors_to_newlist(error_dir, newlist)
print(
f"Found {len(error_counter)} unique words in a total of "
f"{sum(error_counter.values())} misspelled words."
)
except ValueError:
print(
f"[ERROR] no spellcheck results found in '{error_dir}'.\n",
"You can run `make spelling` to create them.",
file=sys.stderr,
)
return 3
return 0
if __name__ == "__main__":
args = get_args()
sys.exit(main(**vars(args)))