diff --git a/lib/dic_chunk.py b/lib/dic_chunk.py index cf7eb96..b8f7616 100644 --- a/lib/dic_chunk.py +++ b/lib/dic_chunk.py @@ -44,6 +44,7 @@ def from_hunspell_dic(cls, dic_path: str, chunk_size: int, target_dir: str, samp Returns: A list of DicChunk objects, each representing a chunk of the dictionary file """ + LOGGER.debug(f"Splitting dictionary file \"{dic_path}\" into chunks...") compounds = (True if 'compounds' in dic_path else False) with open(dic_path, 'r', encoding=LATIN_1_ENCODING) as dic_file: lines = dic_file.readlines()[1:] # Skip the first line @@ -65,6 +66,7 @@ def from_hunspell_dic(cls, dic_path: str, chunk_size: int, target_dir: str, samp chunk_file.write(f"{len(chunk)}\n") chunk_file.writelines(chunk) chunks.append(cls(chunk_path, compounds)) + LOGGER.debug(f"Split into {len(chunks)} chunks.") return chunks def unmunch(self, aff_path: str, delete_tmp: bool = False) -> NamedTemporaryFile: diff --git a/lib/languagetool_utils.py b/lib/languagetool_utils.py index 1d86ab3..b64b38e 100644 --- a/lib/languagetool_utils.py +++ b/lib/languagetool_utils.py @@ -46,6 +46,7 @@ def tokenise(self, unmunched_file: NamedTemporaryFile) -> NamedTemporaryFile: tokenisation_result = ShellCommand(tokenise_cmd).run_with_input(unmunched_str) tokenised_tmp.write(tokenisation_result) tokenised_tmp.flush() + LOGGER.debug(f"Done tokenising {unmunched_file.name}!") return tokenised_tmp def build_spelling_binary(self, tokenised_temps: List[NamedTemporaryFile]) -> None: @@ -62,7 +63,7 @@ def build_spelling_binary(self, tokenised_temps: List[NamedTemporaryFile]) -> No Returns: None """ - LOGGER.info(f"Building binary for {self.variant}...") + LOGGER.info(f"Building spelling binary for {self.variant}...") megatemp = NamedTemporaryFile(delete=self.delete_tmp, mode='w', encoding='utf-8') # Open the file with UTF-8 encoding lines = set() @@ -80,11 +81,12 @@ def build_spelling_binary(self, tokenised_temps: List[NamedTemporaryFile]) -> No f"-o {self.variant.dict()}" ) ShellCommand(cmd_build).run() - LOGGER.info(f"Done compiling {self.variant} dictionary!") + LOGGER.info(f"Done compiling {self.variant} spelling dictionary!") self.variant.copy_spell_info() megatemp.close() def build_pos_binary(self) -> None: + LOGGER.info(f"Building part-of-speech binary for {self.variant}...") cmd_build = ( f"java -cp {LT_JAR_PATH} " f"org.languagetool.tools.POSDictionaryBuilder " @@ -93,9 +95,11 @@ def build_pos_binary(self) -> None: f"-o {self.variant.pos_dict_java_output_path()}" ) ShellCommand(cmd_build).run() + LOGGER.info(f"Done compiling {self.variant} part-of-speech dictionary!") self.variant.copy_pos_info() def build_synth_binary(self) -> None: + LOGGER.info(f"Building synthesiser binary for {self.variant}...") cmd_build = ( f"java -cp {LT_JAR_PATH} " f"org.languagetool.tools.SynthDictionaryBuilder " @@ -104,5 +108,6 @@ def build_synth_binary(self) -> None: f"-o {self.variant.synth_dict_java_output_path()}" ) ShellCommand(cmd_build).run() + LOGGER.info(f"Done compiling {self.variant} synthesiser dictionary!") self.variant.copy_synth_info() self.variant.rename_synth_tag_files() diff --git a/lib/utils.py b/lib/utils.py index cd53f92..c3bd7db 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -1,5 +1,6 @@ import codecs import shutil +from datetime import timedelta from os import chdir, path from tempfile import NamedTemporaryFile from typing import Optional @@ -39,3 +40,19 @@ def convert_to_utf8(tmp_file: NamedTemporaryFile, delete_tmp: bool = False) -> N shutil.copyfileobj(file, utf8_tmp) utf8_tmp.seek(0) return utf8_tmp + + +def pretty_time_delta(time_delta: timedelta) -> str: + """Taken from https://gist.github.com/thatalextaylor/7408395 and tweaked slightly.""" + seconds = int(time_delta.total_seconds()) + days, seconds = divmod(seconds, 86400) + hours, seconds = divmod(seconds, 3600) + minutes, seconds = divmod(seconds, 60) + if days > 0: + return '%dd%dh%dmin%ds' % (days, hours, minutes, seconds) + elif hours > 0: + return '%dh%dmin%ds' % (hours, minutes, seconds) + elif minutes > 0: + return '%dmin%ds' % (minutes, seconds) + else: + return '%ds' % (seconds,) diff --git a/scripts/build_spelling_dicts.py b/scripts/build_spelling_dicts.py index 2eed81f..40c8696 100644 --- a/scripts/build_spelling_dicts.py +++ b/scripts/build_spelling_dicts.py @@ -9,7 +9,7 @@ from lib.dic_chunk import DicChunk from lib.logger import LOGGER from lib.constants import SPELLING_DICT_DIR -from lib.utils import compile_lt_dev, install_dictionaries, convert_to_utf8 +from lib.utils import compile_lt_dev, install_dictionaries, convert_to_utf8, pretty_time_delta from lib.variant import Variant, VARIANT_MAPPING from lib.languagetool_utils import LanguageToolUtils as LtUtils @@ -69,7 +69,8 @@ def process_variant(variant: Variant, dic_chunk: DicChunk) -> tuple[Variant, Nam def main(): - LOGGER.debug(f"started at {datetime.now().strftime('%r')}") + start_time = datetime.now() + LOGGER.debug(f"Started at {start_time.strftime('%r')}") LOGGER.debug( f"Options used:\n" f"TMP_DIR: {TMP_DIR}\n" @@ -107,7 +108,9 @@ def main(): file.close() if FORCE_INSTALL: install_dictionaries(CUSTOM_INSTALL_VERSION) - LOGGER.debug(f"finished at {datetime.now().strftime('%r')}") + end_time = datetime.now() + LOGGER.debug(f"Finished at {end_time.strftime('%r')}. " + f"Total time elapsed: {pretty_time_delta(end_time - start_time)}.") if __name__ == "__main__":