From 77a9e6f0ead139e24488448da1e6871843c6a75a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 8 Apr 2024 20:51:31 +0200 Subject: [PATCH] Updated add-keyword script Updated add-keyword script to handle empty tags. --- scripts/python/src/fodt/add_keyword.py | 48 ++++++++++++------- scripts/python/src/fodt/constants.py | 1 + scripts/python/src/fodt/create_subdocument.py | 4 +- scripts/python/src/fodt/helpers.py | 35 +++++++++++--- scripts/python/src/fodt/remove_subsections.py | 18 ++++++- scripts/python/src/fodt/split_subdocument.py | 4 +- scripts/python/tests/test_helpers.py | 10 ++++ 7 files changed, 91 insertions(+), 29 deletions(-) diff --git a/scripts/python/src/fodt/add_keyword.py b/scripts/python/src/fodt/add_keyword.py index f7b88cb4..ebce5ae1 100644 --- a/scripts/python/src/fodt/add_keyword.py +++ b/scripts/python/src/fodt/add_keyword.py @@ -46,17 +46,17 @@ def __init__(self, keyword: str, status: KeywordStatus, title: str) -> None: self.start_tag_open = False # Flag for empty tags, close with /> def characters(self, content: str): - if self.start_tag_open: - # NOTE: characters() is only called if there is content between the start - # tag and the end tag. If there is no content, characters() is not called. - self.content.write(">") - self.start_tag_open = False if self.in_styles: + self.maybe_close_start_tag(self.content) self.content.write(XMLHelper.escape(content)) elif self.in_appendix_table: if self.in_table_row: + self.maybe_close_start_tag(self.current_row) self.current_row.write(XMLHelper.escape(content)) else: + if self.start_tag_open: + self.between_rows += ">" + self.start_tag_open = False self.between_rows += content # Capture stuff between the rows, such that we # can add it back. There can be tags like @@ -70,6 +70,7 @@ def characters(self, content: str): self.current_table_number += 1 if self.current_table_number == self.keyword_table_number: self.found_appendix_table = True + self.maybe_close_start_tag(self.content) self.content.write(XMLHelper.escape(content)) def endElement(self, name: str): @@ -92,7 +93,6 @@ def endElement(self, name: str): elif self.in_table_row: self.write_end_tag(self.current_row, name) else: - if self.start_tag_open: self.between_rows += "/>" self.start_tag_open = False @@ -107,7 +107,7 @@ def endElement(self, name: str): def extract_keyword_name(self, href: str) -> str: # Assume href starts with "#xxx.yyy.zzz.KEYWORD_NAME" - if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+)\s+", href): + if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+)(?:\s+|$)", href): return m.group(1) else: return '' @@ -135,13 +135,18 @@ def get_new_appendix_row(self) -> str: new_row = re.sub(r'###COLOR###', color, new_row) return new_row + def maybe_close_start_tag(self, buffer: io.StringIO) -> None: + if self.start_tag_open: + # NOTE: characters() is only called if there is content between the start + # tag and the end tag. If there is no content, characters() is not called. + buffer.write(">") + self.start_tag_open = False + + def startDocument(self): self.content.write(XMLHelper.header) def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl): - if self.start_tag_open: - self.content.write(">") # Close the start tag - self.start_tag_open = False if self.in_styles: if name == "style:style": if "style:name" in attrs.getNames(): @@ -152,7 +157,7 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl): elif name == "office:automatic-styles": self.in_styles = True if self.in_styles: - self.content.write(XMLHelper.starttag(name, attrs)) + self.write_start_tag(self.content, name, attrs) else: if name == "table:table-row": self.in_table_row = True @@ -171,11 +176,12 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl): ) if self.in_appendix_table: if self.in_table_row: - self.current_row.write(XMLHelper.starttag(name, attrs)) + self.write_start_tag(self.current_row, name, attrs) else: - self.between_rows += XMLHelper.starttag(name, attrs) + self.start_tag_open = True + self.between_rows += XMLHelper.starttag(name, attrs, close_tag=False) else: - self.content.write(XMLHelper.starttag(name, attrs)) + self.write_start_tag(self.content, name, attrs) def write_appendix_table(self) -> None: idx_found = False @@ -201,6 +207,14 @@ def write_missing_styles(self): self.content.write(self.style_templates[style_name]) self.content.write("\n") + def write_start_tag( + self, buffer: io.StringIO, name: str, attrs: xml.sax.xmlreader.AttributesImpl + ) -> None: + if self.start_tag_open: + buffer.write(">") # Close the start tag + self.start_tag_open = True + buffer.write(XMLHelper.starttag(name, attrs, close_tag=False)) + class AddKeyword(): def __init__( @@ -213,8 +227,8 @@ def __init__( title: str, status: KeywordStatus ) -> None: - self.maindir = maindir - self.keyword_dir = Helpers.get_keyword_dir(keyword_dir) + self.maindir = Helpers.get_maindir(maindir) + self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir) self.keyword = keyword self.chapter = chapter self.section = section @@ -242,7 +256,7 @@ def add_keyword(self) -> None: def update_appendixA(self) -> None: logging.info(f"Updating appendix A.") - self.filename = Path(self.maindir) / Directories.appendices / f"A.{FileExtensions.fodt}" + self.filename = self.maindir / Directories.appendices / f"A.{FileExtensions.fodt}" if not self.filename.is_file(): raise FileNotFoundError(f"File {self.filename} not found.") # parse the xml file diff --git a/scripts/python/src/fodt/constants.py b/scripts/python/src/fodt/constants.py index 11dee58a..5867498f 100644 --- a/scripts/python/src/fodt/constants.py +++ b/scripts/python/src/fodt/constants.py @@ -36,6 +36,7 @@ class Directories(): backup = "backup" info = "info" keywords = "keywords" + keyword_names = "keyword-names" meta = "meta" meta_sections = "sections" parts = "parts" diff --git a/scripts/python/src/fodt/create_subdocument.py b/scripts/python/src/fodt/create_subdocument.py index 8f1fe9c0..e974f580 100644 --- a/scripts/python/src/fodt/create_subdocument.py +++ b/scripts/python/src/fodt/create_subdocument.py @@ -175,14 +175,14 @@ def get_parts(self) -> list[str]: class CreateSubDocument3(CreateSubDocument): def __init__( self, - maindir: str, + maindir: Path, keyword_dir: str, chapter: str, section: str, keyword: str, title: str, ) -> None: - self.maindir = Path(maindir) + self.maindir = maindir self.keyword_dir = keyword_dir self.chapter = chapter self.section = section diff --git a/scripts/python/src/fodt/helpers.py b/scripts/python/src/fodt/helpers.py index 5e903140..628dbd2e 100644 --- a/scripts/python/src/fodt/helpers.py +++ b/scripts/python/src/fodt/helpers.py @@ -1,6 +1,5 @@ import importlib.resources # access non-code resources import shutil -import xml.sax.saxutils from pathlib import Path from fodt.constants import Directories, FileExtensions, FileNames @@ -55,12 +54,14 @@ def derive_maindir_from_filename(filename: str) -> Path: # This should never be reached @staticmethod - def get_keyword_dir(keyword_dir: str) -> str: + def get_keyword_dir(keyword_dir: str, maindir: Path) -> str: if keyword_dir is None: - try_path = Path('../keyword-names') - if try_path.exists(): - keyword_dir = try_path - else: + # Default value for keyword_dir is a relative path like "../../keyword-names" + keyword_dir = Path(f'../../{Directories.keyword_names}') + if not keyword_dir.exists(): + main_dir = Helpers.locate_maindir_from_current_dir() + keyword_dir = main_dir.parent / Directories.keyword_names + if not keyword_dir.exists(): raise FileNotFoundError(f"Keyword names directory not found.") return keyword_dir @@ -160,6 +161,28 @@ def locate_maindir_and_filename( f"called '{maindir.name}'.") + @staticmethod + def locate_maindir_from_current_dir() -> Path: + cwd = Path.cwd() + # We cannot use derive_maindir_from_filename() here because cwd does not + # have to be inside maindir in this case + while True: + # Check if we have reached the root directory + # cwd.parent == cwd is True if filename is the root directory + if cwd.parent == cwd: + raise FileNotFoundError(f"Could not derive maindir from cwd: " + f"Could not find '{FileNames.main_document}' in a directory " + f"called '{Directories.parts}' by searching the parent " + f"directories of cwd." + ) + # Check if there is a sibling directory called "parts" with a file main.fodt + dir_ = cwd / Directories.parts + if dir_.is_dir(): + if (dir_ / FileNames.main_document).exists(): + return dir_ + cwd = cwd.parent + # This line should never be reached + @staticmethod def locate_maindir_from_current_dir() -> Path: cwd = Path.cwd() diff --git a/scripts/python/src/fodt/remove_subsections.py b/scripts/python/src/fodt/remove_subsections.py index 1cdef5f7..dae3b151 100644 --- a/scripts/python/src/fodt/remove_subsections.py +++ b/scripts/python/src/fodt/remove_subsections.py @@ -36,8 +36,14 @@ def __init__( self.done = False self.remove_section = False self.in_main_section = False + self.start_tag_open = False # Handle empty tags def characters(self, content: str): + if self.start_tag_open: + # NOTE: characters() is only called if there is content between the start + # tag and the end tag. If there is no content, characters() is not called. + self.content.write(">") + self.start_tag_open = False # if (not self.in_subsection) and (not self.remove_section): if not self.in_main_section: self.content.write(XMLHelper.escape(content)) @@ -60,7 +66,11 @@ def endElement(self, name: str): self.done = True self.in_main_section = False if (not self.in_subsection) and (not self.remove_section): - self.content.write(XMLHelper.endtag(name)) + if self.start_tag_open: + self.content.write("/>") + self.start_tag_open = False + else: + self.content.write(XMLHelper.endtag(name)) if name == "text:section": if self.remove_section: self.remove_section = False @@ -104,6 +114,9 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl): if self.check_included_section(name, attrs): self.remove_section = True self.in_main_section = True + if self.start_tag_open: + self.content.write(">") # Close the start tag + self.start_tag_open = False if write_include: self.in_main_section = True part = f"{self.chapter}.{self.section}.{self.current_subsection}" @@ -111,7 +124,8 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl): callback = self.replace_callback self.content.write(callback(part, keyword)) if (not self.in_subsection) and (not self.remove_section): - self.content.write(XMLHelper.starttag(name, attrs)) + self.start_tag_open = True + self.content.write(XMLHelper.starttag(name, attrs, close_tag=False)) def write_file(self): filename = Path(self.outputfn) diff --git a/scripts/python/src/fodt/split_subdocument.py b/scripts/python/src/fodt/split_subdocument.py index 9ec36192..e0ad5720 100644 --- a/scripts/python/src/fodt/split_subdocument.py +++ b/scripts/python/src/fodt/split_subdocument.py @@ -14,8 +14,8 @@ class Splitter(): def __init__(self, maindir: str, keyword_dir: str, chapter: int, section: int) -> None: self.chapter = chapter self.section = section - self.maindir = Path(maindir) - self.keyword_dir = Helpers.get_keyword_dir(keyword_dir) + self.maindir = Helpers.get_maindir(maindir) + self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir) self.metadata_dir = self.maindir / Directories.meta assert self.maindir.is_dir() diff --git a/scripts/python/tests/test_helpers.py b/scripts/python/tests/test_helpers.py index 0109a3a0..3eeca967 100644 --- a/scripts/python/tests/test_helpers.py +++ b/scripts/python/tests/test_helpers.py @@ -23,6 +23,8 @@ def test_locate_with_absolute_path_exists(self, tmp_path: Path) -> None: assert result_filename == filename def test_locate_with_absolute_path_exists_no_main(self, tmp_path: Path) -> None: + """Test locating maindir and filename when the maindir is given as an absolute path + and the main file does not exist. This should raise an error.""" maindir = tmp_path / Directories.parts maindir.mkdir() mainfile = maindir / FileNames.main_document @@ -39,6 +41,8 @@ def test_locate_with_absolute_path_exists_no_main(self, tmp_path: Path) -> None: f"called '{Directories.parts}'" in str(excinfo.value)) def test_locate_with_relative_path_in_maindir_exists(self, tmp_path: Path) -> None: + """Test locating maindir and filename when the maindir is absolute and the + filename is a relative path.""" maindir = tmp_path / Directories.parts maindir.mkdir() mainfile = maindir / FileNames.main_document @@ -60,6 +64,9 @@ def test_locate_with_relative_path_in_maindir_exists(self, tmp_path: Path) -> No def test_locate_with_relative_path_not_in_maindir_but_in_cwd( self, tmp_path: Path ): + """Test locating maindir and filename when the maindir is absolute and the + filename is a relative path. The filename is not found in the maindir but + is found in the current working directory.""" cwd = tmp_path / "cwd" cwd.mkdir() os.chdir(str(cwd)) @@ -78,6 +85,9 @@ def test_locate_with_relative_path_not_in_maindir_but_in_cwd( ) def test_locate_with_absolute_path_not_exists(self, tmp_path: Path): + """Test locating maindir and filename when the maindir is absolute and the + filename is a relative path. The filename is not found in the maindir and + is not found in the current working directory. This should raise an error.""" maindir = tmp_path / Directories.parts maindir.mkdir() filename = tmp_path / "nonexistent.fodt"