From 77a9e6f0ead139e24488448da1e6871843c6a75a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= <hakon.hagland@gmail.com>
Date: Mon, 8 Apr 2024 20:51:31 +0200
Subject: [PATCH] Updated add-keyword script

Updated add-keyword script to handle empty tags.
---
 scripts/python/src/fodt/add_keyword.py        | 48 ++++++++++++-------
 scripts/python/src/fodt/constants.py          |  1 +
 scripts/python/src/fodt/create_subdocument.py |  4 +-
 scripts/python/src/fodt/helpers.py            | 35 +++++++++++---
 scripts/python/src/fodt/remove_subsections.py | 18 ++++++-
 scripts/python/src/fodt/split_subdocument.py  |  4 +-
 scripts/python/tests/test_helpers.py          | 10 ++++
 7 files changed, 91 insertions(+), 29 deletions(-)
diff --git a/scripts/python/src/fodt/add_keyword.py b/scripts/python/src/fodt/add_keyword.py
index f7b88cb4..ebce5ae1 100644
--- a/scripts/python/src/fodt/add_keyword.py
+++ b/scripts/python/src/fodt/add_keyword.py
@@ -46,17 +46,17 @@ def __init__(self, keyword: str, status: KeywordStatus, title: str) -> None:
         self.start_tag_open = False  # Flag for empty tags, close with />
 
     def characters(self, content: str):
-        if self.start_tag_open:
-            # NOTE: characters() is only called if there is content between the start
-            # tag and the end tag. If there is no content, characters() is not called.
-            self.content.write(">")
-            self.start_tag_open = False
         if self.in_styles:
+            self.maybe_close_start_tag(self.content)
             self.content.write(XMLHelper.escape(content))
         elif self.in_appendix_table:
             if self.in_table_row:
+                self.maybe_close_start_tag(self.current_row)
                 self.current_row.write(XMLHelper.escape(content))
             else:
+                if self.start_tag_open:
+                    self.between_rows += ">"
+                    self.start_tag_open = False
                 self.between_rows += content
                 # Capture stuff between the rows, such that we
                 # can add it back. There can be tags like
@@ -70,6 +70,7 @@ def characters(self, content: str):
                     self.current_table_number += 1
                     if self.current_table_number == self.keyword_table_number:
                         self.found_appendix_table = True
+            self.maybe_close_start_tag(self.content)
             self.content.write(XMLHelper.escape(content))
 
     def endElement(self, name: str):
@@ -92,7 +93,6 @@ def endElement(self, name: str):
             elif self.in_table_row:
                 self.write_end_tag(self.current_row, name)
             else:
-                
                 if self.start_tag_open:
                     self.between_rows += "/>"
                     self.start_tag_open = False
@@ -107,7 +107,7 @@ def endElement(self, name: str):
 
     def extract_keyword_name(self, href: str) -> str:
         # Assume href starts with "#xxx.yyy.zzz.KEYWORD_NAME<space>"
-        if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+)\s+", href):
+        if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+)(?:\s+|$)", href):
             return m.group(1)
         else:
             return '<NOT FOUND>'
@@ -135,13 +135,18 @@ def get_new_appendix_row(self) -> str:
         new_row = re.sub(r'###COLOR###', color, new_row)
         return new_row
 
+    def maybe_close_start_tag(self, buffer: io.StringIO) -> None:
+        if self.start_tag_open:
+            # NOTE: characters() is only called if there is content between the start
+            # tag and the end tag. If there is no content, characters() is not called.
+            buffer.write(">")
+            self.start_tag_open = False
+
+
     def startDocument(self):
         self.content.write(XMLHelper.header)
 
     def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
-        if self.start_tag_open:
-            self.content.write(">")  # Close the start tag
-            self.start_tag_open = False
         if self.in_styles:
             if name == "style:style":
                 if "style:name" in attrs.getNames():
@@ -152,7 +157,7 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
         elif name == "office:automatic-styles":
             self.in_styles = True
         if self.in_styles:
-            self.content.write(XMLHelper.starttag(name, attrs))
+            self.write_start_tag(self.content, name, attrs)
         else:
             if name == "table:table-row":
                 self.in_table_row = True
@@ -171,11 +176,12 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
                     )
             if self.in_appendix_table:
                 if self.in_table_row:
-                    self.current_row.write(XMLHelper.starttag(name, attrs))
+                    self.write_start_tag(self.current_row, name, attrs)
                 else:
-                    self.between_rows += XMLHelper.starttag(name, attrs)
+                    self.start_tag_open = True
+                    self.between_rows += XMLHelper.starttag(name, attrs, close_tag=False)
             else:
-                self.content.write(XMLHelper.starttag(name, attrs))
+                self.write_start_tag(self.content, name, attrs)
 
     def write_appendix_table(self) -> None:
         idx_found = False
@@ -201,6 +207,14 @@ def write_missing_styles(self):
             self.content.write(self.style_templates[style_name])
             self.content.write("\n")
 
+    def write_start_tag(
+        self, buffer: io.StringIO, name: str, attrs: xml.sax.xmlreader.AttributesImpl
+    ) -> None:
+        if self.start_tag_open:
+            buffer.write(">")  # Close the start tag
+        self.start_tag_open = True
+        buffer.write(XMLHelper.starttag(name, attrs, close_tag=False))
+
 
 class AddKeyword():
     def __init__(
@@ -213,8 +227,8 @@ def __init__(
         title: str,
         status: KeywordStatus
     ) -> None:
-        self.maindir = maindir
-        self.keyword_dir = Helpers.get_keyword_dir(keyword_dir)
+        self.maindir = Helpers.get_maindir(maindir)
+        self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir)
         self.keyword = keyword
         self.chapter = chapter
         self.section = section
@@ -242,7 +256,7 @@ def add_keyword(self) -> None:
 
     def update_appendixA(self) -> None:
         logging.info(f"Updating appendix A.")
-        self.filename = Path(self.maindir) / Directories.appendices / f"A.{FileExtensions.fodt}"
+        self.filename = self.maindir / Directories.appendices / f"A.{FileExtensions.fodt}"
         if not self.filename.is_file():
             raise FileNotFoundError(f"File {self.filename} not found.")
         # parse the xml file
diff --git a/scripts/python/src/fodt/constants.py b/scripts/python/src/fodt/constants.py
index 11dee58a..5867498f 100644
--- a/scripts/python/src/fodt/constants.py
+++ b/scripts/python/src/fodt/constants.py
@@ -36,6 +36,7 @@ class Directories():
     backup = "backup"
     info = "info"
     keywords = "keywords"
+    keyword_names = "keyword-names"
     meta = "meta"
     meta_sections = "sections"
     parts = "parts"
diff --git a/scripts/python/src/fodt/create_subdocument.py b/scripts/python/src/fodt/create_subdocument.py
index 8f1fe9c0..e974f580 100644
--- a/scripts/python/src/fodt/create_subdocument.py
+++ b/scripts/python/src/fodt/create_subdocument.py
@@ -175,14 +175,14 @@ def get_parts(self) -> list[str]:
 class CreateSubDocument3(CreateSubDocument):
     def __init__(
         self,
-        maindir: str,
+        maindir: Path,
         keyword_dir: str,
         chapter: str,
         section: str,
         keyword: str,
         title: str,
     ) -> None:
-        self.maindir = Path(maindir)
+        self.maindir = maindir
         self.keyword_dir = keyword_dir
         self.chapter = chapter
         self.section = section
diff --git a/scripts/python/src/fodt/helpers.py b/scripts/python/src/fodt/helpers.py
index 5e903140..628dbd2e 100644
--- a/scripts/python/src/fodt/helpers.py
+++ b/scripts/python/src/fodt/helpers.py
@@ -1,6 +1,5 @@
 import importlib.resources  # access non-code resources
 import shutil
-import xml.sax.saxutils
 
 from pathlib import Path
 from fodt.constants import Directories, FileExtensions, FileNames
@@ -55,12 +54,14 @@ def derive_maindir_from_filename(filename: str) -> Path:
         # This should never be reached
 
     @staticmethod
-    def get_keyword_dir(keyword_dir: str) -> str:
+    def get_keyword_dir(keyword_dir: str, maindir: Path) -> str:
         if keyword_dir is None:
-            try_path = Path('../keyword-names')
-            if try_path.exists():
-                keyword_dir = try_path
-            else:
+            # Default value for keyword_dir is a relative path like "../../keyword-names"
+            keyword_dir = Path(f'../../{Directories.keyword_names}')
+        if not keyword_dir.exists():
+            main_dir = Helpers.locate_maindir_from_current_dir()
+            keyword_dir = main_dir.parent / Directories.keyword_names
+            if not keyword_dir.exists():
                 raise FileNotFoundError(f"Keyword names directory not found.")
         return keyword_dir
 
@@ -160,6 +161,28 @@ def locate_maindir_and_filename(
                                 f"called '{maindir.name}'.")
 
 
+    @staticmethod
+    def locate_maindir_from_current_dir() -> Path:
+        cwd = Path.cwd()
+        # We cannot use derive_maindir_from_filename() here because cwd does not
+        # have to be inside maindir in this case
+        while True:
+            # Check if we have reached the root directory
+            #  cwd.parent == cwd is True if filename is the root directory
+            if cwd.parent == cwd:
+                raise FileNotFoundError(f"Could not derive maindir from cwd: "
+                      f"Could not find '{FileNames.main_document}' in a directory "
+                      f"called '{Directories.parts}' by searching the parent "
+                      f"directories of cwd."
+                )
+            # Check if there is a sibling directory called "parts" with a file main.fodt
+            dir_ = cwd / Directories.parts
+            if dir_.is_dir():
+                if (dir_ / FileNames.main_document).exists():
+                    return dir_
+            cwd = cwd.parent
+        # This line should never be reached
+
     @staticmethod
     def locate_maindir_from_current_dir() -> Path:
         cwd = Path.cwd()
diff --git a/scripts/python/src/fodt/remove_subsections.py b/scripts/python/src/fodt/remove_subsections.py
index 1cdef5f7..dae3b151 100644
--- a/scripts/python/src/fodt/remove_subsections.py
+++ b/scripts/python/src/fodt/remove_subsections.py
@@ -36,8 +36,14 @@ def __init__(
         self.done = False
         self.remove_section = False
         self.in_main_section = False
+        self.start_tag_open = False  # Handle empty tags
 
     def characters(self, content: str):
+        if self.start_tag_open:
+            # NOTE: characters() is only called if there is content between the start
+            # tag and the end tag. If there is no content, characters() is not called.
+            self.content.write(">")
+            self.start_tag_open = False
         # if (not self.in_subsection) and (not self.remove_section):
         if not self.in_main_section:
             self.content.write(XMLHelper.escape(content))
@@ -60,7 +66,11 @@ def endElement(self, name: str):
                 self.done = True
                 self.in_main_section = False
         if (not self.in_subsection) and (not self.remove_section):
-            self.content.write(XMLHelper.endtag(name))
+            if self.start_tag_open:
+                self.content.write("/>")
+                self.start_tag_open = False
+            else:
+                self.content.write(XMLHelper.endtag(name))
         if name == "text:section":
             if self.remove_section:
                 self.remove_section = False
@@ -104,6 +114,9 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
                 if self.check_included_section(name, attrs):
                     self.remove_section = True
                     self.in_main_section = True
+        if self.start_tag_open:
+            self.content.write(">")  # Close the start tag
+            self.start_tag_open = False
         if write_include:
             self.in_main_section = True
             part = f"{self.chapter}.{self.section}.{self.current_subsection}"
@@ -111,7 +124,8 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
             callback = self.replace_callback
             self.content.write(callback(part, keyword))
         if (not self.in_subsection) and (not self.remove_section):
-            self.content.write(XMLHelper.starttag(name, attrs))
+            self.start_tag_open = True
+            self.content.write(XMLHelper.starttag(name, attrs, close_tag=False))
 
     def write_file(self):
         filename = Path(self.outputfn)
diff --git a/scripts/python/src/fodt/split_subdocument.py b/scripts/python/src/fodt/split_subdocument.py
index 9ec36192..e0ad5720 100644
--- a/scripts/python/src/fodt/split_subdocument.py
+++ b/scripts/python/src/fodt/split_subdocument.py
@@ -14,8 +14,8 @@ class Splitter():
     def __init__(self, maindir: str, keyword_dir: str, chapter: int, section: int) -> None:
         self.chapter = chapter
         self.section = section
-        self.maindir = Path(maindir)
-        self.keyword_dir = Helpers.get_keyword_dir(keyword_dir)
+        self.maindir = Helpers.get_maindir(maindir)
+        self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir)
         self.metadata_dir = self.maindir / Directories.meta
         assert self.maindir.is_dir()
 
diff --git a/scripts/python/tests/test_helpers.py b/scripts/python/tests/test_helpers.py
index 0109a3a0..3eeca967 100644
--- a/scripts/python/tests/test_helpers.py
+++ b/scripts/python/tests/test_helpers.py
@@ -23,6 +23,8 @@ def test_locate_with_absolute_path_exists(self, tmp_path: Path) -> None:
         assert result_filename == filename
 
     def test_locate_with_absolute_path_exists_no_main(self, tmp_path: Path) -> None:
+        """Test locating maindir and filename when the maindir is given as an absolute path
+        and the main file does not exist. This should raise an error."""
         maindir = tmp_path / Directories.parts
         maindir.mkdir()
         mainfile = maindir / FileNames.main_document
@@ -39,6 +41,8 @@ def test_locate_with_absolute_path_exists_no_main(self, tmp_path: Path) -> None:
                 f"called '{Directories.parts}'" in str(excinfo.value))
 
     def test_locate_with_relative_path_in_maindir_exists(self, tmp_path: Path) -> None:
+        """Test locating maindir and filename when the maindir is absolute and the
+        filename is a relative path."""
         maindir = tmp_path / Directories.parts
         maindir.mkdir()
         mainfile = maindir / FileNames.main_document
@@ -60,6 +64,9 @@ def test_locate_with_relative_path_in_maindir_exists(self, tmp_path: Path) -> No
     def test_locate_with_relative_path_not_in_maindir_but_in_cwd(
             self, tmp_path: Path
     ):
+        """Test locating maindir and filename when the maindir is absolute and the
+        filename is a relative path. The filename is not found in the maindir but
+        is found in the current working directory."""
         cwd = tmp_path / "cwd"
         cwd.mkdir()
         os.chdir(str(cwd))
@@ -78,6 +85,9 @@ def test_locate_with_relative_path_not_in_maindir_but_in_cwd(
         )
 
     def test_locate_with_absolute_path_not_exists(self, tmp_path: Path):
+        """Test locating maindir and filename when the maindir is absolute and the
+        filename is a relative path. The filename is not found in the maindir and
+        is not found in the current working directory. This should raise an error."""
         maindir = tmp_path / Directories.parts
         maindir.mkdir()
         filename = tmp_path / "nonexistent.fodt"