Skip to content

Commit

Permalink
Updated add-keyword script
Browse files Browse the repository at this point in the history
Updated add-keyword script to handle empty tags.
  • Loading branch information
hakonhagland committed Apr 16, 2024
1 parent cbed9d7 commit 77a9e6f
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 29 deletions.
48 changes: 31 additions & 17 deletions scripts/python/src/fodt/add_keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@ def __init__(self, keyword: str, status: KeywordStatus, title: str) -> None:
self.start_tag_open = False # Flag for empty tags, close with />

def characters(self, content: str):
if self.start_tag_open:
# NOTE: characters() is only called if there is content between the start
# tag and the end tag. If there is no content, characters() is not called.
self.content.write(">")
self.start_tag_open = False
if self.in_styles:
self.maybe_close_start_tag(self.content)
self.content.write(XMLHelper.escape(content))
elif self.in_appendix_table:
if self.in_table_row:
self.maybe_close_start_tag(self.current_row)
self.current_row.write(XMLHelper.escape(content))
else:
if self.start_tag_open:
self.between_rows += ">"
self.start_tag_open = False
self.between_rows += content
# Capture stuff between the rows, such that we
# can add it back. There can be tags like
Expand All @@ -70,6 +70,7 @@ def characters(self, content: str):
self.current_table_number += 1
if self.current_table_number == self.keyword_table_number:
self.found_appendix_table = True
self.maybe_close_start_tag(self.content)
self.content.write(XMLHelper.escape(content))

def endElement(self, name: str):
Expand All @@ -92,7 +93,6 @@ def endElement(self, name: str):
elif self.in_table_row:
self.write_end_tag(self.current_row, name)
else:

if self.start_tag_open:
self.between_rows += "/>"
self.start_tag_open = False
Expand All @@ -107,7 +107,7 @@ def endElement(self, name: str):

def extract_keyword_name(self, href: str) -> str:
# Assume href starts with "#xxx.yyy.zzz.KEYWORD_NAME<space>"
if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+)\s+", href):
if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+)(?:\s+|$)", href):
return m.group(1)
else:
return '<NOT FOUND>'
Expand Down Expand Up @@ -135,13 +135,18 @@ def get_new_appendix_row(self) -> str:
new_row = re.sub(r'###COLOR###', color, new_row)
return new_row

def maybe_close_start_tag(self, buffer: io.StringIO) -> None:
if self.start_tag_open:
# NOTE: characters() is only called if there is content between the start
# tag and the end tag. If there is no content, characters() is not called.
buffer.write(">")
self.start_tag_open = False


def startDocument(self):
self.content.write(XMLHelper.header)

def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
if self.start_tag_open:
self.content.write(">") # Close the start tag
self.start_tag_open = False
if self.in_styles:
if name == "style:style":
if "style:name" in attrs.getNames():
Expand All @@ -152,7 +157,7 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
elif name == "office:automatic-styles":
self.in_styles = True
if self.in_styles:
self.content.write(XMLHelper.starttag(name, attrs))
self.write_start_tag(self.content, name, attrs)
else:
if name == "table:table-row":
self.in_table_row = True
Expand All @@ -171,11 +176,12 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
)
if self.in_appendix_table:
if self.in_table_row:
self.current_row.write(XMLHelper.starttag(name, attrs))
self.write_start_tag(self.current_row, name, attrs)
else:
self.between_rows += XMLHelper.starttag(name, attrs)
self.start_tag_open = True
self.between_rows += XMLHelper.starttag(name, attrs, close_tag=False)
else:
self.content.write(XMLHelper.starttag(name, attrs))
self.write_start_tag(self.content, name, attrs)

def write_appendix_table(self) -> None:
idx_found = False
Expand All @@ -201,6 +207,14 @@ def write_missing_styles(self):
self.content.write(self.style_templates[style_name])
self.content.write("\n")

def write_start_tag(
self, buffer: io.StringIO, name: str, attrs: xml.sax.xmlreader.AttributesImpl
) -> None:
if self.start_tag_open:
buffer.write(">") # Close the start tag
self.start_tag_open = True
buffer.write(XMLHelper.starttag(name, attrs, close_tag=False))


class AddKeyword():
def __init__(
Expand All @@ -213,8 +227,8 @@ def __init__(
title: str,
status: KeywordStatus
) -> None:
self.maindir = maindir
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir)
self.maindir = Helpers.get_maindir(maindir)
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir)
self.keyword = keyword
self.chapter = chapter
self.section = section
Expand Down Expand Up @@ -242,7 +256,7 @@ def add_keyword(self) -> None:

def update_appendixA(self) -> None:
logging.info(f"Updating appendix A.")
self.filename = Path(self.maindir) / Directories.appendices / f"A.{FileExtensions.fodt}"
self.filename = self.maindir / Directories.appendices / f"A.{FileExtensions.fodt}"
if not self.filename.is_file():
raise FileNotFoundError(f"File {self.filename} not found.")
# parse the xml file
Expand Down
1 change: 1 addition & 0 deletions scripts/python/src/fodt/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class Directories():
backup = "backup"
info = "info"
keywords = "keywords"
keyword_names = "keyword-names"
meta = "meta"
meta_sections = "sections"
parts = "parts"
Expand Down
4 changes: 2 additions & 2 deletions scripts/python/src/fodt/create_subdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,14 @@ def get_parts(self) -> list[str]:
class CreateSubDocument3(CreateSubDocument):
def __init__(
self,
maindir: str,
maindir: Path,
keyword_dir: str,
chapter: str,
section: str,
keyword: str,
title: str,
) -> None:
self.maindir = Path(maindir)
self.maindir = maindir
self.keyword_dir = keyword_dir
self.chapter = chapter
self.section = section
Expand Down
35 changes: 29 additions & 6 deletions scripts/python/src/fodt/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import importlib.resources # access non-code resources
import shutil
import xml.sax.saxutils

from pathlib import Path
from fodt.constants import Directories, FileExtensions, FileNames
Expand Down Expand Up @@ -55,12 +54,14 @@ def derive_maindir_from_filename(filename: str) -> Path:
# This should never be reached

@staticmethod
def get_keyword_dir(keyword_dir: str) -> str:
def get_keyword_dir(keyword_dir: str, maindir: Path) -> str:
if keyword_dir is None:
try_path = Path('../keyword-names')
if try_path.exists():
keyword_dir = try_path
else:
# Default value for keyword_dir is a relative path like "../../keyword-names"
keyword_dir = Path(f'../../{Directories.keyword_names}')
if not keyword_dir.exists():
main_dir = Helpers.locate_maindir_from_current_dir()
keyword_dir = main_dir.parent / Directories.keyword_names
if not keyword_dir.exists():
raise FileNotFoundError(f"Keyword names directory not found.")
return keyword_dir

Expand Down Expand Up @@ -160,6 +161,28 @@ def locate_maindir_and_filename(
f"called '{maindir.name}'.")


@staticmethod
def locate_maindir_from_current_dir() -> Path:
cwd = Path.cwd()
# We cannot use derive_maindir_from_filename() here because cwd does not
# have to be inside maindir in this case
while True:
# Check if we have reached the root directory
# cwd.parent == cwd is True if filename is the root directory
if cwd.parent == cwd:
raise FileNotFoundError(f"Could not derive maindir from cwd: "
f"Could not find '{FileNames.main_document}' in a directory "
f"called '{Directories.parts}' by searching the parent "
f"directories of cwd."
)
# Check if there is a sibling directory called "parts" with a file main.fodt
dir_ = cwd / Directories.parts
if dir_.is_dir():
if (dir_ / FileNames.main_document).exists():
return dir_
cwd = cwd.parent
# This line should never be reached

@staticmethod
def locate_maindir_from_current_dir() -> Path:
cwd = Path.cwd()
Expand Down
18 changes: 16 additions & 2 deletions scripts/python/src/fodt/remove_subsections.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,14 @@ def __init__(
self.done = False
self.remove_section = False
self.in_main_section = False
self.start_tag_open = False # Handle empty tags

def characters(self, content: str):
if self.start_tag_open:
# NOTE: characters() is only called if there is content between the start
# tag and the end tag. If there is no content, characters() is not called.
self.content.write(">")
self.start_tag_open = False
# if (not self.in_subsection) and (not self.remove_section):
if not self.in_main_section:
self.content.write(XMLHelper.escape(content))
Expand All @@ -60,7 +66,11 @@ def endElement(self, name: str):
self.done = True
self.in_main_section = False
if (not self.in_subsection) and (not self.remove_section):
self.content.write(XMLHelper.endtag(name))
if self.start_tag_open:
self.content.write("/>")
self.start_tag_open = False
else:
self.content.write(XMLHelper.endtag(name))
if name == "text:section":
if self.remove_section:
self.remove_section = False
Expand Down Expand Up @@ -104,14 +114,18 @@ def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
if self.check_included_section(name, attrs):
self.remove_section = True
self.in_main_section = True
if self.start_tag_open:
self.content.write(">") # Close the start tag
self.start_tag_open = False
if write_include:
self.in_main_section = True
part = f"{self.chapter}.{self.section}.{self.current_subsection}"
keyword = self.keywords[self.current_subsection - 1]
callback = self.replace_callback
self.content.write(callback(part, keyword))
if (not self.in_subsection) and (not self.remove_section):
self.content.write(XMLHelper.starttag(name, attrs))
self.start_tag_open = True
self.content.write(XMLHelper.starttag(name, attrs, close_tag=False))

def write_file(self):
filename = Path(self.outputfn)
Expand Down
4 changes: 2 additions & 2 deletions scripts/python/src/fodt/split_subdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class Splitter():
def __init__(self, maindir: str, keyword_dir: str, chapter: int, section: int) -> None:
self.chapter = chapter
self.section = section
self.maindir = Path(maindir)
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir)
self.maindir = Helpers.get_maindir(maindir)
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir)
self.metadata_dir = self.maindir / Directories.meta
assert self.maindir.is_dir()

Expand Down
10 changes: 10 additions & 0 deletions scripts/python/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def test_locate_with_absolute_path_exists(self, tmp_path: Path) -> None:
assert result_filename == filename

def test_locate_with_absolute_path_exists_no_main(self, tmp_path: Path) -> None:
"""Test locating maindir and filename when the maindir is given as an absolute path
and the main file does not exist. This should raise an error."""
maindir = tmp_path / Directories.parts
maindir.mkdir()
mainfile = maindir / FileNames.main_document
Expand All @@ -39,6 +41,8 @@ def test_locate_with_absolute_path_exists_no_main(self, tmp_path: Path) -> None:
f"called '{Directories.parts}'" in str(excinfo.value))

def test_locate_with_relative_path_in_maindir_exists(self, tmp_path: Path) -> None:
"""Test locating maindir and filename when the maindir is absolute and the
filename is a relative path."""
maindir = tmp_path / Directories.parts
maindir.mkdir()
mainfile = maindir / FileNames.main_document
Expand All @@ -60,6 +64,9 @@ def test_locate_with_relative_path_in_maindir_exists(self, tmp_path: Path) -> No
def test_locate_with_relative_path_not_in_maindir_but_in_cwd(
self, tmp_path: Path
):
"""Test locating maindir and filename when the maindir is absolute and the
filename is a relative path. The filename is not found in the maindir but
is found in the current working directory."""
cwd = tmp_path / "cwd"
cwd.mkdir()
os.chdir(str(cwd))
Expand All @@ -78,6 +85,9 @@ def test_locate_with_relative_path_not_in_maindir_but_in_cwd(
)

def test_locate_with_absolute_path_not_exists(self, tmp_path: Path):
"""Test locating maindir and filename when the maindir is absolute and the
filename is a relative path. The filename is not found in the maindir and
is not found in the current working directory. This should raise an error."""
maindir = tmp_path / Directories.parts
maindir.mkdir()
filename = tmp_path / "nonexistent.fodt"
Expand Down

0 comments on commit 77a9e6f

Please sign in to comment.