Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added script to fix style in footer #188

Merged
merged 1 commit into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ fodt-extract-section = "fodt.extract_section:extract_section"
fodt-extract-style-info = "fodt.extract_style_info:extract_style_info"
fodt-extract-xml-tag = "fodt.splitter:extract_xml_tag"
fodt-fix-ignored-keywords = "fodt.fix_ignored:fix_ignored"
fodt-fix-footer-style = "fodt.fix_footer_style:fix_footer_style"
fodt-remove-bookmarks-from-master-styles = "fodt.remove_bookmarks:remove_bookmarks_from_master_styles"
fodt-remove-chapters = "fodt.splitter:remove_chapters"
fodt-remove-elements = "fodt.splitter:remove_elements"
Expand Down
143 changes: 143 additions & 0 deletions scripts/python/src/fodt/fix_footer_style.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import click
import io
import logging
import xml.sax
import xml.sax.handler
import xml.sax.xmlreader
import xml.sax.saxutils
from pathlib import Path

from fodt.constants import ClickOptions
from fodt.xml_helpers import XMLHelper

class ContentHandler(xml.sax.handler.ContentHandler):
def __init__(self) -> None:
self.content = io.StringIO()
self.in_master_page = False # Inside the the desired style:master-page element
self.in_footer = False # Inside the footer element in style:master-page
self.in_table_cell = False # Inside the table:table-cell element in the footer
self.in_p_tag = False # Inside the text:p element in the table:table-cell
self.found_span = False # If we found the span with the desired link style
self.start_tag_open = False # For empty tags, do not close with />
self.fixed_style = False # If we fixed the style

def characters(self, content: str):
if self.start_tag_open:
# NOTE: characters() is only called if there is content between the start
# tag and the end tag. If there is no content, characters() is not called.
self.content.write(">")
self.start_tag_open = False
if self.in_p_tag:
if content == "I":
if not self.found_span:
# Insert the desired span tag with internet link style
self.content.write(
"""<text:span text:style-name="Internet_20_link">""" +
content + "</text:span>" # content is "I"
)
self.fixed_style = True
return
self.content.write(XMLHelper.escape(content))

def endElement(self, name: str):
if self.in_p_tag and name == "text:p":
self.in_p_tag = False
self.found_span = False
if self.in_table_cell and name == "table:table-cell":
self.in_table_cell = False
if self.in_footer and name == "style:footer":
self.in_footer = False
if self.in_master_page and name == "style:master-page":
self.in_master_page = False
if self.start_tag_open:
self.content.write("/>")
self.start_tag_open = False
else:
self.content.write(XMLHelper.endtag(name))

def fixed_footer_style(self) -> bool:
return self.fixed_style

def get_content(self) -> str:
return self.content.getvalue()

def startDocument(self):
self.content.write(XMLHelper.header)

def startElement(self, name:str, attrs: xml.sax.xmlreader.AttributesImpl):
if self.start_tag_open:
self.content.write(">") # Close the start tag
self.start_tag_open = False
if self.in_p_tag and name == "text:span":
if attrs.getValue("text:style-name") == "Internet_20_link":
self.found_span = True
if self.in_table_cell and name == "text:p":
self.in_p_tag = True
if self.in_footer and name == "table:table-cell":
self.in_table_cell = True
if self.in_master_page and name == "style:footer":
self.in_footer = True
if name == "style:master-page":
if attrs.getValue("style:name") == "_40_DocumentKeywordPageStyle":
self.in_master_page = True
self.start_tag_open = True
self.content.write(XMLHelper.starttag(name, attrs, close_tag=False))


class FixFooterStyle:
def __init__(self, maindir: str, filename: Path|None) -> None:
self.maindir = maindir
self.filename = filename

def fixall(self) -> None:
# Scan all .fodt documents in the maindir, and fix the footer style in each
for file in Path(self.maindir).rglob("*.fodt"):
self.fix_file(file)

def fixup(self) -> None:
if self.filename:
self.fix_file(Path(self.maindir) / self.filename)
else:
self.fixall()

def fix_file(self, filename: Path) -> None:
parser = xml.sax.make_parser()
handler = ContentHandler()
parser.setContentHandler(handler)
parser.parse(filename)
if handler.fixed_footer_style():
# Write the content back to file
with open(filename, "w", encoding='utf_8') as f:
f.write(handler.get_content())
logging.info(f"Fixing footer style in {filename}.")

# USAGE:
#
# fodt-fix-footer-style --maindir </path/to/maindir> --filename <filename>
#
# DESCRIPTION:
#
# Scan .fodt documents for a master-page style with style:name "_40_DocumentKeywordPageStyle",
# - If found, find the footer element within, and search for a table:table-cell element
# within the footer element. If the table:table-cell element contains a text:p element
# check that the text:p element contains a span with the style "Internet_20_link". If not,
# insert a span with the style "Internet_20_link" in the text:p element surrounding the
# text content "I".
#
# - If <filename> is given, only process that file, else process all .fodt files in <maindir>.
# - <filename> must be relative to <maindir>.
#
@click.command()
@ClickOptions.maindir(required=False)
@click.option(
"--filename",
required=False,
help="Filename to fix.",
)
def fix_footer_style(maindir: str, filename: Path|None) -> None:
"""Remove bookmark refs from the master style section in all subdocuments."""
logging.basicConfig(level=logging.INFO)
FixFooterStyle(maindir, filename).fixup()

if __name__ == "__main__":
fix_footer_style()
Loading