From 633ad636ef5fdec08b60e154aa84c0f15f42e759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Thu, 6 Jun 2024 17:34:32 +0200 Subject: [PATCH] Use more precise regex Use more precise regex in set-keyword-status script --- scripts/python/src/fodt/add_keyword.py | 4 ++-- scripts/python/src/fodt/add_keyword_status.py | 4 ++-- scripts/python/src/fodt/constants.py | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/python/src/fodt/add_keyword.py b/scripts/python/src/fodt/add_keyword.py index 69847889..708a7908 100644 --- a/scripts/python/src/fodt/add_keyword.py +++ b/scripts/python/src/fodt/add_keyword.py @@ -11,7 +11,7 @@ import click -from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus +from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus, Regex from fodt.create_subdocument import CreateSubDocument3 from fodt.helpers import Helpers from fodt.remove_subsections import RemoveSubSections @@ -118,7 +118,7 @@ def extract_keyword_name(self, href: str) -> str: # Assume href starts with "#xxx.yyy.zzz.KEYWORD_NAME" # or "#xxx.yyy.zzz.KEYWORD_NAME|outline" # KEYWORD_NAME can contain letters, numbers, and optionally a trailing hyphen or en-dash - if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+[\-–]?)(?:\s+|$|\|outline$)", href): + if m:= re.match(Regex.href_keyword_name, href): return m.group(1) else: return '' diff --git a/scripts/python/src/fodt/add_keyword_status.py b/scripts/python/src/fodt/add_keyword_status.py index ab477056..39002032 100644 --- a/scripts/python/src/fodt/add_keyword_status.py +++ b/scripts/python/src/fodt/add_keyword_status.py @@ -17,7 +17,7 @@ import click -from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus +from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus, Regex from fodt.xml_helpers import XMLHelper class AppendixKeywordHandler(xml.sax.handler.ContentHandler): @@ -132,7 +132,7 @@ def handle_table_row( href = attrs.getValue("xlink:href") # the href value is on the form "#1.2.1.ACTDIMS – ACTION Keyword Dimensions" # we want to extract the keyword name from this string - if match := re.match(r"#\d+.\d+.\d+.(\w+)\s+", href): + if match := re.match(Regex.href_keyword_name, href): self.current_keyword = match.group(1) elif self.in_table_cell and name == 'text:p': if self.found_table_cell: diff --git a/scripts/python/src/fodt/constants.py b/scripts/python/src/fodt/constants.py index 5867498f..6b966ea6 100644 --- a/scripts/python/src/fodt/constants.py +++ b/scripts/python/src/fodt/constants.py @@ -76,6 +76,9 @@ class MetaSections(): 'office:master-styles', ] +class Regex: + href_keyword_name = r"#\d+\.\d+\.\d+\.(\w+[\-–]?)(?:\s+|$|\|outline$)" + class TagEvent(): NONE = 0 START = 1