Skip to content

Commit

Permalink
Merge pull request #287 from hakonhagland/setkw_status
Browse files Browse the repository at this point in the history
Use more precise regex
  • Loading branch information
gdfldm authored Jun 6, 2024
2 parents 3d40a6f + 633ad63 commit c902de6
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 4 deletions.
4 changes: 2 additions & 2 deletions scripts/python/src/fodt/add_keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import click

from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus
from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus, Regex
from fodt.create_subdocument import CreateSubDocument3
from fodt.helpers import Helpers
from fodt.remove_subsections import RemoveSubSections
Expand Down Expand Up @@ -118,7 +118,7 @@ def extract_keyword_name(self, href: str) -> str:
# Assume href starts with "#xxx.yyy.zzz.KEYWORD_NAME<space>"
# or "#xxx.yyy.zzz.KEYWORD_NAME|outline"
# KEYWORD_NAME can contain letters, numbers, and optionally a trailing hyphen or en-dash
if m:= re.match(r"#\d+\.\d+\.\d+\.(\w+[\-–]?)(?:\s+|$|\|outline$)", href):
if m:= re.match(Regex.href_keyword_name, href):
return m.group(1)
else:
return '<NOT FOUND>'
Expand Down
4 changes: 2 additions & 2 deletions scripts/python/src/fodt/add_keyword_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import click

from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus
from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus, Regex
from fodt.xml_helpers import XMLHelper

class AppendixKeywordHandler(xml.sax.handler.ContentHandler):
Expand Down Expand Up @@ -132,7 +132,7 @@ def handle_table_row(
href = attrs.getValue("xlink:href")
# the href value is on the form "#1.2.1.ACTDIMS – ACTION Keyword Dimensions"
# we want to extract the keyword name from this string
if match := re.match(r"#\d+.\d+.\d+.(\w+)\s+", href):
if match := re.match(Regex.href_keyword_name, href):
self.current_keyword = match.group(1)
elif self.in_table_cell and name == 'text:p':
if self.found_table_cell:
Expand Down
3 changes: 3 additions & 0 deletions scripts/python/src/fodt/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class MetaSections():
'office:master-styles',
]

class Regex:
href_keyword_name = r"#\d+\.\d+\.\d+\.(\w+[\-–]?)(?:\s+|$|\|outline$)"

class TagEvent():
NONE = 0
START = 1
Expand Down

0 comments on commit c902de6

Please sign in to comment.