diff --git a/src/pygef/broxml/parse_bore.py b/src/pygef/broxml/parse_bore.py index 3a95b15e..320daef3 100644 --- a/src/pygef/broxml/parse_bore.py +++ b/src/pygef/broxml/parse_bore.py @@ -9,7 +9,7 @@ from pygef.bore import BoreData from pygef.broxml import resolvers -from pygef.broxml.xml_parser import read_xml +from pygef.broxml.xml_parser import BaseParser, read_xml # maps keyword argument to: # xpath: query passed to elementree.find @@ -136,9 +136,9 @@ def read_bore(file: io.BytesIO | Path | str) -> list[BoreData]: if isinstance(file, str) and not os.path.exists(file): - root = etree.fromstring(file).getroot() + root = etree.fromstring(file, parser=BaseParser).getroot() else: - root = etree.parse(file).getroot() + root = etree.parse(file, parser=BaseParser).getroot() match = re.compile(r"xsd/.*/(\d\.\d)") matched = match.search(root.nsmap["bhrgtcom"]) diff --git a/src/pygef/broxml/parse_cpt.py b/src/pygef/broxml/parse_cpt.py index 0ccaaee6..3a674360 100644 --- a/src/pygef/broxml/parse_cpt.py +++ b/src/pygef/broxml/parse_cpt.py @@ -7,7 +7,7 @@ from lxml import etree from pygef.broxml import resolvers -from pygef.broxml.xml_parser import read_xml +from pygef.broxml.xml_parser import BaseParser, read_xml from pygef.cpt import CPTData # maps keyword argument to: @@ -196,7 +196,7 @@ def read_cpt(file: io.BytesIO | Path | str) -> list[CPTData]: if isinstance(file, str) and not os.path.exists(file): - root = etree.fromstring(file).getroot() + root = etree.fromstring(file, parser=BaseParser).getroot() else: - root = etree.parse(file).getroot() + root = etree.parse(file, parser=BaseParser).getroot() return read_xml(root, CPTData, CPT_ATTRIBS, "dispatchDocument") diff --git a/src/pygef/broxml/xml_parser.py b/src/pygef/broxml/xml_parser.py index b06a7dff..07cc1448 100644 --- a/src/pygef/broxml/xml_parser.py +++ b/src/pygef/broxml/xml_parser.py @@ -9,6 +9,8 @@ T = TypeVar("T", CPTData, BoreData) +BaseParser = etree.XMLParser(resolve_entities=False, dtd_validation=False) + def read_xml( root: etree.Element,