diff --git a/pyxform/survey.py b/pyxform/survey.py index 301ee74a..a45659b1 100644 --- a/pyxform/survey.py +++ b/pyxform/survey.py @@ -42,8 +42,6 @@ r"(instance\(.*\)\/root\/item\[.*?(\$\{.*\})\]\/.*?)\s" ) RE_PULLDATA = re.compile(r"(pulldata\s*\(\s*)(.*?),") -RE_XML_OUTPUT = re.compile(r"\n.*()\n(\s\s)*") -RE_XML_TEXT = re.compile(r"(>)\n\s*(\s[^<>\s].*?)\n\s*(\s' + self.xml().toxml() def _to_pretty_xml(self): - """ - I want the to_xml method to by default validate the xml we are - producing. - """ - # Hacky way of pretty printing xml without adding extra white - # space to text - # TODO: check out pyxml - # http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/ - xml_with_linebreaks = self.xml().toprettyxml(indent=" ") - pretty_xml = RE_XML_TEXT.sub( - lambda m: "".join(m.group(1, 2, 3)), xml_with_linebreaks - ) - inline_output = RE_XML_OUTPUT.sub(r"\g<1>", pretty_xml) - return '\n' + inline_output + """Get the XForm with human readable formatting.""" + return '\n' + self.xml().toprettyxml(indent=" ") def __repr__(self): return self.__unicode__() diff --git a/pyxform/utils.py b/pyxform/utils.py index 19e14c30..70bb194d 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -11,7 +11,8 @@ from collections import namedtuple from json.decoder import JSONDecodeError from typing import Dict, List, Tuple -from xml.dom.minidom import Element, Text, parseString +from xml.dom import Node +from xml.dom.minidom import Element, Text, _write_data, parseString import openpyxl import xlrd @@ -26,6 +27,7 @@ BRACKETED_TAG_REGEX = re.compile(r"\${(last-saved#)?(.*?)}") LAST_SAVED_REGEX = re.compile(r"\${last-saved#(.*?)}") PYXFORM_REFERENCE_REGEX = re.compile(r"\$\{(.*?)\}") +NODE_TYPE_TEXT = (Node.TEXT_NODE, Node.CDATA_SECTION_NODE) NSMAP = { @@ -54,6 +56,39 @@ def __init__(self, *args, **kwargs): Element.__init__(self, *args, **kwargs) self.ownerDocument = None + def writexml(self, writer, indent="", addindent="", newl=""): + # indent = current indentation + # addindent = indentation to add to higher levels + # newl = newline string + writer.write(indent + "<" + self.tagName) + + attrs = self._get_attributes() + + for a_name in attrs.keys(): + writer.write(' %s="' % a_name) + _write_data(writer, attrs[a_name].value) + writer.write('"') + if self.childNodes: + writer.write(">") + # For text or mixed content, write without adding indents or newlines. + if 0 < len([c for c in self.childNodes if c.nodeType in NODE_TYPE_TEXT]): + # Conditions to match old Survey.py regex for remaining whitespace. + child_nodes = len(self.childNodes) + for idx, cnode in enumerate(self.childNodes): + if 1 < child_nodes and idx == 0 and cnode.nodeType in NODE_TYPE_TEXT: + writer.write(" ") + cnode.writexml(writer, "", "", "") + if 1 < child_nodes and (idx + 1) == child_nodes: + writer.write(" ") + else: + writer.write(newl) + for cnode in self.childNodes: + cnode.writexml(writer, indent + addindent, addindent, newl) + writer.write(indent) + writer.write("%s" % (self.tagName, newl)) + else: + writer.write("/>%s" % (newl)) + class PatchedText(Text): def writexml(self, writer, indent="", addindent="", newl=""): diff --git a/tests/test_repeat.py b/tests/test_repeat.py index 94e4b403..013ba2ff 100644 --- a/tests/test_repeat.py +++ b/tests/test_repeat.py @@ -94,7 +94,7 @@ def test_repeat_relative_reference(self): """""", """""", """""", + """ """, ], ) diff --git a/tests/test_whitespace.py b/tests/test_whitespace.py index 4fcfb68c..2f19615a 100644 --- a/tests/test_whitespace.py +++ b/tests/test_whitespace.py @@ -18,6 +18,80 @@ def test_over_trim(self): xml__contains=[''], ) + def test_whitespace_output_permutations(self): + """Should find expected whitespace before/after/between output variables.""" + md = """ + | survey | | | + | | type | name | label | + | | text | A | None | + | | text | B1 | Before {0} | + | | text | C1 | {0} After | + | | text | D1 | Before x2 {0} {0} | + | | text | E1 | {0} {0} After x2 | + | | text | F1 | {0} Between {0} | + | | text | G1 | Wrap {0} in text | + | | text | H1 | Wrap {0} in {0} text | + | | text | I1 | Wrap {0} in {0} | + """ + xp = "/h:html/h:body/x:input[@ref='/test_name/{}']/x:label" + test_cases = ("A", "B1") + for case in test_cases: + with self.subTest(msg=case): + self.assertPyxformXform( + md=md.format(f"${{{case}}}"), + xml__xpath_exact=[ + (xp.format("A"), {""}), + ( + xp.format("B1"), + { + f"""""" + }, + ), + ( + xp.format("C1"), + { + f"""""" + }, + ), + ( + xp.format("D1"), + { + f"""""" + }, + ), + ( + xp.format("E1"), + { + f"""""" + }, + ), + ( + xp.format("F1"), + { + f"""""" + }, + ), + ( + xp.format("G1"), + { + f"""""" + }, + ), + ( + xp.format("H1"), + { + f"""""" + }, + ), + ( + xp.format("I1"), + { + f"""""" + }, + ), + ], + ) + def test_values_without_whitespaces_are_processed_successfully(self): md = """ | survey | | | |