diff --git a/pyxform/builder.py b/pyxform/builder.py index 4092028c..d7edad2c 100644 --- a/pyxform/builder.py +++ b/pyxform/builder.py @@ -83,12 +83,13 @@ def __init__(self, **kwargs): # I don't know why we would need an explicit none option for # select alls self._add_none_option = False + self._sections: Optional[Dict[str, Dict]] = None self.set_sections(kwargs.get("sections", {})) # dictionary of setvalue target and value tuple indexed by triggering element self.setvalues_by_triggering_ref = {} # For tracking survey-level choices while recursing through the survey. - self.choices: Dict[str, Any] = {} + self._choices: Dict[str, Any] = {} def set_sections(self, sections): """ @@ -112,13 +113,13 @@ def create_survey_element_from_dict( if d[const.TYPE] in self.SECTION_CLASSES: if d[const.TYPE] == const.SURVEY: - self.choices = copy.deepcopy(d.get(const.CHOICES, {})) + self._choices = copy.deepcopy(d.get(const.CHOICES, {})) section = self._create_section_from_dict(d) if d[const.TYPE] == const.SURVEY: section.setvalues_by_triggering_ref = self.setvalues_by_triggering_ref - section.choices = self.choices + section.choices = self._choices return section elif d[const.TYPE] == const.LOOP: @@ -263,7 +264,7 @@ def _create_section_from_dict(self, d): survey_element = self.create_survey_element_from_dict(copy.deepcopy(child)) if child[const.TYPE].endswith(const.SELECT_OR_OTHER_SUFFIX): select_question = survey_element[0] - itemset_choices = self.choices.get(select_question[const.ITEMSET], None) + itemset_choices = self._choices.get(select_question[const.ITEMSET], None) if ( itemset_choices is not None and isinstance(itemset_choices, list) @@ -307,7 +308,8 @@ def _create_loop_from_dict(self, d): # TODO: Verify that nothing breaks if this returns a list return result.children - def _name_and_label_substitutions(self, question_template, column_headers): + @staticmethod + def _name_and_label_substitutions(question_template, column_headers): # if the label in column_headers has multiple languages setup a # dictionary by language to do substitutions. info_by_lang = {} diff --git a/pyxform/constants.py b/pyxform/constants.py index 78f312e8..792072c1 100644 --- a/pyxform/constants.py +++ b/pyxform/constants.py @@ -65,6 +65,8 @@ SELECT_ALL_THAT_APPLY = "select all that apply" SELECT_OR_OTHER_SUFFIX = " or specify other" RANK = "rank" +QUESTION = "question" +CHOICE = "choice" CHOICES = "choices" # XLS Specific constants diff --git a/pyxform/entities/entities_parsing.py b/pyxform/entities/entities_parsing.py index 0bff765c..85a75844 100644 --- a/pyxform/entities/entities_parsing.py +++ b/pyxform/entities/entities_parsing.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Any, Dict, List from pyxform import constants as const from pyxform.errors import PyXFormError @@ -8,8 +8,8 @@ def get_entity_declaration( - entities_sheet: Dict, workbook_dict: Dict, warnings: List -) -> Dict: + entities_sheet: List[Dict], workbook_dict: Dict[str, List[Dict]], warnings: List[str] +) -> Dict[str, Any]: if len(entities_sheet) == 0: similar = find_sheet_misspellings(key=const.ENTITIES, keys=workbook_dict.keys()) if similar is not None: @@ -82,7 +82,7 @@ def get_validated_dataset_name(entity): def validate_entity_saveto( - row: Dict, row_number: int, entity_declaration: Dict, in_repeat: bool + row: Dict, row_number: int, entity_declaration: Dict[str, Any], in_repeat: bool ): save_to = row.get(const.BIND, {}).get("entities:saveto", "") if not save_to: diff --git a/pyxform/instance_info.py b/pyxform/instance_info.py deleted file mode 100644 index 049d8974..00000000 --- a/pyxform/instance_info.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- -""" -InstanceInfo class module. -""" - - -class InstanceInfo: - """Standardise Instance details relevant during XML generation.""" - - def __init__(self, type, context, name, src, instance): - self.type = type - self.context = context - self.name = name - self.src = src - self.instance = instance diff --git a/pyxform/question.py b/pyxform/question.py index 1b53a0e2..4421e630 100644 --- a/pyxform/question.py +++ b/pyxform/question.py @@ -14,15 +14,19 @@ from pyxform.errors import PyXFormError from pyxform.question_type_dictionary import QUESTION_TYPE_DICT from pyxform.survey_element import SurveyElement -from pyxform.utils import ( - PYXFORM_REFERENCE_REGEX, - default_is_dynamic, - has_dynamic_label, - node, -) +from pyxform.utils import PYXFORM_REFERENCE_REGEX, default_is_dynamic, node class Question(SurveyElement): + FIELDS = SurveyElement.FIELDS.copy() + FIELDS.update( + { + "_itemset_multi_language": bool, + "_itemset_has_media": bool, + "_itemset_dyn_label": bool, + } + ) + def validate(self): SurveyElement.validate(self) @@ -202,12 +206,6 @@ def build_xml(self): for element in self.xml_label_and_hint(): result.appendChild(element) - choices = survey.get("choices") - multi_language = False - if choices is not None and len(choices) > 0: - first_choices = next(iter(choices.values())) - multi_language = isinstance(first_choices[0].get("label"), dict) - # itemset are only supposed to be strings, # check to prevent the rare dicts that show up if self["itemset"] and isinstance(self["itemset"], str): @@ -227,16 +225,13 @@ def build_xml(self): else EXTERNAL_CHOICES_ITEMSET_REF_LABEL, ) - has_media = False - has_dyn_label = False + multi_language = self.get("_itemset_multi_language", False) + has_media = self.get("_itemset_has_media", False) + has_dyn_label = self.get("_itemset_dyn_label", False) is_previous_question = bool( PYXFORM_REFERENCE_REGEX.search(self.get("itemset")) ) - if choices.get(itemset): - has_media = bool(choices[itemset][0].get("media")) - has_dyn_label = has_dynamic_label(choices[itemset], multi_language) - if file_extension in EXTERNAL_INSTANCE_EXTENSIONS: itemset = itemset else: diff --git a/pyxform/survey.py b/pyxform/survey.py index e0061c17..cf363f02 100644 --- a/pyxform/survey.py +++ b/pyxform/survey.py @@ -10,14 +10,13 @@ from collections import defaultdict from datetime import datetime from functools import lru_cache -from typing import Iterator, List, Optional, Tuple +from typing import Generator, Iterator, List, Optional, Tuple from pyxform import aliases, constants from pyxform.constants import EXTERNAL_INSTANCE_EXTENSIONS from pyxform.errors import PyXFormError, ValidationError from pyxform.external_instance import ExternalInstance from pyxform.instance import SurveyInstance -from pyxform.instance_info import InstanceInfo from pyxform.parsing import instance_expression from pyxform.question import Option, Question from pyxform.section import Section @@ -47,6 +46,26 @@ RE_XML_TEXT = re.compile(r"(>)\n\s*(\s[^<>\s].*?)\n\s*(\s bool: + if ( + label is not None + and isinstance(label, str) + and re.search(BRACKETED_TAG_REGEX, label) is not None + ): + return True + else: + return False + + +def recursive_dict(): + return defaultdict(recursive_dict) + + class Survey(Section): """ Survey class - represents the full XForm XML. @@ -174,7 +209,7 @@ class Survey(Section): constants.COMPACT_DELIMITER: str, "file_name": str, "default_language": str, - "_translations": dict, + "_translations": recursive_dict, "submission_url": str, "auto_send": str, "auto_delete": str, @@ -267,27 +302,30 @@ def xml(self): "h:html", node("h:head", node("h:title", self.title), self.xml_model()), node("h:body", *self.xml_control(), **body_kwargs), - **nsmap + **nsmap, ) def get_setvalues_for_question_name(self, question_name): return self.setvalues_by_triggering_ref.get("${%s}" % question_name) - @staticmethod - def _generate_static_instances(list_name, choice_list) -> InstanceInfo: + def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo: """ - Generates elements for static data - (e.g. choices for select type questions) - - Note that per commit message 0578242 and in xls2json.py R539, an - instance is only output for select items defined in the choices sheet - when the item has a choice_filter, and it is that way for backwards - compatibility. + Generate elements for static data (e.g. choices for selects) """ instance_element_list = [] - multi_language = isinstance(choice_list[0].get("label"), dict) has_media = bool(choice_list[0].get("media")) - has_dyn_label = has_dynamic_label(choice_list, multi_language) + has_dyn_label = has_dynamic_label(choice_list) + multi_language = False + if isinstance(self._translations, dict): + choices = tuple( + k + for items in self._translations.values() + for k, v in items.items() + if v.get(constants.TYPE, "") == constants.CHOICE + and k.split("-")[0] == list_name + ) + if 0 < len(choices): + multi_language = True for idx, choice in enumerate(choice_list): choice_element_list = [] @@ -302,7 +340,7 @@ def _generate_static_instances(list_name, choice_list) -> InstanceInfo: if ( not multi_language and not has_media - and not has_dynamic_label(choice_list, multi_language) + and not has_dyn_label and isinstance(value, str) and name == "label" ): @@ -391,9 +429,9 @@ def get_instance_info(element, file_id): uri = "jr://file-csv/{}.csv".format(file_id) return InstanceInfo( - type=u"pulldata", + type="pulldata", context="[type: {t}, name: {n}]".format( - t=element[u"parent"][u"type"], n=element[u"parent"][u"name"] + t=element["parent"]["type"], n=element["parent"]["name"] ), name=file_id, src=uri, @@ -626,7 +664,11 @@ def xml_instance(self, **kwargs): def _add_to_nested_dict(self, dicty, path, value): if len(path) == 1: - dicty[path[0]] = value + key = path[0] + if key in dicty and isinstance(dicty[key], dict) and isinstance(value, dict): + dicty[key].update(value) + else: + dicty[key] = value return if path[0] not in dicty: dicty[path[0]] = {} @@ -638,32 +680,69 @@ def _setup_translations(self): setup media and itext functions """ - def _setup_choice_translations(name, choice_value, itext_id): - for media_type_or_language, value in choice_value.items(): # noqa + def _setup_choice_translations( + name, choice_value, itext_id + ) -> Generator[Tuple[List[str], str], None, None]: + for media_or_lang, value in choice_value.items(): # noqa if isinstance(value, dict): for language, val in value.items(): - self._add_to_nested_dict( - self._translations, - [language, itext_id, media_type_or_language], - val, - ) + yield ([language, itext_id, media_or_lang], val) else: - if name == "media": - self._add_to_nested_dict( - self._translations, - [self.default_language, itext_id, media_type_or_language], - value, - ) + if name == constants.MEDIA: + yield ([self.default_language, itext_id, media_or_lang], value) else: - self._add_to_nested_dict( - self._translations, - [media_type_or_language, itext_id, "long"], - value, + yield ([media_or_lang, itext_id, "long"], value) + + itemsets_multi_language = set() + itemsets_has_media = set() + itemsets_has_dyn_label = set() + + def get_choices(): + for list_name, choice_list in self.choices.items(): + multi_language = False + has_media = False + dyn_label = False + choices = [] + for idx, choice in enumerate(choice_list): + for col_name, choice_value in choice.items(): + lang_choice = None + if col_name == constants.MEDIA: + has_media = True + if isinstance(choice_value, dict): + lang_choice = choice_value + multi_language = True + elif col_name == constants.LABEL: + lang_choice = {self.default_language: choice_value} + if is_label_dynamic(choice_value): + dyn_label = True + if lang_choice is not None: + # e.g. (label, {"default": "Yes"}, "consent", 0) + choices.append((col_name, lang_choice, list_name, idx)) + if multi_language or has_media or dyn_label: + if multi_language: + itemsets_multi_language.add(list_name) + if has_media: + itemsets_has_media.add(list_name) + if dyn_label: + itemsets_has_dyn_label.add(list_name) + for c in choices: + yield from _setup_choice_translations( + c[0], c[1], f"{c[2]}-{c[3]}" ) - self._translations = defaultdict(dict) # pylint: disable=W0201 + for path, value in get_choices(): + last_path = path.pop() + leaf_value = {last_path: value, constants.TYPE: constants.CHOICE} + self._add_to_nested_dict(self._translations, path, leaf_value) + select_types = set(aliases.select.keys()) for element in self.iter_descendants(): + itemset = element.get("itemset") + if itemset is not None: + element._itemset_multi_language = itemset in itemsets_multi_language + element._itemset_has_media = itemset in itemsets_has_media + element._itemset_dyn_label = itemset in itemsets_has_dyn_label + # Skip creation of translations for choices in selects. The creation of these # translations is done futher below in this function. parent = element.get("parent") @@ -685,37 +764,15 @@ def _setup_choice_translations(name, choice_value, itext_id): form: { "text": d["text"], "output_context": d["output_context"], - } + }, + constants.TYPE: constants.QUESTION, } ) - # This code sets up translations for choices in selects. - for list_name, choice_list in self.choices.items(): - multi_language = isinstance(choice_list[0].get("label"), dict) - has_media = bool(choice_list[0].get("media")) - if ( - not multi_language - and not has_media - and not has_dynamic_label(choice_list, multi_language) - ): - continue - for idx, choice in enumerate(choice_list): - for name, choice_value in choice.items(): - itext_id = "-".join([list_name, str(idx)]) - if isinstance(choice_value, dict): - _setup_choice_translations(name, choice_value, itext_id) - elif name == "label": - self._add_to_nested_dict( - self._translations, - [self.default_language, itext_id, "long"], - choice_value, - ) - def _add_empty_translations(self): """ - Adds translations so that every itext element has the same elements \ - accross every language. - When translations are not provided "-" will be used. + Adds translations so that every itext element has the same elements across every + language. When translations are not provided "-" will be used. This disables any of the default_language fallback functionality. """ paths = {} @@ -767,7 +824,6 @@ def _set_up_media_translations(media_dict, translation_key): # Create the required dictionaries in _translations, # then add media as a leaf value: - if language not in self._translations: self._translations[language] = {} @@ -783,9 +839,6 @@ def _set_up_media_translations(media_dict, translation_key): translations_trans_key[media_type] = media - if not self._translations: - self._translations = defaultdict(dict) # pylint: disable=W0201 - for survey_element in self.iter_descendants(): # Skip set up of media for choices in selects. Translations for their media # content should have been set up in _setup_translations, with one copy of @@ -818,6 +871,9 @@ def itext(self): raise Exception() for media_type, media_value in content.items(): + # Ignore key indicating Question or Choice translation type. + if media_type == constants.TYPE: + continue if isinstance(media_value, dict): value, output_inserted = self.insert_output_values( media_value["text"], context=media_value["output_context"] diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py index 2831a82d..937b26ac 100644 --- a/pyxform/survey_element.py +++ b/pyxform/survey_element.py @@ -4,6 +4,7 @@ """ import json import re +from collections import deque from functools import lru_cache from typing import TYPE_CHECKING @@ -188,16 +189,14 @@ def get_lineage(self): """ Return a the list [root, ..., self._parent, self] """ - result = [self] + result = deque((self,)) current_element = self while current_element.parent: current_element = current_element.parent - result = [current_element] + result + result.appendleft(current_element) # For some reason the root element has a True flat property... - output = [result[0]] - for item in result[1:]: - if not item.get("flat"): - output.append(item) + output = [result.popleft()] + output.extend([i for i in result if not i.get("flat")]) return output def get_root(self): diff --git a/pyxform/utils.py b/pyxform/utils.py index a5d8c926..19e14c30 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -10,7 +10,7 @@ import re from collections import namedtuple from json.decoder import JSONDecodeError -from typing import List, Tuple +from typing import Dict, List, Tuple from xml.dom.minidom import Element, Text, parseString import openpyxl @@ -275,17 +275,20 @@ def default_is_dynamic(element_default, element_type=None): return False -# If the first or second choice label includes a reference, we must use itext. -# Check the first two choices in case first is something like "Other". -def has_dynamic_label(choice_list, multi_language): - if not multi_language: - for i in range(0, min(2, len(choice_list))): - if ( - choice_list[i].get("label") is not None - and re.search(BRACKETED_TAG_REGEX, choice_list[i].get("label")) - is not None - ): - return True +def has_dynamic_label(choice_list: "List[Dict[str, str]]") -> bool: + """ + If the first or second choice label includes a reference, we must use itext. + + Check the first two choices in case first is something like "Other". + """ + for c in choice_list[:2]: + choice_label = c.get("label") + if ( + choice_label is not None + and isinstance(choice_label, str) + and re.search(BRACKETED_TAG_REGEX, choice_label) is not None + ): + return True return False diff --git a/pyxform/validators/pyxform/translations_checks.py b/pyxform/validators/pyxform/translations_checks.py index 722017ba..01121d76 100644 --- a/pyxform/validators/pyxform/translations_checks.py +++ b/pyxform/validators/pyxform/translations_checks.py @@ -6,9 +6,9 @@ from pyxform.errors import PyXFormError if TYPE_CHECKING: - from typing import Dict, List, Optional, Sequence, Set, Union + from typing import Dict, List, Optional, Sequence, Set, Tuple - SheetData = List[Dict[str, Union[str, Dict]]] + SheetData = Tuple[Tuple[str, ...]] Warnings = List[str] @@ -93,31 +93,20 @@ def __init__( def _find_translations( self, sheet_data: "SheetData", translatable_columns: "Dict[str, str]" ): - def process_cell(typ, cell): - if cell is not None: - if typ in translatable_columns.keys(): - name = translatable_columns[typ] - if isinstance(cell, str): - self.seen[const.DEFAULT_LANGUAGE_VALUE].append(name) - self.columns_seen.add(name) - elif isinstance(cell, dict): - for lng in cell: - self.seen[lng].append(name) - self.columns_seen.add(name) - - if 0 < len(sheet_data): - # e.g. ("name", "q1"), ("label", {"en": "Hello", "fr": "Bonjour"}) - for column_type, cell_content in sheet_data[0].items(): - if column_type == const.MEDIA and isinstance(cell_content, dict): - # e.g. ("audio", {"eng": "my.mp3"}) - for media_type, media_cell in cell_content.items(): - process_cell(typ=media_type, cell=media_cell) - if column_type == const.BIND: - # e.g. ("jr:constraintMsg", "Try again") - for bind_type, bind_cell in cell_content.items(): - process_cell(typ=bind_type, cell=bind_cell) - else: - process_cell(typ=column_type, cell=cell_content) + def process_header(head): + if head[0] in translatable_columns.keys(): + name = translatable_columns[head[0]] + if len(head) == 1: + self.seen[const.DEFAULT_LANGUAGE_VALUE].append(name) + elif len(head) == 2: + self.seen[head[1]].append(name) + self.columns_seen.add(name) + + for header in sheet_data: + if 1 < len(header) and header[0] in (const.MEDIA, const.BIND): + process_header(head=header[1:]) + else: + process_header(head=header) def seen_default_only(self) -> bool: return 0 == len(self.seen) or ( diff --git a/pyxform/xform2json.py b/pyxform/xform2json.py index c8497733..4cf0ebcc 100644 --- a/pyxform/xform2json.py +++ b/pyxform/xform2json.py @@ -668,7 +668,8 @@ def _get_choices(self) -> Dict[str, Any]: choices[instance["id"]] = items return choices - def _get_name_from_ref(self, ref): + @staticmethod + def _get_name_from_ref(ref): """given /xlsform_spec_test/launch, return the string after the last occurance of the character '/' """ @@ -678,10 +679,12 @@ def _get_name_from_ref(self, ref): else: return ref[pos + 1 :].strip() - def _expand_child(self, obj_list): + @staticmethod + def _expand_child(obj_list): return obj_list - def _shorten_xpaths_in_string(self, text): + @staticmethod + def _shorten_xpaths_in_string(text): def get_last_item(xpath_str): last_item = xpath_str.split("/") return last_item[len(last_item) - 1].strip() diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index b0acf3df..6d987b61 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -8,7 +8,7 @@ import re import sys from collections import Counter -from typing import IO, Any, Dict, List, Optional +from typing import IO, Any, Dict, List, Optional, Tuple from pyxform import aliases, constants from pyxform.constants import ( @@ -101,13 +101,25 @@ def replace_smart_quotes_in_dict(_d): _d[key] = value +class DealiasAndGroupHeadersResult: + __slots__ = ("headers", "data") + + def __init__(self, headers: Tuple[Tuple[str, ...], ...], data: List[Dict]): + """ + :param headers: Distinct headers seen in the sheet, parsed / split if applicable. + :param data: Sheet data rows, in grouped dict format. + """ + self.headers: Tuple[Tuple[str, ...], ...] = headers + self.data: List[Dict] = data + + def dealias_and_group_headers( - dict_array: "List[Dict]", - header_aliases: "Dict", + dict_array: List[Dict], + header_aliases: Dict[str, str], use_double_colons: bool, default_language: str = constants.DEFAULT_LANGUAGE_VALUE, ignore_case: bool = False, -): +) -> DealiasAndGroupHeadersResult: """ For each row in the worksheet, group all keys that contain a double colon. So @@ -121,10 +133,10 @@ def dealias_and_group_headers( """ group_delimiter = "::" out_dict_array = list() + seen_headers = {} for row in dict_array: out_row = dict() for header, val in row.items(): - if ignore_case: header = header.lower() @@ -154,9 +166,12 @@ def dealias_and_group_headers( new_key = tokens[0] new_value = list_to_nested_dict(tokens[1:] + [val]) out_row = merge_dicts(out_row, {new_key: new_value}, default_language) + seen_headers[tuple(tokens)] = None out_dict_array.append(out_row) - return out_dict_array + return DealiasAndGroupHeadersResult( + headers=tuple(seen_headers.keys()), data=out_dict_array + ) def dealias_types(dict_array): @@ -478,9 +493,11 @@ def workbook_to_json( settings_sheet_headers = [] settings_sheet = dealias_and_group_headers( - settings_sheet_headers, aliases.settings_header, use_double_colons + dict_array=settings_sheet_headers, + header_aliases=aliases.settings_header, + use_double_colons=use_double_colons, ) - settings = settings_sheet[0] if len(settings_sheet) > 0 else {} + settings = settings_sheet.data[0] if len(settings_sheet.data) > 0 else {} replace_smart_quotes_in_dict(settings) default_language = settings.get(constants.DEFAULT_LANGUAGE_KEY, default_language) @@ -518,10 +535,13 @@ def workbook_to_json( replace_smart_quotes_in_dict(choice_item) external_choices_sheet = dealias_and_group_headers( - external_choices_sheet, aliases.list_header, use_double_colons, default_language + dict_array=external_choices_sheet, + header_aliases=aliases.list_header, + use_double_colons=use_double_colons, + default_language=default_language, ) external_choices = group_dictionaries_by_key( - external_choices_sheet, constants.LIST_NAME + list_of_dicts=external_choices_sheet.data, key=constants.LIST_NAME ) # ########## Choices sheet ########## @@ -529,9 +549,14 @@ def workbook_to_json( for choice_item in choices_sheet: replace_smart_quotes_in_dict(choice_item) choices_sheet = dealias_and_group_headers( - choices_sheet, aliases.list_header, use_double_colons, default_language + dict_array=choices_sheet, + header_aliases=aliases.list_header, + use_double_colons=use_double_colons, + default_language=default_language, + ) + combined_lists = group_dictionaries_by_key( + list_of_dicts=choices_sheet.data, key=constants.LIST_NAME ) - combined_lists = group_dictionaries_by_key(choices_sheet, constants.LIST_NAME) # To combine the warning into one message, the check for missing choices translation # columns is run with Survey sheet below. @@ -605,9 +630,13 @@ def workbook_to_json( # ########## Entities sheet ########### entities_sheet = workbook_dict.get(constants.ENTITIES, []) entities_sheet = dealias_and_group_headers( - entities_sheet, aliases.entities_header, False + dict_array=entities_sheet, + header_aliases=aliases.entities_header, + use_double_colons=False, + ) + entity_declaration = get_entity_declaration( + entities_sheet=entities_sheet.data, workbook_dict=workbook_dict, warnings=warnings ) - entity_declaration = get_entity_declaration(entities_sheet, workbook_dict, warnings) # ########## Survey sheet ########### survey_sheet = workbook_dict[constants.SURVEY] @@ -618,23 +647,30 @@ def workbook_to_json( if clean_text_values_enabled: survey_sheet = clean_text_values(survey_sheet) survey_sheet = dealias_and_group_headers( - survey_sheet, aliases.survey_header, use_double_colons, default_language + dict_array=survey_sheet, + header_aliases=aliases.survey_header, + use_double_colons=use_double_colons, + default_language=default_language, ) - survey_sheet = dealias_types(survey_sheet) + survey_sheet.data = dealias_types(dict_array=survey_sheet.data) # Check for missing translations. The choices sheet is checked here so that the # warning can be combined into one message. sheet_translations = SheetTranslations( - survey_sheet=survey_sheet, - choices_sheet=choices_sheet, + survey_sheet=survey_sheet.headers, + choices_sheet=choices_sheet.headers, ) sheet_translations.missing_check(warnings=warnings) # No spell check for OSM sheet (infrequently used, many spurious matches). osm_sheet = dealias_and_group_headers( - workbook_dict.get(constants.OSM, []), aliases.list_header, True + dict_array=workbook_dict.get(constants.OSM, []), + header_aliases=aliases.list_header, + use_double_colons=True, + ) + osm_tags = group_dictionaries_by_key( + list_of_dicts=osm_sheet.data, key=constants.LIST_NAME ) - osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME) # ################################# # Parse the survey sheet while generating a survey in our json format: @@ -677,7 +713,7 @@ def workbook_to_json( # row by row, validate questions, throwing errors and adding warnings # where needed. - for row in survey_sheet: + for row in survey_sheet.data: row_number += 1 if stack[-1] is not None: prev_control_type = stack[-1]["control_type"] @@ -1683,7 +1719,7 @@ def _setup_question_types_dictionary(self): header_aliases={}, use_double_colons=use_double_colons, default_language=constants.DEFAULT_LANGUAGE_VALUE, - ) + ).data self._dict = organize_by_values(self._dict, "name") diff --git a/tests/test_translations.py b/tests/test_translations.py index 8116a0bb..b6a85ba0 100644 --- a/tests/test_translations.py +++ b/tests/test_translations.py @@ -416,6 +416,146 @@ def run(name): ): run(name=f"questions={count}, without check (seconds):") + def test_translation_detection__survey_and_choices_columns_present(self): + """Should identify that the survey is multi-language when first row(s) empty.""" + md = """ + | survey | | | | | + | | type | name | label | label::en | + | | select_one c0 | f | f | | + | | select_one c1 | q1 | Question 1 | Question A | + | choices | | | | | | + | | list name | name | label | label::en | label::fr | + | | c0 | n | l | | | + | | c1 | na | la | | | + | | c1 | nb | lb | lb-e | lb-f | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + xpq.body_select1_itemset("f"), + xpq.body_label_inline("select1", "f", "f"), + xpq.body_select1_itemset("q1"), + xpq.body_label_itext("select1", "q1"), + xpq.model_itext_label("q1", DEFAULT_LANG, "Question 1"), + xpq.model_itext_label("q1", "en", "Question A"), + xpq.model_itext_label("q1", "fr", "-"), + xpc.model_instance_choices_label("c0", (("n", "l"),)), + xpc.model_instance_choices_itext("c1", ("na", "nb")), + xpc.model_itext_choice_text_label_by_pos( + DEFAULT_LANG, "c1", ("la", "lb") + ), + xpc.model_itext_choice_text_label_by_pos("en", "c1", ("-", "lb-e")), + xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "lb-f")), + ], + ) + + def test_translation_detection__survey_columns_not_present(self): + """Should identify that the survey is multi-language when only choices translated.""" + md = """ + | survey | | | | + | | type | name | label | + | | select_one c0 | f | f | + | | select_one c1 | q1 | Question 1 | + | choices | | | | | | + | | list name | name | label | label::en | label::fr | + | | c0 | n | l | | | + | | c1 | na | la | | | + | | c1 | nb | lb | lb-e | lb-f | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + xpq.body_select1_itemset("f"), + xpq.body_label_inline("select1", "f", "f"), + xpq.body_select1_itemset("q1"), + xpq.body_label_inline("select1", "q1", "Question 1"), + xpc.model_instance_choices_label("c0", (("n", "l"),)), + xpc.model_instance_choices_itext("c1", ("na", "nb")), + xpc.model_itext_choice_text_label_by_pos( + DEFAULT_LANG, "c1", ("la", "lb") + ), + xpc.model_itext_choice_text_label_by_pos("en", "c1", ("-", "lb-e")), + xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "lb-f")), + ], + ) + + def test_translation_detection__only_survey_columns_present(self): + """Should identify that the survey is multi-language when first row(s) empty.""" + md = """ + | survey | | | | | + | | type | name | label | label::en | + | | select_one c0 | f | f | | + | | select_one c1 | q1 | Question 1 | Question A | + | choices | | | | + | | list name | name | label | + | | c0 | n | l | + | | c1 | na | la | + | | c1 | nb | lb | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + xpq.body_select1_itemset("f"), + xpq.body_label_inline("select1", "f", "f"), + xpq.body_select1_itemset("q1"), + xpq.body_label_itext("select1", "q1"), + xpq.model_itext_label("q1", DEFAULT_LANG, "Question 1"), + xpq.model_itext_label("q1", "en", "Question A"), + xpc.model_instance_choices_label("c0", (("n", "l"),)), + xpc.model_instance_choices_label("c1", (("na", "la"), ("nb", "lb"))), + ], + ) + + def test_translation_detection__survey_columns_present_with_media(self): + """Should identify that the survey is multi-language when first row(s) empty.""" + md = """ + | survey | | | | | | + | | type | name | label | label::en | image::en | + | | select_one c0 | f | f | | | + | | select_one c1 | q1 | Question 1 | Question A | c1.png | + | choices | | | | | | + | | list name | name | label | label::en | label::fr | audio::de | + | | c0 | n | l | | | | + | | c1 | na | la | | | | + | | c1 | nb | lb | lb-e | | c1_nb.mp3 | + | | c1 | nc | lc | lc-e | lc-f | c1_nc.mp3 | + """ + self.assertPyxformXform( + md=md, + debug=True, + xml__xpath_match=[ + xpq.body_select1_itemset("f"), + xpq.body_label_inline("select1", "f", "f"), + xpq.body_select1_itemset("q1"), + xpq.body_label_itext("select1", "q1"), + xpq.model_itext_label("q1", DEFAULT_LANG, "Question 1"), + xpq.model_itext_label( + "q1", + "en", + "Question A", + ), + xpq.model_itext_form("q1", "en", "image", "c1.png"), + xpc.model_instance_choices_label("c0", (("n", "l"),)), + xpc.model_itext_choice_text_label_by_pos( + DEFAULT_LANG, "c1", ("la", "lb", "lc") + ), + xpc.model_itext_choice_text_label_by_pos( + "en", "c1", ("-", "lb-e", "lc-e") + ), + xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "-", "lc-f")), + xpc.model_itext_choice_text_label_by_pos("de", "c1", ("-", "-", "-")), + xpc.model_itext_choice_media_by_pos( + "de", + "c1", + ( + ((None, None),), + (("audio", "c1_nb.mp3"),), + (("audio", "c1_nc.mp3"),), + ), + ), + ], + ) + class TestTranslationsSurvey(PyxformTestCase): """Translations behaviour of columns in the Survey sheet.""" @@ -1090,9 +1230,9 @@ def test_missing_translation__one_lang_simple__warn__default(self): | | type | name | label | | | select_one c1 | q1 | Question 1 | | choices | | | | - | | list name | name | label | label::eng | media::audio | - | | c1 | na | la-d | la-e | la-d.mp3 | - | | c1 | nb | lb-d | lb-e | lb-d.mp3 | + | | list name | name | label::eng | media::audio | + | | c1 | na | la-e | la-d.mp3 | + | | c1 | nb | lb-e | lb-d.mp3 | """ cols = {CHOICES: {"default": ("label",), "eng": ("media::audio",)}} warning = format_missing_translations_msg(_in=cols) diff --git a/tests/test_xform2json.py b/tests/test_xform2json.py index ba9a56ce..7d283fb0 100644 --- a/tests/test_xform2json.py +++ b/tests/test_xform2json.py @@ -35,9 +35,10 @@ def test_convert_toJSON_multi_language(self): kwargs={"id_string": "id", "name": "multi-language", "title": "some-title"}, autoname=False, ) - expected_xml = survey.to_xml() + expected = survey.to_xml() generated_json = survey.to_json() - survey = create_survey_element_from_dict(json.loads(generated_json)) + survey_from_builder = create_survey_element_from_dict(json.loads(generated_json)) + observed = survey_from_builder.to_xml() - self.assertEqual(expected_xml, survey.to_xml()) + self.assertEqual(expected, observed) diff --git a/tests/xpath_helpers/choices.py b/tests/xpath_helpers/choices.py index dddcdb49..d546221b 100644 --- a/tests/xpath_helpers/choices.py +++ b/tests/xpath_helpers/choices.py @@ -106,6 +106,7 @@ def model_itext_choice_media_by_pos( """ for idx, choice in enumerate(choices) for form, media in choice + if form is not None ) ) return f""" @@ -134,6 +135,7 @@ def model_no_itext_choice_media_by_pos( """ for idx, choice in enumerate(choices) for form, media in choice + if form is not None ) ) return f""" diff --git a/tests/xpath_helpers/questions.py b/tests/xpath_helpers/questions.py index c6af3b65..bf865ab3 100644 --- a/tests/xpath_helpers/questions.py +++ b/tests/xpath_helpers/questions.py @@ -1,6 +1,6 @@ class XPathHelper: """ - XPath expressions for choices assertions. + XPath expressions for questions assertions. """ @staticmethod @@ -20,6 +20,47 @@ def model_instance_bind(q_name: str, _type: str): ] """ + @staticmethod + def model_itext_label(q_name: str, lang: str, q_label: str): + """Model itext contains the question label.""" + return f""" + /h:html/h:head/x:model/x:itext/x:translation[@lang='{lang}'] + /x:text[@id='/test_name/{q_name}:label'] + /x:value[not(@form) and text()='{q_label}'] + """ + + @staticmethod + def model_itext_form(q_name: str, lang: str, form: str, fname: str): + """Model itext contains an alternate form itext for the label or hint.""" + prefix = { + "audio": ("label", "jr://audio/"), + "image": ("label", "jr://images/"), + "big-image": ("label", "jr://images/"), + "video": ("label", "jr://video/"), + "guidance": ("hint", ""), + } + return f""" + /h:html/h:head/x:model/x:itext/x:translation[@lang='{lang}'] + /x:text[@id='/test_name/{q_name}:{prefix[form][0]}'] + /x:value[@form='{form}' and text()='{prefix[form][1]}{fname}'] + """ + + @staticmethod + def body_label_inline(q_type: str, q_name: str, q_label: str): + """Body element contains the question label.""" + return f""" + /h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}'] + /x:label[not(@ref) and text()='{q_label}'] + """ + + @staticmethod + def body_label_itext(q_type: str, q_name: str): + """Body element references itext for the question label.""" + return f""" + /h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}'] + /x:label[@ref="jr:itext('/test_name/{q_name}:label')" and not(text())] + """ + @staticmethod def body_select1_itemset(q_name: str): """Body has a select1 with an itemset, and no inline items."""