Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add international edition to ODK #1006

Merged
merged 10 commits into from
Feb 29, 2024
61 changes: 61 additions & 0 deletions odk/odk.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,38 @@ class SSSOMMappingSetProduct(Product):
sssom_tool_options: Optional[str] = ""
"""SSSOM toolkit options passed to the sssom command used to generate this product command"""

@dataclass_json
@dataclass
class BabelonTranslationProduct(Product):
"""
Represents a Babelon Translation
"""
mirror_babelon_from: Optional[Url] = None
"""if specified this URL is used to mirror the translation."""

mirror_synonyms_from: Optional[Url] = None
"""if specified this URL is used to mirror the synonym template from."""

include_robot_template_synonyms: bool = False
"""if include_robot_template_synonyms is true, a ROBOT template synonym table is added in addition to the babelon translation table."""

babelon_tool_options: Optional[str] = ""
"""Babelon toolkit options passed to the command used to generate this product command"""

language: str = "en"
"""Language tag (IANA/ISO), e.g 'en', 'fr'."""

include_not_translated: str = "false"
"""if include_not_translated is 'false' NOT_TRANSLATED values are removed during preprocessing."""

update_translation_status: str = "false"
"""if update_translation_status is 'true', translations where the source_value has changed are relegated to CANDIDATE status."""

auto_translate: bool = False
"""if auto_translate is true, missing values are being translated using the babelon toolkit during preprocessing. By default, the toolkit employs LLM-mediated translations using the OpenAI API. This default may change at any time."""



@dataclass_json
@dataclass
class ExportProduct(Product):
Expand Down Expand Up @@ -438,6 +469,30 @@ class SSSOMMappingSetGroup(JsonSchemaMixin):

products : Optional[List[SSSOMMappingSetProduct]] = None

@dataclass_json
@dataclass
class BabelonTranslationSetGroup(JsonSchemaMixin):
"""
A configuration section that consists of a list of `BabelonTranslationProduct` descriptions
"""

directory : Directory = "../translations"

release_merged_translations : bool = False
"""If true, a big table and JSON file is created which contains all translations."""

predicates : Optional[List[str]] = field(default_factory=lambda: ['IAO:0000115', 'rdfs:label'])
"""The list of predicates that are considered during translation preparation."""

oak_adapter: str = "pronto:$(ONT).obo"
"""The oak adapter that should be used to process the translation tables. Should match the 'translate_ontology' field."""

translate_ontology : str = "$(ONT).obo"
"""The name of the ontology that should be translated. Should match the 'oak_adapter' field."""

products : Optional[List[BabelonTranslationProduct]] = None


@dataclass_json
@dataclass
class ExportGroup(ProductGroup):
Expand Down Expand Up @@ -558,6 +613,9 @@ class OntologyProject(JsonSchemaMixin):

use_mappings : bool = False
"""if true use SSSOM mapping files."""

use_translations : bool = False
"""if true enable babelon multilingual support."""

use_env_file_docker : bool = False
"""if true environment variables are collected by the docker wrapper and passed into the container."""
Expand Down Expand Up @@ -687,6 +745,9 @@ class OntologyProject(JsonSchemaMixin):

sssom_mappingset_group : Optional[SSSOMMappingSetGroup] = None
"""Block that includes information on all SSSOM mapping tables used"""

babelon_translation_group : Optional[BabelonTranslationSetGroup] = None
"""Block that includes information on all babelon tables used"""

release_diff : bool = False
"""When enabled, a diff is generated between the current release and the new one"""
Expand Down
16 changes: 16 additions & 0 deletions template/_dynamic_files.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,22 @@ subject_id predicate_id object_id mapping_justification
{%- endfor %}
{%- endif %}

{%- endif %}
{%- if project.use_translations %}
^^^ src/translations/README.md
# Directory for managing translation files

{%- if project.babelon_translation_group is not none %}
{%- for translation in project.babelon_translation_group.products %}
^^^ src/translations/{{ translation.id }}.babelon.tsv
source_language translation_language subject_id predicate_id source_value translation_value translation_status
{%- if translation.include_robot_template_synonyms %}
^^^ src/translations/{{ translation.id }}.synonyms.tsv
subject_id translation_value comment
ID AL oboInOwl:hasExactSynonym@{{ translation.language }}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- endif %}
{#-
Example pattern implementation TSV
Expand Down
96 changes: 95 additions & 1 deletion template/src/ontology/Makefile.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ MAPPINGS= {% if project.sssom_mappingset_group is not none %}{
MAPPING_RELEASE_FILES= $(foreach n,$(MAPPINGS), $(MAPPINGDIR)/$(n).sssom.tsv)
{% endif %}

{%- if project.use_translations %}
TRANSLATIONSDIR= {% if project.babelon_translation_group is not none %}{{project.babelon_translation_group.directory|default("../translations")}}{% endif %}
BABELONPY= babelon -q
TRANSLATIONS_OWL= {%- for translation in project.babelon_translation_group.products %}$(TRANSLATIONSDIR)/{{ translation.id }}.babelon.owl {% if translation.include_robot_template_synonyms %} $(TRANSLATIONSDIR)/{{ translation.id }}.synonyms.owl {% endif %}{% endfor %}
TRANSLATIONS_TSV= {%- for translation in project.babelon_translation_group.products %}$(TRANSLATIONSDIR)/{{ translation.id }}-preprocessed.babelon.tsv {% endfor %}
TRANSLATION_FILES= {%- if project.babelon_translation_group is not none %}{% if project.babelon_translation_group.release_merged_translations %}$(TRANSLATIONSDIR)/$(ONT)-all.babelon.tsv $(TRANSLATIONSDIR)/$(ONT)-all.babelon.json{% endif %}{% endif %}
{% endif %}

FORMATS = $(sort {% for format in project.export_formats %} {{ format }}{% endfor %} owl)
FORMATS_INCL_TSV = $(sort $(FORMATS) tsv)
RELEASE_ARTEFACTS = $(sort {% for release in project.release_artefacts %}{% if release.startswith('custom-') %}{{ release | replace("custom-","")}}{% else %}$(ONT)-{{ release }}{% endif %} {% endfor %})
Expand Down Expand Up @@ -322,7 +330,8 @@ check_for_robot_updates:
ASSETS = \
$(IMPORT_FILES) \
$(MAIN_FILES) \{% if project.use_dosdps %}
$(PATTERN_RELEASE_FILES) \{% endif %}
$(PATTERN_RELEASE_FILES) \{% endif %}{% if project.use_translations %}
$(TRANSLATION_FILES) \{% endif %}
$(REPORT_FILES) \
$(SUBSET_FILES) \
$(MAPPING_FILES)
Expand Down Expand Up @@ -913,6 +922,83 @@ validate_mappings:

{% endif %}

{%- if project.use_translations %}
# ----------------------------------------
# Babelon Translation Files
# ----------------------------------------

{%- if project.babelon_translation_group is not none %}

TRANSLATIONS_ADAPTER={{ project.babelon_translation_group.oak_adapter|default('pronto:$(ONT).obo') }}
TRANSLATIONS_ONTOLOGY={{ project.babelon_translation_group.translate_ontology|default('$(ONT).obo') }}
TRANSLATE_PREDICATES={% for predicate_id in project.babelon_translation_group.predicates|default(['IAO:0000115', 'rdfs:label'], true) %}{{ predicate_id }} {% endfor %}

{% for translation in project.babelon_translation_group.products %}
{% if translation.maintenance == "mirror" %}
$(TRANSLATIONSDIR)/{{ translation.id }}.babelon.tsv:
wget "{{ translation.mirror_babelon_from }}" -O $@
{% if translation.include_robot_template_synonyms %}
$(TRANSLATIONSDIR)/{{ translation.id }}.synonyms.tsv:
wget "{{ translation.mirror_synonyms_from }}" -O $@
{% endif %}
{% else %}
# This mappingset is manually curated, so we only check that the file actually exists.
$(TRANSLATIONSDIR)/{{ translation.id }}.babelon.tsv:
test -f $@
{% if translation.include_robot_template_synonyms %}
$(TRANSLATIONSDIR)/{{ translation.id }}.synonyms.tsv:
test -f $@
{% endif %}
{% endif %}

$(TRANSLATIONSDIR)/{{ translation.id }}-preprocessed.babelon.tsv: $(TRANSLATIONS_ONTOLOGY) $(TRANSLATIONSDIR)/{{ translation.id }}.babelon.tsv{% if translation.auto_translate %}
@if [ -z "$(OPENAI_API_KEY)" ]; then echo "OPENAI_API_KEY must be set as as part of the make command, e.g. sh run.sh make OPENAI_API_KEY=\"sk-123\" my_command" && exit 1; fi{% endif %}
$(BABELONPY) prepare-translation $(TRANSLATIONSDIR)/{{ translation.id }}.babelon.tsv \
--oak-adapter $(TRANSLATIONS_ADAPTER) \
--language-code {{ translation.language|default('en') }} \
$(foreach n,$(TRANSLATE_PREDICATES), --field $(n)) \
--output-source-changed $(TRANSLATIONSDIR)/{{ translation.id }}-changed.babelon.tsv \
--output-not-translated $(TRANSLATIONSDIR)/{{ translation.id }}-not-translated.babelon.tsv \
--include-not-translated {{ translation.include_not_translated|default('false') }} \
--update-translation-status {{ translation.update_translation_status|default('true') }} \
-o $@{% if translation.auto_translate %}
echo "Warning: By default, the toolkit employs LLM-mediated translations using the OpenAI API. This default may change at any time"
echo "Warning: Never store API keys or other secrets in Makefiles or scripts you have in version control."
export OPENAI_API_KEY="$(OPENAI_API_KEY)" &&\
matentzn marked this conversation as resolved.
Show resolved Hide resolved
$(BABELONPY) translate $(TRANSLATIONSDIR)/{{ translation.id }}-not-translated.babelon.tsv -o $(TRANSLATIONSDIR)/{{ translation.id }}-translated.babelon.tsv
$(BABELONPY) merge $(TRANSLATIONSDIR)/{{ translation.id }}-preprocessed.babelon.tsv $(TRANSLATIONSDIR)/{{ translation.id }}-translated.babelon.tsv -o $@
{%- endif %}

{% endfor %}
{%- endif %}

$(TRANSLATIONSDIR)/%.synonyms.owl: $(TRANSLATIONSDIR)/%.synonyms.tsv
$(ROBOT) template --template $< \
annotate \
--ontology-iri $(ONTBASE)/translations/$*.synonyms.owl \
-V $(ONTBASE)/releases/$(VERSION)/translations/$*.synonyms.owl \
--annotation owl:versionInfo $(VERSION) \
convert -f owl --output $@
.PRECIOUS: $(TRANSLATIONSDIR)/%.synonyms.owl

$(TRANSLATIONSDIR)/%.babelon.owl: $(TRANSLATIONSDIR)/%-preprocessed.babelon.tsv
$(BABELONPY) convert $(TMPDIR)/$*.babelon.tsv --output-format owl -o [email protected]
$(ROBOT) merge -i [email protected] \
annotate \
--ontology-iri $(ONTBASE)/translations/$*.babelon.owl \
-V $(ONTBASE)/releases/$(VERSION)/translations/$*.babelon.owl \
--annotation owl:versionInfo $(VERSION) \
convert -f owl --output $@
@rm [email protected]
.PRECIOUS: $(TRANSLATIONSDIR)/%.babelon.owl

$(TRANSLATIONSDIR)/$(ONT)-all.babelon.tsv: $(TRANSLATIONS_TSV)
$(BABELONPY) merge $^ -o $@

$(TRANSLATIONSDIR)/%.babelon.json: $(TRANSLATIONSDIR)/%.babelon.tsv
$(BABELONPY) convert $< --output-format json -o $@
{% endif %}

# ----------------------------------------
# Release artefacts: export formats
# ----------------------------------------
Expand Down Expand Up @@ -1063,6 +1149,14 @@ $(ONT)-simple-non-classified.owl: $(EDIT_PREPROCESSED) $(OTHER_SRC) $(SIMPLESEED
$(SHARED_ROBOT_COMMANDS) annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) {% if project.release_date -%}--annotation oboInOwl:date "$(OBODATE)" {% endif -%}--output [email protected] && mv [email protected] $@
{% endif -%}

{% if 'international' in project.release_artefacts or project.primary_release == 'international' -%}
# international: A variant of the primary_release, but with multi-language support.
$(ONT)-international.owl: $(ONT).owl $(TRANSLATIONS_OWL)
$(ROBOT) merge $(patsubst %, -i %, $^) \
$(SHARED_ROBOT_COMMANDS) annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) \
{% if project.release_date -%} --annotation oboInOwl:date "$(OBODATE)" {% endif -%}--output [email protected] && mv [email protected] $@
{% endif -%}

{% if 'basic' in project.release_artefacts or project.primary_release == 'basic' %}
# foo-basic: A version of -simple containing only relationships using relations on a configurable whitelist (default = BFO:0000050 (?)).
# See above (David comment) for explanation.
Expand Down
Loading