From 2b67a6d63bb84623e76e47ee26bb36321121f2d5 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Thu, 8 Feb 2024 18:24:53 -0500 Subject: [PATCH] ICD11 Ingest - Update: mondo-ingest.Makefile - Add: $(COMPONENTSDIR)/icd11.owl - Add: config/icd11foundation-property-map.sssom.tsv - Update: ICD11 docs - Update: config/properties.txt - Update: config/context.json - Update: metadata/mondo.sssom.config.yml: added icd11.foundation to subject_prefixes - Update: lexmatch-sssom-compare.py: entry for icd11 - Update: add-new-source.md: Instructions for additional configuration necessities. General - Bugfix: Slurp files were sometimes getting removed because they were considered intermediates and not .PRECIOUS. - Bugfix: reports/*_exclusion_reasons.robot.template.tsv files were getting removed by the build for same reason as above. --- docs/developer/add-new-source.md | 6 ++++ docs/sources/icd11foundation.md | 23 +++++++++---- src/ontology/Makefile | 2 +- src/ontology/config/context.json | 2 ++ .../icd11foundation-property-map.sssom.tsv | 5 +++ src/ontology/config/properties.txt | 2 ++ src/ontology/metadata/mondo.sssom.config.yml | 1 + src/ontology/mondo-ingest.Makefile | 13 ++++++++ src/scripts/lexmatch-sssom-compare.py | 5 ++- .../icd11foundation-relevant-diseases.sparql | 32 ------------------- 10 files changed, 50 insertions(+), 41 deletions(-) create mode 100644 src/ontology/config/icd11foundation-property-map.sssom.tsv delete mode 100644 src/sparql/icd11foundation-relevant-diseases.sparql diff --git a/docs/developer/add-new-source.md b/docs/developer/add-new-source.md index b00c6faa..1b663ad2 100644 --- a/docs/developer/add-new-source.md +++ b/docs/developer/add-new-source.md @@ -21,6 +21,7 @@ Add a new metadata file to [src/ontology/metadata](https://github.com/monarch-in Prefixes need to be entered in the following places in the yml: - `curie_map` - `extended_prefix_map` +- `subject_prefixes` ### 2.3. `config/prefixes.csv` Add prefixes. @@ -28,6 +29,11 @@ Add prefixes. ### 2.4. `config/context.json` Add prefixes. +### 2.5. `lexmatch-sssom-compare.py` +There is a section of branching logic with a comment "Map ontology filenames to prefixes". Add an entry there if either +(a) there is 1 prefix you care about, and it is spelled differently than the component filename (e.g. the prefix is +`myontology`, but the filename is `components/my-ontology.owl`), or (b) there is more than 1 prefix. + ## 3. Docs ### 3.1. `mkdocs.yaml` Update the Website Table of Contents in [mkdocs.yaml](https://github.com/monarch-initiative/mondo-ingest/blob/main/mkdocs.yaml) diff --git a/docs/sources/icd11foundation.md b/docs/sources/icd11foundation.md index 8ab32c53..289b6f50 100644 --- a/docs/sources/icd11foundation.md +++ b/docs/sources/icd11foundation.md @@ -2,20 +2,29 @@ **Source name:** International Classification of Diseases 11th Revision -**Source description:** The International Classification of Diseases (ICD) provides a common language that allows health professionals to share standardized information across the world. The eleventh revision contains around 17 000 unique codes, more than 120 000 codable terms and is now entirely digital.Feb 11, 2022 +**Source description:** The International Classification of Diseases (ICD) provides a common language that allows health +professionals to share standardized information across the world. The eleventh revision contains around 17 000 unique +codes, more than 120 000 codable terms and is now entirely digital.Feb 11, 2022 This data source in particular is the ICD11 foundation, not one of its linearizations. - **Homepage:** https://icd.who.int/ -**Comments about this source:** -Because the existing logical equivalence class axioms led to equivalence cliques (groups of distinct disease identifiers -that inferred to he semantically identical) we decided to strip out all equivalence class axiom from the foundation -prior to processing it in the ingest. - +**Comments about this source:** +_Data source_ +_Original source URL_: https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz +_Preprocessing_ +In the [monarch-initiative/icd11](https://github.com/monarch-initiative/icd11) repo, We remove unicode characters and +then remove equivalent class statements as discussed below. +_Equivalent classes_ +We remove all equivalent class statements as they are not unique and result in unintended node merges. For example +`icd11.foundation:2000662282` (_Occupant of pick-up truck or van injured in collision with car, pick-up truck or van: +person on outside of vehicle injured in traffic accident_) has the same exact equivalent concept expression as +`icd11.foundation:1279712844` (_Occupant of pick-up truck or van injured in collision with two- or three- wheeled motor +vehicle: person on outside of vehicle injured in traffic accident_). +--- The data pipeline that generates the source is implemented in `make`, in this source file: [src/ontology/mondo-ingest.Makefile](https://github.com/monarch-initiative/mondo-ingest/blob/main/src/ontology/mondo-ingest.Makefile). diff --git a/src/ontology/Makefile b/src/ontology/Makefile index ed84b4ff..baa9ada9 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -54,7 +54,7 @@ OBODATE ?= $(shell date +'%d:%m:%Y %H:%M') VERSION= $(TODAY) ANNOTATE_ONTOLOGY_VERSION = annotate -V $(ONTBASE)/releases/$(VERSION)/$@ --annotation owl:versionInfo $(VERSION) ANNOTATE_CONVERT_FILE = annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) convert -f ofn --output $@.tmp.owl && mv $@.tmp.owl $@ -OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11foundation.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl +OTHER_SRC = $(COMPONENTSDIR)/doid.owl $(COMPONENTSDIR)/gard.owl $(COMPONENTSDIR)/icd10cm.owl $(COMPONENTSDIR)/icd10who.owl $(COMPONENTSDIR)/icd11foundation.owl $(COMPONENTSDIR)/ncit.owl $(COMPONENTSDIR)/omim.owl $(COMPONENTSDIR)/ordo.owl ONTOLOGYTERMS = $(TMPDIR)/ontologyterms.txt EDIT_PREPROCESSED = $(TMPDIR)/$(ONT)-preprocess.owl diff --git a/src/ontology/config/context.json b/src/ontology/config/context.json index 85abe98b..7d086419 100644 --- a/src/ontology/config/context.json +++ b/src/ontology/config/context.json @@ -31,6 +31,8 @@ "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_", "ICD10CM": "http://purl.bioontology.org/ontology/ICD10CM/", "ICD10WHO": "http://apps.who.int/classifications/icd10/browse/2010/en#/", + "icd11.foundation": "http://id.who.int/icd/entity/", + "icd11.z": "http://who.int/icd#Z_", "OMIMPS": "https://www.omim.org/phenotypicSeries/PS", "MONDOREL": "http://purl.obolibrary.org/obo/mondo#" } diff --git a/src/ontology/config/icd11foundation-property-map.sssom.tsv b/src/ontology/config/icd11foundation-property-map.sssom.tsv new file mode 100644 index 00000000..2cfd32be --- /dev/null +++ b/src/ontology/config/icd11foundation-property-map.sssom.tsv @@ -0,0 +1,5 @@ +subject_id object_id +http://id.who.int/icd/schema/isObsolote owl:deprecated +http://id.who.int/icd/schema/longDefinition http://purl.org/dc/terms/description +http://id.who.int/icd/schema/note rdfs:comment +skos:definition IAO:0000115 diff --git a/src/ontology/config/properties.txt b/src/ontology/config/properties.txt index 33de4e8c..79b56639 100644 --- a/src/ontology/config/properties.txt +++ b/src/ontology/config/properties.txt @@ -20,6 +20,8 @@ http://www.w3.org/2004/02/skos/core#narrowMatch http://www.w3.org/2004/02/skos/core#relatedMatch http://www.w3.org/2004/02/skos/core#exactMatch http://www.w3.org/2004/02/skos/core#closeMatch +rdfs:comment rdfs:label rdfs:seeAlso owl:deprecated +http://purl.org/dc/terms/description diff --git a/src/ontology/metadata/mondo.sssom.config.yml b/src/ontology/metadata/mondo.sssom.config.yml index 2aefad03..0d95ed78 100644 --- a/src/ontology/metadata/mondo.sssom.config.yml +++ b/src/ontology/metadata/mondo.sssom.config.yml @@ -367,6 +367,7 @@ subject_prefixes: - EFO - ICD10CM - ICD10WHO + - icd11.foundation - OMIMPS - NCIT - DOID diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 099dc9e0..0b84a20e 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -159,6 +159,17 @@ $(COMPONENTSDIR)/icd10who.owl: $(TMPDIR)/icd10who_relevant_signature.txt | compo remove -T config/properties.txt --select complement --select properties --trim true \ annotate --ontology-iri $(URIBASE)/mondo/sources/icd10who.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd10who.owl -o $@; fi +$(COMPONENTSDIR)/icd11foundation.owl: $(TMPDIR)/icd11foundation_relevant_signature.txt | component-download-icd11foundation.owl + if [ $(COMP) = true ] ; then $(ROBOT) remove -i $(TMPDIR)/component-download-icd11foundation.owl.owl --select imports \ + rename --mappings config/property-map-1.sssom.tsv --allow-missing-entities true \ + rename --mappings config/icd11foundation-property-map.sssom.tsv \ + remove -T $(TMPDIR)/icd11foundation_relevant_signature.txt --select complement --select "classes individuals" --trim false \ + remove -T $(TMPDIR)/icd11foundation_relevant_signature.txt --select individuals \ + query \ + --update ../sparql/fix-labels-with-brackets.ru \ + remove -T config/properties.txt --select complement --select properties --trim true \ + annotate --ontology-iri $(URIBASE)/mondo/sources/icd11foundation.owl --version-iri $(URIBASE)/mondo/sources/$(TODAY)/icd11foundation.owl -o $@; fi + $(COMPONENTSDIR)/gard.owl: $(TMPDIR)/gard_relevant_signature.txt | component-download-gard.owl if [ $(COMP) = true ]; then $(ROBOT) remove -i $(TMPDIR)/component-download-gard.owl.owl --select imports \ remove -T $(TMPDIR)/gard_relevant_signature.txt --select complement --select "classes individuals" --trim false \ @@ -246,6 +257,7 @@ $(REPORTDIR)/%_term_exclusions.txt $(REPORTDIR)/%_exclusion_reasons.robot.templa --config-path metadata/$*.yml \ --outpath-txt $(REPORTDIR)/$*_term_exclusions.txt \ --outpath-robot-template-tsv $(REPORTDIR)/$*_exclusion_reasons.robot.template.tsv +.PRECIOUS: $(REPORTDIR)/%_exclusion_reasons.robot.template.tsv $(REPORTDIR)/%_exclusion_reasons.ttl: component-download-%.owl $(REPORTDIR)/%_exclusion_reasons.robot.template.tsv $(ROBOT) template --input $(TMPDIR)/component-download-$*.owl.owl --add-prefixes config/context.json --template $(REPORTDIR)/$*_exclusion_reasons.robot.template.tsv --output $(REPORTDIR)/$*_exclusion_reasons.ttl @@ -476,6 +488,7 @@ slurp/%.tsv: $(COMPONENTSDIR)/%.owl $(TMPDIR)/mondo.sssom.tsv $(REPORTDIR)/%_map --mondo-terms-path $(REPORTDIR)/mirror_signature-mondo.tsv \ --slurp-dir-path slurp/ \ --outpath $@ +.PRECIOUS: slurp/%.tsv .PHONY: slurp-% slurp-%: slurp/%.tsv diff --git a/src/scripts/lexmatch-sssom-compare.py b/src/scripts/lexmatch-sssom-compare.py index a5914b31..e6b0da03 100644 --- a/src/scripts/lexmatch-sssom-compare.py +++ b/src/scripts/lexmatch-sssom-compare.py @@ -173,11 +173,14 @@ def extract_unmapped_matches(input: str, matches: TextIO, output_dir: str, summa ont_df_list = [] for _, ont in enumerate(input): + # Map ontology filenames to prefixes ont2 = ont.upper() if ont == "omim": ont2 = "|".join((["OMIM", "OMIMPS"])) elif ont == "ordo": ont2 = "|".join((["ORDO", "Orphanet"])) + elif ont == "icd11foundation": + ont2 = 'icd11.foundation' mondo_ont_df = msdf_mondo.df[condition_mondo_sssom_subj & msdf_mondo.df['object_id'].str.contains(ont2)] mondo_ont_lex_df = lex_df[(condition_lex_df_mondo_subj & lex_df['object_id'].str.contains(ont2))] @@ -201,7 +204,7 @@ def extract_unmapped_matches(input: str, matches: TextIO, output_dir: str, summa ont_df_list.append(unmapped_ont_df) - combined_df = pd.concat(ont_df_list) + combined_df = pd.concat(ont_df_list) if ont_df_list else pd.DataFrame() combined_msdf = MappingSetDataFrame( df=combined_df, converter=msdf_lex.converter, metadata=msdf_lex.metadata diff --git a/src/sparql/icd11foundation-relevant-diseases.sparql b/src/sparql/icd11foundation-relevant-diseases.sparql deleted file mode 100644 index e767f914..00000000 --- a/src/sparql/icd11foundation-relevant-diseases.sparql +++ /dev/null @@ -1,32 +0,0 @@ -PREFIX rdfs: -PREFIX owl: - - -### All diseases -SELECT DISTINCT ?term ?label ?deprecated -WHERE { - { - { - ?s1 ?p1 ?term . - ?term rdfs:subClassOf* . - OPTIONAL { - ?term rdfs:label ?label - } - OPTIONAL { - ?term owl:deprecated ?deprecated - } - } - UNION - { - ?term ?p2 ?o2 . - ?term rdfs:subClassOf* . - OPTIONAL { - ?term rdfs:label ?label - } - OPTIONAL { - ?term owl:deprecated ?deprecated - } - } - } - FILTER(isIRI(?term)) -}