From 8c26e8fd7b9333190f64eec3afd5f05de8e9fcee Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Fri, 21 Jul 2023 07:59:08 -0700 Subject: [PATCH] Tools to analyze logical definitions (#610) * Adding a notebook for diff-via-mappings. * Tools to analyze logical definitions * Adding matrix output for logical definitions * lint * reran * normalize ordering * Extending signature of logical_definitions method to allow additional filtering. Adding new docstrings and tests. --- docs/cli.rst | 2 +- docs/glossary.rst | 4 + docs/guide/logical-definitions.rst | 190 ++ docs/search-syntax.rst | 2 +- notebooks/Commands/DiffViaMappings.ipynb | 2121 +++++++++++++++++ notebooks/Commands/LogicalDefinitions.ipynb | 1002 ++++++++ src/oaklib/cli.py | 367 ++- src/oaklib/conf/obograph-style.json | 3 + src/oaklib/datamodels/search.py | 2 +- src/oaklib/datamodels/vocabulary.py | 3 + .../aggregator/aggregator_implementation.py | 19 +- .../obograph/obograph_implementation.py | 22 +- .../pronto/pronto_implementation.py | 39 +- .../simpleobo/simple_obo_implementation.py | 43 +- .../simpleobo/simple_obo_parser.py | 24 +- .../sqldb/sql_implementation.py | 31 +- .../ubergraph/ubergraph_implementation.py | 18 +- .../interfaces/basic_ontology_interface.py | 4 + src/oaklib/interfaces/obograph_interface.py | 35 +- src/oaklib/interfaces/patcher_interface.py | 8 +- src/oaklib/interfaces/subsetter_interface.py | 2 +- src/oaklib/io/streaming_obo_writer.py | 15 +- src/oaklib/io/streaming_writer.py | 14 +- src/oaklib/mappers/base_mapper.py | 8 +- .../mappers/ontology_metadata_mapper.py | 4 + src/oaklib/selector.py | 13 +- src/oaklib/utilities/axioms/__init__.py | 0 .../axioms/logical_definition_analyzer.py | 111 + .../axioms/logical_definition_summarizer.py | 224 ++ .../axioms/logical_definition_utilities.py | 59 + src/oaklib/utilities/lexical/patternizer.py | 284 +++ .../utilities/mapping/cross_ontology_diffs.py | 3 + src/oaklib/utilities/obograph_utils.py | 31 +- tests/test_implementations/__init__.py | 21 +- tests/test_implementations/test_aggregator.py | 12 + tests/test_implementations/test_pronto.py | 5 +- tests/test_implementations/test_simple_obo.py | 7 + .../test_logical_definition_summarizer.py | 104 + tests/test_utilities/test_obograph_utils.py | 19 + tests/test_utilities/test_patternizer.py | 95 + 40 files changed, 4823 insertions(+), 147 deletions(-) create mode 100644 docs/guide/logical-definitions.rst create mode 100644 notebooks/Commands/DiffViaMappings.ipynb create mode 100644 notebooks/Commands/LogicalDefinitions.ipynb create mode 100644 src/oaklib/utilities/axioms/__init__.py create mode 100644 src/oaklib/utilities/axioms/logical_definition_analyzer.py create mode 100644 src/oaklib/utilities/axioms/logical_definition_summarizer.py create mode 100644 src/oaklib/utilities/axioms/logical_definition_utilities.py create mode 100644 src/oaklib/utilities/lexical/patternizer.py create mode 100644 tests/test_utilities/test_logical_definition_summarizer.py create mode 100644 tests/test_utilities/test_patternizer.py diff --git a/docs/cli.rst b/docs/cli.rst index d49fe59ac..1adf0422d 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -52,7 +52,7 @@ Term Lists Many commands take a *term* or a *list of terms* as their primary argument. These are typically one of: - a :ref:`CURIE` such as :code:`UBERON:0000955` -- a :ref:`search-syntax` term, which is either: +- a :ref:`search_syntax` term, which is either: - an exact match to a label; for example "limb" or "plasma membrane" - a compound search term such as :code:`t~limb` which finds terms with partial matches to limb diff --git a/docs/glossary.rst b/docs/glossary.rst index 22d1d4e17..5721e9329 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -230,6 +230,10 @@ For a deeper dive into some of these concepts, see the :ref:`guide`. map to *multiple* triples when following the OWL RDF serialization. An example is the relationship "finger part-of hand", which in OWL is represented using a :term:`Existential Restriction` that maps to 4 triples. + Logical Definition + A :term:`Logical Definition` is a particular kind of :term:`Axiom` that is used to provide a + definition of a term that is *computable*. See :ref:`logical_definitions`. + Subset An :term:`Ontology Element` that represents a named collection of elements, typically grouped for some purpose. Subsets are commonly used in ontologies like the :term:`Gene Ontology`. diff --git a/docs/guide/logical-definitions.rst b/docs/guide/logical-definitions.rst new file mode 100644 index 000000000..7078d19cc --- /dev/null +++ b/docs/guide/logical-definitions.rst @@ -0,0 +1,190 @@ +.. _logical_definitions: + +Logical Definitions +=================== + +:term:`Logical Definitions` are special kinds of :term:`Axioms` that +define a term in terms of other terms, in a way that is *computable*. OAK provides ways of +operating structurally on logical definitions. Logical definitions may also be used by +:term:`Reasoners` to infer :term:`Entailed` axioms. + +Typically, logical definitions are not directly used by consumers of ontologies, so +this section can be skipped by many OAK users. However, if you are interested in +generating, analyzing, summarizing, or other kinds of operations on logical definitions, +this section is intended to provide an overview of the basic concepts. + +Logical Definitions in OWL +-------------------------- + +In OWL, logical definitions are represented using the +`EquivalentClasses axiom `_. +But note that not every equivalence axioms is a logical definition, according to how +we use the term in OAK. For example, an equivalence axiom connects two *named classes* +doesn't really function to define either class, as this would be circular. + +For purposes here, we consider any equivalence axiom between a *named class* on the left +hand side and an *anonymous class expression* on the right hand side to be a logical definition. + +These equivalence axioms can be viewed in ontology browsers such as OLS. For example, +the Uberon [fingernail](http://purl.obolibrary.org/obo/UBERON_0009565) class, has an +equivalence axiom to the expression: + +.. code-block:: + + Equivalent to + (nail and part of (RO) some manual digit) + + +Genus-differentia form logical definitions +------------------------------------------ + +OAK includes dedicated functionality for logical definitions that follow *genus-differentia* +form. These are sometimes known as *Aristotelian definitions*. A genus-differentia definition +takes the form: + +``a C is a G that D`` + +where ``C`` is the defined class, ``G`` is the genus, and ``D`` is the differentia or *differentiating +characteristics*. The differentiating characteristics are typically a list of :term:`Predicate` and "Filler" +class pairs. + +For example: + +``a fingernail is a nail that is part of a finger`` + +Here we are defining a fingernail (defined class) in terms of a specialization of a parent class (nail, the genus) +based on a differentiating characteristics (differentia) based on parthood (the predicate) and +a specific "filler" (finger). + +This seems trivial but in fact these kinds of definitions -- when provided in computable +form -- can be used to automate a large amount of ontology development. And they can +be useful for queries over an ontology too. + +It is generally considered good practice for textual definitions to be consistent with +the computable genus-differentia form. + +The Obo Graphs Model +-------------------- + +The :term:`OBO Graphs` data model includes a data structure / class for representing +logical definitions in genus-differentia form. It limits the differentiae to be a set +of existential restrictions, with no nesting. + +Currently most operations in OAK that deal with logical definitions expect them to be +in this form. This can sometimes be limiting if you wish to operate over more complicated +OWL axioms. We may provide support for this in the future, but for now the simple form +provided in the OBO Graphs data model works for a large number of ontologies. The simple +form is often recommended because mistakes are far more common when more complicated +structures are used. + +Querying for logical definitions in OAK +---------------------------------------- + +.. code-block:: bash + + $ runoak -i sqlite:obo:uberon logical-definitions .desc//p=i nail + +By default, results are returned in YAML: + +.. code-block:: yaml + + definedClassId: UBERON:0009565 + genusIds: + - UBERON:0001705 + restrictions: + - fillerId: UBERON:0002389 + propertyId: BFO:0000050 + ... + +You can specify `-O csv` to get it in tabular form: + +.. csv-table:: uberon nail logical definitions + :header: definedClassId,definedClassId_label,genusIds,genusIds_label,restrictions,restrictionsPropertyIds,restrictionsPropertyIds_label,restrictionsFillerIds,restrictionsFillerIds_label + + UBERON:0009565,nail of manual digit,UBERON:0001705,nail,BFO:0000050=UBERON:0002389,BFO:0000050,part of,UBERON:0002389,manual digit + UBERON:0009567,nail of pedal digit,UBERON:0001705,nail,BFO:0000050=UBERON:0001466,BFO:0000050,part of,UBERON:0001466,pedal digit + UBERON:0011273,nail of manual digit 1,UBERON:0001705,nail,BFO:0000050=UBERON:0001463,BFO:0000050,part of,UBERON:0001463,manual digit 1 + UBERON:0011274,nail of manual digit 2,UBERON:0001705,nail,BFO:0000050=UBERON:0003622,BFO:0000050,part of,UBERON:0003622,manual digit 2 + UBERON:0011275,nail of manual digit 3,UBERON:0001705,nail,BFO:0000050=UBERON:0003623,BFO:0000050,part of,UBERON:0003623,manual digit 3 + UBERON:0011276,nail of manual digit 4,UBERON:0001705,nail,BFO:0000050=UBERON:0003624,BFO:0000050,part of,UBERON:0003624,manual digit 4 + UBERON:0011277,nail of manual digit 5,UBERON:0001705,nail,BFO:0000050=UBERON:0003625,BFO:0000050,part of,UBERON:0003625,manual digit 5 + UBERON:0011278,nail of pedal digit 1,UBERON:0001705,nail,BFO:0000050=UBERON:0003631,BFO:0000050,part of,UBERON:0003631,pedal digit 1 + UBERON:0011279,nail of pedal digit 2,UBERON:0001705,nail,BFO:0000050=UBERON:0003632,BFO:0000050,part of,UBERON:0003632,pedal digit 2 + UBERON:0011280,nail of pedal digit 3,UBERON:0001705,nail,BFO:0000050=UBERON:0003633,BFO:0000050,part of,UBERON:0003633,pedal digit 3 + UBERON:0011281,nail of pedal digit 4,UBERON:0001705,nail,BFO:0000050=UBERON:0003634,BFO:0000050,part of,UBERON:0003634,pedal digit 4 + UBERON:0011282,nail of pedal digit 5,UBERON:0001705,nail,BFO:0000050=UBERON:0003635,BFO:0000050,part of,UBERON:0003635,pedal digit 5 + +The command has a number of options for transforming this to a matrix, customizing +where rows, columns, and values with be defined classes, genus terms, predicates, or fillers. + +:term:`OBO Format` also provides a compact way of showing logical definitions, these can be +seen in OAK using the ``-O obo`` option: + +.. code-block:: bash + + $ runoak -i sqlite:obo:uberon logical-definitions .desc//p=i nail -O obo + +.. code-block:: + + [Term] + id: UBERON:0009565 ! nail of manual digit + intersection_of: UBERON:0001705 ! nail + intersection_of: BFO:0000050 UBERON:0002389 ! manual digit + + [Term] + id: UBERON:0009567 ! nail of pedal digit + intersection_of: UBERON:0001705 ! nail + intersection_of: BFO:0000050 UBERON:0001466 ! pedal digit + + +Reasoning using logical definitions +----------------------------------- + +Currently OAK does not support classification-style reasoning. If you need this, +we recommend doing this ahead of time using a tool like ROBOT. + +Logical definitions and design patterns +--------------------------------------- + +Creating logical definitions by hand can be time consuming and error prone. Many groups +choose to do this via a templating system such as: + +- ROBOT templates +- DOSDPs +- OTTR templates +- LinkML-OWL + +Currently OAK does not support generating logical definition axioms or any other OWL +axioms from templates, but it might in the future + +Generating logical definitions +------------------------------ + +Sometimes it can be useful to generate logical definitions using heuristic methods such +as lexical pattern matching. In general these generated definitions should be reviewed +by experienced ontology developers before being added, but they can be useful to both +get a sense of missing definitions or as an aid to manual definition creation. + +Some OAK commands have a ``generate`` counterpart; above we saw the ``logical-definitions`` +command which is for looking up existing logical definitions. The ``generate-logical-definitions`` +counterpart can be used for generating logical definitions based on specified lexical patterns. + +At this time, this command is experimental, and the flags may change. + +Analyzing logical definitions +------------------------------ + +OAK will soon provide commands for analyzing logical definitions, in particular to determine +consistency of lattice population. + +Validating logical definitions +------------------------------ + +The ``validate-definitions`` command is used for validating text definitions - one aspect +of this is checking for consistency between text and logical definitions. + +Further reading +--------------- + +* `Use of OWL within the Gene Ontology `_ +* `Cross-Product Extensions of the Gene Ontology `_ \ No newline at end of file diff --git a/docs/search-syntax.rst b/docs/search-syntax.rst index ae9a75689..b5fd04527 100644 --- a/docs/search-syntax.rst +++ b/docs/search-syntax.rst @@ -3,7 +3,7 @@ Search Syntax ================= -The search syntax is used by the :ref:`search` interface +The search syntax is mapped to the :ref:`search` interface .. warning:: diff --git a/notebooks/Commands/DiffViaMappings.ipynb b/notebooks/Commands/DiffViaMappings.ipynb new file mode 100644 index 000000000..8f281883a --- /dev/null +++ b/notebooks/Commands/DiffViaMappings.ipynb @@ -0,0 +1,2121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "681a0856", + "metadata": {}, + "source": [ + "# OAK diff-via-mappings command\n", + "\n", + "This notebook is intended as a supplement to the [main OAK CLI docs](https://incatools.github.io/ontology-access-kit/cli.html).\n", + "\n", + "This notebook provides examples for the `diff-via-mappings` command, which can be used to **find differences between two ontologies based on mappings between them**\n", + "\n", + "For more on mappings, see [Mappings](https://incatools.github.io/ontology-access-kit/guide/mappings.html) in the OAK guide.\n", + "\n", + "\n", + "## Help Option\n", + "\n", + "You can get help on any OAK command using `--help`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "460639a7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Usage: runoak diff-via-mappings [OPTIONS] [TERMS]...\r\n", + "\r\n", + " Calculates cross-ontology diff using mappings\r\n", + "\r\n", + " Given a pair of ontologies, and mappings that connect terms in both\r\n", + " ontologies, this command will perform a structural comparison of all mapped\r\n", + " pairs of terms\r\n", + "\r\n", + " Example:\r\n", + "\r\n", + " runoak -i sqlite:obo:uberon diff-via-mappings --other-input\r\n", + " sqlite:obo:zfa --source UBERON --source ZFA -O csv\r\n", + "\r\n", + " Note the above command does not have any mapping file specified; the\r\n", + " mappings that are distributed within each ontology is used (in this case,\r\n", + " Uberon contains mappings to ZFA)\r\n", + "\r\n", + " If the mappings are provided externally:\r\n", + "\r\n", + " runoak -i ont1.obo diff-via-mappings --other-input ont2.obo --mapping-\r\n", + " input mappings.sssom.tsv\r\n", + "\r\n", + " (in the above example, --source is not passed, so all mappings are tested)\r\n", + "\r\n", + " If there are no existing mappings, you can use the lexmatch command to\r\n", + " generate them:\r\n", + "\r\n", + " runoak -i ont1.obo diff-via-mappings -a ont2.obo lexmatch -o\r\n", + " mappings.sssom.tsv\r\n", + "\r\n", + " runoak -i ont1.obo diff-via-mappings --other-input ont2.obo --mapping-\r\n", + " input mappings.sssom.tsv\r\n", + "\r\n", + " The output from this command follows the cross-ontology-diff data model\r\n", + " (https://incatools.github.io/ontology-access-kit/datamodels/cross-ontology-\r\n", + " diff/index.html)\r\n", + "\r\n", + " This can be serialized in YAML or TSV form\r\n", + "\r\n", + "Options:\r\n", + " -S, --source TEXT ontology prefixes e.g. HP, MP\r\n", + " --mapping-input TEXT File of mappings in SSSOM format. If not\r\n", + " provided then mappings in ontology(ies) are\r\n", + " used\r\n", + " -X, --other-input TEXT Additional input file\r\n", + " --other-input-type TEXT Type of additional input file\r\n", + " --intra / --no-intra If true, then all sources are in the main\r\n", + " input ontology [default: no-intra]\r\n", + " --autolabel / --no-autolabel If set, results will automatically have\r\n", + " labels assigned [default: autolabel]\r\n", + " --include-identity-mappings / --no-include-identity-mappings\r\n", + " Use identity relation as mapping; use this\r\n", + " for two versions of the same ontology\r\n", + " [default: no-include-identity-mappings]\r\n", + " --filter-category-identical / --no-filter-category-identical\r\n", + " Do not report cases where a relationship has\r\n", + " not changed [default: no-filter-category-\r\n", + " identical]\r\n", + " --bidirectional / --no-bidirectional\r\n", + " Show diff from both left and right\r\n", + " perspectives [default: bidirectional]\r\n", + " -p, --predicates TEXT A comma-separated list of predicates. This\r\n", + " may be a shorthand (i, p) or CURIE\r\n", + " -o, --output FILENAME Output file, e.g. obo file\r\n", + " -O, --output-type TEXT Desired output type\r\n", + " --help Show this message and exit.\r\n" + ] + } + ], + "source": [ + "!runoak diff-via-mappings --help" + ] + }, + { + "cell_type": "markdown", + "id": "672f0f62", + "metadata": {}, + "source": [ + "## Example: Diff between two anatomy ontologies\n", + "\n", + "To illustrate usage, we will calculate the diff between [UBERON](https://obofoundry.org/ontology/uberon) (a multi-species anatomy ontology) and [ZFA](https://obofoundry.org/ontology/zfa) (an anatomy ontology for zebrafish).\n", + "\n", + "Note that rather than provide a set of external mappings, we will use the mappings present in both ontologies (in this case, UBERON has xrefs to ZFA).\n", + "\n", + "To simplify the comparison, we will only consider [is_a](https://incatools.github.io/ontology-access-kit/glossary.html#term-IS_A) and [part_of](https://incatools.github.io/ontology-access-kit/glossary.html#term-Part-Of) relationships. This is specified using the `--predicates` (`-p`) option" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ef4a124b", + "metadata": {}, + "outputs": [], + "source": [ + "!runoak -i sqlite:obo:uberon diff-via-mappings --other-input \\\n", + " sqlite:obo:zfa --source UBERON --source ZFA -p i,p -O csv -o output/uberon-zfa-diff.csv" + ] + }, + { + "cell_type": "markdown", + "id": "cb6e5491", + "metadata": {}, + "source": [ + "## Analyzing the results using Pandas\n", + "\n", + "Note that we asked for the output as a tabular file (`-O csv`), rather than native YAML. You could take the tabular output, analyze it in a spreadsheet, etc. Here we will use the Python pandas library." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "26c079cc", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df=pd.read_csv(\"output/uberon-zfa-diff.csv\", sep=\"\\t\").fillna(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1ad732b4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
left_subject_idleft_object_idleft_predicate_idcategoryleft_subject_labelleft_object_labelleft_predicate_labelright_subject_idright_object_idright_predicate_idsright_subject_labelright_object_labelright_predicate_labelsleft_subject_is_functionalleft_object_is_functionalsubject_mapping_predicateobject_mapping_predicateright_intermediate_idssubject_mapping_cardinalityobject_mapping_cardinality
0UBERON:0000002UBERON:0000995BFO:0000050MissingMappinguterine cervixuterus1:0
1UBERON:0000002UBERON:0001560rdfs:subClassOfMissingMappinguterine cervixneck of organ1:0
2UBERON:0000002UBERON:0005156rdfs:subClassOfMissingMappinguterine cervixreproductive structure1:0
3UBERON:0000003UBERON:0000033BFO:0000050MissingMappingnarishead1:0
4UBERON:0000003UBERON:0005725BFO:0000050MissingMappingnarisolfactory system1:0
...............................................................
36336ZFA:0009401ZFA:0009000rdfs:subClassOfMissingMappinglens fiber cellcell1:0
36337ZFA:0009402ZFA:0005065BFO:0000050MissingMappingheart valve cellheart valve1:0
36338ZFA:0009402ZFA:0009000rdfs:subClassOfMissingMappingheart valve cellcell1:0
36339ZFA:0009403ZFA:0009402rdfs:subClassOfMissingMappingheart valve interstitial cellheart valve cell1:0
36340ZFA:0009404ZFA:0009402rdfs:subClassOfMissingMappingheart valve endothelial cellheart valve cell1:0
\n", + "

36341 rows × 20 columns

\n", + "
" + ], + "text/plain": [ + " left_subject_id left_object_id left_predicate_id category \\\n", + "0 UBERON:0000002 UBERON:0000995 BFO:0000050 MissingMapping \n", + "1 UBERON:0000002 UBERON:0001560 rdfs:subClassOf MissingMapping \n", + "2 UBERON:0000002 UBERON:0005156 rdfs:subClassOf MissingMapping \n", + "3 UBERON:0000003 UBERON:0000033 BFO:0000050 MissingMapping \n", + "4 UBERON:0000003 UBERON:0005725 BFO:0000050 MissingMapping \n", + "... ... ... ... ... \n", + "36336 ZFA:0009401 ZFA:0009000 rdfs:subClassOf MissingMapping \n", + "36337 ZFA:0009402 ZFA:0005065 BFO:0000050 MissingMapping \n", + "36338 ZFA:0009402 ZFA:0009000 rdfs:subClassOf MissingMapping \n", + "36339 ZFA:0009403 ZFA:0009402 rdfs:subClassOf MissingMapping \n", + "36340 ZFA:0009404 ZFA:0009402 rdfs:subClassOf MissingMapping \n", + "\n", + " left_subject_label left_object_label \\\n", + "0 uterine cervix uterus \n", + "1 uterine cervix neck of organ \n", + "2 uterine cervix reproductive structure \n", + "3 naris head \n", + "4 naris olfactory system \n", + "... ... ... \n", + "36336 lens fiber cell cell \n", + "36337 heart valve cell heart valve \n", + "36338 heart valve cell cell \n", + "36339 heart valve interstitial cell heart valve cell \n", + "36340 heart valve endothelial cell heart valve cell \n", + "\n", + " left_predicate_label right_subject_id right_object_id \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... ... \n", + "36336 \n", + "36337 \n", + "36338 \n", + "36339 \n", + "36340 \n", + "\n", + " right_predicate_ids right_subject_label right_object_label \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... ... \n", + "36336 \n", + "36337 \n", + "36338 \n", + "36339 \n", + "36340 \n", + "\n", + " right_predicate_labels left_subject_is_functional \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... \n", + "36336 \n", + "36337 \n", + "36338 \n", + "36339 \n", + "36340 \n", + "\n", + " left_object_is_functional subject_mapping_predicate \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... \n", + "36336 \n", + "36337 \n", + "36338 \n", + "36339 \n", + "36340 \n", + "\n", + " object_mapping_predicate right_intermediate_ids \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... \n", + "36336 \n", + "36337 \n", + "36338 \n", + "36339 \n", + "36340 \n", + "\n", + " subject_mapping_cardinality object_mapping_cardinality \n", + "0 1:0 \n", + "1 1:0 \n", + "2 1:0 \n", + "3 1:0 \n", + "4 1:0 \n", + "... ... ... \n", + "36336 1:0 \n", + "36337 1:0 \n", + "36338 1:0 \n", + "36339 1:0 \n", + "36340 1:0 \n", + "\n", + "[36341 rows x 20 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "6b959761", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"left_source\"] = df.apply(lambda x: x.left_subject_id.split(\":\")[0], axis=1)" + ] + }, + { + "cell_type": "markdown", + "id": "1b0f2f50", + "metadata": {}, + "source": [ + "## Plotting mapping diff categories\n", + "\n", + "The diff tool works by taking each relationship/edge in the \"left\" ontology (here all UBERON and all ZFA edges\n", + "are considered) and trying to map it to a relationship in the \"right\" ontology.\n", + "\n", + "The mapping is assigned a category from the [cross-ontology-diff:DiffCategory](https://w3id.org/oak/cross-ontology-diff/DiffCategory) enumeration.\n", + "\n", + "(note that like many OAK operations, the output conforms to a data model that makes its semantics explicit)\n", + "\n", + "We will use seaborn/matplotlib to plot the category counts" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "20be04b9", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# set plot style: grey grid in the background:\n", + "sns.set(style=\"darkgrid\")\n", + "\n", + "# Set the figure size\n", + "plt.figure(figsize=(10, 7))\n", + "\n", + "sns.countplot(data=df, x='category')\n", + "plt.title('Count of each category')\n", + "plt.xlabel('Category')\n", + "plt.ylabel('Count')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "d619fecb", + "metadata": {}, + "source": [ + "the dominant category is MissingMapping. This means that either/both subject and object of the left edge could not be matched in the right ontology. It is not surprising there are so many, because Uberon covers more species." + ] + }, + { + "cell_type": "markdown", + "id": "26fb6e19", + "metadata": {}, + "source": [ + "### Breaking down by predicate, and using log scale\n", + "\n", + "We can get a more detailed view by breaking down the counts by the predicate of the left edge.\n", + "\n", + "We will also use a log scale" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "0ab65e2f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(data=df, x='category', hue=\"left_predicate_id\")\n", + "plt.title('Count of each category')\n", + "plt.xlabel('Category')\n", + "plt.ylabel('Count')\n", + "plt.yscale('log')\n", + "plt.xticks(rotation=45) # Make x-axis labels diagonal\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "ed630003", + "metadata": {}, + "source": [ + "## Exploring categories\n", + "\n", + "### Non-entailed relationships\n", + "\n", + "i.e. this is an edge in UBERON that corresonds to a different kind of edge in ZFA (or vice versa), where the former\n", + "doesn't subsume the latter.\n", + "\n", + "For example, in UBERON PNS *part_of* NS; in ZFA PNS *is_a* NS" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "866cd714", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
left_subject_idleft_object_idleft_predicate_idcategoryleft_subject_labelleft_object_labelleft_predicate_labelright_subject_idright_object_idright_predicate_idsright_subject_labelright_object_labelright_predicate_labelsleft_subject_is_functionalleft_object_is_functionalsubject_mapping_predicateobject_mapping_predicateright_intermediate_idssubject_mapping_cardinalityobject_mapping_cardinality
22UBERON:0000010UBERON:0001016BFO:0000050NonEntailedRelationshipperipheral nervous systemnervous systemZFA:0000142ZFA:0000396rdfs:subClassOfperipheral nervous systemnervous system
24UBERON:0000011UBERON:0002410BFO:0000050NonEntailedRelationshipparasympathetic nervous systemautonomic nervous systemZFA:0001575ZFA:0001574rdfs:subClassOfparasympathetic nervous systemautonomic nervous system
29UBERON:0000013UBERON:0002410BFO:0000050NonEntailedRelationshipsympathetic nervous systemautonomic nervous systemZFA:0001576ZFA:0001574rdfs:subClassOfsympathetic nervous systemautonomic nervous system
196UBERON:0000095UBERON:0002342BFO:0000050NonEntailedRelationshipcardiac neural crestneural crestZFA:0001648ZFA:0000045rdfs:subClassOfcardiac neural crestneural crest
725UBERON:0000936UBERON:0003931BFO:0000050NonEntailedRelationshipposterior commissurediencephalic white matterZFA:0000320ZFA:0000338rdfs:subClassOfcaudal commissurediencephalic white matter
...............................................................
35155ZFA:0005661ZFA:0001488rdfs:subClassOfNonEntailedRelationshipolfactory bulb glomerulusmulti-tissue structureUBERON:0005387UBERON:0000481BFO:0000050|RO:0002131|RO:0002323olfactory glomerulusmulti-tissue structure
35458ZFA:0005829ZFA:0005249rdfs:subClassOfNonEntailedRelationshipSchlemm's canalvasculatureUBERON:0004029UBERON:0002049BFO:0000050|RO:0002131|RO:0002323canal of Schlemmvasculature
35680ZFA:0007017ZFA:0001477rdfs:subClassOfNonEntailedRelationshipposterior neural plateportion of tissueUBERON:0003057UBERON:0000479BFO:0000050|RO:0002131|RO:0002323chordal neural platetissue
35701ZFA:0007037ZFA:0001477rdfs:subClassOfNonEntailedRelationshipposterior neural tubeportion of tissueUBERON:0003076UBERON:0000479BFO:0000050|RO:0002131|RO:0002202|RO:0002254|R...posterior neural tubetissue
35702ZFA:0007038ZFA:0001477rdfs:subClassOfNonEntailedRelationshipanterior neural tubeportion of tissueUBERON:0003080UBERON:0000479BFO:0000050|RO:0002131|RO:0002202|RO:0002254|R...anterior neural tubetissue
\n", + "

205 rows × 20 columns

\n", + "
" + ], + "text/plain": [ + " left_subject_id left_object_id left_predicate_id \\\n", + "22 UBERON:0000010 UBERON:0001016 BFO:0000050 \n", + "24 UBERON:0000011 UBERON:0002410 BFO:0000050 \n", + "29 UBERON:0000013 UBERON:0002410 BFO:0000050 \n", + "196 UBERON:0000095 UBERON:0002342 BFO:0000050 \n", + "725 UBERON:0000936 UBERON:0003931 BFO:0000050 \n", + "... ... ... ... \n", + "35155 ZFA:0005661 ZFA:0001488 rdfs:subClassOf \n", + "35458 ZFA:0005829 ZFA:0005249 rdfs:subClassOf \n", + "35680 ZFA:0007017 ZFA:0001477 rdfs:subClassOf \n", + "35701 ZFA:0007037 ZFA:0001477 rdfs:subClassOf \n", + "35702 ZFA:0007038 ZFA:0001477 rdfs:subClassOf \n", + "\n", + " category left_subject_label \\\n", + "22 NonEntailedRelationship peripheral nervous system \n", + "24 NonEntailedRelationship parasympathetic nervous system \n", + "29 NonEntailedRelationship sympathetic nervous system \n", + "196 NonEntailedRelationship cardiac neural crest \n", + "725 NonEntailedRelationship posterior commissure \n", + "... ... ... \n", + "35155 NonEntailedRelationship olfactory bulb glomerulus \n", + "35458 NonEntailedRelationship Schlemm's canal \n", + "35680 NonEntailedRelationship posterior neural plate \n", + "35701 NonEntailedRelationship posterior neural tube \n", + "35702 NonEntailedRelationship anterior neural tube \n", + "\n", + " left_object_label left_predicate_label right_subject_id \\\n", + "22 nervous system ZFA:0000142 \n", + "24 autonomic nervous system ZFA:0001575 \n", + "29 autonomic nervous system ZFA:0001576 \n", + "196 neural crest ZFA:0001648 \n", + "725 diencephalic white matter ZFA:0000320 \n", + "... ... ... ... \n", + "35155 multi-tissue structure UBERON:0005387 \n", + "35458 vasculature UBERON:0004029 \n", + "35680 portion of tissue UBERON:0003057 \n", + "35701 portion of tissue UBERON:0003076 \n", + "35702 portion of tissue UBERON:0003080 \n", + "\n", + " right_object_id right_predicate_ids \\\n", + "22 ZFA:0000396 rdfs:subClassOf \n", + "24 ZFA:0001574 rdfs:subClassOf \n", + "29 ZFA:0001574 rdfs:subClassOf \n", + "196 ZFA:0000045 rdfs:subClassOf \n", + "725 ZFA:0000338 rdfs:subClassOf \n", + "... ... ... \n", + "35155 UBERON:0000481 BFO:0000050|RO:0002131|RO:0002323 \n", + "35458 UBERON:0002049 BFO:0000050|RO:0002131|RO:0002323 \n", + "35680 UBERON:0000479 BFO:0000050|RO:0002131|RO:0002323 \n", + "35701 UBERON:0000479 BFO:0000050|RO:0002131|RO:0002202|RO:0002254|R... \n", + "35702 UBERON:0000479 BFO:0000050|RO:0002131|RO:0002202|RO:0002254|R... \n", + "\n", + " right_subject_label right_object_label \\\n", + "22 peripheral nervous system nervous system \n", + "24 parasympathetic nervous system autonomic nervous system \n", + "29 sympathetic nervous system autonomic nervous system \n", + "196 cardiac neural crest neural crest \n", + "725 caudal commissure diencephalic white matter \n", + "... ... ... \n", + "35155 olfactory glomerulus multi-tissue structure \n", + "35458 canal of Schlemm vasculature \n", + "35680 chordal neural plate tissue \n", + "35701 posterior neural tube tissue \n", + "35702 anterior neural tube tissue \n", + "\n", + " right_predicate_labels left_subject_is_functional \\\n", + "22 \n", + "24 \n", + "29 \n", + "196 \n", + "725 \n", + "... ... ... \n", + "35155 \n", + "35458 \n", + "35680 \n", + "35701 \n", + "35702 \n", + "\n", + " left_object_is_functional subject_mapping_predicate \\\n", + "22 \n", + "24 \n", + "29 \n", + "196 \n", + "725 \n", + "... ... ... \n", + "35155 \n", + "35458 \n", + "35680 \n", + "35701 \n", + "35702 \n", + "\n", + " object_mapping_predicate right_intermediate_ids \\\n", + "22 \n", + "24 \n", + "29 \n", + "196 \n", + "725 \n", + "... ... ... \n", + "35155 \n", + "35458 \n", + "35680 \n", + "35701 \n", + "35702 \n", + "\n", + " subject_mapping_cardinality object_mapping_cardinality \n", + "22 \n", + "24 \n", + "29 \n", + "196 \n", + "725 \n", + "... ... ... \n", + "35155 \n", + "35458 \n", + "35680 \n", + "35701 \n", + "35702 \n", + "\n", + "[205 rows x 20 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.query(\"category=='NonEntailedRelationship'\")" + ] + }, + { + "cell_type": "markdown", + "id": "c88189b1", + "metadata": {}, + "source": [ + "### No relationship\n", + "\n", + "In this case, the edge in UBERON has no corresponding edge in ZFA (or vice versa), entailed or otherwise" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a6807bcf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
left_subject_idleft_object_idleft_predicate_idcategoryleft_subject_labelleft_object_labelleft_predicate_labelright_subject_idright_object_idright_predicate_idsright_subject_labelright_object_labelright_predicate_labelsleft_subject_is_functionalleft_object_is_functionalsubject_mapping_predicateobject_mapping_predicateright_intermediate_idssubject_mapping_cardinalityobject_mapping_cardinality
7UBERON:0000004UBERON:0000475rdfs:subClassOfNoRelationshipnoseorganism subdivisionZFA:0000047ZFA:0001308peripheral olfactory organorganism subdivision
188UBERON:0000089UBERON:0004750BFO:0000050NoRelationshiphypoblast (generic)blastodermZFA:0000117ZFA:0001176hypoblastblastoderm
647UBERON:0000471UBERON:0003103BFO:0000050NoRelationshipcompound organ componentcompound organZFA:0001489ZFA:0000496compound organ componentcompound organ
661UBERON:0000479UBERON:0000468BFO:0000050NoRelationshiptissuemulticellular organismZFA:0001477ZFA:0001094portion of tissuewhole organism
663UBERON:0000480UBERON:0000468BFO:0000050NoRelationshipanatomical groupmulticellular organismZFA:0001512ZFA:0001094anatomical groupwhole organism
...............................................................
35714ZFA:0007057ZFA:0005297rdfs:subClassOfNoRelationshipocular blood vesselcranial blood vesselUBERON:0002203UBERON:0011362vasculature of eyecranial blood vasculature
35715ZFA:0007058ZFA:0000012BFO:0000050NoRelationshiproof platecentral nervous systemUBERON:0003054UBERON:0001017roof platecentral nervous system
35716ZFA:0007058ZFA:0001488rdfs:subClassOfNoRelationshiproof platemulti-tissue structureUBERON:0003054UBERON:0000481roof platemulti-tissue structure
35720ZFA:0007071ZFA:0001477rdfs:subClassOfNoRelationshipflexural organportion of tissueUBERON:0011577UBERON:0000479flexural organtissue
35722ZFA:0007072ZFA:0001488rdfs:subClassOfNoRelationshipblood sinusmulti-tissue structureUBERON:0006615UBERON:0000481venous sinusmulti-tissue structure
\n", + "

1375 rows × 20 columns

\n", + "
" + ], + "text/plain": [ + " left_subject_id left_object_id left_predicate_id category \\\n", + "7 UBERON:0000004 UBERON:0000475 rdfs:subClassOf NoRelationship \n", + "188 UBERON:0000089 UBERON:0004750 BFO:0000050 NoRelationship \n", + "647 UBERON:0000471 UBERON:0003103 BFO:0000050 NoRelationship \n", + "661 UBERON:0000479 UBERON:0000468 BFO:0000050 NoRelationship \n", + "663 UBERON:0000480 UBERON:0000468 BFO:0000050 NoRelationship \n", + "... ... ... ... ... \n", + "35714 ZFA:0007057 ZFA:0005297 rdfs:subClassOf NoRelationship \n", + "35715 ZFA:0007058 ZFA:0000012 BFO:0000050 NoRelationship \n", + "35716 ZFA:0007058 ZFA:0001488 rdfs:subClassOf NoRelationship \n", + "35720 ZFA:0007071 ZFA:0001477 rdfs:subClassOf NoRelationship \n", + "35722 ZFA:0007072 ZFA:0001488 rdfs:subClassOf NoRelationship \n", + "\n", + " left_subject_label left_object_label left_predicate_label \\\n", + "7 nose organism subdivision \n", + "188 hypoblast (generic) blastoderm \n", + "647 compound organ component compound organ \n", + "661 tissue multicellular organism \n", + "663 anatomical group multicellular organism \n", + "... ... ... ... \n", + "35714 ocular blood vessel cranial blood vessel \n", + "35715 roof plate central nervous system \n", + "35716 roof plate multi-tissue structure \n", + "35720 flexural organ portion of tissue \n", + "35722 blood sinus multi-tissue structure \n", + "\n", + " right_subject_id right_object_id right_predicate_ids \\\n", + "7 ZFA:0000047 ZFA:0001308 \n", + "188 ZFA:0000117 ZFA:0001176 \n", + "647 ZFA:0001489 ZFA:0000496 \n", + "661 ZFA:0001477 ZFA:0001094 \n", + "663 ZFA:0001512 ZFA:0001094 \n", + "... ... ... ... \n", + "35714 UBERON:0002203 UBERON:0011362 \n", + "35715 UBERON:0003054 UBERON:0001017 \n", + "35716 UBERON:0003054 UBERON:0000481 \n", + "35720 UBERON:0011577 UBERON:0000479 \n", + "35722 UBERON:0006615 UBERON:0000481 \n", + "\n", + " right_subject_label right_object_label \\\n", + "7 peripheral olfactory organ organism subdivision \n", + "188 hypoblast blastoderm \n", + "647 compound organ component compound organ \n", + "661 portion of tissue whole organism \n", + "663 anatomical group whole organism \n", + "... ... ... \n", + "35714 vasculature of eye cranial blood vasculature \n", + "35715 roof plate central nervous system \n", + "35716 roof plate multi-tissue structure \n", + "35720 flexural organ tissue \n", + "35722 venous sinus multi-tissue structure \n", + "\n", + " right_predicate_labels left_subject_is_functional \\\n", + "7 \n", + "188 \n", + "647 \n", + "661 \n", + "663 \n", + "... ... ... \n", + "35714 \n", + "35715 \n", + "35716 \n", + "35720 \n", + "35722 \n", + "\n", + " left_object_is_functional subject_mapping_predicate \\\n", + "7 \n", + "188 \n", + "647 \n", + "661 \n", + "663 \n", + "... ... ... \n", + "35714 \n", + "35715 \n", + "35716 \n", + "35720 \n", + "35722 \n", + "\n", + " object_mapping_predicate right_intermediate_ids \\\n", + "7 \n", + "188 \n", + "647 \n", + "661 \n", + "663 \n", + "... ... ... \n", + "35714 \n", + "35715 \n", + "35716 \n", + "35720 \n", + "35722 \n", + "\n", + " subject_mapping_cardinality object_mapping_cardinality \n", + "7 \n", + "188 \n", + "647 \n", + "661 \n", + "663 \n", + "... ... ... \n", + "35714 \n", + "35715 \n", + "35716 \n", + "35720 \n", + "35722 \n", + "\n", + "[1375 rows x 20 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.query(\"category=='NoRelationship'\")" + ] + }, + { + "cell_type": "markdown", + "id": "a931a547", + "metadata": {}, + "source": [ + "### LeftEntailedByRight\n", + "\n", + "in this case UBERON has a direct edge that corresponds to two or more direct edges in ZFA (or vice versa)\n", + "\n", + "In this case the edges must chain together via OWL semantics.\n", + "\n", + "for example, UBERON has `ganglion part-of nervous system`, ZFA has `ganglion part-of PNS is-a nervous system`" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "188a5c06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
left_subject_idleft_object_idleft_predicate_idcategoryleft_subject_labelleft_object_labelleft_predicate_labelright_subject_idright_object_idright_predicate_ids...right_object_labelright_predicate_labelsleft_subject_is_functionalleft_object_is_functionalsubject_mapping_predicateobject_mapping_predicateright_intermediate_idssubject_mapping_cardinalityobject_mapping_cardinalityleft_source
35UBERON:0000016UBERON:0000949BFO:0000050LeftEntailedByRightendocrine pancreasendocrine systemZFA:0001260ZFA:0001158BFO:0000050|RO:0002131|RO:0002202...endocrine systemUBERON
91UBERON:0000045UBERON:0001016BFO:0000050LeftEntailedByRightganglionnervous systemZFA:0000190ZFA:0000396BFO:0000050|RO:0002131...nervous systemUBERON
182UBERON:0000086UBERON:0000992BFO:0000050LeftEntailedByRightzona pellucidaovaryZFA:0001111ZFA:0000403BFO:0000050|RO:0002131...ovaryUBERON
645UBERON:0000467UBERON:0000061rdfs:subClassOfLeftEntailedByRightanatomical systemanatomical structureZFA:0001439ZFA:0000037BFO:0000050|RO:0002131|rdfs:subClassOf...anatomical structureUBERON
658UBERON:0000477UBERON:0001062rdfs:subClassOfLeftEntailedByRightanatomical clusteranatomical entityZFA:0001478ZFA:0100000rdfs:subClassOf...zebrafish anatomical entityUBERON
..................................................................
35706ZFA:0007043ZFA:0001477rdfs:subClassOfLeftEntailedByRighthindbrain neural tubeportion of tissueUBERON:2007043UBERON:0000479BFO:0000050|RO:0002131|RO:0002202|RO:0002254|R......tissueZFA
35711ZFA:0007048ZFA:0005145rdfs:subClassOfLeftEntailedByRightventral intermandibularis posteriormuscleUBERON:2007048UBERON:0001630RO:0002131|RO:0002202|RO:0002254|RO:0002258|RO......muscle organZFA
35717ZFA:0007068ZFA:0001486rdfs:subClassOfLeftEntailedByRightotic epitheliumepitheliumUBERON:0003249UBERON:0000483RO:0002131|RO:0002323|rdfs:subClassOf...epitheliumZFA
35721ZFA:0007072ZFA:0000010BFO:0000050LeftEntailedByRightblood sinuscardiovascular systemUBERON:0006615UBERON:0004535BFO:0000050|RO:0002131|RO:0002202|RO:0002254|R......cardiovascular systemZFA
35723ZFA:0007073ZFA:0001643rdfs:subClassOfLeftEntailedByRightblood sinus cavityanatomical spaceUBERON:0034940UBERON:0000464rdfs:subClassOf...anatomical spaceZFA
\n", + "

883 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " left_subject_id left_object_id left_predicate_id category \\\n", + "35 UBERON:0000016 UBERON:0000949 BFO:0000050 LeftEntailedByRight \n", + "91 UBERON:0000045 UBERON:0001016 BFO:0000050 LeftEntailedByRight \n", + "182 UBERON:0000086 UBERON:0000992 BFO:0000050 LeftEntailedByRight \n", + "645 UBERON:0000467 UBERON:0000061 rdfs:subClassOf LeftEntailedByRight \n", + "658 UBERON:0000477 UBERON:0001062 rdfs:subClassOf LeftEntailedByRight \n", + "... ... ... ... ... \n", + "35706 ZFA:0007043 ZFA:0001477 rdfs:subClassOf LeftEntailedByRight \n", + "35711 ZFA:0007048 ZFA:0005145 rdfs:subClassOf LeftEntailedByRight \n", + "35717 ZFA:0007068 ZFA:0001486 rdfs:subClassOf LeftEntailedByRight \n", + "35721 ZFA:0007072 ZFA:0000010 BFO:0000050 LeftEntailedByRight \n", + "35723 ZFA:0007073 ZFA:0001643 rdfs:subClassOf LeftEntailedByRight \n", + "\n", + " left_subject_label left_object_label \\\n", + "35 endocrine pancreas endocrine system \n", + "91 ganglion nervous system \n", + "182 zona pellucida ovary \n", + "645 anatomical system anatomical structure \n", + "658 anatomical cluster anatomical entity \n", + "... ... ... \n", + "35706 hindbrain neural tube portion of tissue \n", + "35711 ventral intermandibularis posterior muscle \n", + "35717 otic epithelium epithelium \n", + "35721 blood sinus cardiovascular system \n", + "35723 blood sinus cavity anatomical space \n", + "\n", + " left_predicate_label right_subject_id right_object_id \\\n", + "35 ZFA:0001260 ZFA:0001158 \n", + "91 ZFA:0000190 ZFA:0000396 \n", + "182 ZFA:0001111 ZFA:0000403 \n", + "645 ZFA:0001439 ZFA:0000037 \n", + "658 ZFA:0001478 ZFA:0100000 \n", + "... ... ... ... \n", + "35706 UBERON:2007043 UBERON:0000479 \n", + "35711 UBERON:2007048 UBERON:0001630 \n", + "35717 UBERON:0003249 UBERON:0000483 \n", + "35721 UBERON:0006615 UBERON:0004535 \n", + "35723 UBERON:0034940 UBERON:0000464 \n", + "\n", + " right_predicate_ids ... \\\n", + "35 BFO:0000050|RO:0002131|RO:0002202 ... \n", + "91 BFO:0000050|RO:0002131 ... \n", + "182 BFO:0000050|RO:0002131 ... \n", + "645 BFO:0000050|RO:0002131|rdfs:subClassOf ... \n", + "658 rdfs:subClassOf ... \n", + "... ... ... \n", + "35706 BFO:0000050|RO:0002131|RO:0002202|RO:0002254|R... ... \n", + "35711 RO:0002131|RO:0002202|RO:0002254|RO:0002258|RO... ... \n", + "35717 RO:0002131|RO:0002323|rdfs:subClassOf ... \n", + "35721 BFO:0000050|RO:0002131|RO:0002202|RO:0002254|R... ... \n", + "35723 rdfs:subClassOf ... \n", + "\n", + " right_object_label right_predicate_labels \\\n", + "35 endocrine system \n", + "91 nervous system \n", + "182 ovary \n", + "645 anatomical structure \n", + "658 zebrafish anatomical entity \n", + "... ... ... \n", + "35706 tissue \n", + "35711 muscle organ \n", + "35717 epithelium \n", + "35721 cardiovascular system \n", + "35723 anatomical space \n", + "\n", + " left_subject_is_functional left_object_is_functional \\\n", + "35 \n", + "91 \n", + "182 \n", + "645 \n", + "658 \n", + "... ... ... \n", + "35706 \n", + "35711 \n", + "35717 \n", + "35721 \n", + "35723 \n", + "\n", + " subject_mapping_predicate object_mapping_predicate \\\n", + "35 \n", + "91 \n", + "182 \n", + "645 \n", + "658 \n", + "... ... ... \n", + "35706 \n", + "35711 \n", + "35717 \n", + "35721 \n", + "35723 \n", + "\n", + " right_intermediate_ids subject_mapping_cardinality \\\n", + "35 \n", + "91 \n", + "182 \n", + "645 \n", + "658 \n", + "... ... ... \n", + "35706 \n", + "35711 \n", + "35717 \n", + "35721 \n", + "35723 \n", + "\n", + " object_mapping_cardinality left_source \n", + "35 UBERON \n", + "91 UBERON \n", + "182 UBERON \n", + "645 UBERON \n", + "658 UBERON \n", + "... ... ... \n", + "35706 ZFA \n", + "35711 ZFA \n", + "35717 ZFA \n", + "35721 ZFA \n", + "35723 ZFA \n", + "\n", + "[883 rows x 21 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.query(\"category=='LeftEntailedByRight'\")" + ] + }, + { + "cell_type": "markdown", + "id": "06b37850", + "metadata": {}, + "source": [ + "## Breaking things down by direction\n", + "\n", + "By default, the command will do diffs in both directions, unless `--no-bidirectional` is passed.\n", + "\n", + "We can post-hoc break the summary statistics down based on the source of the left term" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e07d6097", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "unique_srcs = df['left_source'].unique()\n", + "n_unique = len(unique_srcs)\n", + "fig, axs = plt.subplots(nrows=n_unique, figsize=(10, 6*n_unique))\n", + "\n", + "for ax, left_source in zip(axs, unique_srcs):\n", + " sub_df = df[df['left_source'] == left_source]\n", + " sns.countplot(data=sub_df, x='category', hue='left_predicate_id', ax=ax)\n", + " ax.set_title(f'Count of each type broken down by left_source = {left_source}')\n", + " ax.set_xlabel('Type')\n", + " ax.set_ylabel('Count')\n", + " ax.set_yscale('log') # Make y-axis logarithmic\n", + " ax.tick_params(axis='x', rotation=45) # Make x-axis labels diagonal\n", + "\n", + "plt.tight_layout() # Ensure layout is tight so labels don't get cut off\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "5f2b0bc2", + "metadata": {}, + "source": [ + "As expected `MissingMapping` dominates the Uberon->ZFA direction. Note this is not actually \"missing\" in the sense of incomplete, we expect most edges in uberon to be non-mappable to ZFA due to difference in taxonomic scope." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cae729ec", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Commands/LogicalDefinitions.ipynb b/notebooks/Commands/LogicalDefinitions.ipynb new file mode 100644 index 000000000..a7c13c384 --- /dev/null +++ b/notebooks/Commands/LogicalDefinitions.ipynb @@ -0,0 +1,1002 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "738137ef", + "metadata": {}, + "source": [ + "# OAK logical-definitions command\n", + "\n", + "This notebook is intended as a supplement to the [main OAK CLI docs](https://incatools.github.io/ontology-access-kit/cli.html).\n", + "\n", + "This notebook provides examples for the `logical-definitions` command, which can be used to **lookup and summarize logical defs**\n", + "\n", + "For more on logical definitions, see [Logical Definitions](https://incatools.github.io/ontology-access-kit/guide/logical-definitions.html) in the OAK guide.\n", + "\n", + "## Help Option\n", + "\n", + "You can get help on any OAK command using `--help`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "838586ce", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Usage: runoak logical-definitions [OPTIONS] [TERMS]...\r\n", + "\r\n", + " Show all logical definitions for a term or terms.\r\n", + "\r\n", + " To show all logical definitions in an ontology, pass the \".all\" query term\r\n", + "\r\n", + " Example; first create an alias:\r\n", + "\r\n", + " alias pato=\"runoak -i obo:sqlite:pato\"\r\n", + "\r\n", + " Then run the query:\r\n", + "\r\n", + " pato logical-definitions .all\r\n", + "\r\n", + " By default, \".all\" will query all axioms for all terms including merged\r\n", + " terms; to restrict to only the current terms, use an ID query:\r\n", + "\r\n", + " pato logical-definitions i^PATO\r\n", + "\r\n", + " You can also restrict to branches:\r\n", + "\r\n", + " pato logical-definitions .desc//p=i \"physical object quality\"\r\n", + "\r\n", + " By default, the output is a subset of OboGraph datamodel rendered as YAML,\r\n", + " e.g.\r\n", + "\r\n", + " definedClassId: PATO:0045071 genusIds: - PATO:0001439\r\n", + " restrictions: - fillerId: PATO:0000461 propertyId: RO:0015010\r\n", + "\r\n", + " You can also specify CSV to generate a flattened form of this.\r\n", + "\r\n", + " Example:\r\n", + "\r\n", + " pato logical-definitions .all --output-type csv\r\n", + "\r\n", + " You can optionally choose to \"--matrix-axes\" to transform the output to a\r\n", + " matrix form. This is a comma-separated pair of axes, where each element is a\r\n", + " logical definition element type: \"f\" for filler, \"p\" for predicate, \"g\" for\r\n", + " genus, \"d\" for defined class.\r\n", + "\r\n", + " Example:\r\n", + "\r\n", + " - Each property/predicate is a column - For repeated properties, columns of\r\n", + " the form prop_1, prop_2, ... are generated\r\n", + "\r\n", + " Example:\r\n", + "\r\n", + " pato logical-definitions .all --matrix-axes d,p --output-type csv\r\n", + "\r\n", + " This will generate a row for each defined class with a logical definition,\r\n", + " with columns for each predicate (\"genus\" is treated as a predicate here).\r\n", + "\r\n", + " Limitations:\r\n", + "\r\n", + " Currently this only works for definitions that follow a basic genus-\r\n", + " differentia pattern, which is what is currently represented in the OboGraph\r\n", + " datamodel.\r\n", + "\r\n", + " Consider using the \"axioms\" command for inspection of complex nested OWL\r\n", + " axioms.\r\n", + "\r\n", + " More examples:\r\n", + "\r\n", + " https://github.com/INCATools/ontology-access-\r\n", + " kit/blob/main/notebooks/Commands/LogicalDefinitions.ipynb\r\n", + "\r\n", + " Python API:\r\n", + "\r\n", + " https://incatools.github.io/ontology-access-kit/interfaces/obograph\r\n", + "\r\n", + " Data model:\r\n", + "\r\n", + " https://w3id.org/oak/obograph\r\n", + "\r\n", + "Options:\r\n", + " --unmelt / --no-unmelt Flatten to a wide table [default: no-\r\n", + " unmelt]\r\n", + " --matrix-axes TEXT If specified, transform results to matrix\r\n", + " using these row and column axes. Examples:\r\n", + " d,p; f,g\r\n", + " -p, --predicates TEXT A comma-separated list of predicates. This\r\n", + " may be a shorthand (i, p) or CURIE\r\n", + " --autolabel / --no-autolabel If set, results will automatically have\r\n", + " labels assigned [default: autolabel]\r\n", + " -O, --output-type TEXT Desired output type\r\n", + " -o, --output FILENAME Output file, e.g. obo file\r\n", + " --if-absent [absent-only|present-only]\r\n", + " determines behavior when the value is not\r\n", + " present or is empty.\r\n", + " -S, --set-value TEXT the value to set for all terms for the given\r\n", + " property.\r\n", + " --help Show this message and exit.\r\n" + ] + } + ], + "source": [ + "!runoak logical-definitions --help" + ] + }, + { + "cell_type": "markdown", + "id": "5b899d5c", + "metadata": {}, + "source": [ + "## Set up an alias\n", + "\n", + "For convenience we will set up an alias for use in this notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6050d1a5", + "metadata": {}, + "outputs": [], + "source": [ + "alias uberon runoak -i sqlite:obo:uberon" + ] + }, + { + "cell_type": "markdown", + "id": "c906c109", + "metadata": {}, + "source": [ + "## Fetching logical definitions for individual terms\n", + "\n", + "First we will pass in a simple list of terms to the command.\n", + "\n", + "Like most OAK commands, this command accepts lists of either IDs, labels, queries, or boolean combinations thereof" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "572be78c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "definedClassId: UBERON:0009565\r\n", + "genusIds:\r\n", + "- UBERON:0001705\r\n", + "restrictions:\r\n", + "- fillerId: UBERON:0002389\r\n", + " propertyId: BFO:0000050\r\n", + "\r\n", + "---\r\n", + "definedClassId: UBERON:0009567\r\n", + "genusIds:\r\n", + "- UBERON:0001705\r\n", + "restrictions:\r\n", + "- fillerId: UBERON:0001466\r\n", + " propertyId: BFO:0000050\r\n" + ] + } + ], + "source": [ + "uberon logical-definitions fingernail toenail" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3b3c4afa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "definedClassId\tdefinedClassId_label\tgenusIds\tgenusIds_label\trestrictions\trestrictionsPropertyIds\trestrictionsPropertyIds_label\trestrictionsFillerIds\trestrictionsFillerIds_label\r", + "\r\n", + "UBERON:0009565\tnail of manual digit\tUBERON:0001705\tnail\tBFO:0000050=UBERON:0002389\tBFO:0000050\tpart of\tUBERON:0002389\tmanual digit\r", + "\r\n", + "UBERON:0009567\tnail of pedal digit\tUBERON:0001705\tnail\tBFO:0000050=UBERON:0001466\tBFO:0000050\tpart of\tUBERON:0001466\tpedal digit\r", + "\r\n" + ] + } + ], + "source": [ + "uberon logical-definitions fingernail toenail -O csv" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c9900ca0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Term]\r\n", + "id: UBERON:0009565 ! nail of manual digit\r\n", + "intersection_of: UBERON:0001705 ! nail\r\n", + "intersection_of: BFO:0000050 UBERON:0002389 ! manual digit\r\n", + "\r\n", + "\r\n", + "[Term]\r\n", + "id: UBERON:0009567 ! nail of pedal digit\r\n", + "intersection_of: UBERON:0001705 ! nail\r\n", + "intersection_of: BFO:0000050 UBERON:0001466 ! pedal digit\r\n", + "\r\n", + "\r\n" + ] + } + ], + "source": [ + "uberon logical-definitions fingernail toenail -O obo" + ] + }, + { + "cell_type": "markdown", + "id": "d08c6000", + "metadata": {}, + "source": [ + "## Matrix views\n", + "\n", + "We can use the `--matrix-axes` option to summarize a large collection of logical definitions as a wide table.\n", + "\n", + "This takes two values, separated by a comma:\n", + "\n", + "- d: defined_class\n", + "- f: filler\n", + "- g: genus\n", + "- p: predicate\n", + "\n", + "### Define class x Predicate\n", + "\n", + "In the following example `d,p` will create a matrix whose rows are defined classes and whose columns are predicates" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ae4748a0", + "metadata": {}, + "outputs": [], + "source": [ + "uberon logical-definitions -p p .desc//p=i \"bone element\" .and .desc//p=i,p UBERON:0002544 --matrix-axes d,p -O csv -o output/uberon-digit-defs-dp.tsv" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "dcb7bb7e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
defined_classdefined_class_labelgenusgenus_labelpart_ofpart_of_label
0UBERON:0001436phalanx of manusUBERON:0003221phalanxUBERON:0002389manual digit
1UBERON:0002234proximal phalanx of manusUBERON:0004302proximal phalanxUBERON:0002389manual digit
2UBERON:0004328proximal phalanx of manual digit 2UBERON:0004302proximal phalanxUBERON:0003622manual digit 2
3UBERON:0004329proximal phalanx of manual digit 3UBERON:0004302proximal phalanxUBERON:0003623manual digit 3
4UBERON:0004330proximal phalanx of manual digit 4UBERON:0004302proximal phalanxUBERON:0003624manual digit 4
.....................
59UBERON:0014503proximal phalanx of digit 3UBERON:0004302proximal phalanxUBERON:0006050digit 3
60UBERON:0014504proximal phalanx of digit 4UBERON:0004302proximal phalanxUBERON:0006051digit 4
61UBERON:0014505proximal phalanx of digit 5UBERON:0004302proximal phalanxUBERON:0006052digit 5
62UBERON:0004248pedal digit boneUBERON:0001474bone elementUBERON:0001466pedal digit
63UBERON:0004249manual digit boneUBERON:0001474bone elementUBERON:0002389manual digit
\n", + "

64 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " defined_class defined_class_label genus \\\n", + "0 UBERON:0001436 phalanx of manus UBERON:0003221 \n", + "1 UBERON:0002234 proximal phalanx of manus UBERON:0004302 \n", + "2 UBERON:0004328 proximal phalanx of manual digit 2 UBERON:0004302 \n", + "3 UBERON:0004329 proximal phalanx of manual digit 3 UBERON:0004302 \n", + "4 UBERON:0004330 proximal phalanx of manual digit 4 UBERON:0004302 \n", + ".. ... ... ... \n", + "59 UBERON:0014503 proximal phalanx of digit 3 UBERON:0004302 \n", + "60 UBERON:0014504 proximal phalanx of digit 4 UBERON:0004302 \n", + "61 UBERON:0014505 proximal phalanx of digit 5 UBERON:0004302 \n", + "62 UBERON:0004248 pedal digit bone UBERON:0001474 \n", + "63 UBERON:0004249 manual digit bone UBERON:0001474 \n", + "\n", + " genus_label part_of part_of_label \n", + "0 phalanx UBERON:0002389 manual digit \n", + "1 proximal phalanx UBERON:0002389 manual digit \n", + "2 proximal phalanx UBERON:0003622 manual digit 2 \n", + "3 proximal phalanx UBERON:0003623 manual digit 3 \n", + "4 proximal phalanx UBERON:0003624 manual digit 4 \n", + ".. ... ... ... \n", + "59 proximal phalanx UBERON:0006050 digit 3 \n", + "60 proximal phalanx UBERON:0006051 digit 4 \n", + "61 proximal phalanx UBERON:0006052 digit 5 \n", + "62 bone element UBERON:0001466 pedal digit \n", + "63 bone element UBERON:0002389 manual digit \n", + "\n", + "[64 rows x 6 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv(\"output/uberon-digit-defs-dp.tsv\", sep=\"\\t\")\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "7448e67f", + "metadata": {}, + "source": [ + "### Filler x Genus\n", + "\n", + "We can flip this around, and have each row be a filler (`f`) and each column be a genus (`g`)." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "27ef3274", + "metadata": {}, + "outputs": [], + "source": [ + "uberon logical-definitions -p p .desc//p=i \"bone element\" .and .desc//p=i,p UBERON:0002544 --matrix-axes f,g -O csv -o output/uberon-digit-defs-fg.tsv" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d9357cb9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fillerfiller_labelphalanxphalanx_labelproximal_phalanxproximal_phalanx_labelmiddle_phalanxmiddle_phalanx_labeldistal_phalanxdistal_phalanx_labelbone_elementbone_element_label
0UBERON:0002387pesUBERON:0001449phalanx of pesNaNNaNNaNNaNNaNNaNNaNNaN
1UBERON:0009563pastern region of limbUBERON:0009558pastern boneNaNNaNNaNNaNNaNNaNNaNNaN
2UBERON:0002389manual digitUBERON:0001436phalanx of manusUBERON:0002234proximal phalanx of manusUBERON:0003864middle phalanx of manusUBERON:0003865distal phalanx of manusUBERON:0004249manual digit bone
3UBERON:0003622manual digit 2UBERON:0003636manual digit 2 phalanxUBERON:0004328proximal phalanx of manual digit 2UBERON:0004320middle phalanx of manual digit 2UBERON:0004311distal phalanx of manual digit 2NaNNaN
4UBERON:0003623manual digit 3UBERON:0003637manual digit 3 phalanxUBERON:0004329proximal phalanx of manual digit 3UBERON:0004321middle phalanx of manual digit 3UBERON:0004312distal phalanx of manual digit 3NaNNaN
5UBERON:0003624manual digit 4UBERON:0003638manual digit 4 phalanxUBERON:0004330proximal phalanx of manual digit 4UBERON:0004322middle phalanx of manual digit 4UBERON:0004313distal phalanx of manual digit 4NaNNaN
6UBERON:0001463manual digit 1UBERON:0003620manual digit 1 phalanxUBERON:0004338proximal phalanx of manual digit 1NaNNaNUBERON:0004337distal phalanx of manual digit 1NaNNaN
7UBERON:0003625manual digit 5UBERON:0003639manual digit 5 phalanxUBERON:0004331proximal phalanx of manual digit 5UBERON:0004323middle phalanx of manual digit 5UBERON:0004314distal phalanx of manual digit 5NaNNaN
8UBERON:0001466pedal digitNaNNaNUBERON:0003868proximal phalanx of pesUBERON:0003866middle phalanx of pesUBERON:0003867distal phalanx of pesUBERON:0004248pedal digit bone
9UBERON:0003632pedal digit 2UBERON:0003641pedal digit 2 phalanxUBERON:0004333proximal phalanx of pedal digit 2UBERON:0004324middle phalanx of pedal digit 2UBERON:0004316distal phalanx of pedal digit 2NaNNaN
10UBERON:0003633pedal digit 3UBERON:0003642pedal digit 3 phalanxUBERON:0004334proximal phalanx of pedal digit 3UBERON:0004325middle phalanx of pedal digit 3UBERON:0004317distal phalanx of pedal digit 3NaNNaN
11UBERON:0003634pedal digit 4UBERON:0003862pedal digit 4 phalanxUBERON:0004335proximal phalanx of pedal digit 4UBERON:0004326middle phalanx of pedal digit 4UBERON:0004318distal phalanx of pedal digit 4NaNNaN
12UBERON:0003631pedal digit 1UBERON:0003640pedal digit 1 phalanxUBERON:0004332proximal phalanx of pedal digit 1NaNNaNUBERON:0004315distal phalanx of pedal digit 1NaNNaN
13UBERON:0003635pedal digit 5UBERON:0003863pedal digit 5 phalanxUBERON:0004336proximal phalanx of pedal digit 5UBERON:0004327middle phalanx of pedal digit 5UBERON:0004319distal phalanx of pedal digit 5NaNNaN
14UBERON:0012137pedal digit 7UBERON:4100009pedal digit 7 phalanxNaNNaNNaNNaNNaNNaNNaNNaN
15UBERON:0006049digit 2NaNNaNUBERON:0014502proximal phalanx of digit 2UBERON:0014488middle phalanx of digit 2UBERON:0014484distal phalanx of digit 2NaNNaN
16UBERON:0006050digit 3NaNNaNUBERON:0014503proximal phalanx of digit 3UBERON:0014489middle phalanx of digit 3UBERON:0014485distal phalanx of digit 3NaNNaN
17UBERON:0006051digit 4NaNNaNUBERON:0014504proximal phalanx of digit 4UBERON:0014490middle phalanx of digit 4UBERON:0014486distal phalanx of digit 4NaNNaN
18UBERON:0006048digit 1NaNNaNUBERON:0014501proximal phalanx of digit 1NaNNaNUBERON:0014483distal phalanx of digit 1NaNNaN
19UBERON:0006052digit 5NaNNaNUBERON:0014505proximal phalanx of digit 5UBERON:0014491middle phalanx of digit 5UBERON:0014487distal phalanx of digit 5NaNNaN
\n", + "
" + ], + "text/plain": [ + " filler filler_label phalanx \\\n", + "0 UBERON:0002387 pes UBERON:0001449 \n", + "1 UBERON:0009563 pastern region of limb UBERON:0009558 \n", + "2 UBERON:0002389 manual digit UBERON:0001436 \n", + "3 UBERON:0003622 manual digit 2 UBERON:0003636 \n", + "4 UBERON:0003623 manual digit 3 UBERON:0003637 \n", + "5 UBERON:0003624 manual digit 4 UBERON:0003638 \n", + "6 UBERON:0001463 manual digit 1 UBERON:0003620 \n", + "7 UBERON:0003625 manual digit 5 UBERON:0003639 \n", + "8 UBERON:0001466 pedal digit NaN \n", + "9 UBERON:0003632 pedal digit 2 UBERON:0003641 \n", + "10 UBERON:0003633 pedal digit 3 UBERON:0003642 \n", + "11 UBERON:0003634 pedal digit 4 UBERON:0003862 \n", + "12 UBERON:0003631 pedal digit 1 UBERON:0003640 \n", + "13 UBERON:0003635 pedal digit 5 UBERON:0003863 \n", + "14 UBERON:0012137 pedal digit 7 UBERON:4100009 \n", + "15 UBERON:0006049 digit 2 NaN \n", + "16 UBERON:0006050 digit 3 NaN \n", + "17 UBERON:0006051 digit 4 NaN \n", + "18 UBERON:0006048 digit 1 NaN \n", + "19 UBERON:0006052 digit 5 NaN \n", + "\n", + " phalanx_label proximal_phalanx \\\n", + "0 phalanx of pes NaN \n", + "1 pastern bone NaN \n", + "2 phalanx of manus UBERON:0002234 \n", + "3 manual digit 2 phalanx UBERON:0004328 \n", + "4 manual digit 3 phalanx UBERON:0004329 \n", + "5 manual digit 4 phalanx UBERON:0004330 \n", + "6 manual digit 1 phalanx UBERON:0004338 \n", + "7 manual digit 5 phalanx UBERON:0004331 \n", + "8 NaN UBERON:0003868 \n", + "9 pedal digit 2 phalanx UBERON:0004333 \n", + "10 pedal digit 3 phalanx UBERON:0004334 \n", + "11 pedal digit 4 phalanx UBERON:0004335 \n", + "12 pedal digit 1 phalanx UBERON:0004332 \n", + "13 pedal digit 5 phalanx UBERON:0004336 \n", + "14 pedal digit 7 phalanx NaN \n", + "15 NaN UBERON:0014502 \n", + "16 NaN UBERON:0014503 \n", + "17 NaN UBERON:0014504 \n", + "18 NaN UBERON:0014501 \n", + "19 NaN UBERON:0014505 \n", + "\n", + " proximal_phalanx_label middle_phalanx \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 proximal phalanx of manus UBERON:0003864 \n", + "3 proximal phalanx of manual digit 2 UBERON:0004320 \n", + "4 proximal phalanx of manual digit 3 UBERON:0004321 \n", + "5 proximal phalanx of manual digit 4 UBERON:0004322 \n", + "6 proximal phalanx of manual digit 1 NaN \n", + "7 proximal phalanx of manual digit 5 UBERON:0004323 \n", + "8 proximal phalanx of pes UBERON:0003866 \n", + "9 proximal phalanx of pedal digit 2 UBERON:0004324 \n", + "10 proximal phalanx of pedal digit 3 UBERON:0004325 \n", + "11 proximal phalanx of pedal digit 4 UBERON:0004326 \n", + "12 proximal phalanx of pedal digit 1 NaN \n", + "13 proximal phalanx of pedal digit 5 UBERON:0004327 \n", + "14 NaN NaN \n", + "15 proximal phalanx of digit 2 UBERON:0014488 \n", + "16 proximal phalanx of digit 3 UBERON:0014489 \n", + "17 proximal phalanx of digit 4 UBERON:0014490 \n", + "18 proximal phalanx of digit 1 NaN \n", + "19 proximal phalanx of digit 5 UBERON:0014491 \n", + "\n", + " middle_phalanx_label distal_phalanx \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 middle phalanx of manus UBERON:0003865 \n", + "3 middle phalanx of manual digit 2 UBERON:0004311 \n", + "4 middle phalanx of manual digit 3 UBERON:0004312 \n", + "5 middle phalanx of manual digit 4 UBERON:0004313 \n", + "6 NaN UBERON:0004337 \n", + "7 middle phalanx of manual digit 5 UBERON:0004314 \n", + "8 middle phalanx of pes UBERON:0003867 \n", + "9 middle phalanx of pedal digit 2 UBERON:0004316 \n", + "10 middle phalanx of pedal digit 3 UBERON:0004317 \n", + "11 middle phalanx of pedal digit 4 UBERON:0004318 \n", + "12 NaN UBERON:0004315 \n", + "13 middle phalanx of pedal digit 5 UBERON:0004319 \n", + "14 NaN NaN \n", + "15 middle phalanx of digit 2 UBERON:0014484 \n", + "16 middle phalanx of digit 3 UBERON:0014485 \n", + "17 middle phalanx of digit 4 UBERON:0014486 \n", + "18 NaN UBERON:0014483 \n", + "19 middle phalanx of digit 5 UBERON:0014487 \n", + "\n", + " distal_phalanx_label bone_element bone_element_label \n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 distal phalanx of manus UBERON:0004249 manual digit bone \n", + "3 distal phalanx of manual digit 2 NaN NaN \n", + "4 distal phalanx of manual digit 3 NaN NaN \n", + "5 distal phalanx of manual digit 4 NaN NaN \n", + "6 distal phalanx of manual digit 1 NaN NaN \n", + "7 distal phalanx of manual digit 5 NaN NaN \n", + "8 distal phalanx of pes UBERON:0004248 pedal digit bone \n", + "9 distal phalanx of pedal digit 2 NaN NaN \n", + "10 distal phalanx of pedal digit 3 NaN NaN \n", + "11 distal phalanx of pedal digit 4 NaN NaN \n", + "12 distal phalanx of pedal digit 1 NaN NaN \n", + "13 distal phalanx of pedal digit 5 NaN NaN \n", + "14 NaN NaN NaN \n", + "15 distal phalanx of digit 2 NaN NaN \n", + "16 distal phalanx of digit 3 NaN NaN \n", + "17 distal phalanx of digit 4 NaN NaN \n", + "18 distal phalanx of digit 1 NaN NaN \n", + "19 distal phalanx of digit 5 NaN NaN " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"output/uberon-digit-defs-fg.tsv\", sep=\"\\t\")\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "9b6213e2", + "metadata": {}, + "source": [ + "Note that this view immediately shows the *density* of the lattice. We can identify what might potentially be gaps;\n", + "\n", + "for example, the cells for \"middle phalanx\" and digit 1 of the hand (manual) and foot (pedal) are empty. We might think\n", + "this means we left out a potential term. \n", + "\n", + "However, this omission is actually intentional due to the lack of a middle/intermediate phalanx on the thumb / big toe:\n", + " \n", + "![img](https://upload.wikimedia.org/wikipedia/commons/thumb/a/ab/Scheme_human_hand_bones-en.svg/800px-Scheme_human_hand_bones-en.svg.png)\n", + "\n", + "(it may be the case this phalanx is present in other species, in which case a term may be added with negative taxon constraints)" + ] + }, + { + "cell_type": "markdown", + "id": "78894237", + "metadata": {}, + "source": [ + "## Analyzing and gap filling\n", + "\n", + "OAK has experimental features for analyzing and gap-filling logical definitions; these are not yet exposed on the command line" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54e9d757", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py index 4bd7d14f6..2fd39b94e 100644 --- a/src/oaklib/cli.py +++ b/src/oaklib/cli.py @@ -43,6 +43,7 @@ from linkml_runtime.dumpers import json_dumper, yaml_dumper from linkml_runtime.utils.introspection import package_schemaview from prefixmaps.io.parser import load_multi_context +from pydantic import BaseModel from sssom.parsers import parse_sssom_table, to_mapping_set_document import oaklib.datamodels.taxon_constraints as tcdm @@ -56,6 +57,7 @@ Edge, Graph, GraphDocument, + LogicalDefinitionAxiom, Meta, PrefixDeclaration, ) @@ -141,12 +143,17 @@ from oaklib.utilities import table_filler from oaklib.utilities.apikey_manager import set_apikey_value from oaklib.utilities.associations.association_differ import AssociationDiffer +from oaklib.utilities.axioms import ( + logical_definition_analyzer, + logical_definition_summarizer, +) from oaklib.utilities.iterator_utils import chunk from oaklib.utilities.kgcl_utilities import ( generate_change_id, parse_kgcl_files, write_kgcl, ) +from oaklib.utilities.lexical import patternizer from oaklib.utilities.lexical.lexical_indexer import ( DEFAULT_QUALIFIER, add_labels_from_uris, @@ -571,9 +578,14 @@ def curies_from_file(file: IO) -> Iterator[CURIE]: :param file: :return: """ + line_no = 0 for line in file.readlines(): + line_no += 1 m = re.match(r"^(\S+)", line) - yield m.group(1) + curie = m.group(1) + if curie == "id" and line_no == 1: + continue + yield curie def query_terms_iterator(query_terms: NESTED_LIST, impl: BasicOntologyInterface) -> Iterator[CURIE]: @@ -1126,6 +1138,10 @@ def obsoletes( runoak -i obolibrary:go.obo obsoletes --show-migration-relationships GO:0000187 GO:0000188 + More examples: + + https://github.com/INCATools/ontology-access-kit/blob/main/notebooks/Commands/TaxonConstraints.ipynb + Python API: https://incatools.github.io/ontology-access-kit/interfaces/basic @@ -1846,9 +1862,9 @@ def tree( output: TextIO, ): """ - Display an ancestor graph as an ascii/markdown tree + Display an ancestor graph as an ascii/markdown tree. - For general instructions, see the viz command, which this is analogous too + For general instructions, see the viz command, which this is analogous too. Example: @@ -1936,7 +1952,7 @@ def tree( if isinstance(impl, OboGraphInterface): graph = impl.relationships_to_graph(rels) else: - raise AssertionError(f"{impl} needs to of type OboGraphInterface") + raise AssertionError(f"{impl} needs to be of type OboGraphInterface") else: raise NotImplementedError(f"{impl} needs to implement Subsetter for --gap-fill") else: @@ -2148,7 +2164,11 @@ def paths( go paths -p i,p 'nuclear membrane' --target cytoplasm --narrow | go viz --fill-gaps - This visualizes the path by first exporting the path as a flat list, then passing the - results to viz, using the fill-gaps option + results to viz, using the fill-gaps option. + + More examples: + + https://github.com/INCATools/ontology-access-kit/blob/main/notebooks/Commands/Paths.ipynb """ impl = settings.impl writer = _get_writer(output_type, impl, StreamingCsvWriter) @@ -3158,6 +3178,10 @@ def relationships( runoak -i uberon.db relationships -p RO:0002178 .desc//p=i "artery" .and .desc//p=i,p "limb" + More examples: + + https://github.com/INCATools/ontology-access-kit/blob/main/notebooks/Commands/Relationships.ipynb + Python API: https://incatools.github.io/ontology-access-kit/interfaces/basic @@ -3232,6 +3256,10 @@ def relationships( @click.option( "--unmelt/--no-unmelt", default=False, show_default=True, help="Flatten to a wide table" ) +@click.option( + "--matrix-axes", + help="If specified, transform results to matrix using these row and column axes. Examples: d,p; f,g", +) @predicates_option @autolabel_option @output_type_option @@ -3246,6 +3274,7 @@ def logical_definitions( output: str, if_absent: bool, unmelt: bool, + matrix_axes: str, set_value: str, ): """ @@ -3285,14 +3314,21 @@ def logical_definitions( pato logical-definitions .all --output-type csv - You can optionally choose to "unmelt" or flatten this, such that: + You can optionally choose to "--matrix-axes" to transform the output to a matrix form. + This is a comma-separated pair of axes, where each element is a logical definition element + type: "f" for filler, "p" for predicate, "g" for genus, "d" for defined class. + + Example: - Each property/predicate is a column - For repeated properties, columns of the form prop_1, prop_2, ... are generated Example: - pato logical-definitions .all --unmelt --output-type csv + pato logical-definitions .all --matrix-axes d,p --output-type csv + + This will generate a row for each defined class with a logical definition, with columns + for each predicate ("genus" is treated as a predicate here). Limitations: @@ -3301,6 +3337,10 @@ def logical_definitions( Consider using the "axioms" command for inspection of complex nested OWL axioms. + More examples: + + https://github.com/INCATools/ontology-access-kit/blob/main/notebooks/Commands/LogicalDefinitions.ipynb + Python API: https://incatools.github.io/ontology-access-kit/interfaces/obograph @@ -3314,6 +3354,13 @@ def logical_definitions( writer.output = output writer.autolabel = autolabel actual_predicates = _process_predicates_arg(predicates) + + def _exclude_ldef(ldef: LogicalDefinitionAxiom) -> bool: + if actual_predicates: + if not any(r for r in ldef.restrictions if r.propertyId in actual_predicates): + return True + return False + if set_value: raise NotImplementedError label_fields = [ @@ -3323,39 +3370,54 @@ def logical_definitions( "restrictionsPropertyIds", "restrictionsFillerIds", ] - if isinstance(impl, OboGraphInterface): - # curies = list(query_terms_iterator(terms, impl)) - has_relationships = defaultdict(bool) - curies = [] - if unmelt: - ldef_flattener = LogicalDefinitionFlattener( - labeler=lambda x: impl.label(x), curie_converter=impl.converter - ) - writer.heterogeneous_keys = True + if not isinstance(impl, OboGraphInterface): + raise NotImplementedError(f"Cannot execute this using {type(impl)}") + # curies = list(query_terms_iterator(terms, impl)) + has_relationships = defaultdict(bool) + curies = [] + if matrix_axes: + config = logical_definition_summarizer.parse_axes_to_config(matrix_axes) + ldefs = [] for curie_it in chunk(query_terms_iterator(terms, impl)): - curie_chunk = list(curie_it) - curies += curie_chunk - for ldef in impl.logical_definitions(curie_chunk): - if actual_predicates: - if not any(r for r in ldef.restrictions if r.propertyId in actual_predicates): - continue - if ldef.definedClassId: - has_relationships[ldef.definedClassId] = True - if if_absent and if_absent == IfAbsent.absent_only.value: - continue - if unmelt: - flat_obj = ldef_flattener.convert(ldef) - writer.emit(flat_obj, label_fields=list(flat_obj.keys())) - else: - writer.emit(ldef, label_fields=label_fields) - if if_absent and if_absent == IfAbsent.absent_only.value: - for curie in curies: - if not has_relationships.get(curie, False): - writer.emit({"noLogicalDefinition": curie}) + ldefs.extend(list(impl.logical_definitions(curie_it))) + ldefs = [ldef for ldef in ldefs if not _exclude_ldef(ldef)] + objs = logical_definition_summarizer.logical_definitions_to_matrix(impl, ldefs, config) + writer.heterogeneous_keys = True + label_fields = None + for obj in objs: + if label_fields is None: + label_fields = list(obj.keys()) + writer.emit(obj, label_fields=label_fields) writer.finish() writer.file.close() - else: - raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}") + return + if unmelt: + logging.warning("Deprecated: use --matrix-type d,p instead") + ldef_flattener = LogicalDefinitionFlattener( + labeler=lambda x: impl.label(x), curie_converter=impl.converter + ) + writer.heterogeneous_keys = True + for curie_it in chunk(query_terms_iterator(terms, impl)): + curie_chunk = list(curie_it) + curies += curie_chunk + for ldef in impl.logical_definitions(curie_chunk): + if _exclude_ldef(ldef): + continue + if ldef.definedClassId: + has_relationships[ldef.definedClassId] = True + if if_absent and if_absent == IfAbsent.absent_only.value: + continue + if unmelt: + flat_obj = ldef_flattener.convert(ldef) + writer.emit(flat_obj, label_fields=list(flat_obj.keys())) + else: + writer.emit(ldef, label_fields=label_fields) + if if_absent and if_absent == IfAbsent.absent_only.value: + for curie in curies: + if not has_relationships.get(curie, False): + writer.emit({"noLogicalDefinition": curie}) + writer.finish() + writer.file.close() @main.command() @@ -3920,6 +3982,10 @@ def taxon_constraints( runoak -i sqlite:obo:uberon taxon-constraints UBERON:0003884 UBERON:0003941 -p i,p + More examples: + + https://github.com/INCATools/ontology-access-kit/blob/main/notebooks/Commands/TaxonConstraints.ipynb + This command is a wrapper onto taxon_constraints_utils: - https://incatools.github.io/ontology-access-kit/src/oaklib.utilities.taxon.taxon_constraints_utils @@ -3989,6 +4055,10 @@ def apply_taxon_constraints( runoak -i db/go.db eval-taxon-constraints -p i,p -E tests/input/go-evo-gains-losses.csv + More examples: + + https://github.com/INCATools/ontology-access-kit/blob/main/notebooks/Commands/Apply.ipynb + """ actual_predicates = _process_predicates_arg(predicates) impl = settings.impl @@ -4101,6 +4171,9 @@ def associations( runoak --i src/oaklib/conf/go-dictybase-input-spec.yaml associations -p i,p GO:0008104 + More examples: + + https://github.com/INCATools/ontology-access-kit/blob/main/notebooks/Commands/Associations.ipynb """ impl = settings.impl writer = _get_writer(output_type, impl, StreamingCsvWriter) @@ -4564,7 +4637,9 @@ def diff_associations( ) @output_option @output_type_option +@click.argument("terms", nargs=-1) def validate( + terms: List[str], output: str, cutoff: int, skip_structural_validation: bool, @@ -4603,32 +4678,41 @@ def validate( writer.output = output if rule: skip_ontology_rules = False - if isinstance(impl, ValidatorInterface): - if not skip_structural_validation: - counts = defaultdict(int) - for result in impl.validate(): - key = (result.type, result.predicate) - n = counts[key] - n += 1 - counts[key] = n - if n % 1000 == 0: - logging.info(f"Reached {n} results with {key}") - if n == cutoff: - print(f"**TRUNCATING RESULTS FOR {key} at {cutoff}") - elif n < cutoff: - writer.emit(result) - # print(yaml_dumper.dumps(result)) - for k, v in counts.items(): - print(f"{k}:: {v}") - if not skip_ontology_rules: - rr = RuleRunner() - if rule: - rr.set_rules(rule) - for result in rr.run(impl): - writer.emit(result) - writer.finish() - else: + if not isinstance(impl, ValidatorInterface): raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}") + if terms: + # note: currently the validate interface doesn't supported filtered lists, + # so we post-hoc filter. This is potentially inefficient. + entities = list(query_terms_iterator(terms, impl)) + else: + entities = None + if not skip_structural_validation: + counts = defaultdict(int) + for result in impl.validate(): + if entities and result.subject not in entities: + continue + key = (result.type, result.predicate) + n = counts[key] + n += 1 + counts[key] = n + if n % 1000 == 0: + logging.info(f"Reached {n} results with {key}") + if n == cutoff: + print(f"**TRUNCATING RESULTS FOR {key} at {cutoff}") + elif n < cutoff: + writer.emit(result) + # print(yaml_dumper.dumps(result)) + for k, v in counts.items(): + print(f"{k}:: {v}") + if not skip_ontology_rules: + rr = RuleRunner() + if rule: + rr.set_rules(rule) + for result in rr.run(impl): + if entities and result.subject not in entities: + continue + writer.emit(result) + writer.finish() @main.command() @@ -5286,9 +5370,15 @@ def apply( show_default=True, help="if true, expand complex changes to atomic changes", ) +@click.option( + "--ignore-invalid-changes/--no-ignore-invalid-changes", + default=False, + show_default=True, + help="if true, ignore invalid changes, e.g. obsoletions of dependent entities", +) @output_type_option @click.argument("terms", nargs=-1) -def apply_obsolete(output, output_type, expand: bool, terms): +def apply_obsolete(output, output_type, expand: bool, terms, **kwargs): """ Sets an ontology element to be obsolete @@ -5308,25 +5398,25 @@ def apply_obsolete(output, output_type, expand: bool, terms): This command is partially redundant with the more general "apply" command """ impl = settings.impl - if isinstance(impl, PatcherInterface): - impl.autosave = settings.autosave - for term in query_terms_iterator(terms, impl): - change = kgcl.NodeObsoletion(id=generate_change_id(), about_node=term) - if expand: - changes = impl.expand_change(change) - else: - changes = [change] - for change in changes: - impl.apply_patch(change) - if not settings.autosave and not output: - logging.warning("--autosave not passed, changes are NOT saved") - if output: - if output == "-": - output = sys.stdout - impl.dump(output, output_type) - # impl.save() - else: + if not isinstance(impl, PatcherInterface): raise NotImplementedError + impl.autosave = settings.autosave + for k, v in kwargs.items(): + setattr(impl, k, v) + for term in query_terms_iterator(terms, impl): + change = kgcl.NodeObsoletion(id=generate_change_id(), about_node=term) + if expand: + changes = impl.expand_change(change) + else: + changes = [change] + for change in changes: + impl.apply_patch(change) + if not settings.autosave and not output: + logging.warning("--autosave not passed, changes are NOT saved") + if output: + if output == "-": + output = sys.stdout + impl.dump(output, output_type) @main.command() @@ -5796,5 +5886,120 @@ def generate_synonyms(terms, rules_file, apply_patch, patch, patch_format, outpu _apply_changes(impl, change_list) +@main.command() +@click.argument("terms", nargs=-1) +@click.option( + "--patterns-file", + "-P", + multiple=True, + help="path to patterns file", +) +@click.option( + "--show-extract/--no-show-extract", + default=False, + show_default=True, + help="Show the original extracted object.", +) +@click.option( + "--parse/--no-parse", + default=True, + show_default=True, + help="Parse the input terms according to the patterns.", +) +@click.option( + "--fill/--no-fill", + default=False, + show_default=True, + help="If true, fill in descendant logical definitions.", +) +@click.option( + "--analyze/--no-analyze", + default=False, + show_default=True, + help="Analyze consistency of logical definitions (in progress).", +) +@click.option( + "--unmelt/--no-unmelt", + default=False, + show_default=True, + help="Use a wide table for display.", +) +@autolabel_option +@output_option +@output_type_option +def generate_logical_definitions( + terms, patterns_file, show_extract, unmelt, autolabel, parse, fill, analyze, output, output_type +): + """ + Generate logical definitions based on patterns file. + """ + impl = settings.impl + writer = _get_writer(output_type, impl, StreamingYamlWriter, kgcl) + writer.output = output + writer.autolabel = autolabel + if not isinstance(impl, OboGraphInterface): + raise NotImplementedError + curies = list(query_terms_iterator(terms, impl)) + pattern_collection = None + for pf in patterns_file: + if pattern_collection is None: + pattern_collection = patternizer.load_pattern_collection(pf) + else: + pattern_collection.patterns.extend(patternizer.load_pattern_collection(pf).patterns) + if show_extract: + results = patternizer.lexical_pattern_instances(impl, pattern_collection.patterns, curies) + # label_fields = [] + if unmelt: + results = patternizer.as_matrix(results, pattern_collection) + # label_fields = [p.name for p in pattern_collection.patterns] + for result in results: + if isinstance(result, BaseModel): + result = result.dict() + writer.emit(result) + else: + label_fields = [ + "definedClassId", + "genusIds", + "restrictionFillerIds", + "restrictionsPropertyIds", + "restrictionsFillerIds", + ] + if parse: + if not pattern_collection: + raise ValueError("Must specify -P if --parse is set") + results = patternizer.lexical_pattern_instances( + impl, pattern_collection.patterns, curies + ) + ldefs = list(patternizer.as_logical_definitions(results)) + else: + ldefs = list(impl.logical_definitions(curies)) + if fill: + for ldef in ldefs: + for ( + filled_ldef + ) in logical_definition_analyzer.generate_descendant_logical_definitions( + impl, ldef + ): + writer.emit(filled_ldef, label_fields=label_fields) + if analyze: + logging.warning("Analyzing logical definitions is incomplete") + reports = logical_definition_analyzer.analyze_logical_definitions(impl, ldefs) + for report in reports: + print(report) + if unmelt: + ldef_flattener = LogicalDefinitionFlattener( + labeler=lambda x: impl.label(x), curie_converter=impl.converter + ) + writer.heterogeneous_keys = True + for ldef in ldefs: + flat_obj = ldef_flattener.convert(ldef) + writer.emit(flat_obj, label_fields=list(flat_obj.keys())) + else: + for ldef in ldefs: + writer.emit(ldef, label_fields=label_fields) + writer.finish() + writer.file.close() + + if __name__ == "__main__": main() diff --git a/src/oaklib/conf/obograph-style.json b/src/oaklib/conf/obograph-style.json index 6920198a0..1efeac4e8 100644 --- a/src/oaklib/conf/obograph-style.json +++ b/src/oaklib/conf/obograph-style.json @@ -152,6 +152,9 @@ "FBdv": { "fillcolor": "mediumturquoise" }, + "PR": { + "fillcolor": "mediumturquoise" + }, "RO": { "fillcolor": "pink" }, diff --git a/src/oaklib/datamodels/search.py b/src/oaklib/datamodels/search.py index 868d74738..071b8c9b7 100644 --- a/src/oaklib/datamodels/search.py +++ b/src/oaklib/datamodels/search.py @@ -25,7 +25,7 @@ def create_search_configuration(term: str) -> "SearchConfiguration": term is either a plaintext search term, or a search term prefixed by - 1. a property packages, one of t, ., l (for term, anything, label) - - 2. a match type indicator, one of "~","/","=","^" + - 2. a match type indicator, one of "~","/","=","^","@" For more documentation, see `Search docs `_ diff --git a/src/oaklib/datamodels/vocabulary.py b/src/oaklib/datamodels/vocabulary.py index e21148a0c..25f191c97 100644 --- a/src/oaklib/datamodels/vocabulary.py +++ b/src/oaklib/datamodels/vocabulary.py @@ -118,6 +118,9 @@ NEVER_IN_TAXON = "RO:0002161" IN_TAXON = "RO:0002162" PRESENT_IN_TAXON = "RO:0002175" +NEGATIVELY_REGULATES = "RO:0002212" +POSITIVELY_REGULATES = "RO:0002213" +REGULATES = "RO:0002211" OBO_PURL = "http://purl.obolibrary.org/obo/" diff --git a/src/oaklib/implementations/aggregator/aggregator_implementation.py b/src/oaklib/implementations/aggregator/aggregator_implementation.py index 6b79bf078..758c7f8e0 100644 --- a/src/oaklib/implementations/aggregator/aggregator_implementation.py +++ b/src/oaklib/implementations/aggregator/aggregator_implementation.py @@ -78,7 +78,7 @@ class AggregatorImplementation( implementations: List[BasicOntologyInterface] = None - def _delegate_iterator(self, func: Callable) -> Iterable: + def _delegate_iterator(self, func: Callable) -> Iterator: for i in self.implementations: for v in func(i): yield v @@ -113,16 +113,17 @@ def simple_mappings_by_curie(self, curie: CURIE) -> Iterable[Tuple[PRED_CURIE, C def get_sssom_mappings_by_curie(self, curie: CURIE) -> Iterable[Mapping]: return self._delegate_iterator(lambda i: i.get_sssom_mappings_by_curie(curie)) - def label(self, curie: CURIE) -> str: - return self._delegate_first(lambda i: i.label(curie)) + def label(self, curie: CURIE, **kwargs) -> str: + return self._delegate_first(lambda i: i.label(curie, **kwargs)) - def definition(self, curie: CURIE) -> str: - return self._delegate_first(lambda i: i.definition(curie)) + def curies_by_label(self, label: str) -> List[CURIE]: + return list(self._delegate_iterator(lambda i: i.curies_by_label(label))) - def definitions( - self, curies: Iterable[CURIE], include_metadata=False, include_missing=False - ) -> Iterator[DEFINITION]: - return self._delegate_iterator(lambda i: i.definitions(curies)) + def definition(self, curie: CURIE, **kwargs) -> str: + return self._delegate_first(lambda i: i.definition(curie, **kwargs)) + + def definitions(self, curies: Iterable[CURIE], **kwargs) -> Iterator[DEFINITION]: + return self._delegate_iterator(lambda i: i.definitions(curies, **kwargs)) def entity_alias_map(self, curie: CURIE) -> ALIAS_MAP: return self._delegate_simple_tuple_map(lambda i: i.entity_alias_map(curie)) diff --git a/src/oaklib/implementations/obograph/obograph_implementation.py b/src/oaklib/implementations/obograph/obograph_implementation.py index 3a5224f6b..35ddc2cad 100644 --- a/src/oaklib/implementations/obograph/obograph_implementation.py +++ b/src/oaklib/implementations/obograph/obograph_implementation.py @@ -48,6 +48,9 @@ from oaklib.interfaces.validator_interface import ValidatorInterface from oaklib.resource import OntologyResource from oaklib.types import CURIE, PRED_CURIE, SUBSET_CURIE, URI +from oaklib.utilities.axioms.logical_definition_utilities import ( + logical_definition_matches, +) from oaklib.utilities.basic_utils import pairs_as_dict RDFLIB_FORMAT_MAP = { @@ -433,12 +436,21 @@ def node( def as_obograph(self) -> Graph: return self._entire_graph() - def logical_definitions(self, subjects: Iterable[CURIE]) -> Iterable[LogicalDefinitionAxiom]: - subjects = list(subjects) + def logical_definitions( + self, + subjects: Iterable[CURIE], + predicates: Iterable[PRED_CURIE] = None, + objects: Iterable[CURIE] = None, + **kwargs, + ) -> Iterable[LogicalDefinitionAxiom]: + if subjects: + subjects = list(subjects) for g in self.obograph_document.graphs: - for lda in g.logicalDefinitionAxioms: - if lda.definedClassId in subjects: - yield lda + for ldef in g.logicalDefinitionAxioms: + if logical_definition_matches( + ldef, subjects=subjects, predicates=predicates, objects=objects + ): + yield ldef # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Implements: SearchInterface diff --git a/src/oaklib/implementations/pronto/pronto_implementation.py b/src/oaklib/implementations/pronto/pronto_implementation.py index b8825ed2a..dc68630ab 100644 --- a/src/oaklib/implementations/pronto/pronto_implementation.py +++ b/src/oaklib/implementations/pronto/pronto_implementation.py @@ -49,6 +49,7 @@ from oaklib.interfaces import TextAnnotatorInterface from oaklib.interfaces.basic_ontology_interface import ( ALIAS_MAP, + DEFINITION, LANGUAGE_TAG, METADATA_MAP, PRED_CURIE, @@ -74,6 +75,9 @@ from oaklib.interfaces.validator_interface import ValidatorInterface from oaklib.resource import OntologyResource from oaklib.types import CURIE, SUBSET_CURIE +from oaklib.utilities.axioms.logical_definition_utilities import ( + logical_definition_matches, +) from oaklib.utilities.kgcl_utilities import tidy_change_object from oaklib.utilities.mapping.sssom_utils import inject_mapping_sources @@ -490,6 +494,27 @@ def definition(self, curie: CURIE, lang: Optional[LANGUAGE_TAG] = None) -> Optio e = self._entity(curie) return e.definition if e else None + def definitions( + self, + curies: Iterable[CURIE], + include_metadata=False, + include_missing=False, + lang: Optional[LANGUAGE_TAG] = None, + ) -> Iterator[DEFINITION]: + for curie in curies: + e = self._entity(curie) + if not e: + continue + defn = e.definition + if not defn and not include_missing: + continue + metadata = {} + if include_metadata: + metadata[HAS_DBXREF] = [] + for x in defn.xrefs: + metadata[HAS_DBXREF].append(x.id) + yield curie, defn, metadata + def comments(self, curies: Iterable[CURIE]) -> Iterable[Tuple[CURIE, str]]: for curie in curies: e = self._entity(curie) @@ -702,10 +727,11 @@ def as_obograph(self, expand_curies=False) -> Graph: Edge(sub=r[0], pred="is_a" if r[1] == IS_A else r[1], obj=r[2]) for r in self.relationships() ] + ldefs = list(self.logical_definitions(entities)) graph_id = om.ontology if not graph_id: graph_id = self.resource.slug - return Graph(id=graph_id, nodes=nodes, edges=edges) + return Graph(id=graph_id, nodes=nodes, edges=edges, logicalDefinitionAxioms=ldefs) def synonym_property_values( self, subject: Union[CURIE, Iterable[CURIE]] @@ -725,8 +751,14 @@ def synonym_property_values( yield curie, spv def logical_definitions( - self, subjects: Optional[Iterable[CURIE]] + self, + subjects: Optional[Iterable[CURIE]], + predicates: Iterable[PRED_CURIE] = None, + objects: Iterable[CURIE] = None, + **kwargs, ) -> Iterable[obograph.LogicalDefinitionAxiom]: + if not subjects: + subjects = self.entities() for s in subjects: term = self._entity(s) if term and term.intersection_of: @@ -741,7 +773,8 @@ def logical_definitions( propertyId=rel, fillerId=filler.id ) ldef.restrictions.append(er) - yield ldef + if logical_definition_matches(ldef, predicates=predicates, objects=objects): + yield ldef # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Implements: SearchInterface diff --git a/src/oaklib/implementations/simpleobo/simple_obo_implementation.py b/src/oaklib/implementations/simpleobo/simple_obo_implementation.py index c8a51b85c..698336c9e 100644 --- a/src/oaklib/implementations/simpleobo/simple_obo_implementation.py +++ b/src/oaklib/implementations/simpleobo/simple_obo_implementation.py @@ -120,6 +120,9 @@ from oaklib.interfaces.validator_interface import ValidatorInterface from oaklib.resource import OntologyResource from oaklib.types import CURIE, PRED_CURIE, SUBSET_CURIE +from oaklib.utilities.axioms.logical_definition_utilities import ( + logical_definition_matches, +) from oaklib.utilities.kgcl_utilities import tidy_change_object from oaklib.utilities.mapping.sssom_utils import inject_mapping_sources @@ -412,13 +415,13 @@ def create_entity( for filler in fillers: self.add_relationship(curie, pred, filler) - def add_relationship(self, curie: CURIE, predicate: PRED_CURIE, filler: CURIE): + def add_relationship(self, curie: CURIE, predicate: PRED_CURIE, filler: CURIE, **kwargs): t = self._stanza(curie) if predicate == IS_A: - t.add_tag_value(TAG_IS_A, filler) + t.add_tag_value(TAG_IS_A, filler, **kwargs) else: predicate_code = self.map_curie_to_shorthand(predicate) - t.add_tag_value_pair(TAG_RELATIONSHIP, predicate_code, filler) + t.add_tag_value_pair(TAG_RELATIONSHIP, predicate_code, filler, **kwargs) self._clear_relationship_index() def remove_relationship(self, curie: CURIE, predicate: Optional[PRED_CURIE], filler: CURIE): @@ -703,13 +706,29 @@ def node(self, curie: CURIE, strict=False, include_metadata=False) -> obograph.N meta.synonyms.append(syn) return obograph.Node(id=curie, lbl=self.label(curie), type=typ, meta=meta) - def as_obograph(self) -> Graph: - nodes = [self.node(curie) for curie in self.entities()] - edges = [Edge(sub=r[0], pred=r[1], obj=r[2]) for r in self.relationships()] - return Graph(id="TODO", nodes=nodes, edges=edges) + def as_obograph(self, expand_curies=False) -> Graph: + def expand(curie: CURIE) -> CURIE: + if expand_curies: + uri = self.curie_to_uri(curie, strict=False) + return uri if uri is not None else curie + else: + return curie + + entities = list(self.entities()) + nodes = [self.node(expand(curie)) for curie in entities] + edges = [ + Edge(sub=expand(r[0]), pred=expand(r[1]), obj=expand(r[2])) + for r in self.relationships() + ] + ldefs = list(self.logical_definitions(entities)) + return Graph(id="TODO", nodes=nodes, edges=edges, logicalDefinitionAxioms=ldefs) def logical_definitions( - self, subjects: Optional[Iterable[CURIE]] = None + self, + subjects: Optional[Iterable[CURIE]] = None, + predicates: Iterable[PRED_CURIE] = None, + objects: Iterable[CURIE] = None, + **kwargs, ) -> Iterable[LogicalDefinitionAxiom]: for s in subjects: t = self._stanza(s, strict=False) @@ -727,7 +746,8 @@ def logical_definitions( ) else: ldef.genusIds.append(m1) - yield ldef + if logical_definition_matches(ldef, predicates=predicates, objects=objects): + yield ldef # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Implements: SearchInterface @@ -865,7 +885,10 @@ def apply_patch( ) # Handling a bug where quotes are accidentally introduced. t.remove_tag_quoted_value(TAG_SYNONYM, v) elif isinstance(patch, kgcl.EdgeCreation): - self.add_relationship(patch.subject, patch.predicate, patch.object) + description = patch.change_description + self.add_relationship( + patch.subject, patch.predicate, patch.object, description=description + ) modified_entities.append(patch.subject) elif isinstance(patch, kgcl.EdgeDeletion): self.remove_relationship(patch.subject, patch.predicate, patch.object) diff --git a/src/oaklib/implementations/simpleobo/simple_obo_parser.py b/src/oaklib/implementations/simpleobo/simple_obo_parser.py index 323a877ca..b188ca0db 100644 --- a/src/oaklib/implementations/simpleobo/simple_obo_parser.py +++ b/src/oaklib/implementations/simpleobo/simple_obo_parser.py @@ -521,7 +521,20 @@ def remove_pairwise_tag_value(self, tag: TAG, val1: str, val2: str) -> None: logging.warning(f"No values to remove for {tag} = {val1} {val2} // {self}") self.tag_values = tvs - def add_tag_value(self, tag: TAG, val: str) -> None: + def _kwargs_to_qualifiers_string(self, **kwargs) -> str: + """ + Converts a set of kwargs to a qualifier string + + :param kwargs: + :return: + """ + if not kwargs: + return "" + quals = [f'{k}="{v}"' for k, v in kwargs.items()] + quals_str = ", ".join(quals) + return f" {{{quals_str}}}" + + def add_tag_value(self, tag: TAG, val: str, **kwargs) -> None: """ Adds a tag-value pair @@ -529,6 +542,8 @@ def add_tag_value(self, tag: TAG, val: str) -> None: :param val: :return: """ + if kwargs: + val += " " + self._kwargs_to_qualifiers_string(**kwargs) self.tag_values.append(TagValue(tag, val)) def add_quoted_tag_value(self, tag: TAG, val: str, xrefs: List[str]) -> None: @@ -541,7 +556,7 @@ def add_quoted_tag_value(self, tag: TAG, val: str, xrefs: List[str]) -> None: """ self.tag_values.append(TagValue(tag, f"\"{val}\" [{','.join(xrefs)}]")) - def add_tag_value_pair(self, tag: TAG, val1: str, val2: str) -> None: + def add_tag_value_pair(self, tag: TAG, val1: str, val2: str, **kwargs) -> None: """ Adds a tag-value pair where the value is a pair @@ -550,7 +565,10 @@ def add_tag_value_pair(self, tag: TAG, val1: str, val2: str) -> None: :param val2: :return: """ - self.tag_values.append(TagValue(tag, f"{val1} {val2}")) + v = f"{val1} {val2}" + if kwargs: + v += " " + self._kwargs_to_qualifiers_string(**kwargs) + self.tag_values.append(TagValue(tag, v)) def get_boolean_value(self, tag: TAG, strict=False) -> bool: """ diff --git a/src/oaklib/implementations/sqldb/sql_implementation.py b/src/oaklib/implementations/sqldb/sql_implementation.py index 958e2a41b..d2e150216 100644 --- a/src/oaklib/implementations/sqldb/sql_implementation.py +++ b/src/oaklib/implementations/sqldb/sql_implementation.py @@ -148,6 +148,9 @@ from oaklib.interfaces.taxon_constraint_interface import TaxonConstraintInterface from oaklib.interfaces.validator_interface import ValidatorInterface from oaklib.types import CATEGORY_CURIE, CURIE, SUBSET_CURIE +from oaklib.utilities.axioms.logical_definition_utilities import ( + logical_definition_matches, +) from oaklib.utilities.graph.relationship_walker import walk_down, walk_up from oaklib.utilities.identifier_utils import ( string_as_base64_curie, @@ -207,6 +210,11 @@ def regex_to_sql_like(regex: str) -> str: """ convert a regex to a LIKE + * ``.*`` => ``%`` + * ``.`` => ``_`` + * ``^`` => ``%`` (at start of string) + * ``$`` => ``%`` (at end of string) + TODO: implement various different DBMS flavors https://stackoverflow.com/questions/20794860/regex-in-sql-to-detect-one-or-more-digit @@ -1515,21 +1523,34 @@ def _ixn_definition(self, ixn: str, subject: CURIE) -> Optional[LogicalDefinitio return ldef def logical_definitions( - self, subjects: Optional[Iterable[CURIE]] = None + self, + subjects: Optional[Iterable[CURIE]] = None, + predicates: Iterable[PRED_CURIE] = None, + objects: Iterable[CURIE] = None, + **kwargs, ) -> Iterable[LogicalDefinitionAxiom]: logging.info("Getting logical definitions") q = self.session.query(OwlEquivalentClassStatement) + if predicates is not None: + predicates = list(predicates) + if objects is not None: + objects = list(objects) if subjects is None: - for ldef in self._logical_definitions_from_eq_query(q): + for ldef in self._logical_definitions_from_eq_query(q, predicates, objects): yield ldef return for curie_it in chunk(subjects, self.max_items_for_in_clause): logging.info(f"Getting logical definitions for {curie_it} from {subjects}") q = q.filter(OwlEquivalentClassStatement.subject.in_(tuple(curie_it))) - for ldef in self._logical_definitions_from_eq_query(q): + for ldef in self._logical_definitions_from_eq_query(q, predicates, objects): yield ldef - def _logical_definitions_from_eq_query(self, query) -> Iterable[LogicalDefinitionAxiom]: + def _logical_definitions_from_eq_query( + self, + query, + predicates: Iterable[PRED_CURIE] = None, + objects: Iterable[CURIE] = None, + ) -> Iterable[LogicalDefinitionAxiom]: for eq_row in query: ixn_q = self.session.query(Statements).filter( and_( @@ -1540,6 +1561,8 @@ def _logical_definitions_from_eq_query(self, query) -> Iterable[LogicalDefinitio for ixn in ixn_q: ldef = self._ixn_definition(ixn.object, eq_row.subject) if ldef: + if not logical_definition_matches(ldef, predicates=predicates, objects=objects): + continue yield ldef # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/oaklib/implementations/ubergraph/ubergraph_implementation.py b/src/oaklib/implementations/ubergraph/ubergraph_implementation.py index b16e6bded..b650f60b5 100644 --- a/src/oaklib/implementations/ubergraph/ubergraph_implementation.py +++ b/src/oaklib/implementations/ubergraph/ubergraph_implementation.py @@ -18,7 +18,7 @@ from oaklib.interfaces import SubsetterInterface from oaklib.interfaces.basic_ontology_interface import RELATIONSHIP, RELATIONSHIP_MAP from oaklib.interfaces.mapping_provider_interface import MappingProviderInterface -from oaklib.interfaces.obograph_interface import OboGraphInterface +from oaklib.interfaces.obograph_interface import GraphTraversalMethod, OboGraphInterface from oaklib.interfaces.rdf_interface import TRIPLE from oaklib.interfaces.relation_graph_interface import RelationGraphInterface from oaklib.interfaces.search_interface import SearchInterface @@ -282,8 +282,14 @@ def relationships_to_graph(self, relationships: Iterable[RELATIONSHIP]) -> obogr return obograph.Graph(id="query", nodes=list(nodes.values()), edges=edges) def ancestors( - self, start_curies: Union[CURIE, List[CURIE]], predicates: List[PRED_CURIE] = None + self, + start_curies: Union[CURIE, List[CURIE]], + predicates: List[PRED_CURIE] = None, + reflexive=True, + method: Optional[GraphTraversalMethod] = None, ) -> Iterable[CURIE]: + if method and method == GraphTraversalMethod.HOP: + raise NotImplementedError("HOP not implemented for ubergraph") # TODO: DRY if not isinstance(start_curies, list): start_curies = [start_curies] @@ -303,8 +309,14 @@ def ancestors( yield self.uri_to_curie(row["o"]["value"]) def descendants( - self, start_curies: Union[CURIE, List[CURIE]], predicates: List[PRED_CURIE] = None + self, + start_curies: Union[CURIE, List[CURIE]], + predicates: List[PRED_CURIE] = None, + reflexive=True, + method: Optional[GraphTraversalMethod] = None, ) -> Iterable[CURIE]: + if method and method == GraphTraversalMethod.HOP: + raise NotImplementedError("HOP not implemented for ubergraph") # TODO: DRY query_uris = [self.curie_to_sparql(curie) for curie in start_curies] where = ["?s ?p ?o", "?s a owl:Class", f'VALUES ?o {{ {" ".join(query_uris)} }}'] diff --git a/src/oaklib/interfaces/basic_ontology_interface.py b/src/oaklib/interfaces/basic_ontology_interface.py index bfd52699f..1838af7bf 100644 --- a/src/oaklib/interfaces/basic_ontology_interface.py +++ b/src/oaklib/interfaces/basic_ontology_interface.py @@ -232,6 +232,10 @@ def curie_to_uri(self, curie: CURIE, strict: bool = False) -> Optional[URI]: :param strict: (Default is False) if True, exceptions will be raised if curie cannot be expanded :return: """ + if ":" not in curie: + if strict: + raise ValueError(f"Invalid CURIE: {curie}") + return None rv = self.converter.expand(curie) if rv is None and strict: prefix_map_text = "\n".join( diff --git a/src/oaklib/interfaces/obograph_interface.py b/src/oaklib/interfaces/obograph_interface.py index 0ec904f26..6fefea325 100644 --- a/src/oaklib/interfaces/obograph_interface.py +++ b/src/oaklib/interfaces/obograph_interface.py @@ -461,6 +461,14 @@ def paths( """ Returns all paths between sources and targets. + >>> from oaklib import get_adapter + >>> adapter = get_adapter("tests/input/go-nucleus.db", implements=OboGraphInterface) + >>> for path in sorted(list(adapter.paths(["GO:0005634"], ["GO:0005773"]))): + ... print(path) + ('GO:0005634', 'GO:0005773', 'GO:0005634') + ('GO:0005634', 'GO:0005773', 'GO:0005773') + ('GO:0005634', 'GO:0005773', 'GO:0043231') + :param start_curies: :param start_curies: :param predicates: @@ -491,12 +499,33 @@ def paths( yield s, o, intermediate def logical_definitions( - self, subjects: Optional[Iterable[CURIE]] = None + self, + subjects: Optional[Iterable[CURIE]] = None, + predicates: Iterable[PRED_CURIE] = None, + objects: Iterable[CURIE] = None, + **kwargs, ) -> Iterable[LogicalDefinitionAxiom]: """ - Yields all logical definitions for input subjects + Yields all logical definitions for input subjects. + + >>> from oaklib import get_adapter + >>> adapter = get_adapter("tests/input/go-nucleus.db", implements=OboGraphInterface) + >>> for ldef in adapter.logical_definitions(["GO:0009892"]): + ... print(f"Genus: {adapter.label(ldef.genusIds[0])}") + ... for r in ldef.restrictions: + ... print(f" Differentia: {adapter.label(r.propertyId)} SOME {adapter.label(r.fillerId)}") + Genus: biological regulation + Differentia: negatively regulates SOME metabolic process + + Leaving the subjects parameter as None will yield all logical definitions in the ontology. + + >>> len(list(adapter.logical_definitions())) + 50 + + :param subjects: If specified, defined class must be in this set + :param predicates: If specified, only yields logical definitions with these predicates + :param objects: If specified, only yields logical definitions with genus or filler in this list - :param subjects: :return: """ return iter(()) diff --git a/src/oaklib/interfaces/patcher_interface.py b/src/oaklib/interfaces/patcher_interface.py index 9a3656584..2cae83b6f 100644 --- a/src/oaklib/interfaces/patcher_interface.py +++ b/src/oaklib/interfaces/patcher_interface.py @@ -34,6 +34,7 @@ class PatcherInterface(BasicOntologyInterface, ABC): If this is set then the recommended value is dct:contributor""" ignore_invalid_changes: bool = False + """If True, then invalid changes are ignored. If False, then invalid changes raise an exception""" def apply_patch( self, @@ -172,8 +173,13 @@ def expand_change(self, change: Change, configuration: Configuration = None) -> # edge previously existed continue new_edges.append(e) + desc = f"Rewired from link to {about_node} {self.label(about_node)}" ch = EdgeCreation( - generate_change_id(), subject=s, predicate=pred, object=o + generate_change_id(), + subject=s, + predicate=pred, + object=o, + change_description=desc, ) changes.append(ch) logging.info(f"Rewiring {s} {p1} {about_node} to {s} {pred} {o}") diff --git a/src/oaklib/interfaces/subsetter_interface.py b/src/oaklib/interfaces/subsetter_interface.py index f424cf918..bbf7c1a25 100644 --- a/src/oaklib/interfaces/subsetter_interface.py +++ b/src/oaklib/interfaces/subsetter_interface.py @@ -57,7 +57,7 @@ def gap_fill_relationships( self, seed_curies: List[CURIE], predicates: List[PRED_CURIE] = None ) -> Iterator[RELATIONSHIP]: """ - Given a term subset as a list of curies, find all non-redundant relationships connecting them + Given a term subset as a list of curies, find all non-redundant relationships connecting them. This assumes relation-graph entailed edges, so currently only implemented for ubergraph and sqlite diff --git a/src/oaklib/io/streaming_obo_writer.py b/src/oaklib/io/streaming_obo_writer.py index 9dbdc85ef..52f0be59b 100644 --- a/src/oaklib/io/streaming_obo_writer.py +++ b/src/oaklib/io/streaming_obo_writer.py @@ -7,7 +7,7 @@ from oaklib.converters.obo_graph_to_obo_format_converter import ( OboGraphToOboFormatConverter, ) -from oaklib.datamodels.obograph import GraphDocument +from oaklib.datamodels.obograph import GraphDocument, LogicalDefinitionAxiom from oaklib.datamodels.vocabulary import IS_A, RDF_TYPE, SYNONYM_PRED_TO_SCOPE_MAP from oaklib.interfaces.metadata_interface import MetadataInterface from oaklib.interfaces.obograph_interface import OboGraphInterface @@ -84,3 +84,16 @@ def emit_multiple(self, entities: Iterable[CURIE], **kwargs): obodoc.dump(self.file) else: super().emit_multiple(entities, **kwargs) + + def emit_obj(self, obj: Any, **kwargs): + oi = self.ontology_interface + if isinstance(obj, CURIE): + self.emit_curie(obj) + elif isinstance(obj, LogicalDefinitionAxiom): + self.line("[Term]") + self.line(f"id: {obj.definedClassId} ! {oi.label(obj.definedClassId)}") + for genus in obj.genusIds: + self.line(f"intersection_of: {genus} ! {oi.label(genus)}") + for r in obj.restrictions: + self.line(f"intersection_of: {r.propertyId} {r.fillerId} ! {oi.label(r.fillerId)}") + self.line("\n") diff --git a/src/oaklib/io/streaming_writer.py b/src/oaklib/io/streaming_writer.py index cffc92d31..b630ffdb9 100644 --- a/src/oaklib/io/streaming_writer.py +++ b/src/oaklib/io/streaming_writer.py @@ -121,6 +121,11 @@ def add_labels(self, obj_as_dict: Dict, label_fields: Optional[List[str]] = None :param label_fields: :return: """ + + def _label(c: CURIE) -> str: + lbl = self.ontology_interface.label(c, lang=self.settings.preferred_language) + return str(lbl) if lbl else "" + if label_fields and self.autolabel: for f in label_fields: curie = obj_as_dict.get(f, None) @@ -131,14 +136,7 @@ def add_labels(self, obj_as_dict: Dict, label_fields: Optional[List[str]] = None if delim and isinstance(curie, str) and delim in curie: curie = curie.split("|") if isinstance(curie, list): - label = [ - str( - self.ontology_interface.label( - c, lang=self.settings.preferred_language - ) - ) - for c in curie - ] + label = [_label(c) for c in curie] if delim: label = delim.join(label) else: diff --git a/src/oaklib/mappers/base_mapper.py b/src/oaklib/mappers/base_mapper.py index 30e28ae58..e1a4a4da6 100644 --- a/src/oaklib/mappers/base_mapper.py +++ b/src/oaklib/mappers/base_mapper.py @@ -2,7 +2,7 @@ from abc import ABC from collections import defaultdict from dataclasses import dataclass -from typing import Dict, Iterable, Iterator, List, Tuple +from typing import Any, Dict, Iterable, Iterator, List, Tuple from curies import Converter from sssom_schema import SEMAPV, Mapping @@ -27,6 +27,12 @@ class Mapper(ABC): _mappings_by_source: Dict[CURIE, List[CURIE]] = None + axiom_annotations_to_mapping_predicates: Dict[Tuple[CURIE, Any], CURIE] = None + """ + Maps axiom annotations to predicates. + GO, Mondo and other ontologies may use axiom annotations to encode mapping predicates. + """ + def __post_init__(self): self._mappings_by_source = defaultdict(list) self.add_mappings(self.mappings) diff --git a/src/oaklib/mappers/ontology_metadata_mapper.py b/src/oaklib/mappers/ontology_metadata_mapper.py index 04daf9523..8001f53ed 100644 --- a/src/oaklib/mappers/ontology_metadata_mapper.py +++ b/src/oaklib/mappers/ontology_metadata_mapper.py @@ -54,3 +54,7 @@ def is_a_uri(self) -> URI: def use_skos_profile(self): """Sets the profile to SKOS.""" self.add_mappings(load_default_sssom("omo-to-skos")) + + def skos_encodings(self): + """Maps SKOS encodings.""" + self.add_mappings(load_default_sssom("skos-encodings")) diff --git a/src/oaklib/selector.py b/src/oaklib/selector.py index b163223ae..9c1e39bde 100644 --- a/src/oaklib/selector.py +++ b/src/oaklib/selector.py @@ -3,7 +3,7 @@ import logging import os from pathlib import Path -from typing import List, Optional, Type, Union +from typing import List, Optional, Type, TypeVar, Union import requests from deprecation import deprecated @@ -62,10 +62,15 @@ ), } +T = TypeVar("T", bound=BasicOntologyInterface) + def get_adapter( - descriptor: Union[str, Path, InputSpecification], format: str = None, **kwargs -) -> BasicOntologyInterface: + descriptor: Union[str, Path, InputSpecification], + format: str = None, + implements: Optional[Type[T]] = None, + **kwargs, +) -> T: """ Gets an adapter (implementation) for a given descriptor. @@ -119,7 +124,7 @@ def get_adapter( >>> from oaklib import get_adapter >>> from gilda import get_grounder - >>> grounder = get_grounder("~/.data/gilda/0.11.1/grounding_terms.tsv.gz") + >>> grounder = get_grounder() >>> adapter = get_adapter("gilda:", grounder=grounder) >>> annotations = adapter.annotate_text("nucleus") diff --git a/src/oaklib/utilities/axioms/__init__.py b/src/oaklib/utilities/axioms/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/oaklib/utilities/axioms/logical_definition_analyzer.py b/src/oaklib/utilities/axioms/logical_definition_analyzer.py new file mode 100644 index 000000000..1b8aaa481 --- /dev/null +++ b/src/oaklib/utilities/axioms/logical_definition_analyzer.py @@ -0,0 +1,111 @@ +import base64 +import logging +from copy import deepcopy +from itertools import product +from random import shuffle +from typing import Iterator, List, Set, Tuple + +from oaklib import BasicOntologyInterface +from oaklib.datamodels.obograph import ( + ExistentialRestrictionExpression, + LogicalDefinitionAxiom, +) +from oaklib.datamodels.vocabulary import IS_A +from oaklib.interfaces import OboGraphInterface +from oaklib.types import CURIE +from oaklib.utilities.lexical.patternizer import LexicalPattern + + +def logical_definition_to_set(ldef: LogicalDefinitionAxiom) -> Set[CURIE]: + return set( + ldef.genusIds + + [r.propertyId for r in ldef.restrictions] + + [r.fillerId for r in ldef.restrictions] + ) + + +def set_delta(s1: Set[CURIE], s2: Set[CURIE]) -> Tuple[Tuple[CURIE], Tuple[CURIE]]: + return tuple(s1.difference(s2)), tuple(s2.difference(s1)) + + +def reflexive_logical_definition(curie: CURIE) -> LogicalDefinitionAxiom: + return LogicalDefinitionAxiom(definedClassId=curie, genusIds=[curie], restrictions=[]) + + +def logical_definition_signature(ldef: LogicalDefinitionAxiom) -> List[CURIE]: + return ( + [ldef.definedClassId] + + ldef.genusIds + + [r.propertyId for r in ldef.restrictions] + + [r.fillerId for r in ldef.restrictions] + ) + + +def analyze_logical_definitions( + adapter: BasicOntologyInterface, ldefs: List[LogicalDefinitionAxiom], reflexive=False +) -> Iterator: + if reflexive: + ldefs = deepcopy(ldefs) + signature = [] + for ldef in ldefs: + signature.extend(logical_definition_signature(ldef)) + for curie in signature: + ldefs.append(reflexive_logical_definition(curie)) + lmap = {ldef.definedClassId: logical_definition_to_set(ldef) for ldef in ldefs} + curies = list(lmap.keys()) + rels = [rel for rel in adapter.relationships(curies) if rel[2] in curies] + logging.info(f"Found {len(rels)} relationships") + abduced = [] + amap = {} + for rel in rels: + sx = lmap[rel[0]] + ox = lmap[rel[2]] + d = set_delta(sx, ox) + abduced.append((rel[1], d)) + amap[d] = rel + logging.debug(f"Indexing amap[{d}] = {rel}") + for i, ix in lmap.items(): + irels = [rel for rel in rels if rel[0] == i] + for j, jx in lmap.items(): + ijrels = [rel for rel in irels if rel[2] == j] + if i != j: + d = set_delta(ix, jx) + logging.debug(f"Checking if {i}, {j} = {d} in amap") + if d in amap: + if not ijrels: + print(i, j, d, amap[d]) + yield ("abduced", i, j, amap[d]) + + +def generate_descendant_logical_definitions( + adapter: OboGraphInterface, + ldef: LogicalDefinitionAxiom, + pattern: LexicalPattern = None, + random_sample=False, +) -> Iterator[LogicalDefinitionAxiom]: + existing = list(adapter.logical_definitions()) + dc_to_ldef = {ldef.definedClassId: ldef for ldef in existing} + ldef_to_dc = {str(ldef): dc for dc, ldef in dc_to_ldef.items()} + terms = ldef.genusIds + [r.fillerId for r in ldef.restrictions] + num_genus_ids = len(ldef.genusIds) + props = [r.propertyId for r in ldef.restrictions] + candidates_list = [adapter.descendants([t], [IS_A], reflexive=True) for t in terms] + if random_sample: + candidates_list = [shuffle(cs) for cs in candidates_list] + for tpl in product(*candidates_list): + if tpl == tuple(terms): + continue + restrictions = [ + ExistentialRestrictionExpression(propertyId=pred, fillerId=filler) + for pred, filler in zip(props, tpl[num_genus_ids:]) + ] + curie = base64.b64encode(str(tpl).encode("ascii")).decode("utf-8") + new_ldef = LogicalDefinitionAxiom( + definedClassId=curie, + genusIds=list(tpl[0:num_genus_ids]), + restrictions=restrictions, + ) + if str(new_ldef) in ldef_to_dc: + logging.debug(f"Skipping {new_ldef} because it already exists") + continue + yield new_ldef diff --git a/src/oaklib/utilities/axioms/logical_definition_summarizer.py b/src/oaklib/utilities/axioms/logical_definition_summarizer.py new file mode 100644 index 000000000..774e36fdf --- /dev/null +++ b/src/oaklib/utilities/axioms/logical_definition_summarizer.py @@ -0,0 +1,224 @@ +from collections import defaultdict +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel + +from oaklib.datamodels.obograph import ( + ExistentialRestrictionExpression, + LogicalDefinitionAxiom, +) +from oaklib.datamodels.vocabulary import IS_A +from oaklib.interfaces import OboGraphInterface +from oaklib.types import CURIE, PRED_CURIE +from oaklib.utilities.obograph_utils import depth_first_ordering + + +class LogicalDefinitionElementRole(Enum): + DEFINED_CLASS = "defined_class" + GENUS = "genus" + PREDICATE = "predicate" + FILLER = "filler" + SIGNATURE = "signature" + META = "meta" + + +class Config(BaseModel): + row_represents: Optional[List[LogicalDefinitionElementRole]] = None + column_represents: Optional[List[LogicalDefinitionElementRole]] = None + cell_represents: Optional[List[LogicalDefinitionElementRole]] = None + + +def parse_axes_to_config(config: str) -> Config: + sep = "," + parts = [parse_config_element(x) for x in config.split(sep)] + row_represents = parts[0] + column_represents = parts[1] if len(parts) > 1 else None + cell_represents = parts[2] if len(parts) > 2 else None + return Config( + row_represents=row_represents, + column_represents=column_represents, + cell_represents=cell_represents, + ) + + +def parse_config_element(config: str) -> [LogicalDefinitionElementRole]: + sep = "+" + if sep in config: + return [parse_config_element(v)[0] for v in config.split("+")] + for x in LogicalDefinitionElementRole: + if x.value.startswith(config): + return [x] + + +def sort_entities( + adapter: OboGraphInterface, + entities: List[CURIE], + traversal_order_predicates: Optional[List[PRED_CURIE]] = None, +) -> List[CURIE]: + graph = adapter.ancestor_graph(entities, predicates=traversal_order_predicates) + sorted_entities = [x for x in depth_first_ordering(graph) if x in entities] + return sorted_entities + [x for x in entities if x not in sorted_entities] + + +def logical_definitions_to_matrix( + adapter: OboGraphInterface, + ldefs: List[LogicalDefinitionAxiom], + config: Config = None, + traversal_order_predicates: Optional[List[PRED_CURIE]] = None, + sort_values: bool = True, +) -> List[Dict[str, List[Any]]]: + """ + Converts a list of logical definition axioms to a table. + + The axes are determined by the configuration object. The user can control both + what each row corresponds to and what each column corresponds to. + + :param adapter: OAK adapter for performing lookups + :param ldefs: list of logical definition axioms to summarize + :param config: axis configuration + :param sort_values: sort values in each cell + :return: list of row objects + """ + if not config: + config = Config() + row_represents, column_represents = config.row_represents, config.column_represents + rows = [] + + def _cell_val( + ldef: LogicalDefinitionAxiom, + element: Optional[Union[CURIE, ExistentialRestrictionExpression]] = None, + cell_represents: List[LogicalDefinitionElementRole] = None, + ) -> Any: + if element is not None: + return element + else: + # if LogicalDefinitionElementRole.FILLER in cell_represents: + # return ldef.definedClassId + return ldef.definedClassId + + curie_to_col_name = {} + + def _col_name(curie: CURIE) -> str: + lbl = adapter.label(curie) + if lbl: + cn = lbl.replace(" ", "_") + else: + cn = curie + curie_to_col_name[curie] = cn + return cn + + if not row_represents: + row_represents = [LogicalDefinitionElementRole.DEFINED_CLASS] + + pk = None + if LogicalDefinitionElementRole.DEFINED_CLASS in row_represents: + if len(row_represents) > 1: + raise ValueError( + f"Cannot have more than one row_representing for defined_class: {row_represents}" + ) + for ldef in ldefs: + row = defaultdict(list) + pk = "defined_class" + row[pk] = [ldef.definedClassId] + ok = False + if LogicalDefinitionElementRole.PREDICATE in column_represents: + for x in ldef.genusIds: + row["genus"].append(x) + for x in ldef.restrictions: + row[_col_name(x.propertyId)].append(x.fillerId) + ok = True + if LogicalDefinitionElementRole.FILLER in column_represents: + for x in ldef.genusIds: + row["genus"].append(x) + for x in ldef.restrictions: + row[_col_name(x.fillerId)].append(x.propertyId) + ok = True + if LogicalDefinitionElementRole.GENUS in column_represents: + # for x in ldef.restrictions: + # row[_col_name(x.fillerId)].extend([x.propertyId, x.fillerId]) + for x in ldef.genusIds: + row[_col_name(x)].append(_cell_val(ldef)) + ok = True + if LogicalDefinitionElementRole.META in column_represents: + for x in ldef.genusIds: + row["genus"].append(x) + for x in ldef.restrictions: + row["differentia"].append((x.propertyId, x.fillerId)) + ok = True + if not ok: + raise ValueError( + f"Invalid column_represents: {column_represents} for row: {row_represents}" + ) + rows.append(row) + else: + row_ix = {} + for ldef in ldefs: + pk_vals = [] + if LogicalDefinitionElementRole.GENUS in row_represents: + pk_vals = ldef.genusIds + if LogicalDefinitionElementRole.PREDICATE in row_represents: + pk_vals = [x.propertyId for x in ldef.restrictions] + if LogicalDefinitionElementRole.FILLER in row_represents: + pk_vals = [x.fillerId for x in ldef.restrictions] + if column_represents is None: + column_represents = [LogicalDefinitionElementRole.PREDICATE] + if not pk_vals: + raise ValueError(f"Invalid row_represents: {row_represents}") + for pk_val in pk_vals: + if pk_val not in row_ix: + row = defaultdict(list) + row_ix[pk_val] = row + row = row_ix[pk_val] + pk = row_represents[0].value + row[pk] = [pk_val] + ok = False + if LogicalDefinitionElementRole.GENUS in column_represents: + genus_ids = ldef.genusIds if ldef.genusIds else ["NO_GENUS"] + for x in genus_ids: + row[_col_name(x)].append(_cell_val(ldef)) + ok = True + if LogicalDefinitionElementRole.FILLER in column_represents: + fillers = [x.fillerId for x in ldef.restrictions] + if not fillers: + fillers = ["NO_FILLER"] + for x in fillers: + row[_col_name(x)].append(_cell_val(ldef)) + ok = True + if LogicalDefinitionElementRole.PREDICATE in column_represents: + preds = [x.propertyId for x in ldef.restrictions] + if not preds: + preds = ["NO_PREDICATE"] + for x in preds: + row[_col_name(x)].append(_cell_val(ldef)) + ok = True + if not ok: + raise ValueError( + f"Invalid column_represents: {column_represents} for row: {row_represents}" + ) + rows = list(row_ix.values()) + cols = [] + for row in rows: + for k in row.keys(): + if k not in cols: + cols.append(k) + for row in rows: + for col in cols: + if col not in row: + row[col] = [""] + if sort_values: + row[col] = sorted(row[col]) + if traversal_order_predicates is None: + traversal_order_predicates = [IS_A] + if traversal_order_predicates: + sorted_row_ids = sort_entities( + adapter, [row[pk][0] for row in rows], traversal_order_predicates + ) + rows = sorted(rows, key=lambda row: sorted_row_ids.index(row[pk][0])) + sorted_col_ids = sort_entities( + adapter, list(curie_to_col_name.keys()), traversal_order_predicates + ) + fixed_cols = [col for col in cols if col not in curie_to_col_name.values()] + ordered_cols = fixed_cols + [curie_to_col_name[col] for col in sorted_col_ids] + rows = [{col: row[col] for col in ordered_cols} for row in rows] + return rows diff --git a/src/oaklib/utilities/axioms/logical_definition_utilities.py b/src/oaklib/utilities/axioms/logical_definition_utilities.py new file mode 100644 index 000000000..99e9bd031 --- /dev/null +++ b/src/oaklib/utilities/axioms/logical_definition_utilities.py @@ -0,0 +1,59 @@ +from typing import List, Optional + +from oaklib.datamodels.obograph import LogicalDefinitionAxiom +from oaklib.datamodels.vocabulary import IS_A +from oaklib.types import CURIE, PRED_CURIE + + +def logical_definition_matches( + ldef: LogicalDefinitionAxiom, + subjects: Optional[List[CURIE]] = None, + predicates: Optional[List[PRED_CURIE]] = None, + objects: Optional[List[CURIE]] = None, +) -> bool: + """ + Check if a logical definition matches a filter criteria. + + >>> from oaklib.datamodels.obograph import LogicalDefinitionAxiom, ExistentialRestrictionExpression + >>> from oaklib.utilities.axioms.logical_definition_utilities import logical_definition_matches + >>> from oaklib.datamodels.vocabulary import IS_A + >>> differentia1 = ExistentialRestrictionExpression(propertyId="R:1", fillerId="X:Filler1") + >>> differentia2 = ExistentialRestrictionExpression(propertyId="R:1", fillerId="X:Filler2") + >>> ldef = LogicalDefinitionAxiom(definedClassId="X:1", + ... genusIds=["X:Genus"], restrictions=[differentia1, differentia2]) + >>> logical_definition_matches(ldef) + True + >>> logical_definition_matches(ldef, subjects=["X:Genus"]) + False + >>> logical_definition_matches(ldef, objects=["X:Genus"]) + True + >>> logical_definition_matches(ldef, subjects=["X:Filler1"]) + False + >>> logical_definition_matches(ldef, predicates=["R:1"]) + True + >>> logical_definition_matches(ldef, predicates=[IS_A]) + True + >>> logical_definition_matches(ldef, objects=["X:Filler1"]) + True + + :param ldef: + :param subjects: if specified, the logical definition must have one of these subjects + :param predicates: if specified, the logical definition must have one of these predicates + :param objects: if specified, the logical definition must have one of these objects + :return: + """ + if predicates or objects: + class_signature = set(ldef.genusIds + [r.fillerId for r in ldef.restrictions]) + pred_signature = set([r.propertyId for r in ldef.restrictions]) + if ldef.genusIds: + pred_signature.add(IS_A) + if predicates: + if not pred_signature.intersection(predicates): + return False + if objects: + if not class_signature.intersection(objects): + return False + if subjects: + if ldef.definedClassId not in subjects: + return False + return True diff --git a/src/oaklib/utilities/lexical/patternizer.py b/src/oaklib/utilities/lexical/patternizer.py new file mode 100644 index 000000000..4d82965c7 --- /dev/null +++ b/src/oaklib/utilities/lexical/patternizer.py @@ -0,0 +1,284 @@ +"""Detect logical definitions from lexical elements in an ontology.""" +import logging +import urllib +from typing import Dict, Iterator, List, Optional + +import yaml +from pydantic import BaseModel + +import oaklib.datamodels.obograph as obograph +from oaklib import BasicOntologyInterface +from oaklib.datamodels.vocabulary import IS_A +from oaklib.interfaces import OboGraphInterface +from oaklib.types import CURIE + + +class LexicalPattern(BaseModel): + """ + A lexical pattern is a string that is used to detect a logical definition. + + The data model here is similar to DOSDPs, but is geared towards parsing of lexical elements + in ontologies. + """ + + name: str + """Name of lexical pattern. Typically corresponds to pattern.""" + + pattern: Optional[str] = None + """String pattern to match. If None, defaults to name.""" + + is_regex: Optional[bool] = False + """If True, pattern is a regular expression. + If False, pattern is a string. Defaults to False. NOT IMPLEMENTED YET.""" + + pattern_position: Optional[int] = None + """If 0, then pattern must be at the beginning of the label. + If -1, then pattern must be at the end of the label. + If None, then pattern can be anywhere in the label. + No other options are supported. Defaults to None.""" + + description: Optional[str] = None + """Description of pattern.""" + + curie: Optional[CURIE] = None + """If the pattern maps to a logical definition, then this is the curie + of the term that is a fixed element of the definition (genus or differentia filler).""" + + curie_is_genus: Optional[bool] = True + """If True, then the curie is the genus and the extracted term is the differentia filler.""" + + differentia_predicate: Optional[CURIE] = None + """If the pattern maps to a logical definition, then this is the predicate + that is used in the differentia.""" + + +class Differentia(BaseModel): + """Discriminating relationship in a logical definition.""" + + predicate: CURIE + filler: CURIE + + +class LogicalDefinition(BaseModel): + """Logical definition of a term, following genus-differentia format.""" + + genus: CURIE + differentia: List[Differentia] + + +class Term(BaseModel): + curie: CURIE + label: str + logical_definition: Optional[LogicalDefinition] = None + pattern: Optional[str] = None + genus_not_in_descendants: Optional[bool] = False + differentia_not_in_descendants: Optional[bool] = False + + +class ExtractedConcept(BaseModel): + """ + A concept extracted from a lexical pattern. + + For example, in a pattern like "nuclear X", the concept is "X". + """ + + label: str + curies: List[CURIE] + in_ontology: Optional[bool] = None + instances: Dict[str, Optional[Term]] + + +class LexicalPatternCollection(BaseModel): + """Collection of lexical patterns""" + + patterns: List[LexicalPattern] + + +def match_and_extract(pattern: LexicalPattern, label: str) -> Optional[str]: + """ + Given a lexical pattern and a label, return the label with the pattern removed. + + >>> from oaklib.utilities.lexical.patternizer import LexicalPattern, match_and_extract + >>> pattern = LexicalPattern(name="nuclear X", pattern="nuclear") + >>> print(match_and_extract(pattern, "nuclear membrane")) + membrane + + :param pattern: + :param label: + :return: + """ + if pattern.pattern in label: + if pattern.pattern_position is not None: + if pattern.pattern_position == 0: + if not label.startswith(pattern.pattern): + return None + elif pattern.pattern_position == -1: + if not label.endswith(pattern.pattern): + return None + else: + raise NotImplementedError("Pattern position must be 0 or -1.") + return label.replace(pattern.pattern, "").strip() + + +def lexical_pattern_instances( + adapter: BasicOntologyInterface, + patterns: List[LexicalPattern], + curies: Optional[List[CURIE]] = None, + new_concept_prefix=None, + strict=False, +) -> List[ExtractedConcept]: + """ + Given a list of lexical patterns, return a list of ExtractedConcepts. + + Each ExtractedConcepts contains a label and a dictionary of instances, keyed by the pattern name. + + :param adapter: + :param patterns: + :param curies: + :param new_concept_prefix: + :return: + """ + if curies is None: + curies = list(adapter.entities()) + id_labels = list(adapter.labels(curies, allow_none=False)) + ecs = {} + injected_curies = [] + for pattern in patterns: + if pattern.pattern is None: + pattern.pattern = pattern.name + logging.info(f"Processing pattern {pattern.name} on {len(id_labels)} labels.") + for id, label in id_labels: + concept_label = match_and_extract(pattern, label) + if concept_label: + # concept_label = label.replace(pattern.pattern, "").strip() + if concept_label not in ecs: + concept_ids = adapter.curies_by_label(concept_label) + if len(concept_ids) > 1: + candidate_concept_ids = [id for id in concept_ids if id in curies] + if len(candidate_concept_ids) > 0: + concept_ids = candidate_concept_ids + in_ontology = len(concept_ids) == 1 + if not concept_ids: + if new_concept_prefix: + # make label safe by url encoding using library + concept_ids = [ + f"{new_concept_prefix}:{urllib.parse.quote(concept_label)}" + ] + injected_curies.append(concept_ids[0]) + ecs[concept_label] = ExtractedConcept( + label=concept_label, + curies=concept_ids, + in_ontology=in_ontology, + instances={}, + ) + concept_ids = ecs[concept_label].curies + if pattern.curie is not None and not concept_ids and strict: + raise ValueError( + f"Pattern {pattern.name} matched {concept_label} but no curie was found." + ) + if pattern.curie is not None and concept_ids: + if pattern.curie_is_genus: + genus = pattern.curie + differentia = concept_ids[0] + else: + genus = concept_ids[0] + differentia = pattern.curie + ldef = LogicalDefinition( + genus=genus, + differentia=[ + Differentia(predicate=pattern.differentia_predicate, filler=differentia) + ], + ) + else: + ldef = None + term = Term(curie=id, label=label, logical_definition=ldef, pattern=pattern.name) + if ldef: + if isinstance(adapter, OboGraphInterface): + if ldef.genus not in injected_curies: + if ldef.genus not in adapter.ancestors(id, [IS_A]): + term.genus_not_in_descendants = True + differentia0 = ldef.differentia[0] + filler = differentia0.filler + if filler not in injected_curies: + # pred_closure = [IS_A, differentia0.predicate] + pred_closure = None + if filler not in adapter.ancestors(id, pred_closure): + term.differentia_not_in_descendants = True + ecs[concept_label].instances[pattern.name] = term + return list(ecs.values()) + + +def as_matrix( + ecs: List[ExtractedConcept], + pattern_collection: Optional[LexicalPatternCollection] = None, + fields: Optional[List[str]] = None, +) -> Iterator[dict]: + """ + Given a list of ExtractedConcepts, a matrix representation as a list of dicts. + + Each row is an ExtractedConcept, and each column is a pattern. + + :param ecs: + :return: + """ + if not fields: + if pattern_collection: + fields = [p.name for p in pattern_collection.patterns] + if not fields: + fields = set() + for ec in ecs: + fields.update(ec.instances.keys()) + fields = list(fields) + + def cell_value(ec: ExtractedConcept, field: str) -> Optional[str]: + if field in ec.instances: + inst = ec.instances[field] + v = inst.curie + if inst.genus_not_in_descendants: + v = f"*{v}/GEN" + if inst.differentia_not_in_descendants: + v = f"+{v}/DF" + else: + v = "" + return v + + for ec in ecs: + curie = ec.curies[0] if ec.curies else None + row = {"id": curie, "label": ec.label, **{f: cell_value(ec, f) for f in fields}} + n = len([v for v in row.values() if v]) - 1 + row["num_concepts"] = n + yield row + + +def as_logical_definitions( + ecs: List[ExtractedConcept], +) -> Iterator[obograph.LogicalDefinitionAxiom]: + """ + Given a list of ExtractedConcepts, return a list of LogicalDefinitionAxioms. + + :param ecs: + :return: + """ + for ec in ecs: + for instance in ec.instances.values(): + if instance.logical_definition is not None: + yield obograph.LogicalDefinitionAxiom( + definedClassId=instance.curie, + genusIds=[instance.logical_definition.genus], + restrictions=[ + obograph.ExistentialRestrictionExpression( + propertyId=r.predicate, fillerId=r.filler + ) + for r in instance.logical_definition.differentia + ], + ) + + +def load_pattern_collection(patterns_file: str): + """ + Load a pattern collection from a file. + + :param patterns_file: + :return: + """ + return LexicalPatternCollection(**yaml.safe_load(open(patterns_file))) diff --git a/src/oaklib/utilities/mapping/cross_ontology_diffs.py b/src/oaklib/utilities/mapping/cross_ontology_diffs.py index 0e582e4c2..3d0193d69 100644 --- a/src/oaklib/utilities/mapping/cross_ontology_diffs.py +++ b/src/oaklib/utilities/mapping/cross_ontology_diffs.py @@ -200,6 +200,8 @@ def calculate_pairwise_relational_diff( left_oi_entities = entities else: left_oi_entities = left_oi.entities() + # main diff calculation loop: + # iterate through all entities in left/subject ontology, treat as subject/child for subject_child in left_oi_entities: logging.info(f"Subject child: {subject_child}") if not curie_has_prefix(subject_child, sources): @@ -207,6 +209,7 @@ def calculate_pairwise_relational_diff( for pred, subject_parent in relation_dict_as_tuples( left_oi.outgoing_relationship_map(subject_child) ): + logging.debug(f"left edge: {subject_child} {pred} {subject_parent}") if entities is not None: if subject_child not in entities and subject_parent not in entities: continue diff --git a/src/oaklib/utilities/obograph_utils.py b/src/oaklib/utilities/obograph_utils.py index 001f4a8c5..37c775a8b 100644 --- a/src/oaklib/utilities/obograph_utils.py +++ b/src/oaklib/utilities/obograph_utils.py @@ -204,7 +204,10 @@ def as_multi_digraph( def as_digraph( - graph: Graph, reverse: bool = True, filter_reflexive: bool = True + graph: Graph, + reverse: bool = True, + filter_reflexive: bool = True, + predicates: Optional[List[PRED_CURIE]] = None, ) -> nx.MultiDiGraph: """ Convert to a networkx :class:`.DiGraph` @@ -213,8 +216,10 @@ def as_digraph( :param reverse: :return: """ - dg = nx.DiGraph() + dg = nx.MultiDiGraph() for edge in graph.edges: + if predicates is not None and edge.pred not in predicates: + continue if filter_reflexive and reflexive(edge): continue edge_attrs = {"predicate": edge.pred} @@ -231,7 +236,7 @@ def as_graph( filter_reflexive: bool = True, predicate_weights: PREDICATE_WEIGHT_MAP = None, default_weight=1.0, -) -> nx.MultiDiGraph: +) -> nx.Graph: """ Convert to a networkx :class:`.DiGraph` @@ -338,6 +343,26 @@ def shortest_paths( logging.info(f"No path between {start_curie} and {end_curie}") +def depth_first_ordering(graph: Graph) -> List[CURIE]: + """ + Return a depth-first ordering of the nodes in the graph. + + :param graph: + :return: + """ + six = index_graph_edges_by_subject(graph) + oix = index_graph_edges_by_object(graph) + stack = list(set(oix.keys()) - set(six.keys())) + visited = [] + while stack: + node = stack.pop() + visited.append(node) + for edge in oix[node]: + if edge.sub not in visited and edge.sub not in stack: + stack.append(edge.sub) + return visited + + def remove_nodes_from_graph(graph: Graph, node_ids: List[CURIE]): """ Remove the specified nodes from the graph, and cascade to any edges diff --git a/tests/test_implementations/__init__.py b/tests/test_implementations/__init__.py index b001e72cb..ecc0da4f3 100644 --- a/tests/test_implementations/__init__.py +++ b/tests/test_implementations/__init__.py @@ -64,6 +64,7 @@ ClassEnrichmentCalculationInterface, ) from oaklib.interfaces.differ_interface import DifferInterface +from oaklib.interfaces.dumper_interface import DumperInterface from oaklib.interfaces.merge_interface import MergeInterface from oaklib.interfaces.metadata_interface import MetadataInterface from oaklib.interfaces.obograph_interface import ( @@ -735,7 +736,7 @@ def _check(syns: List[obograph.SynonymPropertyValue]): syns = list(oi.synonym_property_values(NUCLEUS)) _check([syn[1] for syn in syns]) - def test_dump_obograph(self, oi: BasicOntologyInterface): + def test_dump_obograph(self, oi: DumperInterface): """ Tests conformance of dump method with obograph json syntax. @@ -972,6 +973,20 @@ def test_diff(self, oi: DifferInterface, oi_modified: DifferInterface): for typ, expected in cases: test.assertEqual(expected, residual[typ]) + def test_as_obograph(self, oi: OboGraphInterface): + """ + Tests as_obograph in OboGraphInterface + + :param oi: + :return: + """ + test = self.test + for expand in [False, True]: + g = oi.as_obograph(expand_curies=expand) + test.assertGreater(len(g.nodes), 0) + test.assertGreater(len(g.edges), 0) + test.assertGreater(len(g.logicalDefinitionAxioms), 0) + def test_subgraph_from_traversal(self, oi: OboGraphInterface): """ Tests subgraph_from_traversal in OboGraphInterface @@ -1027,8 +1042,8 @@ def test_subgraph_from_traversal(self, oi: OboGraphInterface): ) = case traversal = TraversalConfiguration(up_distance=up_dist, down_distance=down_dist) graph = oi.subgraph_from_traversal(seeds, predicates=predicates, traversal=traversal) - test.assertEqual(expected_num_nodes, len(graph.nodes)) - test.assertEqual(expected_num_edges, len(graph.edges)) + test.assertEqual(expected_num_nodes, len(graph.nodes), f"Failed for case: {case}") + test.assertEqual(expected_num_edges, len(graph.edges), f"Failed for case: {case}") node_ids = [n.id for n in graph.nodes] for node_id in expected_nodes_subset: test.assertIn(node_id, node_ids, f"Failed for case: {case}") diff --git a/tests/test_implementations/test_aggregator.py b/tests/test_implementations/test_aggregator.py index 5284a0283..03c37c130 100644 --- a/tests/test_implementations/test_aggregator.py +++ b/tests/test_implementations/test_aggregator.py @@ -19,6 +19,7 @@ TISSUE, VACUOLE, ) +from tests.test_implementations import ComplianceTester TEST_ONT = INPUT_DIR / "go-nucleus.obo" TEST_ONT2 = INPUT_DIR / "interneuron.obo" @@ -35,6 +36,7 @@ def setUp(self) -> None: oi1 = ProntoImplementation(resource1) oi2 = ProntoImplementation(resource2) self.oi = AggregatorImplementation(implementations=[oi1, oi2]) + self.compliance_tester = ComplianceTester(self) def test_relationships(self): oi = self.oi @@ -71,6 +73,9 @@ def test_metadata(self): assert "https://github.com/geneontology/go-ontology/issues/17776" in m["term_tracker_item"] def test_labels(self): + self.compliance_tester.test_labels(self.oi) + + def test_labels_extra(self): """ Tests labels can be retrieved, and no label is retrieved when a term does not exist :return: @@ -105,6 +110,13 @@ def test_synonyms(self): ["tissue", "simple tissue", "tissue portion", "portion of tissue"], ) + def test_definitions(self): + self.compliance_tester.test_definitions(self.oi, include_metadata=True) + + @unittest.skip("TODO") + def test_owl_types(self): + self.compliance_tester.test_owl_types(self.oi, skip_oio=True) + def test_subsets(self): oi = self.oi subsets = list(oi.subsets()) diff --git a/tests/test_implementations/test_pronto.py b/tests/test_implementations/test_pronto.py index 384d520cc..b89b30377 100644 --- a/tests/test_implementations/test_pronto.py +++ b/tests/test_implementations/test_pronto.py @@ -251,7 +251,7 @@ def test_subontology(self): ) def test_definitions(self): - self.compliance_tester.test_definitions(self.oi) + self.compliance_tester.test_definitions(self.oi, include_metadata=True) def test_store_associations(self): self.compliance_tester.test_store_associations(self.oi) @@ -336,6 +336,9 @@ def test_entailed_edges(self): def test_subgraph_from_traversal(self): self.compliance_tester.test_subgraph_from_traversal(self.oi) + def test_as_obograph(self): + self.compliance_tester.test_as_obograph(self.oi) + def test_save_extract(self): g = self.oi.ancestor_graph(VACUOLE) oi = ProntoImplementation() diff --git a/tests/test_implementations/test_simple_obo.py b/tests/test_implementations/test_simple_obo.py index aecedf266..3fa7315c9 100644 --- a/tests/test_implementations/test_simple_obo.py +++ b/tests/test_implementations/test_simple_obo.py @@ -304,6 +304,13 @@ def test_obograph(self): def test_extract_graph(self): self.compliance_tester.test_extract_graph(self.oi, test_metadata=False) # TODO + @unittest.skip("TODO") + def test_subgraph_from_traversal(self): + self.compliance_tester.test_subgraph_from_traversal(self.oi) + + def test_as_obograph(self): + self.compliance_tester.test_as_obograph(self.oi) + def test_ancestors_descendants(self): self.compliance_tester.test_ancestors_descendants(self.oi) diff --git a/tests/test_utilities/test_logical_definition_summarizer.py b/tests/test_utilities/test_logical_definition_summarizer.py new file mode 100644 index 000000000..0c91a2bb0 --- /dev/null +++ b/tests/test_utilities/test_logical_definition_summarizer.py @@ -0,0 +1,104 @@ +import unittest + +from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation +from oaklib.resource import OntologyResource +from oaklib.utilities.axioms.logical_definition_summarizer import ( + logical_definitions_to_matrix, + parse_axes_to_config, +) +from tests import INPUT_DIR + +TEST_ONT = INPUT_DIR / "go-nucleus.obo" + +CASES = [ + ( + "d,f", + [ + { + "defined_class": ["GO:0009893"], + "genus": ["GO:0065007"], + "metabolic_process": ["RO:0002213"], + } + ], + ), + ("d,g", [{"defined_class": ["GO:0009893"], "biological_regulation": ["GO:0009893"]}]), + ( + "d,p", + [ + { + "defined_class": ["GO:0009893"], + "genus": ["GO:0065007"], + "positively_regulates": ["GO:0008152"], + } + ], + ), + ( + "f,g", + [ + { + "filler": ["GO:0008152"], + "biological_regulation": ["GO:0009892", "GO:0009893", "GO:0019222"], + } + ], + ), + ( + "f,p", + [ + { + "filler": ["GO:0008152"], + "positively_regulates": ["GO:0009893"], + "negatively_regulates": ["GO:0009892"], + "regulates": ["GO:0019222"], + } + ], + ), + ( + "f", + [ + { + "filler": ["GO:0008152"], + "positively_regulates": ["GO:0009893"], + "negatively_regulates": ["GO:0009892"], + "regulates": ["GO:0019222"], + } + ], + ), + # use organelle + ( + "g,f", + [ + { + "genus": ["GO:0043226"], + "membrane": ["GO:0043227"], + "intracellular_anatomical_structure": ["GO:0043229"], + } + ], + ), + ( + "g,p", + [ + {"genus": ["GO:0043226"], "has_part": ["GO:0043227"], "part_of": ["GO:0043229"]}, + {"genus": ["GO:0016020"], "part_of": ["GO:0031090", "GO:0031965", "GO:0098590"]}, + ], + ), +] + + +class TestLogicalDefinitionSummarizer(unittest.TestCase): + def setUp(self) -> None: + resource = OntologyResource(slug="go-nucleus.obo", directory=INPUT_DIR, local=True) + oi = ProntoImplementation(resource) + self.oi = oi + self.ldefs = list(oi.logical_definitions(oi.entities())) + + def test_summarizer(self): + ldefs = self.ldefs + for case in CASES: + (cfg_str, expected) = case + cfg = parse_axes_to_config(cfg_str) + rows = logical_definitions_to_matrix(self.oi, ldefs, cfg) + for row in rows: + slim_row = {k: v for k, v in row.items() if v and v != [""]} + if slim_row in expected: + expected.remove(slim_row) + self.assertEqual([], expected, f"Expected rows not found in output in {case}") diff --git a/tests/test_utilities/test_obograph_utils.py b/tests/test_utilities/test_obograph_utils.py index 5f9795ea3..235244976 100644 --- a/tests/test_utilities/test_obograph_utils.py +++ b/tests/test_utilities/test_obograph_utils.py @@ -12,6 +12,7 @@ from oaklib.utilities.obograph_utils import ( as_multi_digraph, compress_all_graph_ids, + depth_first_ordering, expand_all_graph_ids, filter_by_predicates, graph_as_dict, @@ -23,6 +24,7 @@ ) from tests import ( CELLULAR_ANATOMICAL_ENTITY, + CELLULAR_COMPONENT, CELLULAR_ORGANISMS, CYTOPLASM, HUMAN, @@ -31,6 +33,7 @@ INTRACELLULAR, NUCLEAR_MEMBRANE, NUCLEUS, + ORGANELLE, OUTPUT_DIR, VACUOLE, ) @@ -183,3 +186,19 @@ def test_shortest_paths(self): self.assertIn(x, path) for x in excludes: self.assertNotIn(x, path) + + def test_depth_first_ordering(self): + oi = self.oi + graph = oi.descendant_graph([CELLULAR_COMPONENT], predicates=[IS_A, PART_OF]) + ordered = depth_first_ordering(graph) + self.assertEqual(ordered[0], CELLULAR_COMPONENT) + expected_order = [ + (CELLULAR_COMPONENT, CELLULAR_ANATOMICAL_ENTITY), + (CELLULAR_ANATOMICAL_ENTITY, ORGANELLE), + (ORGANELLE, NUCLEUS), + # (CYTOPLASM, NUCLEUS), + (IMBO, NUCLEUS), + (NUCLEUS, NUCLEAR_MEMBRANE), + ] + for parent, child in expected_order: + self.assertLess(ordered.index(parent), ordered.index(child), f"{parent} -> {child}") diff --git a/tests/test_utilities/test_patternizer.py b/tests/test_utilities/test_patternizer.py new file mode 100644 index 000000000..a377a08b2 --- /dev/null +++ b/tests/test_utilities/test_patternizer.py @@ -0,0 +1,95 @@ +import unittest + +import yaml + +from oaklib.datamodels.vocabulary import ( + NEGATIVELY_REGULATES, + PART_OF, + POSITIVELY_REGULATES, +) +from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation +from oaklib.resource import OntologyResource +from oaklib.utilities.lexical.patternizer import ( + Differentia, + LexicalPattern, + LexicalPatternCollection, + LogicalDefinition, + Term, + lexical_pattern_instances, + load_pattern_collection, +) +from tests import INPUT_DIR, MEMBRANE, NUCLEAR_MEMBRANE, NUCLEUS, OUTPUT_DIR + +TEST_ONT = INPUT_DIR / "go-nucleus.obo" +TEST_PATTERNS_OUT = OUTPUT_DIR / "go-patterns.yaml" + +PATTERNS = [ + LexicalPattern( + name="nucleus", + pattern="nuclear", + description="A nuclear X is an X that is part of the nucleus.", + curie=NUCLEUS, + curie_is_genus=False, + differentia_predicate=PART_OF, + ), + LexicalPattern( + name="negative regulation", + pattern="negative regulation of", + curie="GO:0065007", + differentia_predicate=NEGATIVELY_REGULATES, + ), + LexicalPattern( + name="positive regulation", + pattern="positive regulation of", + curie="GO:0065007", + differentia_predicate=POSITIVELY_REGULATES, + ), +] + + +class TestPatternizer(unittest.TestCase): + def setUp(self) -> None: + resource = OntologyResource(slug="go-nucleus.obo", directory=INPUT_DIR, local=True) + oi = ProntoImplementation(resource) + self.oi = oi + self.pattern_collection = LexicalPatternCollection(patterns=PATTERNS) + + def test_patternizer(self): + """Test that the patternizer works by extracting nucleus and regulation concepts.""" + expected = [ + Term( + curie=NUCLEAR_MEMBRANE, + label="nuclear membrane", + logical_definition=LogicalDefinition( + genus=MEMBRANE, differentia=[Differentia(predicate=PART_OF, filler=NUCLEUS)] + ), + pattern="nucleus", + ), + Term( + curie="GO:0009892", + label="negative regulation of metabolic process", + logical_definition=LogicalDefinition( + genus="GO:0065007", + differentia=[Differentia(predicate=NEGATIVELY_REGULATES, filler="GO:0008152")], + ), + pattern="negative regulation", + ), + ] + for new_concept_prefix in [None, "TEST"]: + todo = [yaml.dump(ec.dict()) for ec in expected] + ecs = lexical_pattern_instances( + self.oi, PATTERNS, new_concept_prefix=new_concept_prefix + ) + for ec in ecs: + print(yaml.dump(ec.dict())) + for inst in ec.instances.values(): + inst_yaml = yaml.dump(inst.dict()) + if inst_yaml in todo: + todo.remove(inst_yaml) + self.assertEqual(todo, []) + + def test_write_patterns(self): + """Test that the patternizer works by extracting nucleus and regulation concepts.""" + with open(TEST_PATTERNS_OUT, "w") as outf: + yaml.dump(self.pattern_collection.dict(), outf) + load_pattern_collection(TEST_PATTERNS_OUT)