Skip to content

Commit

Permalink
Merge pull request #89 from INCATools/additional-sources2
Browse files Browse the repository at this point in the history
additional sources
  • Loading branch information
cmungall authored Jul 5, 2024
2 parents d8a58bd + af957d1 commit 8b6cb59
Show file tree
Hide file tree
Showing 11 changed files with 292 additions and 28 deletions.
93 changes: 85 additions & 8 deletions ontologies.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,28 @@ db/oeo.owl: download/oeo.owl
cp $< $@


download/taxslim.owl: STAMP
curl -L -s http://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim.obo > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/taxslim.owl

db/taxslim.owl: download/taxslim.owl
robot convert -i $< -o $@


download/goldterms.owl: STAMP
curl -L -s https://raw.githubusercontent.com/cmungall/gold-ontology/main/gold.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/goldterms.owl

db/goldterms.owl: download/goldterms.owl
robot relax -i $< reason -o $@


download/sdgio.owl: STAMP
curl -L -s https://raw.githubusercontent.com/SDG-InterfaceOntology/sdgio/master/sdgio.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
Expand All @@ -240,6 +262,17 @@ db/sdgio.owl: download/sdgio.owl
cp $< $@


download/kin.owl: STAMP
curl -L -s http://purl.org/ga4gh/kin.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/kin.owl

db/kin.owl: download/kin.owl
robot reason -i $< -o $@


download/biovoices.owl: STAMP
curl -L -s https://zenodo.org/record/5589773/files/ontology.owl?download=1 > $@.tmp
sha256sum -b $@.tmp > $@.sha256
Expand All @@ -262,6 +295,17 @@ db/omop.owl: download/omop.owl
cp $< $@


download/comet.owl: STAMP
curl -L -s https://raw.githubusercontent.com/linkml/linkml-common/main/project/owl/linkml_common.owl.ttl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/comet.owl

db/comet.owl: download/comet.owl
robot relax -i $< merge -o $@


download/cco.owl: STAMP
curl -L -s http://www.ontologyrepository.com/CommonCoreOntologies/Mid/AllCoreOntology > $@.tmp
sha256sum -b $@.tmp > $@.sha256
Expand Down Expand Up @@ -428,7 +472,7 @@ db/mlo.owl: download/mlo.owl


download/ito.owl: STAMP
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp ITO.owl > $@.tmp && rm $@.zip.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

Expand All @@ -438,14 +482,25 @@ db/ito.owl: download/ito.owl
cp $< $@


download/reactome-Homo-sapiens.owl: STAMP
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
download/reactome-hs.owl: STAMP
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp Homo_sapiens.owl > $@.tmp && rm $@.zip.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/reactome-hs.owl

db/reactome-hs.owl: download/reactome-hs.owl
cp $< $@


download/reactome-mm.owl: STAMP
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp Mus_musculus.owl > $@.tmp && rm $@.zip.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/reactome-Homo-sapiens.owl
.PRECIOUS: download/reactome-mm.owl

db/reactome-Homo-sapiens.owl: download/reactome-Homo-sapiens.owl
db/reactome-mm.owl: download/reactome-mm.owl
cp $< $@


Expand Down Expand Up @@ -823,6 +878,28 @@ db/nando.owl: download/nando.owl
cp $< $@


download/ecso.owl: STAMP
curl -L -s 'https://data.bioontology.org/ontologies/ECSO/submissions/64/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb' > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/ecso.owl

db/ecso.owl: download/ecso.owl
cp $< $@


download/enigma_context.owl: STAMP
curl -L -s https://raw.githubusercontent.com/jmchandonia/CORAL/main/example/enigma/ontologies/context_measurement_ontology.obo > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/enigma_context.owl

db/enigma_context.owl: download/enigma_context.owl
robot merge -i $< -o $@


download/ontie.owl: STAMP
curl -L -s https://ontology.iedb.org/file/ontie.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
Expand Down Expand Up @@ -857,14 +934,14 @@ db/nmdc_schema.owl: download/nmdc_schema.owl


download/mixs.owl: STAMP
curl -L -s https://raw.githubusercontent.com/microbiomedata/mixs-6-2-release-candidate/main/schema-derivatives/mixs_6_2_rc.owl.ttl > $@.tmp
curl -L -s https://raw.githubusercontent.com/GenomicsStandardsConsortium/mixs/main/project/owl/mixs.owl.ttl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/mixs.owl

db/mixs.owl: download/mixs.owl
robot merge -i $< reason -o $@.tmp.owl && perl -npe 's@_6_2_rc@@g;s@-6-2-rc@@g' $@.tmp.owl > $@
robot merge -i $< reason -o $@


download/fibo.owl: STAMP
Expand Down Expand Up @@ -932,4 +1009,4 @@ download/%.owl: STAMP
db/%.owl: download/%.owl
robot merge -i $< -o $@

EXTRA_ONTOLOGIES = upheno chiro ncit fma maxo foodon chebiplus msio modl phenio phenio_test comploinc bero aio reacto bcio icd10who ordo gard mondo-ingest oeo sdgio biovoices omop cco occo iof upa go go-lego go-amigo neo bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo hcao hpinternational edam sweetAll lov schema-dot-org prov cellosaurus cosmo fhkb dbpendiaont uberoncm icd10cm co_324 ppeo interpro hgnc.genegroup hgnc sgd dictybase eccode uniprot rhea swisslipid drugbank drugcentral complexportal wikipathways drugmechdb rxnorm vccf ontobiotope nando ontie ecosim nmdc_schema mixs fibo bfo2020 bfo2020_core bfo2020_notime bfo2020_time
EXTRA_ONTOLOGIES = upheno chiro ncit fma maxo foodon chebiplus msio modl phenio phenio_test comploinc bero aio reacto bcio icd10who ordo gard mondo-ingest oeo taxslim goldterms sdgio kin biovoices omop comet cco occo iof upa go go-lego go-amigo neo bao orcid cpont biolink biopax enanomapper mlo ito reactome-hs reactome-mm efo hcao hpinternational edam sweetAll lov schema-dot-org prov cellosaurus cosmo fhkb dbpendiaont uberoncm icd10cm co_324 ppeo interpro hgnc.genegroup hgnc sgd dictybase eccode uniprot rhea swisslipid drugbank drugcentral complexportal wikipathways drugmechdb rxnorm vccf ontobiotope nando ecso enigma_context ontie ecosim nmdc_schema mixs fibo bfo2020 bfo2020_core bfo2020_notime bfo2020_time
1 change: 1 addition & 0 deletions src/semsql/builder/build.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ PREFIX_YAML_PATH = $(PREFIX_DIR)/prefixes.yaml
gzip -f $*-$(RGSUFFIX).tsv && \
cat $(THIS_DIR)/indexes/*.sql | sqlite3 $@.tmp && \
echo "ALTER TABLE statements ADD COLUMN graph TEXT;" | sqlite3 $@.tmp && \
(test -d views && find views -maxdepth 1 -name '$(notdir $*)*.sql' -type f -print0 | xargs -0 -I{} sh -c 'sqlite3 [email protected]< "$$1"' sh {} || echo no views ) && \
mv $@.tmp $@
.PRECIOUS: %.db

Expand Down
32 changes: 26 additions & 6 deletions src/semsql/builder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import shutil
import subprocess
from pathlib import Path
from typing import Optional, TextIO
from typing import List, Optional, TextIO

import requests
from linkml_runtime.loaders import yaml_loader
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from semsql.builder.registry import registry_schema
from semsql.builder.registry import path_to_ontology_registry, registry_schema
from semsql.builder.registry.registry_schema import (CompressionEnum, Makefile,
MakefileRule, Ontology)
from semsql.utils.makefile_utils import makefile_to_string
Expand Down Expand Up @@ -117,6 +117,27 @@ def connect(owl_file: str):
return session


def get_postprocessing_steps(
ontology: str, db: str, registry_path: str = None
) -> List[str]:
"""
Get postprocessing steps for an ontology
:param registry_path:
:param ontology:
:return:
"""
if registry_path is None:
registry_path = path_to_ontology_registry()
registry: registry_schema.Registry
registry = yaml_loader.load(
str(registry_path), target_class=registry_schema.Registry
)
# steps = [step.format(ont=ontology, db=db) for step in registry.ontologies.get(ontology, []).post_processing_steps]
steps = registry.ontologies.get(ontology, [])
return steps


def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> str:
"""
Generate makefile content from registry
Expand All @@ -139,7 +160,7 @@ def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> st
elif ont.zip_extract_file:
command = (
f"curl -L -s {ont.url} > [email protected] && "
"unzip -p [email protected] {ont.zip_extract_file} "
f"unzip -p [email protected] {ont.zip_extract_file} "
"> [email protected] && rm [email protected]"
)
elif ont.compression:
Expand Down Expand Up @@ -169,9 +190,8 @@ def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> st
command = "robot merge -i $< -o $@"
else:
command = "cp $< $@"
rule = MakefileRule(
target=target, dependencies=dependencies, commands=[command]
)
commands = [command]
rule = MakefileRule(target=target, dependencies=dependencies, commands=commands)
makefile.rules.append(rule)
if not ont.suppress:
onts.append(ont.id)
Expand Down
11 changes: 11 additions & 0 deletions src/semsql/builder/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import subprocess
from itertools import chain, combinations

import click
Expand Down Expand Up @@ -62,6 +63,16 @@ def make(path, docker, **kwargs):
else:
docker_config = None
builder.make(path, docker_config=docker_config, **kwargs)
# check if path is db/{foo}.db using regular expression
import re

matches = re.match(r"db/(\w+).db", path)
if matches:
ontology = matches.group(1)
steps = builder.get_postprocessing_steps(ontology, path)
for step in steps:
print(f"RUNNING: {step}")
subprocess.run(step, shell=True)


@main.command()
Expand Down
5 changes: 5 additions & 0 deletions src/semsql/builder/prefixes/prefixes.csv
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,13 @@ OMIM,https://omim.org/entry/
OMIMPS,https://www.omim.org/phenotypicSeries/PS
CHR,http://purl.obolibrary.org/obo/CHR_
OEO,http://openenergy-platform.org/ontology/oeo/OEO_
GOLDTERMS,https://w3id.org/gold.path/
GOLDVOCAB,https://w3id.org/gold.vocab/
SDGIO,http://purl.unep.org/sdg/SDGIO_
KIN,http://purl.org/ga4gh/kin.owl#KIN_
ontorion,http://ontorion.com/namespace#
omop,https://athena.ohdsi.org/search-terms/terms/
comet,https://w3id.org/linkml-common/
CCO,http://www.ontologyrepository.com/CommonCoreOntologies/
OccO,http://purl.obolibrary.org/obo/OccO_
IOFcore,https://spec.industrialontologies.org/ontology/
Expand Down Expand Up @@ -115,6 +119,7 @@ MESH,http://id.nlm.nih.gov/mesh/
RXNORM,http://purl.bioontology.org/ontology/RXNORM/
OBT,http://purl.obolibrary.org/obo/OBT_
NANDO,http://nanbyodata.jp/ontology/NANDO_
ECSO,http://purl.dataone.org/odo/ECSO_
ONTIE,https://ontology.iedb.org/ontology/ONTIE_
ECOSIM,http://purl.obolibrary.org/obo/ECOSIM_
ECOSIMCONCEPT,http://purl.obolibrary.org/obo/ECOSIMCONCEPT_
Expand Down
5 changes: 5 additions & 0 deletions src/semsql/builder/prefixes/prefixes_local.csv
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ OMIM,https://omim.org/entry/
OMIMPS,https://www.omim.org/phenotypicSeries/PS
CHR,http://purl.obolibrary.org/obo/CHR_
OEO,http://openenergy-platform.org/ontology/oeo/OEO_
GOLDTERMS,https://w3id.org/gold.path/
GOLDVOCAB,https://w3id.org/gold.vocab/
SDGIO,http://purl.unep.org/sdg/SDGIO_
KIN,http://purl.org/ga4gh/kin.owl#KIN_
ontorion,http://ontorion.com/namespace#
omop,https://athena.ohdsi.org/search-terms/terms/
comet,https://w3id.org/linkml-common/
CCO,http://www.ontologyrepository.com/CommonCoreOntologies/
OccO,http://purl.obolibrary.org/obo/OccO_
IOFcore,https://spec.industrialontologies.org/ontology/
Expand Down Expand Up @@ -52,6 +56,7 @@ MESH,http://id.nlm.nih.gov/mesh/
RXNORM,http://purl.bioontology.org/ontology/RXNORM/
OBT,http://purl.obolibrary.org/obo/OBT_
NANDO,http://nanbyodata.jp/ontology/NANDO_
ECSO,http://purl.dataone.org/odo/ECSO_
ONTIE,https://ontology.iedb.org/ontology/ONTIE_
ECOSIM,http://purl.obolibrary.org/obo/ECOSIM_
ECOSIMCONCEPT,http://purl.obolibrary.org/obo/ECOSIMCONCEPT_
Expand Down
46 changes: 43 additions & 3 deletions src/semsql/builder/registry/ontologies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,24 @@ ontologies:
url: https://openenergyplatform.org/ontology/oeo/releases/oeo-full.owl
prefixmap:
OEO: http://openenergy-platform.org/ontology/oeo/OEO_
taxslim:
url: http://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim.obo
build_command: "robot convert -i $< -o $@"
goldterms:
url: https://raw.githubusercontent.com/cmungall/gold-ontology/main/gold.owl
build_command: "robot relax -i $< reason -o $@"
prefixmap:
GOLDTERMS: https://w3id.org/gold.path/
GOLDVOCAB: https://w3id.org/gold.vocab/
sdgio:
url: https://raw.githubusercontent.com/SDG-InterfaceOntology/sdgio/master/sdgio.owl
prefixmap:
SDGIO: http://purl.unep.org/sdg/SDGIO_
kin:
url: http://purl.org/ga4gh/kin.owl
build_command: "robot reason -i $< -o $@"
prefixmap:
KIN: "http://purl.org/ga4gh/kin.owl#KIN_"
biovoices:
url: https://zenodo.org/record/5589773/files/ontology.owl?download=1
build_command: "robot relax -i $< merge -o $@"
Expand All @@ -110,6 +124,11 @@ ontologies:
omop:
prefixmap:
omop: https://athena.ohdsi.org/search-terms/terms/
comet:
url: https://raw.githubusercontent.com/linkml/linkml-common/main/project/owl/linkml_common.owl.ttl
build_command: "robot relax -i $< merge -o $@"
prefixmap:
comet: https://w3id.org/linkml-common/
cco:
url: http://www.ontologyrepository.com/CommonCoreOntologies/Mid/AllCoreOntology
build_command: "robot merge -i $< relax -o $@"
Expand Down Expand Up @@ -166,9 +185,16 @@ ontologies:
ito:
url: https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip
zip_extract_file: ITO.owl
reactome-Homo-sapiens:
reactome-hs:
url: https://reactome.org/download/current/biopax.zip
zip_extract_file: Homo_sapiens.owl
#post_processing_steps:
# - "sqlite3 {db} < views/reactome.sql"
reactome-mm:
url: https://reactome.org/download/current/biopax.zip
zip_extract_file: Mus_musculus.owl
post_processing_steps:
- "sqlite3 {db} < views/reactome.sql"
efo:
url: http://www.ebi.ac.uk/efo/efo.owl
has_imports: true
Expand Down Expand Up @@ -327,6 +353,17 @@ ontologies:
url: "'https://data.bioontology.org/ontologies/NANDO/submissions/15/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb'"
prefixmap:
NANDO: http://nanbyodata.jp/ontology/NANDO_
ecso:
url: "'https://data.bioontology.org/ontologies/ECSO/submissions/64/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb'"
prefixmap:
ECSO: http://purl.dataone.org/odo/ECSO_
enigma_context:
url: https://raw.githubusercontent.com/jmchandonia/CORAL/main/example/enigma/ontologies/context_measurement_ontology.obo
build_command: "robot merge -i $< -o $@"
#meo:
# url: "'https://data.bioontology.org/ontologies/MEO/submissions/9/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb'"
# prefixmap:
# MEO: http://purl.jp/bio/11/meo/MEO_
ontie:
url: https://ontology.iedb.org/file/ontie.owl
prefixmap:
Expand All @@ -349,8 +386,8 @@ ontologies:
linkml: https://w3id.org/linkml/
mixs: https://w3id.org/mixs/
mixs:
url: https://raw.githubusercontent.com/microbiomedata/mixs-6-2-release-candidate/main/schema-derivatives/mixs_6_2_rc.owl.ttl
build_command: "robot merge -i $< reason -o $@.tmp.owl && perl -npe 's@_6_2_rc@@g;s@-6-2-rc@@g' [email protected] > $@"
url: https://raw.githubusercontent.com/GenomicsStandardsConsortium/mixs/main/project/owl/mixs.owl.ttl
build_command: "robot merge -i $< reason -o $@"
prefixmap:
mixs: https://w3id.org/mixs/
fibo:
Expand All @@ -363,6 +400,9 @@ ontologies:
# BFO variant products
bfo2020:
url: http://purl.obolibrary.org/obo/bfo/2020/bfo.owl
#post_processing_steps:
# - "echo hello {db}"
# - "echo goodbye {db}"
bfo2020_core:
url: http://purl.obolibrary.org/obo/bfo/2020/bfo-core.owl
bfo2020_notime:
Expand Down
Loading

0 comments on commit 8b6cb59

Please sign in to comment.