-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from Cellular-Semantics/obask-refactoring
Obask refactoring
- Loading branch information
Showing
65 changed files
with
1,710 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,7 @@ jobs: | |
- name: Build and push Docker image | ||
uses: docker/[email protected] | ||
with: | ||
context: . | ||
context: "./anndata2rdf/" | ||
push: true | ||
platforms: linux/amd64, linux/arm64 | ||
tags: ${{ steps.meta.outputs.tags }} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
name: YAML schema validator | ||
on: | ||
# Triggers the workflow on pull request events but only for the main branch | ||
pull_request: | ||
branches: [ main ] | ||
paths: | ||
- 'cl_kb_pipeline/config/dumps/neo4j2owl-config.yaml' | ||
permissions: | ||
pull-requests: write | ||
|
||
jobs: | ||
yaml-schema-validation: | ||
runs-on: macos-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python 3.8 | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: 3.8 | ||
- name: Install dependencies | ||
run: pip install ruamel.yaml==0.17.21 jsonschema==4.4.0 | ||
- name: Schema validation | ||
id: schema | ||
run: | | ||
python cl_kb_pipeline/src/test_neo2owl_config.py | ||
- name: Prepare schema validator comment | ||
if: failure() | ||
run: | | ||
echo "cl_kb_pipeline/config/dumps/neo4j2owl-config.yaml file failed the schema validation check " > comment.md; cat validation.report >> comment.md | ||
- name: Prepare success comment | ||
run: | | ||
echo "cl_kb_pipeline/config/dumps/neo4j2owl-config.yaml file passed validation check " > comment.md | ||
- name: Post comment validator comment | ||
if: always() | ||
env: | ||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} | ||
uses: NejcZdovc/[email protected] | ||
with: | ||
file: "../../comment.md" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
FROM python:3.10 | ||
|
||
SHELL ["/bin/bash", "-c"] | ||
|
||
RUN apt-get update && apt-get install -y build-essential graphviz libgraphviz-dev pkg-config libhdf5-dev && apt-get clean && rm \ | ||
-rf /var/lib/apt/lists/* | ||
|
||
WORKDIR /app | ||
|
||
COPY requirements.txt ./ | ||
RUN pip3 install --upgrade pip | ||
RUN pip3 install -r requirements.txt | ||
|
||
RUN mkdir -p src/config src/curated_data src/dataset src/graph | ||
|
||
COPY src/csv_parser.py ./src | ||
COPY src/pull_anndata.py ./src | ||
COPY src/generate_rdf.py ./src | ||
COPY src/process.py ./src | ||
|
||
CMD ["python", "src/process.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
pandasaurus-cxg | ||
pandas | ||
PyYAML~=6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
### `config` | ||
Contains YAML files generated from the CSV files in the `curated_data` directory. These configurations are used to | ||
guide the download process of datasets from CxG. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
499 changes: 499 additions & 0 deletions
499
...src/curated_data/CxG author category field names - Brain_CxG_Author_Category_Filtered.csv
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
### `curated_data` | ||
This directory holds the original CSV files that are used as the starting point of the data processing pipeline. These files are read and processed to generate corresponding YAML configurations. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
### `dataset` | ||
Stores the datasets downloaded according to the instructions specified in the YAML files located in the `config` directory. These datasets are then used for further processing and analysis. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
### `graph` | ||
Contains the OWL (Web Ontology Language) files that are generated from the datasets in the `dataset` directory. These files represent the structured data in a format that is suitable for semantic web applications. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# cl_kb_pipeline | ||
|
||
[OBASK pipeline](https://github.com/OBASKTools/obask) for cl_kb_pipeline. | ||
|
||
To run the pipeline, please follow the `Run your project` steps of the [OBASK quick start guide](https://obasktools.github.io/obask/quick_start/) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
EXPORT_KB_TO_OWL=false | ||
COLLECT_BIBLIO_DATA=false |
18 changes: 18 additions & 0 deletions
18
cl_kb_pipeline/config/collectdata/sparql/delete_blocked_entities.ru
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | ||
PREFIX owl: <http://www.w3.org/2002/07/owl#> | ||
PREFIX n2o: <http://n2o.neo/property/> | ||
PREFIX n2oc: <http://n2o.neo/custom/> | ||
PREFIX dct: <http://purl.org/dc/terms/> | ||
|
||
DELETE { | ||
?s <http://n2o.neo/custom/block> ?blocked . | ||
?s ?p ?o . | ||
} | ||
WHERE { | ||
?s <http://n2o.neo/custom/block> ?blocked . | ||
?s ?p ?o . | ||
FILTER(?blocked=true) . | ||
FILTER(isIRI(?s)) | ||
} | ||
|
||
### EDIT: this was obsoleted in the end in favour of a cypher solution, see process.sh. |
30 changes: 30 additions & 0 deletions
30
cl_kb_pipeline/config/collectdata/sparql/delete_blocked_relations.ru
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | ||
PREFIX owl: <http://www.w3.org/2002/07/owl#> | ||
PREFIX n2o: <http://n2o.neo/property/> | ||
PREFIX n2oc: <http://n2o.neo/custom/> | ||
PREFIX dct: <http://purl.org/dc/terms/> | ||
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> | ||
|
||
|
||
DELETE { | ||
?s ?p ?o . | ||
?r rdf:type owl:Axiom ; | ||
owl:annotatedSource ?s ; | ||
owl:annotatedProperty ?p ; | ||
owl:annotatedTarget ?o ; | ||
<http://n2o.neo/custom/block> ?blocked; | ||
?bp ?bo; | ||
|
||
} WHERE { | ||
?s ?p ?o . | ||
?r rdf:type owl:Axiom ; | ||
owl:annotatedSource ?s ; | ||
owl:annotatedProperty ?p ; | ||
owl:annotatedTarget ?o ; | ||
<http://n2o.neo/custom/block> ?blocked; | ||
?bp ?bo; | ||
|
||
FILTER(?blocked=true) . | ||
} | ||
|
||
### EDIT: this was obsoleted in the end in favour of a cypher solution, see process.sh. |
31 changes: 31 additions & 0 deletions
31
cl_kb_pipeline/config/collectdata/sparql/delete_embargoed_channels.ru
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | ||
PREFIX owl: <http://www.w3.org/2002/07/owl#> | ||
PREFIX n2o: <http://n2o.neo/property/> | ||
PREFIX n2oc: <http://n2o.neo/custom/> | ||
PREFIX dct: <http://purl.org/dc/terms/> | ||
|
||
#Delete all ds:DataSet where ds.production is False | ||
#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False | ||
|
||
DELETE { | ||
?channel ?channelrel ?channelval . | ||
} | ||
|
||
WHERE { | ||
|
||
?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets | ||
|
||
OPTIONAL { | ||
?dataset n2oc:production ?production . | ||
# n2oc:production is a bit brittle because IRI might be changed (risk!) | ||
} | ||
|
||
?image dct:source ?dataset . | ||
?channel <http://xmlns.com/foaf/0.1/depicts> ?image . # There does not always seem to be a channel | ||
?channel ?channelrel ?channelval . | ||
|
||
FILTER(?production=false || !bound(?production)) . | ||
FILTER(?nodelabel="DataSet") | ||
} | ||
|
||
### EDIT: this was obsoleted in the end in favour of a ROBOT solution, see process.sh. Using SPARQL this way is too memory consuming. |
Oops, something went wrong.