Skip to content

Commit

Permalink
Fix curie issue oio tests api (#736)
Browse files Browse the repository at this point in the history
* - Add logic for curies format in linker

* - Add preferredPrefix in rdf2json
- Change curie logic linker pass 1

* - Add preferredPrefix in rdf2json
- Change curie logic linker pass 1

* - Fix isDefinedBy curie object

* - Fix isDefinedBy curie object array

* - Fix isDefinedBy value object

* - Fix defining entity

* - Update testcases

* - Update testcases

* - Update TestCases
  • Loading branch information
haideriqbal committed Sep 5, 2024
1 parent b95620e commit 8078aad
Show file tree
Hide file tree
Showing 2,853 changed files with 39,305 additions and 27,785 deletions.
2 changes: 2 additions & 0 deletions compare_testcase_output_mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
diff -r -q -a -B -w --strip-trailing-cr --exclude=.gitkeep testcases_output/testcases testcases_expected_output/ > testcases_compare_result.log
33 changes: 33 additions & 0 deletions dataload/configs/idocovid19.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"ontologies": [
{
"id": "idocovid19",
"preferredPrefix": "IDO-COVID-19",
"title": "The COVID-19 Infectious Disease Ontology",
"uri": "https://raw.githubusercontent.com/infectious-disease-ontology-extensions/ido-covid-19/master/ontology/ido%20covid-19",
"description": "The COVID-19 Infectious Disease Ontology (IDO-COVID-19) is an extension of the Infectious Disease Ontology (IDO) and the Virus Infectious Disease Ontology (VIDO). IDO-COVID-19 follows OBO Foundry guidelines, employs the Basic Formal Ontology as its starting point, and covers epidemiology, classification, pathogenesis, and treatment of terms used to represent infection by the SARS-CoV-2 virus strain, and the associated COVID-19 disease.",
"homepage": "https://github.com/infectious-disease-ontology-extensions/ido-covid-19",
"mailing_list": "[email protected]",
"definition_property": [
"http://purl.obolibrary.org/obo/IAO_0000115"
],
"synonym_property": [
"http://www.geneontology.org/formats/oboInOwl#hasExactSynonym"
],
"hierarchical_property": [
"http://purl.obolibrary.org/obo/BFO_0000050"
],
"base_uri": [
"http://purl.obolibrary.org/obo/IDO-COVID-19"
],
"oboSlims": false,
"reasoner": "OWL2",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/9b5245af626bd7687831c19c2c8076e8/raw/2c75495f31df0a379062bf12d3fab323eedbb7a9/idocovid19.owl"
},
{
"id": "oio",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/4a2b1a9aa81d9fa26cae81e0b0b7730b/raw/527665128b9be9d7d6133f9a796379600151c737/oboInOwl.owl",
"base_uri": ["http://www.geneontology.org/formats/oboInOwl#"]
}
]
}
53 changes: 40 additions & 13 deletions dataload/linker/src/main/java/LinkerPass1.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.SetMultimap;
import com.google.common.io.CountingInputStream;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import com.google.gson.*;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;

Expand Down Expand Up @@ -168,15 +163,31 @@ public static LinkerPass1Result run(String inputJsonFilename) throws IOException
for(var entry : result.iriToDefinitions.entrySet()) {

EntityDefinitionSet definitions = entry.getValue();

// definingOntologyIris -> definingOntologyIds
for(String ontologyIri : definitions.definingOntologyIris) {
for(String ontologyId : result.ontologyIriToOntologyIds.get(ontologyIri)) {
definitions.definingOntologyIds.add(ontologyId);
if (result.ontologyIriToOntologyIds.containsKey(ontologyIri)) {
for(String ontologyId : result.ontologyIriToOntologyIds.get(ontologyIri)) {
definitions.definingOntologyIds.add(ontologyId);
}
}
}

for(EntityDefinition def : definitions.definitions) {
if(def.curie != null && entry.getValue().definingOntologyIds.iterator().hasNext()) {
JsonObject curieObject = def.curie.getAsJsonObject();
if(curieObject.has("value")) {
String curieValue = curieObject.get("value").getAsString();
if(!curieValue.contains(":")) {
var definingOntologyId = entry.getValue().definingOntologyIds.iterator().next();
EntityDefinition definingEntity = entry.getValue().ontologyIdToDefinitions.get(definingOntologyId);
if (definingEntity != null && definingEntity.curie != null) {
curieValue = definingEntity.curie.getAsJsonObject().get("value").getAsString();
curieObject.addProperty("value", curieValue);
result.iriToDefinitions.put(entry.getKey(), definitions);
}
}
}
}
if(definitions.definingOntologyIds.contains(def.ontologyId)) {
def.isDefiningOntology = true;
}
Expand Down Expand Up @@ -235,14 +246,30 @@ public static void parseEntity(JsonReader jsonReader, String entityType, String
curie = jsonParser.parse(jsonReader);
} else if(key.equals("type")) {
types = gson.fromJson(jsonReader, Set.class);
} else if(key.equals("http://www.w3.org/2000/01/rdf-schema#definedBy")) {
} else if(key.equals("http://www.w3.org/2000/01/rdf-schema#isDefinedBy")) {
JsonElement jsonDefinedBy = jsonParser.parse(jsonReader);
if(jsonDefinedBy.isJsonArray()) {
JsonArray arr = jsonDefinedBy.getAsJsonArray();
for(JsonElement el : arr) {
definedBy.add( el.getAsString() );
for(JsonElement isDefinedBy : arr) {
if (isDefinedBy.isJsonObject()) {
JsonObject obj = isDefinedBy.getAsJsonObject();
var value = obj.get("value");
if (value.isJsonObject()) {
definedBy.add(value.getAsJsonObject().get("value").getAsString());
} else
definedBy.add(value.getAsString());
} else
definedBy.add( isDefinedBy.getAsString() );
}
} else {
} else if (jsonDefinedBy.isJsonObject()) {
JsonObject obj = jsonDefinedBy.getAsJsonObject();
var value = obj.get("value");
if (value.isJsonObject()) {
definedBy.add(value.getAsJsonObject().get("value").getAsString());
} else
definedBy.add(value.getAsString());
}
else {
definedBy.add(jsonDefinedBy.getAsString());
}
} else {
Expand Down
88 changes: 87 additions & 1 deletion dataload/linker/src/main/java/LinkerPass2.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;
Expand Down Expand Up @@ -151,6 +153,10 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite
if(name.equals("iri")) {
entityIri = jsonReader.nextString();
jsonWriter.value(entityIri);
} else if (name.equalsIgnoreCase("curie")) {
processCurieObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else if (name.equalsIgnoreCase("shortForm")) {
processShortFormObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else {
CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity);
}
Expand Down Expand Up @@ -320,7 +326,7 @@ private static void writeLinkedEntitiesFromGatheredStrings(JsonWriter jsonWriter
private static void writeIriMapping(JsonWriter jsonWriter, EntityDefinitionSet definitions, String ontologyId) throws IOException {

if(definitions.definingDefinitions.size() > 0) {

// There are ontologies which canonically define this term

jsonWriter.name("definedBy");
Expand Down Expand Up @@ -436,4 +442,84 @@ private static class CurieMapResult {
public String source;
}

private static void processShortFormObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject shortFormObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String shortFormFieldName = jsonReader.nextName();
if (shortFormFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
shortFormObject.add("type", typeArray);
} else if (shortFormFieldName.equals("value")) {
String shortFormValue = jsonReader.nextString();
// Modify the value attribute
shortFormValue = getProcessedCurieValue(pass1Result, entityIri).replace(":", "_");
shortFormObject.addProperty("value", shortFormValue);
}
}
jsonReader.endObject();

// Write the modified short form object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : shortFormObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(shortFormObject.get("value").getAsString());
jsonWriter.endObject();
}

private static void processCurieObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject curieObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String curieFieldName = jsonReader.nextName();
if (curieFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
curieObject.add("type", typeArray);
} else if (curieFieldName.equals("value")) {
String curieValue = jsonReader.nextString();
// Modify the value attribute
curieValue = getProcessedCurieValue(pass1Result, entityIri);
curieObject.addProperty("value", curieValue);
}
}
jsonReader.endObject();

// Write the modified curie object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : curieObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(curieObject.get("value").getAsString());
jsonWriter.endObject();
}

private static String getProcessedCurieValue(LinkerPass1.LinkerPass1Result pass1Result, String entityIri) {
var def = pass1Result.iriToDefinitions.get(entityIri);
if (def.definitions.iterator().hasNext()) {
JsonObject defCurieObject = def.definitions.iterator().next().curie.getAsJsonObject();
if (defCurieObject.has("value")) {
return defCurieObject.get("value").getAsString();
}
}
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public static void annotateShortForms(OntologyGraph graph) {
if(c.uri == null)
continue;

if (preferredPrefix == null || preferredPrefix.isEmpty()) {
preferredPrefix = graph.config.get("id").toString().toUpperCase();
}

String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri);
String curie = shortForm.replaceFirst("_", ":");
Expand Down
69 changes: 69 additions & 0 deletions dev-testing/teststack-mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash

if [ $# == 0 ]; then
echo "Usage: $0 <rel_json_config_url> <rel_output_dir>"
echo "If <rel_json_config_url> is a file it will read and load this single configuration."
echo "If <rel_json_config_url> as a directory, it will read and load all json configuration in the directory and
subdirectories."
exit 1
fi

config_url=$1
out_dir=$2

# Create or clean output directory
if [ -d "$out_dir" ]; then
echo "$out_dir already exists and will now be cleaned."
rm -Rf $out_dir/*
else
echo "$out_dir does not exist and will now be created."
mkdir "$out_dir"
fi

function process_config {
echo "process_config param1="$1
echo "process_config param2="$2

local config_url=$1
local out_dir=$2


if [ -d "$config_url" ]; then
echo "$config_url is a directory. Processing config files in $config_url"
local basename=$(basename $config_url)
echo "basename for config_url="$basename
local out_dir_basename=$out_dir/$basename
mkdir $out_dir_basename
for filename in $config_url/*.json; do
echo "filename="$filename
process_config $filename $out_dir_basename
done
for dir in $config_url/*/; do
process_config $dir $out_dir_basename
done
elif [ -f "$config_url" ]; then
echo "$config_url is a file. Processing single config file."
local basename=$(basename $config_url .json)

local relative_out_dir=$out_dir/$basename
mkdir $relative_out_dir

local absolute_out_dir=$(realpath -q $relative_out_dir)
echo "absolute_out_dir="$absolute_out_dir

$OLS4_HOME/dataload/create_datafiles.sh $config_url $absolute_out_dir --noDates

$OLS4_HOME/dev-testing/load_test_into_solr.sh $absolute_out_dir
else
echo "$config_url does not exist."
fi
}

$OLS4_HOME/dev-testing/clean-neo4j.sh
$OLS4_HOME/dev-testing/clean-solr.sh
$OLS4_HOME/dev-testing/start-solr.sh

process_config $config_url $out_dir

$OLS4_HOME/dev-testing/load_test_into_neo4j.sh $out_dir
$OLS4_HOME/dev-testing/start-neo4j.sh
5 changes: 5 additions & 0 deletions testcases/annotation-properties/gitIssue502.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
"id": "gitIssue502",
"preferredPrefix": "gitIssue502",
"ontology_purl": "./testcases/annotation-properties/gitIssue502.owl"
},
{
"id": "oio",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/4a2b1a9aa81d9fa26cae81e0b0b7730b/raw/527665128b9be9d7d6133f9a796379600151c737/oboInOwl.owl",
"base_uri": ["http://www.geneontology.org/formats/oboInOwl#"]
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -715,3 +715,32 @@
{"ontologyId":"gitissue502","id":"gitissue502+property+http://www.w3.org/2000/01/rdf-schema#label","label":"label"}
{"ontologyId":"gitissue502","id":"gitissue502+property+http://www.w3.org/2004/02/skos/core#closeMatch","label":"closeMatch"}
{"ontologyId":"gitissue502","id":"gitissue502+property+http://www.w3.org/2004/02/skos/core#exactMatch","label":"exactMatch"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#DbXref","label":"database_cross_reference"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#Definition","label":"definition"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#ObsoleteClass","label":"obsolete_class"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#Subset","label":"subset"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#Synonym","label":"synonym"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#SynonymType","label":"synonym_type"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#ObsoleteProperty","label":"obsolete_property"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#SubsetProperty","label":"subset_property"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#SynonymTypeProperty","label":"synonym_type_property"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#consider","label":"consider"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasAlternativeId","label":"has_alternative_id"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym","label":"has_broad_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasDate","label":"has_date"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasDbXref","label":"has_dbxref"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasDefaultNamespace","label":"has_default_namespace"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasDefinition","label":"has_definition"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasExactSynonym","label":"has_exact_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym","label":"has_narrow_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasOBONamespace","label":"has_obo_namespace"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym","label":"has_related_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasSubset","label":"has_subset"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasSynonym","label":"has_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasSynonymType","label":"has_synonym_type"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasURI","label":"has_URI"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasVersion","label":"has_version"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#inSubset","label":"in_subset"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#isCyclic","label":"is_cyclic"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#replacedBy","label":"replaced_by"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#savedBy","label":"saved_by"}
Loading

0 comments on commit 8078aad

Please sign in to comment.