diff --git a/erdiagram-autogen/genome_annotation.md b/erdiagram-autogen/genome_annotation.md new file mode 100644 index 00000000..f94fbb91 --- /dev/null +++ b/erdiagram-autogen/genome_annotation.md @@ -0,0 +1,85 @@ +```mermaid +erDiagram +AnnotationCollection { + +} +GenomeAssembly { + string version + string strain + label_type in_taxon_label + string id + iri_type iri + uriorcurieList category + stringList type + label_type name + narrative_text description + boolean deprecated + stringList provided_by + uriorcurieList xref + label_type full_name + label_typeList synonym +} +GenomeAnnotation { + string version + stringList digest + stringList content_url + AuthorityType authority + string reference_assembly + string id + iri_type iri + uriorcurieList category + stringList type + label_type name + narrative_text description + boolean deprecated + stringList provided_by + uriorcurieList xref + label_type full_name + label_typeList synonym + label_type in_taxon_label + biological_sequence has_biological_sequence +} +GeneAnnotation { + string molecular_type + string source_id + string referenced_in + string id + iri_type iri + uriorcurieList category + stringList type + label_type name + narrative_text description + boolean deprecated + stringList provided_by + label_type full_name + label_typeList synonym + label_type in_taxon_label + string symbol + uriorcurieList xref + biological_sequence has_biological_sequence +} +Checksum { + DigestType checksum_algorithm + string value + string id + iri_type iri + uriorcurieList category + stringList type + label_type name + narrative_text description + boolean deprecated +} + +AnnotationCollection ||--}o GeneAnnotation : "annotations" +AnnotationCollection ||--}o GenomeAnnotation : "genome_annotations" +AnnotationCollection ||--}o GenomeAssembly : "genome_assemblies" +GenomeAssembly ||--}o OrganismTaxon : "in taxon" +GenomeAssembly ||--}o Attribute : "has attribute" +GenomeAnnotation ||--}o Attribute : "has attribute" +GenomeAnnotation ||--}o OrganismTaxon : "in taxon" +GeneAnnotation ||--}o Attribute : "has attribute" +GeneAnnotation ||--}o OrganismTaxon : "in taxon" +Checksum ||--}o Attribute : "has attribute" + +``` + diff --git a/json-schema-autogen/genome_annotation.json b/json-schema-autogen/genome_annotation.json new file mode 100644 index 00000000..1a3e79cb --- /dev/null +++ b/json-schema-autogen/genome_annotation.json @@ -0,0 +1,1394 @@ +{ + "$defs": { + "Activity": { + "additionalProperties": false, + "description": "An activity is something that occurs over a period of time and acts upon or with entities; it may include consuming, processing, transforming, modifying, relocating, using, or generating entities.", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:Activity" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "Activity", + "type": "object" + }, + "AnnotationCollection": { + "additionalProperties": false, + "description": "", + "properties": { + "annotations": { + "items": { + "$ref": "#/$defs/GeneAnnotation" + }, + "type": "array" + }, + "genome_annotations": { + "items": { + "$ref": "#/$defs/GenomeAnnotation" + }, + "type": "array" + }, + "genome_assemblies": { + "items": { + "$ref": "#/$defs/GenomeAssembly" + }, + "type": "array" + } + }, + "title": "AnnotationCollection", + "type": "object" + }, + "Attribute": { + "additionalProperties": false, + "description": "A property or characteristic of an entity. For example, an apple may have properties such as color, shape, age, crispiness. An environmental sample may have attributes such as depth, lat, long, material.", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:Attribute" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_attribute_type": { + "description": "connects an attribute to a class that describes it", + "type": "string" + }, + "has_qualitative_value": { + "description": "connects an attribute to a value", + "type": "string" + }, + "has_quantitative_value": { + "description": "connects an attribute to a value", + "items": { + "$ref": "#/$defs/QuantityValue" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "The human-readable 'attribute name' can be set to a string which reflects its context of interpretation, e.g. SEPIO evidence/provenance/confidence annotation or it can default to the name associated with the 'has attribute type' slot ontology term.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id", + "has_attribute_type" + ], + "title": "Attribute", + "type": "object" + }, + "AuthorityType": { + "description": "", + "enum": [ + "ENSEMBL", + "NCBI" + ], + "title": "AuthorityType", + "type": "string" + }, + "BioType": { + "description": "", + "enum": [ + "protein_coding", + "noncoding" + ], + "title": "BioType", + "type": "string" + }, + "Checksum": { + "additionalProperties": false, + "description": "Checksum values associated with digital entities.", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "bican:Checksum" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "checksum_algorithm": { + "$ref": "#/$defs/DigestType", + "description": "The type of cryptographic hash function used to calculate the checksum value." + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "value": { + "description": "The checksum value obtained from a specific cryotographic hash function.", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Checksum", + "type": "object" + }, + "Dataset": { + "additionalProperties": false, + "description": "an item that refers to a collection of data from a data source.", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:Dataset" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "creation_date": { + "description": "date on which an entity was created. This can be applied to nodes or edges", + "format": "date", + "type": "string" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "format": { + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "license": { + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "rights": { + "type": "string" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "Dataset", + "type": "object" + }, + "DigestType": { + "description": "", + "enum": [ + "spdx:checksumAlgorithm_sha1", + "spdx:checksumAlgorithm_md5", + "spdx:checksumAlgorithm_sha256" + ], + "title": "DigestType", + "type": "string" + }, + "Gene": { + "additionalProperties": false, + "description": "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene locus may include regulatory regions, transcribed regions and/or other functional sequence regions.", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:Gene" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_biological_sequence": { + "description": "connects a genomic feature to its sequence", + "type": "string" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "in_taxon": { + "description": "connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'", + "items": { + "type": "string" + }, + "type": "array" + }, + "in_taxon_label": { + "description": "The human readable scientific name for the taxon of the entity.", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "symbol": { + "description": "Symbol for a particular thing", + "type": "string" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "Gene", + "type": "object" + }, + "GeneAnnotation": { + "additionalProperties": false, + "description": "An annotation describing the location, boundaries, and functions of individual genes within a genome annotation.", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "bican:GeneAnnotation" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_biological_sequence": { + "description": "connects a genomic feature to its sequence", + "type": "string" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "in_taxon": { + "description": "connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'", + "items": { + "type": "string" + }, + "type": "array" + }, + "in_taxon_label": { + "description": "The human readable scientific name for the taxon of the entity.", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "molecular_type": { + "anyOf": [ + { + "$ref": "#/$defs/BioType" + }, + { + "type": "string" + } + ], + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "referenced_in": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ], + "description": "The genome annotation that this gene annotation was referenced from.", + "type": "string" + }, + "source_id": { + "description": "The authority specific identifier.", + "type": "string" + }, + "symbol": { + "description": "Symbol for a particular thing", + "type": "string" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "referenced_in", + "id" + ], + "title": "GeneAnnotation", + "type": "object" + }, + "Genome": { + "additionalProperties": false, + "description": "A genome is the sum of genetic material within a cell or virion.", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:Genome" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_biological_sequence": { + "description": "connects a genomic feature to its sequence", + "type": "string" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "in_taxon": { + "description": "connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'", + "items": { + "type": "string" + }, + "type": "array" + }, + "in_taxon_label": { + "description": "The human readable scientific name for the taxon of the entity.", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "Genome", + "type": "object" + }, + "GenomeAnnotation": { + "additionalProperties": false, + "description": "Location and nomenclature of genes and all of the coding regions in a genome assembly and the classification of genes and transcripts into types.", + "properties": { + "authority": { + "$ref": "#/$defs/AuthorityType", + "description": "The organization responsible for publishing the data." + }, + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "bican:GenomeAnnotation" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "content_url": { + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "digest": { + "description": "Stores checksum information.", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ], + "type": "string" + }, + "type": "array" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_biological_sequence": { + "description": "connects a genomic feature to its sequence", + "type": "string" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "in_taxon": { + "description": "connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'", + "items": { + "type": "string" + }, + "type": "array" + }, + "in_taxon_label": { + "description": "The human readable scientific name for the taxon of the entity.", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "reference_assembly": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ], + "description": "The reference genome assembly that this genome annotation was created from.", + "type": "string" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "version": { + "type": "string" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "reference_assembly", + "id" + ], + "title": "GenomeAnnotation", + "type": "object" + }, + "GenomeAssembly": { + "additionalProperties": false, + "description": "Genome assembly to contain version and label information", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "bican:GenomeAssembly" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "in_taxon": { + "description": "connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'", + "items": { + "type": "string" + }, + "type": "array" + }, + "in_taxon_label": { + "description": "The human readable scientific name for the taxon of the entity.", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "strain": { + "description": "The genetic variant or subtype of a species or organism.", + "type": "string" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "version": { + "type": "string" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "GenomeAssembly", + "type": "object" + }, + "MaterialSample": { + "additionalProperties": false, + "description": "A sample is a limited quantity of something (e.g. an individual or set of individuals from a population, or a portion of a substance) to be used for testing, analysis, inspection, investigation, demonstration, or trial use. [SIO]", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:MaterialSample" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "MaterialSample", + "type": "object" + }, + "NamedThing": { + "additionalProperties": false, + "description": "a databased entity or concept/class", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:NamedThing" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "NamedThing", + "type": "object" + }, + "OrganismTaxon": { + "additionalProperties": false, + "description": "A classification of a set of organisms. Example instances: NCBITaxon:9606 (Homo sapiens), NCBITaxon:2 (Bacteria). Can also be used to represent strains or subspecies.", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:OrganismTaxon" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "has_taxonomic_rank": { + "type": "string" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "OrganismTaxon", + "type": "object" + }, + "PhysicalEntity": { + "additionalProperties": false, + "description": "An entity that has material reality (a.k.a. physical essence).", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:PhysicalEntity" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "PhysicalEntity", + "type": "object" + }, + "Procedure": { + "additionalProperties": false, + "description": "A series of actions conducted in a certain order or manner", + "properties": { + "category": { + "description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}", + "enum": [ + "biolink:Procedure" + ], + "items": { + "type": "string" + }, + "type": "array" + }, + "deprecated": { + "description": "A boolean flag indicating that an entity is no longer considered current or valid.", + "type": "boolean" + }, + "description": { + "description": "a human-readable description of an entity", + "type": "string" + }, + "full_name": { + "description": "a long-form human readable name for a thing", + "type": "string" + }, + "has_attribute": { + "description": "connects any entity to an attribute", + "items": { + "type": "string" + }, + "type": "array" + }, + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + }, + "iri": { + "description": "An IRI for an entity. This is determined by the id using expansion rules.", + "type": "string" + }, + "name": { + "description": "A human-readable name for an attribute or entity.", + "type": "string" + }, + "provided_by": { + "description": "The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.", + "items": { + "type": "string" + }, + "type": "array" + }, + "synonym": { + "description": "Alternate human-readable names for a thing", + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "items": { + "type": "string" + }, + "type": "array" + }, + "xref": { + "description": "A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "id" + ], + "title": "Procedure", + "type": "object" + }, + "QuantityValue": { + "additionalProperties": false, + "description": "A value of an attribute that is quantitative and measurable, expressed as a combination of a unit and a numeric value", + "properties": { + "has_numeric_value": { + "description": "connects a quantity value to a number", + "type": "number" + }, + "has_unit": { + "description": "connects a quantity value to a unit", + "type": "string" + } + }, + "title": "QuantityValue", + "type": "object" + }, + "TaxonomicRank": { + "additionalProperties": false, + "description": "A descriptor for the rank within a taxonomic classification. Example instance: TAXRANK:0000017 (kingdom)", + "properties": { + "id": { + "description": "A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "TaxonomicRank", + "type": "object" + } + }, + "$id": "https://identifiers.org/brain-bican/genome-annotation-schema", + "$schema": "https://json-schema.org/draft/2019-09/schema", + "additionalProperties": true, + "description": "", + "metamodel_version": "1.7.0", + "properties": { + "annotations": { + "items": { + "$ref": "#/$defs/GeneAnnotation" + }, + "type": "array" + }, + "genome_annotations": { + "items": { + "$ref": "#/$defs/GenomeAnnotation" + }, + "type": "array" + }, + "genome_assemblies": { + "items": { + "$ref": "#/$defs/GenomeAssembly" + }, + "type": "array" + } + }, + "title": "genome-annotation-schema", + "type": "object", + "version": null +} diff --git a/jsonld-context-autogen/genome_annotation.context.jsonld b/jsonld-context-autogen/genome_annotation.context.jsonld new file mode 100644 index 00000000..2ce81166 --- /dev/null +++ b/jsonld-context-autogen/genome_annotation.context.jsonld @@ -0,0 +1,456 @@ +{ + "comments": { + "description": "Auto generated by LinkML jsonld context generator", + "source": "genome_annotation.yaml" + }, + "@context": { + "AGRKB": "https://www.alliancegenome.org/", + "AspGD": { + "@id": "http://www.aspergillusgenome.org/cgi-bin/locus.pl?dbid=", + "@prefix": true + }, + "BFO": { + "@id": "http://purl.obolibrary.org/obo/BFO_", + "@prefix": true + }, + "BIOGRID": "http://identifiers.org/biogrid/", + "BIOSAMPLE": "http://identifiers.org/biosample/", + "BTO": { + "@id": "http://purl.obolibrary.org/obo/BTO_", + "@prefix": true + }, + "CHEBI": { + "@id": "http://purl.obolibrary.org/obo/CHEBI_", + "@prefix": true + }, + "CHEMBL.TARGET": "http://identifiers.org/chembl.target/", + "CPT": "https://www.ama-assn.org/practice-management/cpt/", + "DRUGBANK": "http://identifiers.org/drugbank/", + "EDAM-DATA": { + "@id": "http://edamontology.org/data_", + "@prefix": true + }, + "EDAM-FORMAT": { + "@id": "http://edamontology.org/format_", + "@prefix": true + }, + "EDAM-OPERATION": { + "@id": "http://edamontology.org/operation_", + "@prefix": true + }, + "EDAM-TOPIC": { + "@id": "http://edamontology.org/topic_", + "@prefix": true + }, + "EFO": { + "@id": "http://www.ebi.ac.uk/efo/EFO_", + "@prefix": true + }, + "ENSEMBL": "http://identifiers.org/ensembl/", + "FB": "http://identifiers.org/fb/", + "FMA": { + "@id": "http://purl.obolibrary.org/obo/FMA_", + "@prefix": true + }, + "GENO": { + "@id": "http://purl.obolibrary.org/obo/GENO_", + "@prefix": true + }, + "GOLD.META": "http://identifiers.org/gold.meta/", + "GOREL": { + "@id": "http://purl.obolibrary.org/obo/GOREL_", + "@prefix": true + }, + "HANCESTRO": { + "@id": "http://www.ebi.ac.uk/ancestro/ancestro_", + "@prefix": true + }, + "HCPCS": "http://purl.bioontology.org/ontology/HCPCS/", + "HGNC": "http://identifiers.org/hgnc/", + "HMDB": "http://identifiers.org/hmdb/", + "IAO": { + "@id": "http://purl.obolibrary.org/obo/IAO_", + "@prefix": true + }, + "INO": { + "@id": "http://purl.obolibrary.org/obo/INO_", + "@prefix": true + }, + "IUPHAR.FAMILY": "http://identifiers.org/iuphar.family/", + "KEGG.BRITE": "https://bioregistry.io/kegg.brite:", + "KEGG.GENES": "https://bioregistry.io/kegg.genes:bsu:", + "LOINC": "http://loinc.org/rdf/", + "MESH": "http://id.nlm.nih.gov/mesh/", + "MGI": "http://identifiers.org/mgi/", + "MONDO": { + "@id": "http://purl.obolibrary.org/obo/MONDO_", + "@prefix": true + }, + "NBO-PROPERTY": "http://purl.obolibrary.org/obo/nbo#", + "NCBIAssembly": "https://www.ncbi.nlm.nih.gov/assembly/", + "NCBIGene": "http://identifiers.org/ncbigene/", + "NCBITaxon": { + "@id": "http://purl.obolibrary.org/obo/NCBITaxon_", + "@prefix": true + }, + "NCIT": { + "@id": "http://purl.obolibrary.org/obo/NCIT_", + "@prefix": true + }, + "OBAN": "http://purl.org/oban/", + "OBI": { + "@id": "http://purl.obolibrary.org/obo/OBI_", + "@prefix": true + }, + "OMIM": { + "@id": "http://purl.obolibrary.org/obo/OMIM_", + "@prefix": true + }, + "PATO": { + "@id": "http://purl.obolibrary.org/obo/PATO_", + "@prefix": true + }, + "PHARMGKB.GENE": "https://www.pharmgkb.org/gene/", + "PomBase": "https://www.pombase.org/gene/", + "RGD": "http://identifiers.org/rgd/", + "RO": { + "@id": "http://purl.obolibrary.org/obo/RO_", + "@prefix": true + }, + "RXNORM": "http://purl.bioontology.org/ontology/RXNORM/", + "SEMMEDDB": { + "@id": "https://skr3.nlm.nih.gov/SemMedDB", + "@prefix": true + }, + "SGD": "http://identifiers.org/sgd/", + "SIO": { + "@id": "http://semanticscience.org/resource/SIO_", + "@prefix": true + }, + "SNOMED": { + "@id": "http://purl.obolibrary.org/obo/SNOMED_", + "@prefix": true + }, + "SO": { + "@id": "http://purl.obolibrary.org/obo/SO_", + "@prefix": true + }, + "STY": "http://purl.bioontology.org/ontology/STY/", + "TAXRANK": { + "@id": "http://purl.obolibrary.org/obo/TAXRANK_", + "@prefix": true + }, + "UBERON": { + "@id": "http://purl.obolibrary.org/obo/UBERON_", + "@prefix": true + }, + "UBERON_CORE": "http://purl.obolibrary.org/obo/uberon/core#", + "UBERON_NONAMESPACE": "http://purl.obolibrary.org/obo/core#", + "UMLS": "http://identifiers.org/umls/", + "UMLSSG": { + "@id": "https://lhncbc.nlm.nih.gov/semanticnetwork/download/sg_archive/SemGroups-v04.txt", + "@prefix": true + }, + "UO-PROPERTY": "http://purl.obolibrary.org/obo/uo#", + "WB": "http://identifiers.org/wb/", + "WIKIDATA": "https://www.wikidata.org/entity/", + "WIKIDATA_PROPERTY": "https://www.wikidata.org/prop/", + "WormBase": { + "@id": "https://www.wormbase.org/get?name=", + "@prefix": true + }, + "Xenbase": { + "@id": "http://www.xenbase.org/gene/showgene.do?method=display&geneId=", + "@prefix": true + }, + "ZFIN": "http://identifiers.org/zfin/", + "bican": "https://identifiers.org/brain-bican/vocab/", + "biolink": "https://w3id.org/biolink/vocab/", + "bioschemas": "https://bioschemas.org/", + "dcid": "https://datacommons.org/browser/", + "dct": "http://purl.org/dc/terms/", + "dctypes": "http://purl.org/dc/dcmitype/", + "dictyBase": "http://dictybase.org/gene/", + "doi": "https://doi.org/", + "gff3": "https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md#", + "gpi": "https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#", + "linkml": "https://w3id.org/linkml/", + "ncbi": { + "@id": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=", + "@prefix": true + }, + "oboInOwl": "http://www.geneontology.org/formats/oboInOwl#", + "orphanet": { + "@id": "http://www.orpha.net/ORDO/Orphanet_", + "@prefix": true + }, + "owl": "http://www.w3.org/2002/07/owl#", + "pav": "http://purl.org/pav/", + "prov": "http://www.w3.org/ns/prov#", + "qud": "http://qudt.org/1.1/schema/qudt#", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "schema": "http://schema.org/", + "skos": "http://www.w3.org/2004/02/skos/core#", + "spdx": "http://spdx.org/rdf/terms#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "@vocab": "https://identifiers.org/brain-bican/vocab/", + "annotations": { + "@type": "@id", + "@id": "annotations" + }, + "genome_annotations": { + "@type": "@id", + "@id": "genome_annotations" + }, + "genome_assemblies": { + "@type": "@id", + "@id": "genome_assemblies" + }, + "authority": { + "@context": { + "@vocab": "@null", + "text": "skos:notation", + "description": "skos:prefLabel", + "meaning": "@id" + }, + "@id": "authority" + }, + "category": { + "@type": "@id", + "@id": "biolink:category" + }, + "checksum_algorithm": { + "@context": { + "@vocab": "@null", + "text": "skos:notation", + "description": "skos:prefLabel", + "meaning": "@id" + }, + "@id": "checksum_algorithm" + }, + "value": { + "@id": "value" + }, + "content_url": { + "@id": "schema:url" + }, + "creation_date": { + "@type": "xsd:date", + "@id": "biolink:creation_date" + }, + "deprecated": { + "@type": "xsd:boolean", + "@id": "biolink:deprecated" + }, + "description": { + "@id": "dct:description" + }, + "digest": { + "@type": "@id", + "@id": "digest" + }, + "format": { + "@id": "biolink:format" + }, + "full_name": { + "@id": "biolink:full_name" + }, + "referenced_in": { + "@type": "@id", + "@id": "referenced_in" + }, + "reference_assembly": { + "@type": "@id", + "@id": "reference_assembly" + }, + "has_attribute": { + "@type": "@id", + "@id": "biolink:has_attribute" + }, + "has_attribute_type": { + "@type": "@id", + "@id": "biolink:has_attribute_type" + }, + "has_biological_sequence": { + "@id": "biolink:has_biological_sequence" + }, + "has_numeric_value": { + "@type": "xsd:double", + "@id": "biolink:has_numeric_value" + }, + "has_qualitative_value": { + "@type": "@id", + "@id": "biolink:has_qualitative_value" + }, + "has_quantitative_value": { + "@type": "@id", + "@id": "biolink:has_quantitative_value" + }, + "has_taxonomic_rank": { + "@type": "@id", + "@id": "biolink:has_taxonomic_rank" + }, + "has_unit": { + "@type": "UO:0000000", + "@id": "biolink:has_unit" + }, + "id": "@id", + "in_taxon": { + "@type": "@id", + "@id": "biolink:in_taxon" + }, + "in_taxon_label": { + "@id": "biolink:in_taxon_label" + }, + "iri": { + "@type": "@id", + "@id": "biolink:iri" + }, + "license": { + "@id": "biolink:license" + }, + "molecular_type": { + "@id": "molecular_type" + }, + "name": { + "@id": "rdfs:label" + }, + "node_property": { + "@id": "biolink:node_property" + }, + "provided_by": { + "@id": "biolink:provided_by" + }, + "related_to": { + "@type": "@id", + "@id": "biolink:related_to" + }, + "related_to_at_instance_level": { + "@type": "@id", + "@id": "biolink:related_to_at_instance_level" + }, + "rights": { + "@id": "biolink:rights" + }, + "source_id": { + "@id": "schema:identifier" + }, + "strain": { + "@id": "strain" + }, + "symbol": { + "@id": "biolink:symbol" + }, + "synonym": { + "@id": "biolink:synonym" + }, + "type": { + "@id": "rdf:type" + }, + "version": { + "@id": "biolink:version" + }, + "xref": { + "@type": "@id", + "@id": "biolink:xref" + }, + "Activity": { + "@id": "biolink:Activity" + }, + "ActivityAndBehavior": { + "@id": "biolink:ActivityAndBehavior" + }, + "Annotation": { + "@id": "biolink:Annotation" + }, + "AnnotationCollection": { + "@id": "AnnotationCollection" + }, + "Attribute": { + "@id": "biolink:Attribute" + }, + "BiologicalEntity": { + "@id": "biolink:BiologicalEntity" + }, + "Checksum": { + "@id": "Checksum" + }, + "ChemicalEntityOrGeneOrGeneProduct": { + "@id": "biolink:ChemicalEntityOrGeneOrGeneProduct" + }, + "Dataset": { + "@id": "biolink:Dataset" + }, + "Entity": { + "@id": "biolink:Entity" + }, + "Gene": { + "@id": "biolink:Gene" + }, + "GeneAnnotation": { + "@id": "GeneAnnotation" + }, + "GeneOrGeneProduct": { + "@id": "biolink:GeneOrGeneProduct" + }, + "Genome": { + "@id": "biolink:Genome" + }, + "GenomeAnnotation": { + "@id": "GenomeAnnotation" + }, + "GenomeAssembly": { + "@id": "GenomeAssembly" + }, + "GenomicEntity": { + "@id": "biolink:GenomicEntity" + }, + "InformationContentEntity": { + "@id": "biolink:InformationContentEntity" + }, + "MacromolecularMachineMixin": { + "@id": "biolink:MacromolecularMachineMixin" + }, + "MaterialSample": { + "@id": "biolink:MaterialSample" + }, + "NamedThing": { + "@id": "biolink:NamedThing" + }, + "Occurrent": { + "@id": "biolink:Occurrent" + }, + "OntologyClass": { + "@id": "biolink:OntologyClass" + }, + "OrganismTaxon": { + "@id": "biolink:OrganismTaxon" + }, + "PhysicalEntity": { + "@id": "biolink:PhysicalEntity" + }, + "PhysicalEssence": { + "@id": "biolink:PhysicalEssence" + }, + "PhysicalEssenceOrOccurrent": { + "@id": "biolink:PhysicalEssenceOrOccurrent" + }, + "Procedure": { + "@id": "biolink:Procedure" + }, + "QuantityValue": { + "@id": "biolink:QuantityValue" + }, + "SubjectOfInvestigation": { + "@id": "biolink:SubjectOfInvestigation" + }, + "TaxonomicRank": { + "@id": "biolink:TaxonomicRank" + }, + "ThingWithTaxon": { + "@id": "biolink:ThingWithTaxon" + } + } +} + diff --git a/models_py-autogen/genome_annotation.py b/models_py-autogen/genome_annotation.py new file mode 100644 index 00000000..a10acbc4 --- /dev/null +++ b/models_py-autogen/genome_annotation.py @@ -0,0 +1,540 @@ +from __future__ import annotations +from datetime import ( + datetime, + date +) +from decimal import Decimal +from enum import Enum +import re +import sys +from typing import ( + Any, + List, + Literal, + Dict, + Optional, + Union +) +from pydantic.version import VERSION as PYDANTIC_VERSION +if int(PYDANTIC_VERSION[0])>=2: + from pydantic import ( + BaseModel, + ConfigDict, + Field, + field_validator + ) +else: + from pydantic import ( + BaseModel, + Field, + validator + ) + +metamodel_version = "None" +version = "None" + + +class ConfiguredBaseModel(BaseModel): + model_config = ConfigDict( + validate_assignment = True, + validate_default = True, + extra = "forbid", + arbitrary_types_allowed = True, + use_enum_values = True, + strict = False, + ) + pass + + +class DigestType(str, Enum): + SHA1 = "spdx:checksumAlgorithm_sha1" + MD5 = "spdx:checksumAlgorithm_md5" + SHA256 = "spdx:checksumAlgorithm_sha256" + + +class BioType(str, Enum): + protein_coding = "protein_coding" + noncoding = "noncoding" + + +class AuthorityType(str, Enum): + ENSEMBL = "ENSEMBL" + NCBI = "NCBI" + + +class OntologyClass(ConfiguredBaseModel): + """ + a concept or class in an ontology, vocabulary or thesaurus. Note that nodes in a biolink compatible KG can be considered both instances of biolink classes, and OWL classes in their own right. In general you should not need to use this class directly. Instead, use the appropriate biolink class. For example, for the GO concept of endocytosis (GO:0006897), use bl:BiologicalProcess as the type. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + + +class Annotation(ConfiguredBaseModel): + """ + Biolink Model root class for entity annotations. + """ + pass + + +class QuantityValue(Annotation): + """ + A value of an attribute that is quantitative and measurable, expressed as a combination of a unit and a numeric value + """ + has_unit: Optional[str] = Field(None, description="""connects a quantity value to a unit""") + has_numeric_value: Optional[float] = Field(None, description="""connects a quantity value to a number""") + + +class Entity(ConfiguredBaseModel): + """ + Root Biolink Model class for all things and informational relationships, real or imagined. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/Entity","biolink:Entity"]] = Field(["biolink:Entity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + + +class NamedThing(Entity): + """ + a databased entity or concept/class + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/NamedThing","biolink:NamedThing"]] = Field(["biolink:NamedThing"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + + +class Attribute(NamedThing, OntologyClass): + """ + A property or characteristic of an entity. For example, an apple may have properties such as color, shape, age, crispiness. An environmental sample may have attributes such as depth, lat, long, material. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + category: List[Literal["https://w3id.org/biolink/vocab/Attribute","biolink:Attribute"]] = Field(["biolink:Attribute"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + attribute_name: Optional[str] = Field(None, description="""The human-readable 'attribute name' can be set to a string which reflects its context of interpretation, e.g. SEPIO evidence/provenance/confidence annotation or it can default to the name associated with the 'has attribute type' slot ontology term.""") + has_attribute_type: str = Field(..., description="""connects an attribute to a class that describes it""") + has_quantitative_value: Optional[List[QuantityValue]] = Field(None, description="""connects an attribute to a value""") + has_qualitative_value: Optional[str] = Field(None, description="""connects an attribute to a value""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + name: Optional[str] = Field(None, description="""The human-readable 'attribute name' can be set to a string which reflects its context of interpretation, e.g. SEPIO evidence/provenance/confidence annotation or it can default to the name associated with the 'has attribute type' slot ontology term.""") + + +class TaxonomicRank(OntologyClass): + """ + A descriptor for the rank within a taxonomic classification. Example instance: TAXRANK:0000017 (kingdom) + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + + +class OrganismTaxon(NamedThing): + """ + A classification of a set of organisms. Example instances: NCBITaxon:9606 (Homo sapiens), NCBITaxon:2 (Bacteria). Can also be used to represent strains or subspecies. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/OrganismTaxon","biolink:OrganismTaxon"]] = Field(["biolink:OrganismTaxon"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + has_taxonomic_rank: Optional[str] = Field(None) + + +class InformationContentEntity(NamedThing): + """ + a piece of information that typically describes some topic of discourse or is used as support. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/InformationContentEntity","biolink:InformationContentEntity"]] = Field(["biolink:InformationContentEntity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + license: Optional[str] = Field(None) + rights: Optional[str] = Field(None) + format: Optional[str] = Field(None) + creation_date: Optional[date] = Field(None, description="""date on which an entity was created. This can be applied to nodes or edges""") + + +class Dataset(InformationContentEntity): + """ + an item that refers to a collection of data from a data source. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/Dataset","biolink:Dataset"]] = Field(["biolink:Dataset"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + license: Optional[str] = Field(None) + rights: Optional[str] = Field(None) + format: Optional[str] = Field(None) + creation_date: Optional[date] = Field(None, description="""date on which an entity was created. This can be applied to nodes or edges""") + + +class PhysicalEssenceOrOccurrent(ConfiguredBaseModel): + """ + Either a physical or processual entity. + """ + pass + + +class PhysicalEssence(PhysicalEssenceOrOccurrent): + """ + Semantic mixin concept. Pertains to entities that have physical properties such as mass, volume, or charge. + """ + pass + + +class PhysicalEntity(PhysicalEssence, NamedThing): + """ + An entity that has material reality (a.k.a. physical essence). + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/PhysicalEntity","biolink:PhysicalEntity"]] = Field(["biolink:PhysicalEntity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + + +class Occurrent(PhysicalEssenceOrOccurrent): + """ + A processual entity. + """ + pass + + +class ActivityAndBehavior(Occurrent): + """ + Activity or behavior of any independent integral living, organization or mechanical actor in the world + """ + pass + + +class Activity(ActivityAndBehavior, NamedThing): + """ + An activity is something that occurs over a period of time and acts upon or with entities; it may include consuming, processing, transforming, modifying, relocating, using, or generating entities. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/Activity","biolink:Activity"]] = Field(["biolink:Activity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + + +class Procedure(ActivityAndBehavior, NamedThing): + """ + A series of actions conducted in a certain order or manner + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/Procedure","biolink:Procedure"]] = Field(["biolink:Procedure"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + + +class SubjectOfInvestigation(ConfiguredBaseModel): + """ + An entity that has the role of being studied in an investigation, study, or experiment + """ + pass + + +class MaterialSample(SubjectOfInvestigation, PhysicalEntity): + """ + A sample is a limited quantity of something (e.g. an individual or set of individuals from a population, or a portion of a substance) to be used for testing, analysis, inspection, investigation, demonstration, or trial use. [SIO] + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/MaterialSample","biolink:MaterialSample"]] = Field(["biolink:MaterialSample"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + + +class ThingWithTaxon(ConfiguredBaseModel): + """ + A mixin that can be used on any entity that can be taxonomically classified. This includes individual organisms; genes, their products and other molecular entities; body parts; biological processes + """ + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""") + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""") + + +class BiologicalEntity(ThingWithTaxon, NamedThing): + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/BiologicalEntity","biolink:BiologicalEntity"]] = Field(["biolink:BiologicalEntity"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""") + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""") + + +class GenomicEntity(ConfiguredBaseModel): + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""") + + +class ChemicalEntityOrGeneOrGeneProduct(ConfiguredBaseModel): + """ + A union of chemical entities and children, and gene or gene product. This mixin is helpful to use when searching across chemical entities that must include genes and their children as chemical entities. + """ + pass + + +class MacromolecularMachineMixin(ConfiguredBaseModel): + """ + A union of gene locus, gene product, and macromolecular complex. These are the basic units of function in a cell. They either carry out individual biological activities, or they encode molecules which do this. + """ + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + + +class GeneOrGeneProduct(MacromolecularMachineMixin): + """ + A union of gene loci or gene products. Frequently an identifier for one will be used as proxy for another + """ + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + + +class Gene(GeneOrGeneProduct, ChemicalEntityOrGeneOrGeneProduct, GenomicEntity, BiologicalEntity, PhysicalEssence, OntologyClass): + """ + A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene locus may include regulatory regions, transcribed regions and/or other functional sequence regions. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/Gene","biolink:Gene"]] = Field(["biolink:Gene"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""") + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""") + symbol: Optional[str] = Field(None, description="""Symbol for a particular thing""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""") + + +class Genome(GenomicEntity, BiologicalEntity, PhysicalEssence, OntologyClass): + """ + A genome is the sum of genetic material within a cell or virion. + """ + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://w3id.org/biolink/vocab/Genome","biolink:Genome"]] = Field(["biolink:Genome"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""") + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""") + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""") + + +class GeneAnnotation(Gene): + """ + An annotation describing the location, boundaries, and functions of individual genes within a genome annotation. + """ + molecular_type: Optional[Union[BioType, str]] = Field(None) + source_id: Optional[str] = Field(None, description="""The authority specific identifier.""") + referenced_in: Union[GenomeAnnotation, str] = Field(..., description="""The genome annotation that this gene annotation was referenced from.""") + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://identifiers.org/brain-bican/vocab/GeneAnnotation","bican:GeneAnnotation"]] = Field(["bican:GeneAnnotation"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""") + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""") + symbol: Optional[str] = Field(None, description="""Symbol for a particular thing""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""") + + +class GenomeAnnotation(Genome): + """ + Location and nomenclature of genes and all of the coding regions in a genome assembly and the classification of genes and transcripts into types. + """ + version: Optional[str] = Field(None) + digest: Optional[List[Union[Checksum, str]]] = Field(default_factory=list, description="""Stores checksum information.""") + content_url: Optional[List[str]] = Field(default_factory=list) + authority: Optional[AuthorityType] = Field(None, description="""The organization responsible for publishing the data.""") + reference_assembly: Union[GenomeAssembly, str] = Field(..., description="""The reference genome assembly that this genome annotation was created from.""") + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://identifiers.org/brain-bican/vocab/GenomeAnnotation","bican:GenomeAnnotation"]] = Field(["bican:GenomeAnnotation"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""") + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""") + has_biological_sequence: Optional[str] = Field(None, description="""connects a genomic feature to its sequence""") + + +class GenomeAssembly(ThingWithTaxon, NamedThing): + """ + Genome assembly to contain version and label information + """ + version: Optional[str] = Field(None) + strain: Optional[str] = Field(None, description="""The genetic variant or subtype of a species or organism.""") + in_taxon: Optional[List[str]] = Field(None, description="""connects an entity to its taxonomic classification. Only certain kinds of entities can be taxonomically classified; see 'thing with taxon'""") + in_taxon_label: Optional[str] = Field(None, description="""The human readable scientific name for the taxon of the entity.""") + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://identifiers.org/brain-bican/vocab/GenomeAssembly","bican:GenomeAssembly"]] = Field(["bican:GenomeAssembly"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + provided_by: Optional[List[str]] = Field(None, description="""The value in this node property represents the knowledge provider that created or assembled the node and all of its attributes. Used internally to represent how a particular node made its way into a knowledge provider or graph.""") + xref: Optional[List[str]] = Field(default_factory=list, description="""A database cross reference or alternative identifier for a NamedThing or edge between two NamedThings. This property should point to a database record or webpage that supports the existence of the edge, or gives more detail about the edge. This property can be used on a node or edge to provide multiple URIs or CURIE cross references.""") + full_name: Optional[str] = Field(None, description="""a long-form human readable name for a thing""") + synonym: Optional[List[str]] = Field(default_factory=list, description="""Alternate human-readable names for a thing""") + + +class Checksum(Entity): + """ + Checksum values associated with digital entities. + """ + checksum_algorithm: Optional[DigestType] = Field(None, description="""The type of cryptographic hash function used to calculate the checksum value.""") + value: Optional[str] = Field(None, description="""The checksum value obtained from a specific cryotographic hash function.""") + id: str = Field(..., description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI""") + iri: Optional[str] = Field(None, description="""An IRI for an entity. This is determined by the id using expansion rules.""") + category: List[Literal["https://identifiers.org/brain-bican/vocab/Checksum","bican:Checksum"]] = Field(["bican:Checksum"], description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}""") + type: Optional[List[str]] = Field(default_factory=list) + name: Optional[str] = Field(None, description="""A human-readable name for an attribute or entity.""") + description: Optional[str] = Field(None, description="""a human-readable description of an entity""") + has_attribute: Optional[List[str]] = Field(None, description="""connects any entity to an attribute""") + deprecated: Optional[bool] = Field(None, description="""A boolean flag indicating that an entity is no longer considered current or valid.""") + + +class AnnotationCollection(ConfiguredBaseModel): + annotations: Optional[List[GeneAnnotation]] = Field(default_factory=list) + genome_annotations: Optional[List[GenomeAnnotation]] = Field(default_factory=list) + genome_assemblies: Optional[List[GenomeAssembly]] = Field(default_factory=list) + + +# Model rebuild +# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model +OntologyClass.model_rebuild() +Annotation.model_rebuild() +QuantityValue.model_rebuild() +Entity.model_rebuild() +NamedThing.model_rebuild() +Attribute.model_rebuild() +TaxonomicRank.model_rebuild() +OrganismTaxon.model_rebuild() +InformationContentEntity.model_rebuild() +Dataset.model_rebuild() +PhysicalEssenceOrOccurrent.model_rebuild() +PhysicalEssence.model_rebuild() +PhysicalEntity.model_rebuild() +Occurrent.model_rebuild() +ActivityAndBehavior.model_rebuild() +Activity.model_rebuild() +Procedure.model_rebuild() +SubjectOfInvestigation.model_rebuild() +MaterialSample.model_rebuild() +ThingWithTaxon.model_rebuild() +BiologicalEntity.model_rebuild() +GenomicEntity.model_rebuild() +ChemicalEntityOrGeneOrGeneProduct.model_rebuild() +MacromolecularMachineMixin.model_rebuild() +GeneOrGeneProduct.model_rebuild() +Gene.model_rebuild() +Genome.model_rebuild() +GeneAnnotation.model_rebuild() +GenomeAnnotation.model_rebuild() +GenomeAssembly.model_rebuild() +Checksum.model_rebuild() +AnnotationCollection.model_rebuild() +