diff --git a/_modules/pandasaurus_cxg/graph_generator/graph_generator.html b/_modules/pandasaurus_cxg/graph_generator/graph_generator.html index 58574fe..21f35ae 100644 --- a/_modules/pandasaurus_cxg/graph_generator/graph_generator.html +++ b/_modules/pandasaurus_cxg/graph_generator/graph_generator.html @@ -78,7 +78,6 @@
import re
+from typing import Dict
import networkx as nx
from rdflib import OWL, RDF, RDFS, BNode, Graph, Literal, Namespace, URIRef
@@ -87,6 +88,8 @@ Source code for pandasaurus_cxg.graph_generator.graph_generator_utils
"http://purl.obolibrary.org/obo/PCL_0010001": "cyan",
}
+citation_field_name = "citation"
+
def add_edge(nx_graph: nx.Graph, subject, predicate, obj):
edge_data = {
@@ -205,6 +208,38 @@ Source code for pandasaurus_cxg.graph_generator.graph_generator_utils
def remove_special_characters(input_string: str) -> str:
return re.sub(r"[^a-zA-Z0-9_]", "", input_string.replace(" ", "_"))
+
+
+
+[docs]
+def parse_citation_field_into_dict(value: str) -> Dict[str, str]:
+ """
+ Parses a citation string into a dictionary by extracting key citation fields.
+
+ Args:
+ value: The string containing citation fields and values.
+
+ Returns:
+ A dictionary with keys such as 'Publication', 'Dataset Version', and 'Collection',
+ and corresponding values extracted from the input string.
+ """
+ # Split the input string on the key terms
+ parts = value.split(" ")
+ keys = ["Publication:", "Version:", "Collection:"]
+ key_indices = [parts.index(key) for key in keys if key in parts]
+ # Break down into key-value pairs
+ key_value_pairs = {}
+ for i, index in enumerate(key_indices):
+ current_value = " ".join(parts[index + 1 : index + 2])
+ key_value_pairs.update(
+ {
+ "download_link"
+ if parts[index][:-1].lower() == "version"
+ else parts[index][:-1].lower(): current_value
+ }
+ )
+ return key_value_pairs
+
Parses a citation string into a dictionary by extracting key citation fields.
+value – The string containing citation fields and values.
+and corresponding values extracted from the input string.
+