Skip to content

Commit

Permalink
Merge pull request #34 from GenomicDataInfrastructure/add-creator-to-…
Browse files Browse the repository at this point in the history
…fairdatapointprofile

feat: Enhance creator parsing in DCAT profile
  • Loading branch information
hcvdwerf authored Jul 11, 2024
2 parents 18633fb + 4ccbd64 commit 2b2117d
Show file tree
Hide file tree
Showing 6 changed files with 873 additions and 787 deletions.
31 changes: 30 additions & 1 deletion ckanext/fairdatapoint/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dateutil.parser import ParserError
from json import JSONDecodeError
from typing import Dict, List
from rdflib import URIRef, Namespace, DCAT
from rdflib import URIRef, Namespace, DCAT, DCTERMS, FOAF

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -107,6 +107,8 @@ def parse_dataset(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
super(FAIRDataPointDCATAPProfile, self).parse_dataset(dataset_dict, dataset_ref)
dataset_dict = self._parse_contact_point(dataset_dict, dataset_ref)

dataset_dict = self._parse_creator(dataset_dict, dataset_ref)

dataset_dict = _convert_extras_to_declared_schema_fields(dataset_dict)

dataset_dict['tags'] = validate_tags(dataset_dict['tags'])
Expand Down Expand Up @@ -144,3 +146,30 @@ def _parse_contact_point(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
dataset_dict['extras'] = \
[item for item in dataset_dict['extras'] if item.get('key') not in dcat_profile_contact_fields]
return dataset_dict

def _parse_creator(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
graph = self.g
creators = []
for creator_ref in graph.objects(dataset_ref, DCTERMS.creator):
creator = {}
creator_identifier = graph.value(creator_ref, DCTERMS.identifier)
creator_name = graph.value(creator_ref, FOAF.name)

if creator_identifier:
creator['creator_identifier'] = str(creator_identifier)
if creator_name:
creator['creator_name'] = str(creator_name)
else:
# If the creator is a URI, use it as the identifier
if isinstance(creator_ref, URIRef):
creator['creator_identifier'] = str(creator_ref)
creator['creator_name'] = str(creator_ref)
else:
creator['creator_name'] = str(creator_ref)

creators.append(creator)

if len(creators) > 0:
dataset_dict['creator'] = creators

return dataset_dict
17 changes: 17 additions & 0 deletions ckanext/fairdatapoint/tests/test_data/creator_prisma.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# SPDX-FileCopyrightText: 2024 Stichting Health-RI
#
# SPDX-License-Identifier: AGPL-3.0-only

@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .

<http://example.org/dataset/1>
a dcat:Dataset ;
dct:title "Sample Dataset Title" ;
dct:description "This is a description of the sample dataset." ;
dct:creator [
a foaf:Agent ;
dct:identifier "https://orcid.org/0000-0002-9095-9201" ;
foaf:name "Marc Bonten"
] .
Loading

0 comments on commit 2b2117d

Please sign in to comment.