From 2c0ccc6ae352e1ea7e449472348b6e10e716193a Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Mon, 31 Jul 2023 14:19:24 -0400 Subject: [PATCH] osfmap updates - remove osf:affiliatedInstitution (use osf:affiliation instead) - use dcterms:conformsTo to link osf registration schema - remodel funding info (add osf:hasFunding, osf:FundingAward) --- osf/metadata/osf_gathering.py | 55 ++++++++++++---- .../datacite/datacite_tree_walker.py | 47 ++++++++------ .../project_full.turtle | 18 ++++-- .../registration_basic.turtle | 5 +- .../registration_full.turtle | 5 +- osf_tests/metadata/test_osf_gathering.py | 64 +++++++++++-------- 6 files changed, 125 insertions(+), 69 deletions(-) diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py index e9086b840da..58060037963 100644 --- a/osf/metadata/osf_gathering.py +++ b/osf/metadata/osf_gathering.py @@ -106,7 +106,7 @@ def osfmap_for_type(rdftype_iri: str): DCTERMS.title: None, DCTERMS.type: None, OSF.affiliation: None, - OSF.funder: None, + OSF.hasFunding: None, OSF.contains: OSF_FILE_REFERENCE, OSF.hasRoot: OSF_OBJECT_REFERENCE, OSF.keyword: None, @@ -135,6 +135,7 @@ def osfmap_for_type(rdftype_iri: str): OSF.Registration: { **OSF_OBJECT, OSF.archivedAt: None, + DCTERMS.conformsTo: None, OSF.hasAnalyticCodeResource: OSF_OBJECT_REFERENCE, OSF.hasDataResource: OSF_OBJECT_REFERENCE, OSF.hasMaterialsResource: OSF_OBJECT_REFERENCE, @@ -170,7 +171,7 @@ def osfmap_for_type(rdftype_iri: str): OSF.isContainedBy: OSF_OBJECT_REFERENCE, OSF.fileName: None, OSF.filePath: None, - OSF.funder: None, + OSF.hasFunding: None, OWL.sameAs: None, }, OSF.Agent: { @@ -271,7 +272,7 @@ def gather_identifiers(focus: gather.Focus): yield (DCTERMS.identifier, str(osfguid_iri)) if hasattr(focus.dbmodel, 'get_identifier_value'): doi = focus.dbmodel.get_identifier_value('doi') - if doi and doi.startswith('10.'): # HACK: skip malformed doi + if doi: doi_iri = DOI[doi] yield (OWL.sameAs, doi_iri) yield (DCTERMS.identifier, str(doi_iri)) @@ -679,19 +680,32 @@ def gather_affiliated_institutions(focus): yield (institution_iri, DCTERMS.identifier, osf_institution.identifier_domain) -@gather.er(OSF.funder) +@gather.er(OSF.hasFunding) def gather_funding(focus): if hasattr(focus, 'guid_metadata_record'): - for funding in focus.guid_metadata_record.funding_info: - funder_bnode = rdflib.BNode() - yield (OSF.funder, funder_bnode) - yield (funder_bnode, RDF.type, OSF.FundingReference) - yield (funder_bnode, FOAF.name, funding.get('funder_name')) - yield (funder_bnode, DCTERMS.identifier, funding.get('funder_identifier')) - yield (funder_bnode, OSF.funderIdentifierType, funding.get('funder_identifier_type')) - yield (funder_bnode, OSF.awardNumber, funding.get('award_number')) - yield (funder_bnode, OSF.awardUri, funding.get('award_uri')) - yield (funder_bnode, OSF.awardTitle, funding.get('award_title')) + for _funding in focus.guid_metadata_record.funding_info: + _award_uri = _funding.get('award_uri') + _award_ref = ( + rdflib.URIRef(_award_uri) + if _award_uri + else rdflib.BNode() + ) + yield (OSF.hasFunding, _award_ref) + yield (_award_ref, RDF.type, OSF.FundingAward) + if _award_uri: + yield (_award_ref, DCTERMS.identifier, _award_uri) + yield (_award_ref, DCTERMS.title, _funding.get('award_title')) + yield (_award_ref, OSF.awardNumber, _funding.get('award_number')) + _funder_uri = _funding.get('funder_identifier') + _funder_ref = ( + rdflib.URIRef(_funder_uri) + if _funder_uri + else rdflib.BNode() + ) + yield (_award_ref, OSF.funder, _funder_ref) + yield (_funder_ref, FOAF.name, _funding.get('funder_name')) + yield (_funder_ref, DCTERMS.identifier, _funder_uri) + yield (_funder_ref, OSF.funderIdentifierType, _funding.get('funder_identifier_type')) @gather.er(OSF.HostingInstitution) @@ -743,6 +757,19 @@ def gather_ia_url(focus): yield (OSF.archivedAt, focus.dbmodel.ia_url) +@gather.er( + DCTERMS.conformsTo, + focustype_iris=[OSF.Registration] +) +def gather_registration_type(focus): + _reg_schema = getattr(focus.dbmodel, 'registration_schema') + if _reg_schema: + _schema_url = rdflib.URIRef(_reg_schema.absolute_api_v2_url) + yield (DCTERMS.conformsTo, _schema_url) + yield (_schema_url, DCTERMS.title, _reg_schema.name) + yield (_schema_url, DCTERMS.description, _reg_schema.description) + + @gather.er(DCTERMS.publisher) def gather_publisher(focus): provider = getattr(focus.dbmodel, 'provider', None) diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index b52b6c44eca..4fe0d0177b9 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -258,7 +258,7 @@ def _visit_rights(self, parent_el): self.visit(rights_list_el, 'rights', text=name, attrib=attrib) def _visit_affiliations(self, parent_el, focus_iri): - for institution_iri in self.basket[focus_iri:OSF.affiliatedInstitution]: + for institution_iri in self.basket[focus_iri:OSF.affiliation]: try: name = next(self.basket[institution_iri:FOAF.name]) except StopIteration: @@ -284,26 +284,31 @@ def _visit_dates(self, parent_el): def _visit_funding_references(self, parent_el): fundrefs_el = self.visit(parent_el, 'fundingReferences', is_list=True) - for funding_ref in self.basket[OSF.funder]: - fundref_el = self.visit(fundrefs_el, 'fundingReference') - self.visit(fundref_el, 'funderName', text=next(self.basket[funding_ref:FOAF.name], '')) - self.visit( - fundref_el, - 'funderIdentifier', - text=next(self.basket[funding_ref:DCTERMS.identifier], ''), - attrib={ - 'funderIdentifierType': next(self.basket[funding_ref:OSF.funderIdentifierType], ''), - }, - ) - self.visit( - fundref_el, - 'awardNumber', - text=next(self.basket[funding_ref:OSF.awardNumber], ''), - attrib={ - 'awardURI': next(self.basket[funding_ref:OSF.awardUri], ''), - }, - ) - self.visit(fundref_el, 'awardTitle', text=next(self.basket[funding_ref:OSF.awardTitle], '')) + for _funding_award in self.basket[OSF.hasFunding]: + for _funder in self.basket[_funding_award:OSF.funder]: + fundref_el = self.visit(fundrefs_el, 'fundingReference') + self.visit(fundref_el, 'funderName', text=next(self.basket[_funder:FOAF.name], '')) + self.visit( + fundref_el, + 'funderIdentifier', + text=next(self.basket[_funder:DCTERMS.identifier], ''), + attrib={ + 'funderIdentifierType': next(self.basket[_funder:OSF.funderIdentifierType], ''), + }, + ) + self.visit( + fundref_el, + 'awardNumber', + text=next(self.basket[_funding_award:OSF.awardNumber], ''), + attrib={ + 'awardURI': ( + str(_funding_award) + if isinstance(_funding_award, rdflib.URIRef) + else '' + ) + }, + ) + self.visit(fundref_el, 'awardTitle', text=next(self.basket[_funding_award:DCTERMS.title], '')) def _visit_publication_year(self, parent_el, focus_iri): year_copyrighted = next(self.basket[focus_iri:DCTERMS.dateCopyrighted], None) diff --git a/osf_tests/metadata/expected_metadata_files/project_full.turtle b/osf_tests/metadata/expected_metadata_files/project_full.turtle index dc9a2757cb1..d24be3c25d9 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/project_full.turtle @@ -21,13 +21,7 @@ dcterms:type "Dataset" ; owl:sameAs ; osf:contains ; - osf:funder [ a osf:FundingReference ; - dcterms:identifier "https://doi.org/10.$$$$" ; - foaf:name "Mx. Moneypockets" ; - osf:awardNumber "10000000" ; - osf:awardTitle "because reasons" ; - osf:awardUri "https://moneypockets.example/millions" ; - osf:funderIdentifierType "Crossref Funder ID" ] ; + osf:hasFunding ; osf:supplements . a osf:Preprint ; @@ -54,6 +48,12 @@ osf:filePath "/my-file.blarg" ; osf:isContainedBy . + a osf:FundingAward ; + dcterms:identifier "https://moneypockets.example/millions" ; + dcterms:title "because reasons" ; + osf:awardNumber "10000000" ; + osf:funder . + dcterms:identifier "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode" ; foaf:name "CC-By Attribution-NonCommercial-NoDerivatives 4.0 International" . @@ -72,6 +72,10 @@ dcterms:type foaf:Person ; foaf:name "Person McNamington" . + dcterms:identifier "https://doi.org/10.$$$$" ; + foaf:name "Mx. Moneypockets" ; + osf:funderIdentifierType "Crossref Funder ID" . + a osf:Agent ; dcterms:identifier "http://localhost:5000" ; dcterms:type foaf:Organization ; diff --git a/osf_tests/metadata/expected_metadata_files/registration_basic.turtle b/osf_tests/metadata/expected_metadata_files/registration_basic.turtle index b8cfcd69469..b8f30ee2aac 100644 --- a/osf_tests/metadata/expected_metadata_files/registration_basic.turtle +++ b/osf_tests/metadata/expected_metadata_files/registration_basic.turtle @@ -4,6 +4,7 @@ @prefix owl: . a osf:Registration ; + dcterms:conformsTo ; dcterms:created "2123-05-04" ; dcterms:creator ; dcterms:dateCopyrighted "2252" ; @@ -39,4 +40,6 @@ a osf:Agent ; dcterms:identifier "http://localhost:5000" ; dcterms:type foaf:Organization ; - foaf:name "OSF" . \ No newline at end of file + foaf:name "OSF" . + + dcterms:title "Open-Ended Registration" . \ No newline at end of file diff --git a/osf_tests/metadata/expected_metadata_files/registration_full.turtle b/osf_tests/metadata/expected_metadata_files/registration_full.turtle index 72dcd4d28e6..3d4baa58f2d 100644 --- a/osf_tests/metadata/expected_metadata_files/registration_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/registration_full.turtle @@ -4,6 +4,7 @@ @prefix owl: . a osf:Registration ; + dcterms:conformsTo ; dcterms:created "2123-05-04" ; dcterms:creator ; dcterms:dateCopyrighted "2250-2254" ; @@ -43,4 +44,6 @@ a osf:Agent ; dcterms:identifier "http://localhost:5000" ; dcterms:type foaf:Organization ; - foaf:name "OSF" . \ No newline at end of file + foaf:name "OSF" . + + dcterms:title "Open-Ended Registration" . \ No newline at end of file diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py index a1b19f00c4b..4d7e8adfa6f 100644 --- a/osf_tests/metadata/test_osf_gathering.py +++ b/osf_tests/metadata/test_osf_gathering.py @@ -456,7 +456,7 @@ def test_gather_affiliated_institutions(self): self.user__admin.add_or_update_affiliated_institution(institution) self.project.add_affiliated_institution(institution, self.user__admin) _assert_triples(osf_gathering.gather_affiliated_institutions(self.projectfocus), { - (self.projectfocus.iri, OSF.affiliatedInstitution, institution_iri), + (self.projectfocus.iri, OSF.affiliation, institution_iri), (institution_iri, RDF.type, OSF.Agent), (institution_iri, DCTERMS.type, FOAF.Organization), (institution_iri, FOAF.name, Literal(institution.name)), @@ -464,7 +464,7 @@ def test_gather_affiliated_institutions(self): }) # focus: user _assert_triples(osf_gathering.gather_affiliated_institutions(self.userfocus__admin), { - (self.userfocus__admin.iri, OSF.affiliatedInstitution, institution_iri), + (self.userfocus__admin.iri, OSF.affiliation, institution_iri), (institution_iri, RDF.type, OSF.Agent), (institution_iri, DCTERMS.type, FOAF.Organization), (institution_iri, FOAF.name, Literal(institution.name)), @@ -489,42 +489,56 @@ def test_gather_funding(self): 'award_number': '27', }, ] - bnode1 = rdflib.BNode() - bnode2 = rdflib.BNode() + _bnode1 = rdflib.BNode() + _bnode2 = rdflib.BNode() + _award_uri = URIRef('https://nih.example/award') + _funder_uri = URIRef('https://doi.org/10.fake/NIH') _assert_triples(osf_gathering.gather_funding(self.projectfocus), { - (self.projectfocus.iri, OSF.funder, bnode1), - (bnode1, RDF.type, OSF.FundingReference), - (bnode1, FOAF.name, Literal('hooray')), - (self.projectfocus.iri, OSF.funder, bnode2), - (bnode2, RDF.type, OSF.FundingReference), - (bnode2, FOAF.name, Literal('NIH')), - (bnode2, DCTERMS.identifier, Literal('https://doi.org/10.fake/NIH')), - (bnode2, OSF.funderIdentifierType, Literal('Crossref Funder ID')), - (bnode2, OSF.awardNumber, Literal('27')), - (bnode2, OSF.awardUri, Literal('https://nih.example/award')), - (bnode2, OSF.awardTitle, Literal('big fun')), + (self.projectfocus.iri, OSF.hasFunding, _bnode1), + (_bnode1, RDF.type, OSF.FundingAward), + (_bnode1, OSF.funder, _bnode2), + (_bnode2, FOAF.name, Literal('hooray')), + (self.projectfocus.iri, OSF.hasFunding, _award_uri), + (_award_uri, RDF.type, OSF.FundingAward), + (_award_uri, DCTERMS.identifier, Literal(_award_uri)), + (_award_uri, DCTERMS.title, Literal('big fun')), + (_award_uri, OSF.awardNumber, Literal('27')), + (_award_uri, OSF.funder, _funder_uri), + (_funder_uri, FOAF.name, Literal('NIH')), + (_funder_uri, DCTERMS.identifier, Literal(_funder_uri)), + (_funder_uri, OSF.funderIdentifierType, Literal('Crossref Funder ID')), }) # focus: registration _assert_triples(osf_gathering.gather_funding(self.registrationfocus), set()) self.registrationfocus.guid_metadata_record.funding_info = [ - {'funder_name': 'blooray'}, + { + 'funder_name': 'blooray', + 'funder_identifier': 'https://doi.org/11.bloo', + }, ] - bnode1 = rdflib.BNode() + _funder_uri = rdflib.URIRef('https://doi.org/11.bloo') _assert_triples(osf_gathering.gather_funding(self.registrationfocus), { - (self.registrationfocus.iri, OSF.funder, bnode1), - (bnode1, RDF.type, OSF.FundingReference), - (bnode1, FOAF.name, Literal('blooray')), + (self.registrationfocus.iri, OSF.hasFunding, _bnode1), + (_bnode1, RDF.type, OSF.FundingAward), + (_bnode1, OSF.funder, _funder_uri), + (_funder_uri, DCTERMS.identifier, Literal(_funder_uri)), + (_funder_uri, FOAF.name, Literal('blooray')), }) # focus: file _assert_triples(osf_gathering.gather_funding(self.filefocus), set()) self.filefocus.guid_metadata_record.funding_info = [ - {'funder_name': 'exray'}, + { + 'funder_name': 'exray', + 'funder_identifier': 'https://doi.org/11.ex', + }, ] - bnode1 = rdflib.BNode() + _funder_uri = rdflib.URIRef('https://doi.org/11.ex') _assert_triples(osf_gathering.gather_funding(self.filefocus), { - (self.filefocus.iri, OSF.funder, bnode1), - (bnode1, RDF.type, OSF.FundingReference), - (bnode1, FOAF.name, Literal('exray')), + (self.filefocus.iri, OSF.hasFunding, _bnode1), + (_bnode1, RDF.type, OSF.FundingAward), + (_bnode1, OSF.funder, _funder_uri), + (_funder_uri, DCTERMS.identifier, Literal(_funder_uri)), + (_funder_uri, FOAF.name, Literal('exray')), }) def test_gather_user_basics(self):