Skip to content

Commit

Permalink
osf:usage supplementary metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Oct 3, 2024
1 parent e08e332 commit aa78756
Show file tree
Hide file tree
Showing 13 changed files with 263 additions and 7 deletions.
27 changes: 27 additions & 0 deletions osf/metadata/osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
without_namespace,
smells_like_iri,
)
from osf.metrics.reports import PublicItemUsageReport
from osf.utils import workflows as osfworkflows
from osf.utils.outcomes import ArtifactTypes
from website import settings as website_settings
Expand Down Expand Up @@ -220,16 +221,22 @@ def osfmap_supplement_for_type(rdftype_iri: str):

OSFMAP_SUPPLEMENT = {
OSF.Project: {
OSF.usage: None,
},
OSF.ProjectComponent: {
OSF.usage: None,
},
OSF.Registration: {
OSF.usage: None,
},
OSF.RegistrationComponent: {
OSF.usage: None,
},
OSF.Preprint: {
OSF.usage: None,
},
OSF.File: {
OSF.usage: None,
},
}

Expand Down Expand Up @@ -1051,3 +1058,23 @@ def gather_cedar_templates(focus):
template_iri = rdflib.URIRef(record.get_template_semantic_iri())
yield (OSF.hasCedarTemplate, template_iri)
yield (template_iri, DCTERMS.title, record.get_template_name())


@gather.er(OSF.usage)
def gather_last_month_usage(focus):
_usage_report = PublicItemUsageReport.for_last_month(
item_osfid=osfguid_from_iri(focus.iri),
)
if _usage_report is not None:
_usage_report_ref = rdflib.BNode()
yield (OSF.usage, _usage_report_ref)
yield (_usage_report_ref, DCAT.accessService, rdflib.URIRef(website_settings.DOMAIN.rstrip('/')))
yield (_usage_report_ref, FOAF.primaryTopic, focus.iri)
yield (_usage_report_ref, DCTERMS.temporal, rdflib.Literal(
str(_usage_report.report_yearmonth),
datatype=rdflib.XSD.gYearMonth,
))
yield (_usage_report_ref, OSF.viewCount, _usage_report.view_count)
yield (_usage_report_ref, OSF.viewSessionCount, _usage_report.view_session_count)
yield (_usage_report_ref, OSF.downloadCount, _usage_report.download_count)
yield (_usage_report_ref, OSF.downloadSessionCount, _usage_report.download_session_count)
2 changes: 1 addition & 1 deletion osf/metrics/counted_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import pytz

from osf.metrics.utils import stable_key
from osf.models import Guid


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -87,6 +86,7 @@ def _autofill_fields(sender, instance, **kwargs):
_fill_pageview_info(instance)
item_guid = getattr(instance, 'item_guid', None)
if item_guid:
from osf.models import Guid
guid_instance = Guid.load(item_guid)
if guid_instance and guid_instance.referent:
_fill_osfguid_info(instance, guid_instance.referent)
Expand Down
34 changes: 33 additions & 1 deletion osf/metrics/reports.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from __future__ import annotations
from collections import abc
import datetime

from django.dispatch import receiver
from elasticsearch6_dsl import InnerDoc
from elasticsearch_metrics import metrics
from elasticsearch_metrics.signals import pre_save as metrics_pre_save
from elasticsearch_metrics.signals import (
pre_save as metrics_pre_save,
post_save as metrics_post_save,
)

from osf.metrics.utils import stable_key, YearMonth
from website import settings as website_settings


class ReportInvalid(Exception):
Expand Down Expand Up @@ -304,3 +309,30 @@ class PublicItemUsageReport(MonthlyReport):
# download counts of this item only (not including contained components or files)
download_count = metrics.Long() # counter:Total_Item_Requests
download_session_count = metrics.Long() # counter:Unique_Item_Requests

@classmethod
def for_last_month(cls, item_osfid: str) -> PublicItemUsageReport | None:
_search = (
PublicItemUsageReport.search()
.filter('term', item_osfid=item_osfid)
# only last month's report
.filter('range', report_yearmonth={
'gte': 'now-1M/M',
'lt': 'now/M',
})
.sort('-report_yearmonth')
[:1]
)
_response = _search.execute()
return _response[0] if _response else None


@receiver(metrics_post_save, sender=PublicItemUsageReport)
def update_supplementary_metadata(sender, instance, **kwargs):
if website_settings.SHARE_ENABLED:
from api.share.utils import task__update_share
task__update_share.apply_async(
args=(instance.item_osfid,),
kwargs={'is_supplementary': True, 'is_backfill': True},
countdown=30, # delay 30 seconds; plenty of time for index refresh
)
3 changes: 2 additions & 1 deletion osf/models/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@
from api.caching.tasks import update_storage_usage
from api.caching import settings as cache_settings
from api.caching.utils import storage_usage_cache
from api.share.utils import update_share


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -711,6 +710,7 @@ def should_request_identifiers(self):

@classmethod
def bulk_update_search(cls, nodes, index=None):
from api.share.utils import update_share
for _node in nodes:
update_share(_node)
from website import search
Expand All @@ -722,6 +722,7 @@ def bulk_update_search(cls, nodes, index=None):
log_exception(e)

def update_search(self):
from api.share.utils import update_share
update_share(self)
from website import search
try:
Expand Down
2 changes: 1 addition & 1 deletion osf/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
MergeConflictError)
from framework.exceptions import PermissionsError
from framework.sessions.utils import remove_sessions_for_user
from api.share.utils import update_share
from osf.utils.requests import get_current_request
from osf.exceptions import reraise_django_validation_errors, UserStateError
from .base import BaseModel, GuidMixin, GuidMixinQuerySet
Expand Down Expand Up @@ -1451,6 +1450,7 @@ def is_assumed_ham(self):
return user_has_trusted_email

def update_search(self):
from api.share.utils import update_share
update_share(self)
from website.search.search import update_user
update_user(self)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://localhost:5000/w3ibb> osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ;
dcat:accessService <http://localhost:5000> ;
foaf:primaryTopic <http://localhost:5000/w3ibb> ;
osf:downloadCount 3 ;
osf:downloadSessionCount 2 ;
osf:viewCount 7 ;
osf:viewSessionCount 5 ] .

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://localhost:5000/w4ibb> osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ;
dcat:accessService <http://localhost:5000> ;
foaf:primaryTopic <http://localhost:5000/w4ibb> ;
osf:downloadCount 3 ;
osf:downloadSessionCount 2 ;
osf:viewCount 7 ;
osf:viewSessionCount 5 ] .

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://localhost:5000/w2ibb> osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ;
dcat:accessService <http://localhost:5000> ;
foaf:primaryTopic <http://localhost:5000/w2ibb> ;
osf:downloadCount 3 ;
osf:downloadSessionCount 2 ;
osf:viewCount 7 ;
osf:viewSessionCount 5 ] .

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://localhost:5000/w5ibb> osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ;
dcat:accessService <http://localhost:5000> ;
foaf:primaryTopic <http://localhost:5000/w5ibb> ;
osf:downloadCount 3 ;
osf:downloadSessionCount 2 ;
osf:viewCount 7 ;
osf:viewSessionCount 5 ] .

38 changes: 38 additions & 0 deletions osf_tests/metadata/test_osf_gathering.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
from unittest import mock

from django.test import TestCase
import rdflib
Expand All @@ -11,6 +12,7 @@
FOAF,
OSF,
OSFIO,
DCAT,
DCTERMS,
DCMITYPE,
DOI,
Expand All @@ -20,11 +22,14 @@
checksum_iri,
)
from osf import models as osfdb
from osf.metrics.reports import PublicItemUsageReport
from osf.metrics.utils import YearMonth
from osf.utils import permissions, workflows
from osf_tests import factories
from website import settings as website_settings
from website.project import new_bookmark_collection
from osf_tests.metadata._utils import assert_triples
from osf_tests.metrics.utils_for_tests import es_metrics_temps


class TestOsfGathering(TestCase):
Expand Down Expand Up @@ -750,3 +755,36 @@ def test_gather_cedar_templates(self):
(self.filefocus.iri, OSF.hasCedarTemplate, cedar_template_iri),
(cedar_template_iri, DCTERMS.title, Literal(self.cedar_template.schema_name))
})

@es_metrics_temps()
def test_gather_last_month_usage(self):
# no usage report:
with mock.patch(
'osf.metrics.reports.PublicItemUsageReport.for_last_month',
return_value=None,
):
assert_triples(osf_gathering.gather_last_month_usage(self.projectfocus), set())
# yes usage report:
_ym = YearMonth.from_date(datetime.datetime.now(tz=datetime.UTC))
with mock.patch(
'osf.metrics.reports.PublicItemUsageReport.for_last_month',
return_value=PublicItemUsageReport(
item_osfid=self.project._id,
report_yearmonth=_ym,
view_count=71,
view_session_count=13,
download_count=43,
download_session_count=11,
),
):
_usage_bnode = rdflib.BNode()
assert_triples(osf_gathering.gather_last_month_usage(self.projectfocus), {
(self.projectfocus.iri, OSF.usage, _usage_bnode),
(_usage_bnode, DCTERMS.temporal, Literal(str(_ym), datatype=rdflib.XSD.gYearMonth)),
(_usage_bnode, DCAT.accessService, rdflib.URIRef(website_settings.DOMAIN.rstrip('/'))),
(_usage_bnode, FOAF.primaryTopic, self.projectfocus.iri),
(_usage_bnode, OSF.viewCount, Literal(71)),
(_usage_bnode, OSF.viewSessionCount, Literal(13)),
(_usage_bnode, OSF.downloadCount, Literal(43)),
(_usage_bnode, OSF.downloadSessionCount, Literal(11)),
})
40 changes: 38 additions & 2 deletions osf_tests/metadata/test_serialized_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from osf import models as osfdb
from osf.metadata.rdfutils import OSF, DCTERMS
from osf.metadata.tools import pls_gather_metadata_file
from osf.metrics.reports import PublicItemUsageReport
from osf.metrics.utils import YearMonth
from osf.models.licenses import NodeLicense
from api_tests.utils import create_test_file
from osf_tests import factories
Expand Down Expand Up @@ -72,6 +74,14 @@
},
}

EXPECTED_SUPPLEMENTARY_METADATA = {
OSF.Project: 'project_supplementary.turtle',
OSF.Preprint: 'preprint_supplementary.turtle',
OSF.Registration: 'registration_supplementary.turtle',
OSF.File: 'file_supplementary.turtle',
DCTERMS.Agent: 'agent_supplementary.turtle',
}

EXPECTED_MEDIATYPE = {
'turtle': 'text/turtle; charset=utf-8',
'datacite-xml': 'application/xml',
Expand Down Expand Up @@ -124,8 +134,7 @@ def setUp(self):
mock.patch('django.utils.timezone.now', new=forever_now),
mock.patch('osf.models.metaschema.RegistrationSchema.absolute_api_v2_url', new='http://fake.example/schema/for/test'),
):
patcher.start()
self.addCleanup(patcher.stop)
self.enterContext(patcher)
# build test objects
self.user = factories.AuthUserFactory(
fullname='Person McNamington',
Expand Down Expand Up @@ -211,6 +220,16 @@ def setUp(self):
osfdb.GuidMetadataRecord.objects.for_guid(self.registration._id).update({
'resource_type_general': 'StudyRegistration',
}, auth=self.user)
self.enterContext(mock.patch(
'osf.metrics.reports.PublicItemUsageReport.for_last_month',
return_value=PublicItemUsageReport(
report_yearmonth=YearMonth.from_date(forever_now()),
view_count=7,
view_session_count=5,
download_count=3,
download_session_count=2,
),
))
self.guid_dict = {
OSF.Project: self.project._id,
OSF.Preprint: self.preprint._id,
Expand Down Expand Up @@ -259,6 +278,7 @@ def test_serialized_metadata(self):
self._assert_scenario(BASIC_METADATA_SCENARIO)
self._setUp_full()
self._assert_scenario(FULL_METADATA_SCENARIO)
self._assert_supplementary_files(EXPECTED_SUPPLEMENTARY_METADATA)

def _assert_scenario(self, scenario_dict):
for focus_type, expected_files in scenario_dict.items():
Expand All @@ -283,6 +303,22 @@ def _assert_scenario(self, scenario_dict):
)
self._assert_expected_file(filename, resp.text)

def _assert_supplementary_files(self, expected_supplementary_files):
_format_key = 'turtle' # supplementary metadata only in turtle, for now
for _focus_type, _filename in expected_supplementary_files.items():
with self.subTest(focus_type=_focus_type, is_supplementary=True):
_osfguid = self.guid_dict[_focus_type]
_gathered_file = pls_gather_metadata_file(
_osfguid,
_format_key,
serializer_config={'is_supplementary': True},
)
self.assertEqual(_gathered_file.mediatype, EXPECTED_MEDIATYPE[_format_key])
# to update expected metadata, uncomment `_write_expected_file` and this
# next line (being careful not to leave it uncommented...) and run tests
# self._write_expected_file(_filename, _gathered_file.serialized_metadata)
self._assert_expected_file(_filename, _gathered_file.serialized_metadata)

def _assert_expected_file(self, filename, actual_metadata):
_open_mode = ('rb' if isinstance(actual_metadata, bytes) else 'r')
with open(METADATA_SCENARIO_DIR / filename, _open_mode) as _file:
Expand Down
Loading

0 comments on commit aa78756

Please sign in to comment.