From aa787564a0fbe0b7f1540af99508ebcdd29c4d2c Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 1 Oct 2024 13:17:15 -0400 Subject: [PATCH] osf:usage supplementary metadata --- osf/metadata/osf_gathering.py | 27 ++++++++ osf/metrics/counted_usage.py | 2 +- osf/metrics/reports.py | 34 +++++++++- osf/models/node.py | 3 +- osf/models/user.py | 2 +- .../agent_supplementary.turtle | 1 + .../file_supplementary.turtle | 14 ++++ .../preprint_supplementary.turtle | 14 ++++ .../project_supplementary.turtle | 14 ++++ .../registration_supplementary.turtle | 14 ++++ osf_tests/metadata/test_osf_gathering.py | 38 +++++++++++ .../metadata/test_serialized_metadata.py | 40 ++++++++++- osf_tests/metrics/test_monthly_report.py | 67 ++++++++++++++++++- 13 files changed, 263 insertions(+), 7 deletions(-) create mode 100644 osf_tests/metadata/expected_metadata_files/agent_supplementary.turtle create mode 100644 osf_tests/metadata/expected_metadata_files/file_supplementary.turtle create mode 100644 osf_tests/metadata/expected_metadata_files/preprint_supplementary.turtle create mode 100644 osf_tests/metadata/expected_metadata_files/project_supplementary.turtle create mode 100644 osf_tests/metadata/expected_metadata_files/registration_supplementary.turtle diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py index 9d97782dfff..3bce4b448e7 100644 --- a/osf/metadata/osf_gathering.py +++ b/osf/metadata/osf_gathering.py @@ -27,6 +27,7 @@ without_namespace, smells_like_iri, ) +from osf.metrics.reports import PublicItemUsageReport from osf.utils import workflows as osfworkflows from osf.utils.outcomes import ArtifactTypes from website import settings as website_settings @@ -220,16 +221,22 @@ def osfmap_supplement_for_type(rdftype_iri: str): OSFMAP_SUPPLEMENT = { OSF.Project: { + OSF.usage: None, }, OSF.ProjectComponent: { + OSF.usage: None, }, OSF.Registration: { + OSF.usage: None, }, OSF.RegistrationComponent: { + OSF.usage: None, }, OSF.Preprint: { + OSF.usage: None, }, OSF.File: { + OSF.usage: None, }, } @@ -1051,3 +1058,23 @@ def gather_cedar_templates(focus): template_iri = rdflib.URIRef(record.get_template_semantic_iri()) yield (OSF.hasCedarTemplate, template_iri) yield (template_iri, DCTERMS.title, record.get_template_name()) + + +@gather.er(OSF.usage) +def gather_last_month_usage(focus): + _usage_report = PublicItemUsageReport.for_last_month( + item_osfid=osfguid_from_iri(focus.iri), + ) + if _usage_report is not None: + _usage_report_ref = rdflib.BNode() + yield (OSF.usage, _usage_report_ref) + yield (_usage_report_ref, DCAT.accessService, rdflib.URIRef(website_settings.DOMAIN.rstrip('/'))) + yield (_usage_report_ref, FOAF.primaryTopic, focus.iri) + yield (_usage_report_ref, DCTERMS.temporal, rdflib.Literal( + str(_usage_report.report_yearmonth), + datatype=rdflib.XSD.gYearMonth, + )) + yield (_usage_report_ref, OSF.viewCount, _usage_report.view_count) + yield (_usage_report_ref, OSF.viewSessionCount, _usage_report.view_session_count) + yield (_usage_report_ref, OSF.downloadCount, _usage_report.download_count) + yield (_usage_report_ref, OSF.downloadSessionCount, _usage_report.download_session_count) diff --git a/osf/metrics/counted_usage.py b/osf/metrics/counted_usage.py index 393bd0558c0..c3c6d4cc1aa 100644 --- a/osf/metrics/counted_usage.py +++ b/osf/metrics/counted_usage.py @@ -10,7 +10,6 @@ import pytz from osf.metrics.utils import stable_key -from osf.models import Guid logger = logging.getLogger(__name__) @@ -87,6 +86,7 @@ def _autofill_fields(sender, instance, **kwargs): _fill_pageview_info(instance) item_guid = getattr(instance, 'item_guid', None) if item_guid: + from osf.models import Guid guid_instance = Guid.load(item_guid) if guid_instance and guid_instance.referent: _fill_osfguid_info(instance, guid_instance.referent) diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index bb0553f7f3b..5e47b2ddbf2 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -1,12 +1,17 @@ +from __future__ import annotations from collections import abc import datetime from django.dispatch import receiver from elasticsearch6_dsl import InnerDoc from elasticsearch_metrics import metrics -from elasticsearch_metrics.signals import pre_save as metrics_pre_save +from elasticsearch_metrics.signals import ( + pre_save as metrics_pre_save, + post_save as metrics_post_save, +) from osf.metrics.utils import stable_key, YearMonth +from website import settings as website_settings class ReportInvalid(Exception): @@ -304,3 +309,30 @@ class PublicItemUsageReport(MonthlyReport): # download counts of this item only (not including contained components or files) download_count = metrics.Long() # counter:Total_Item_Requests download_session_count = metrics.Long() # counter:Unique_Item_Requests + + @classmethod + def for_last_month(cls, item_osfid: str) -> PublicItemUsageReport | None: + _search = ( + PublicItemUsageReport.search() + .filter('term', item_osfid=item_osfid) + # only last month's report + .filter('range', report_yearmonth={ + 'gte': 'now-1M/M', + 'lt': 'now/M', + }) + .sort('-report_yearmonth') + [:1] + ) + _response = _search.execute() + return _response[0] if _response else None + + +@receiver(metrics_post_save, sender=PublicItemUsageReport) +def update_supplementary_metadata(sender, instance, **kwargs): + if website_settings.SHARE_ENABLED: + from api.share.utils import task__update_share + task__update_share.apply_async( + args=(instance.item_osfid,), + kwargs={'is_supplementary': True, 'is_backfill': True}, + countdown=30, # delay 30 seconds; plenty of time for index refresh + ) diff --git a/osf/models/node.py b/osf/models/node.py index 9e342308f44..62925966e2e 100644 --- a/osf/models/node.py +++ b/osf/models/node.py @@ -80,7 +80,6 @@ from api.caching.tasks import update_storage_usage from api.caching import settings as cache_settings from api.caching.utils import storage_usage_cache -from api.share.utils import update_share logger = logging.getLogger(__name__) @@ -711,6 +710,7 @@ def should_request_identifiers(self): @classmethod def bulk_update_search(cls, nodes, index=None): + from api.share.utils import update_share for _node in nodes: update_share(_node) from website import search @@ -722,6 +722,7 @@ def bulk_update_search(cls, nodes, index=None): log_exception(e) def update_search(self): + from api.share.utils import update_share update_share(self) from website import search try: diff --git a/osf/models/user.py b/osf/models/user.py index 29e10efa991..438c8d3938e 100644 --- a/osf/models/user.py +++ b/osf/models/user.py @@ -34,7 +34,6 @@ MergeConflictError) from framework.exceptions import PermissionsError from framework.sessions.utils import remove_sessions_for_user -from api.share.utils import update_share from osf.utils.requests import get_current_request from osf.exceptions import reraise_django_validation_errors, UserStateError from .base import BaseModel, GuidMixin, GuidMixinQuerySet @@ -1451,6 +1450,7 @@ def is_assumed_ham(self): return user_has_trusted_email def update_search(self): + from api.share.utils import update_share update_share(self) from website.search.search import update_user update_user(self) diff --git a/osf_tests/metadata/expected_metadata_files/agent_supplementary.turtle b/osf_tests/metadata/expected_metadata_files/agent_supplementary.turtle new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/osf_tests/metadata/expected_metadata_files/agent_supplementary.turtle @@ -0,0 +1 @@ + diff --git a/osf_tests/metadata/expected_metadata_files/file_supplementary.turtle b/osf_tests/metadata/expected_metadata_files/file_supplementary.turtle new file mode 100644 index 00000000000..3724c914858 --- /dev/null +++ b/osf_tests/metadata/expected_metadata_files/file_supplementary.turtle @@ -0,0 +1,14 @@ +@prefix dcat: . +@prefix dcterms: . +@prefix foaf: . +@prefix osf: . +@prefix xsd: . + + osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ; + dcat:accessService ; + foaf:primaryTopic ; + osf:downloadCount 3 ; + osf:downloadSessionCount 2 ; + osf:viewCount 7 ; + osf:viewSessionCount 5 ] . + diff --git a/osf_tests/metadata/expected_metadata_files/preprint_supplementary.turtle b/osf_tests/metadata/expected_metadata_files/preprint_supplementary.turtle new file mode 100644 index 00000000000..9e0ef035f18 --- /dev/null +++ b/osf_tests/metadata/expected_metadata_files/preprint_supplementary.turtle @@ -0,0 +1,14 @@ +@prefix dcat: . +@prefix dcterms: . +@prefix foaf: . +@prefix osf: . +@prefix xsd: . + + osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ; + dcat:accessService ; + foaf:primaryTopic ; + osf:downloadCount 3 ; + osf:downloadSessionCount 2 ; + osf:viewCount 7 ; + osf:viewSessionCount 5 ] . + diff --git a/osf_tests/metadata/expected_metadata_files/project_supplementary.turtle b/osf_tests/metadata/expected_metadata_files/project_supplementary.turtle new file mode 100644 index 00000000000..ac6021a334c --- /dev/null +++ b/osf_tests/metadata/expected_metadata_files/project_supplementary.turtle @@ -0,0 +1,14 @@ +@prefix dcat: . +@prefix dcterms: . +@prefix foaf: . +@prefix osf: . +@prefix xsd: . + + osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ; + dcat:accessService ; + foaf:primaryTopic ; + osf:downloadCount 3 ; + osf:downloadSessionCount 2 ; + osf:viewCount 7 ; + osf:viewSessionCount 5 ] . + diff --git a/osf_tests/metadata/expected_metadata_files/registration_supplementary.turtle b/osf_tests/metadata/expected_metadata_files/registration_supplementary.turtle new file mode 100644 index 00000000000..81abd9d231d --- /dev/null +++ b/osf_tests/metadata/expected_metadata_files/registration_supplementary.turtle @@ -0,0 +1,14 @@ +@prefix dcat: . +@prefix dcterms: . +@prefix foaf: . +@prefix osf: . +@prefix xsd: . + + osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ; + dcat:accessService ; + foaf:primaryTopic ; + osf:downloadCount 3 ; + osf:downloadSessionCount 2 ; + osf:viewCount 7 ; + osf:viewSessionCount 5 ] . + diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py index 7bd72770aba..a91cca14931 100644 --- a/osf_tests/metadata/test_osf_gathering.py +++ b/osf_tests/metadata/test_osf_gathering.py @@ -1,4 +1,5 @@ import datetime +from unittest import mock from django.test import TestCase import rdflib @@ -11,6 +12,7 @@ FOAF, OSF, OSFIO, + DCAT, DCTERMS, DCMITYPE, DOI, @@ -20,11 +22,14 @@ checksum_iri, ) from osf import models as osfdb +from osf.metrics.reports import PublicItemUsageReport +from osf.metrics.utils import YearMonth from osf.utils import permissions, workflows from osf_tests import factories from website import settings as website_settings from website.project import new_bookmark_collection from osf_tests.metadata._utils import assert_triples +from osf_tests.metrics.utils_for_tests import es_metrics_temps class TestOsfGathering(TestCase): @@ -750,3 +755,36 @@ def test_gather_cedar_templates(self): (self.filefocus.iri, OSF.hasCedarTemplate, cedar_template_iri), (cedar_template_iri, DCTERMS.title, Literal(self.cedar_template.schema_name)) }) + + @es_metrics_temps() + def test_gather_last_month_usage(self): + # no usage report: + with mock.patch( + 'osf.metrics.reports.PublicItemUsageReport.for_last_month', + return_value=None, + ): + assert_triples(osf_gathering.gather_last_month_usage(self.projectfocus), set()) + # yes usage report: + _ym = YearMonth.from_date(datetime.datetime.now(tz=datetime.UTC)) + with mock.patch( + 'osf.metrics.reports.PublicItemUsageReport.for_last_month', + return_value=PublicItemUsageReport( + item_osfid=self.project._id, + report_yearmonth=_ym, + view_count=71, + view_session_count=13, + download_count=43, + download_session_count=11, + ), + ): + _usage_bnode = rdflib.BNode() + assert_triples(osf_gathering.gather_last_month_usage(self.projectfocus), { + (self.projectfocus.iri, OSF.usage, _usage_bnode), + (_usage_bnode, DCTERMS.temporal, Literal(str(_ym), datatype=rdflib.XSD.gYearMonth)), + (_usage_bnode, DCAT.accessService, rdflib.URIRef(website_settings.DOMAIN.rstrip('/'))), + (_usage_bnode, FOAF.primaryTopic, self.projectfocus.iri), + (_usage_bnode, OSF.viewCount, Literal(71)), + (_usage_bnode, OSF.viewSessionCount, Literal(13)), + (_usage_bnode, OSF.downloadCount, Literal(43)), + (_usage_bnode, OSF.downloadSessionCount, Literal(11)), + }) diff --git a/osf_tests/metadata/test_serialized_metadata.py b/osf_tests/metadata/test_serialized_metadata.py index 0c74961778a..f3712fc7481 100644 --- a/osf_tests/metadata/test_serialized_metadata.py +++ b/osf_tests/metadata/test_serialized_metadata.py @@ -7,6 +7,8 @@ from osf import models as osfdb from osf.metadata.rdfutils import OSF, DCTERMS from osf.metadata.tools import pls_gather_metadata_file +from osf.metrics.reports import PublicItemUsageReport +from osf.metrics.utils import YearMonth from osf.models.licenses import NodeLicense from api_tests.utils import create_test_file from osf_tests import factories @@ -72,6 +74,14 @@ }, } +EXPECTED_SUPPLEMENTARY_METADATA = { + OSF.Project: 'project_supplementary.turtle', + OSF.Preprint: 'preprint_supplementary.turtle', + OSF.Registration: 'registration_supplementary.turtle', + OSF.File: 'file_supplementary.turtle', + DCTERMS.Agent: 'agent_supplementary.turtle', +} + EXPECTED_MEDIATYPE = { 'turtle': 'text/turtle; charset=utf-8', 'datacite-xml': 'application/xml', @@ -124,8 +134,7 @@ def setUp(self): mock.patch('django.utils.timezone.now', new=forever_now), mock.patch('osf.models.metaschema.RegistrationSchema.absolute_api_v2_url', new='http://fake.example/schema/for/test'), ): - patcher.start() - self.addCleanup(patcher.stop) + self.enterContext(patcher) # build test objects self.user = factories.AuthUserFactory( fullname='Person McNamington', @@ -211,6 +220,16 @@ def setUp(self): osfdb.GuidMetadataRecord.objects.for_guid(self.registration._id).update({ 'resource_type_general': 'StudyRegistration', }, auth=self.user) + self.enterContext(mock.patch( + 'osf.metrics.reports.PublicItemUsageReport.for_last_month', + return_value=PublicItemUsageReport( + report_yearmonth=YearMonth.from_date(forever_now()), + view_count=7, + view_session_count=5, + download_count=3, + download_session_count=2, + ), + )) self.guid_dict = { OSF.Project: self.project._id, OSF.Preprint: self.preprint._id, @@ -259,6 +278,7 @@ def test_serialized_metadata(self): self._assert_scenario(BASIC_METADATA_SCENARIO) self._setUp_full() self._assert_scenario(FULL_METADATA_SCENARIO) + self._assert_supplementary_files(EXPECTED_SUPPLEMENTARY_METADATA) def _assert_scenario(self, scenario_dict): for focus_type, expected_files in scenario_dict.items(): @@ -283,6 +303,22 @@ def _assert_scenario(self, scenario_dict): ) self._assert_expected_file(filename, resp.text) + def _assert_supplementary_files(self, expected_supplementary_files): + _format_key = 'turtle' # supplementary metadata only in turtle, for now + for _focus_type, _filename in expected_supplementary_files.items(): + with self.subTest(focus_type=_focus_type, is_supplementary=True): + _osfguid = self.guid_dict[_focus_type] + _gathered_file = pls_gather_metadata_file( + _osfguid, + _format_key, + serializer_config={'is_supplementary': True}, + ) + self.assertEqual(_gathered_file.mediatype, EXPECTED_MEDIATYPE[_format_key]) + # to update expected metadata, uncomment `_write_expected_file` and this + # next line (being careful not to leave it uncommented...) and run tests + # self._write_expected_file(_filename, _gathered_file.serialized_metadata) + self._assert_expected_file(_filename, _gathered_file.serialized_metadata) + def _assert_expected_file(self, filename, actual_metadata): _open_mode = ('rb' if isinstance(actual_metadata, bytes) else 'r') with open(METADATA_SCENARIO_DIR / filename, _open_mode) as _file: diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py index bc8d482a605..5687c9a7b02 100644 --- a/osf_tests/metrics/test_monthly_report.py +++ b/osf_tests/metrics/test_monthly_report.py @@ -1,9 +1,10 @@ +import datetime from unittest import mock import pytest from elasticsearch_metrics import metrics -from osf.metrics.reports import MonthlyReport, ReportInvalid +from osf.metrics.reports import MonthlyReport, ReportInvalid, PublicItemUsageReport from osf.metrics.utils import YearMonth @@ -70,3 +71,67 @@ class Meta: ): with pytest.raises(ReportInvalid): _bad_report.save() + + +@pytest.mark.es_metrics +class TestLastMonthReport: + @pytest.fixture + def osfid(self): + return 'abced' + + @pytest.fixture + def this_month(self): + return YearMonth.from_date(datetime.date.today()) + + @pytest.fixture + def last_month(self, this_month): + return _prior_yearmonth(this_month) + + @pytest.fixture + def prior_month(self, last_month): + return _prior_yearmonth(last_month) + + @pytest.fixture + def this_month_report(self, osfid, this_month): + return _item_usage_report(this_month, osfid, view_count=77) + + @pytest.fixture + def last_month_report(self, osfid, last_month): + return _item_usage_report(last_month, osfid, view_count=57) + + @pytest.fixture + def diff_last_month_report(self, last_month): + return _item_usage_report(last_month, 'zyxvt', view_count=17) + + @pytest.fixture + def prior_month_report(self, osfid, prior_month): + return _item_usage_report(prior_month, osfid, view_count=37) + + def test_with_none(self, osfid): + assert PublicItemUsageReport().for_last_month(osfid) is None + + def test_with_others(self, osfid, this_month_report, prior_month_report, diff_last_month_report): + assert PublicItemUsageReport().for_last_month(osfid) is None + + def test_with_last_month(self, osfid, this_month_report, last_month_report, diff_last_month_report, prior_month_report): + _report = PublicItemUsageReport().for_last_month(osfid) + assert _report is not None + assert _report.view_count == 57 + + +def _prior_yearmonth(ym: YearMonth) -> YearMonth: + return ( + YearMonth(ym.year - 1, 1) + if ym.month == 1 + else YearMonth(ym.year, ym.month - 1) + ) + + +def _item_usage_report(ym: YearMonth, osfid: str, **kwargs): + _report = PublicItemUsageReport( + report_yearmonth=ym, + item_osfid=osfid, + **kwargs + ) + _report.save(refresh=True) + return _report