From 1cdb1d7ec7966ab1b5d63d081baf6acef9a4c64b Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Wed, 16 Oct 2024 14:07:45 -0400 Subject: [PATCH 1/3] add monthly active stat to users table --- api/institutions/serializers.py | 1 + api/institutions/views.py | 1 + osf/metrics/reporters/institutional_users.py | 18 ++++++++++++++++++ osf/metrics/reports.py | 1 + .../test_institutional_users_reporter.py | 18 ++++++++++++++++++ 5 files changed, 39 insertions(+) diff --git a/api/institutions/serializers.py b/api/institutions/serializers.py index 29c7c8b380a..e3679b2a9c5 100644 --- a/api/institutions/serializers.py +++ b/api/institutions/serializers.py @@ -334,6 +334,7 @@ class Meta: department = ser.CharField(read_only=True, source='department_name') orcid_id = ser.CharField(read_only=True) month_last_login = YearmonthField(read_only=True) + month_last_active = YearmonthField(read_only=True) account_creation_date = YearmonthField(read_only=True) public_projects = ser.IntegerField(read_only=True, source='public_project_count') diff --git a/api/institutions/views.py b/api/institutions/views.py index c13cb1f8d74..f5602bafcb2 100644 --- a/api/institutions/views.py +++ b/api/institutions/views.py @@ -561,6 +561,7 @@ class _NewInstitutionUserMetricsList(InstitutionMixin, ElasticsearchListView): 'user_name', 'department', 'month_last_login', + 'month_last_active', 'account_creation_date', 'public_projects', 'private_projects', diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py index 98e13003387..1e85b31a95f 100644 --- a/osf/metrics/reporters/institutional_users.py +++ b/osf/metrics/reporters/institutional_users.py @@ -52,6 +52,7 @@ def __post_init__(self): if self.user.date_last_login is not None else None ), + month_last_active=self._get_last_active(), account_creation_date=YearMonth.from_date(self.user.created), orcid_id=self.user.get_verified_external_id('ORCID', verified_only=True), public_project_count=self._public_project_queryset().count(), @@ -140,3 +141,20 @@ def _storage_byte_count(self): purged__isnull=True, basefilenode__in=self._public_osfstorage_file_queryset(), ).aggregate(storage_bytes=Sum('size', default=0))['storage_bytes'] + + def _get_last_active(self): + end_date = self.yearmonth.next_month() + + node_logs = self.user.logs.filter(created__lt=end_date).order_by('-created') + preprint_logs = self.user.preprint_logs.filter(created__lt=end_date).order_by('-created') + + latest_node_log_date = node_logs.first().created if node_logs.exists() else None + latest_preprint_log_date = preprint_logs.first().created if preprint_logs.exists() else None + dates = [date for date in [latest_node_log_date, latest_preprint_log_date] if date is not None] + + latest_activity_date = max(dates, default=None) + + if latest_activity_date: + return YearMonth.from_date(latest_activity_date) + else: + return None diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index 08d14867ae0..76fa7e65b51 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -260,6 +260,7 @@ class InstitutionalUserReport(MonthlyReport): user_name = metrics.Keyword() department_name = metrics.Keyword() month_last_login = YearmonthField() + month_last_active = YearmonthField() account_creation_date = YearmonthField() orcid_id = metrics.Keyword() # counts: diff --git a/osf_tests/metrics/reporters/test_institutional_users_reporter.py b/osf_tests/metrics/reporters/test_institutional_users_reporter.py index 8fbb873083f..a0457fdcf1f 100644 --- a/osf_tests/metrics/reporters/test_institutional_users_reporter.py +++ b/osf_tests/metrics/reporters/test_institutional_users_reporter.py @@ -52,6 +52,11 @@ def _assert_report_matches_setup(self, report: InstitutionalUserReport, setup: _ self.assertEqual(report.user_name, setup.user.fullname) self.assertEqual(report.department_name, setup.department_name) self.assertEqual(report.month_last_login, YearMonth.from_date(setup.user.date_last_login)) + if setup.user.month_last_active: + self.assertEqual(report.month_last_active, YearMonth.from_date(setup.user.month_last_active)) + else: + self.assertEqual(report.month_last_active, setup.user.month_last_active) + self.assertEqual(report.account_creation_date, YearMonth.from_date(setup.user.created)) self.assertEqual(report.orcid_id, setup.orcid_id) # counts (NOTE: report.public_file_count and report.storage_byte_count tested separately) @@ -159,6 +164,19 @@ def __post_init__(self): ), ) self._add_affiliations(self._generate_counted_objects()) + recent_node_log = self.user.logs.order_by('-created').first() + + recent_preprint_log = self.user.preprint_logs.order_by('-created').first() + + recent_node_log_date = recent_node_log.created if recent_node_log else None + recent_preprint_log_date = recent_preprint_log.created if recent_preprint_log else None + + dates = [date for date in [recent_node_log_date, recent_preprint_log_date] if date is not None] + + if dates: + self.user.month_last_active = max(dates) + else: + self.user.month_last_active = None def affiliate_user(self): self.user.add_or_update_affiliated_institution( From e3669b7e2d700036f7825b8b9cca92eb4cde0a83 Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Fri, 18 Oct 2024 09:17:58 -0400 Subject: [PATCH 2/3] clean-up queries and use datacless for setup tool --- osf/metrics/reporters/institutional_users.py | 7 +++--- .../test_institutional_users_reporter.py | 25 ++++++++----------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py index 1e85b31a95f..cd0afb4a6cd 100644 --- a/osf/metrics/reporters/institutional_users.py +++ b/osf/metrics/reporters/institutional_users.py @@ -148,9 +148,10 @@ def _get_last_active(self): node_logs = self.user.logs.filter(created__lt=end_date).order_by('-created') preprint_logs = self.user.preprint_logs.filter(created__lt=end_date).order_by('-created') - latest_node_log_date = node_logs.first().created if node_logs.exists() else None - latest_preprint_log_date = preprint_logs.first().created if preprint_logs.exists() else None - dates = [date for date in [latest_node_log_date, latest_preprint_log_date] if date is not None] + dates = filter(bool, [ + node_logs.values_list('created', flat=True).first(), + preprint_logs.values_list('created', flat=True).first(), + ]) latest_activity_date = max(dates, default=None) diff --git a/osf_tests/metrics/reporters/test_institutional_users_reporter.py b/osf_tests/metrics/reporters/test_institutional_users_reporter.py index a0457fdcf1f..0aa8e5d0d74 100644 --- a/osf_tests/metrics/reporters/test_institutional_users_reporter.py +++ b/osf_tests/metrics/reporters/test_institutional_users_reporter.py @@ -52,10 +52,10 @@ def _assert_report_matches_setup(self, report: InstitutionalUserReport, setup: _ self.assertEqual(report.user_name, setup.user.fullname) self.assertEqual(report.department_name, setup.department_name) self.assertEqual(report.month_last_login, YearMonth.from_date(setup.user.date_last_login)) - if setup.user.month_last_active: - self.assertEqual(report.month_last_active, YearMonth.from_date(setup.user.month_last_active)) + if setup.month_last_active: + self.assertEqual(report.month_last_active, YearMonth.from_date(setup.month_last_active)) else: - self.assertEqual(report.month_last_active, setup.user.month_last_active) + self.assertEqual(report.month_last_active, setup.month_last_active) self.assertEqual(report.account_creation_date, YearMonth.from_date(setup.user.created)) self.assertEqual(report.orcid_id, setup.orcid_id) @@ -153,6 +153,7 @@ class _InstiUserSetup: department_name: str | None = None orcid_id: str | None = None user: osfdb.OSFUser = dataclasses.field(init=False) + month_last_active: YearMonth | None = dataclasses.field(init=False) def __post_init__(self): self.user = UserFactory( @@ -164,19 +165,15 @@ def __post_init__(self): ), ) self._add_affiliations(self._generate_counted_objects()) - recent_node_log = self.user.logs.order_by('-created').first() + node_logs = self.user.logs.order_by('-created') + preprint_logs = self.user.preprint_logs.order_by('-created') - recent_preprint_log = self.user.preprint_logs.order_by('-created').first() + dates = filter(bool, [ + node_logs.values_list('created', flat=True).first(), + preprint_logs.values_list('created', flat=True).first(), + ]) - recent_node_log_date = recent_node_log.created if recent_node_log else None - recent_preprint_log_date = recent_preprint_log.created if recent_preprint_log else None - - dates = [date for date in [recent_node_log_date, recent_preprint_log_date] if date is not None] - - if dates: - self.user.month_last_active = max(dates) - else: - self.user.month_last_active = None + self.month_last_active = max(dates, default=None) def affiliate_user(self): self.user.add_or_update_affiliated_institution( From b175bb5dd565312fbb92598addac3ae98ed90d52 Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Fri, 18 Oct 2024 15:16:23 -0400 Subject: [PATCH 3/3] change type annotations to be accurate --- .../metrics/reporters/test_institutional_users_reporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osf_tests/metrics/reporters/test_institutional_users_reporter.py b/osf_tests/metrics/reporters/test_institutional_users_reporter.py index 0aa8e5d0d74..cc5d1a70e59 100644 --- a/osf_tests/metrics/reporters/test_institutional_users_reporter.py +++ b/osf_tests/metrics/reporters/test_institutional_users_reporter.py @@ -153,7 +153,7 @@ class _InstiUserSetup: department_name: str | None = None orcid_id: str | None = None user: osfdb.OSFUser = dataclasses.field(init=False) - month_last_active: YearMonth | None = dataclasses.field(init=False) + month_last_active: datetime.datetime | None = dataclasses.field(init=False) def __post_init__(self): self.user = UserFactory(