diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py index a08ac0fd8a5..61a86faf9d8 100644 --- a/osf/metrics/reporters/institution_summary_monthly.py +++ b/osf/metrics/reporters/institution_summary_monthly.py @@ -18,7 +18,14 @@ def report(self, yearmonth: YearMonth): yield self.generate_report(institution, yearmonth) def generate_report(self, institution, yearmonth): - node_queryset = institution.nodes.filter(deleted__isnull=True) + node_queryset = institution.nodes.filter( + deleted__isnull=True, + created__lt=yearmonth.next_month() + ).exclude( + spam_status=SpamStatus.SPAM, + ) + + preprint_queryset = self.get_published_preprints(institution, yearmonth) return InstitutionMonthlySummaryReport( institution_id=institution._id, @@ -27,9 +34,9 @@ def generate_report(self, institution, yearmonth): public_project_count=self._get_count(node_queryset, 'osf.node', is_public=True), public_registration_count=self._get_count(node_queryset, 'osf.registration', is_public=True), embargoed_registration_count=self._get_count(node_queryset, 'osf.registration', is_public=False), - published_preprint_count=self.get_published_preprints(institution).count(), - storage_byte_count=self.get_storage_size(node_queryset, institution), - public_file_count=self.get_files(node_queryset, institution, is_public=True).count(), + published_preprint_count=preprint_queryset.count(), + storage_byte_count=self.get_storage_size(node_queryset, preprint_queryset), + public_file_count=self.get_files(node_queryset, preprint_queryset, is_public=True).count(), monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, yearmonth), monthly_active_user_count=self.get_monthly_active_user_count(institution, yearmonth), ) @@ -37,12 +44,17 @@ def generate_report(self, institution, yearmonth): def _get_count(self, node_queryset, node_type, is_public): return node_queryset.filter(type=node_type, is_public=is_public, root_id=F('pk')).count() - def get_published_preprints(self, institution): - return Preprint.objects.can_view().filter( - affiliated_institutions=institution - ).exclude(spam_status=SpamStatus.SPAM) + def get_published_preprints(self, institution, yearmonth): + queryset = Preprint.objects.can_view().filter( + affiliated_institutions=institution, + created__lte=yearmonth.next_month() + ).exclude( + spam_status=SpamStatus.SPAM + ) - def get_files(self, node_queryset, institution, is_public=None): + return queryset + + def get_files(self, node_queryset, preprint_queryset, is_public=None): public_kwargs = {} if is_public: public_kwargs = {'is_public': is_public} @@ -52,42 +64,30 @@ def get_files(self, node_queryset, institution, is_public=None): target_content_type=ContentType.objects.get_for_model(AbstractNode), ) target_preprint_q = Q( - target_object_id__in=self.get_published_preprints(institution).values('pk'), + target_object_id__in=preprint_queryset.values('pk'), target_content_type=ContentType.objects.get_for_model(Preprint), ) return OsfStorageFile.objects.filter( deleted__isnull=True, purged__isnull=True ).filter(target_node_q | target_preprint_q) - def get_storage_size(self, node_queryset, institution): - files = self.get_files(node_queryset, institution) + def get_storage_size(self, node_queryset, preprint_queryset): + files = self.get_files(node_queryset, preprint_queryset) return FileVersion.objects.filter( size__gt=0, purged__isnull=True, basefilenode__in=files ).aggregate(storage_bytes=Sum('size', default=0))['storage_bytes'] - def get_month_start_end(self, yearmonth): - # Get the first day of the month - start_date = datetime(yearmonth.year, yearmonth.month, 1) - # Calculate the first day of the next month - if yearmonth.month == 12: - end_date = datetime(yearmonth.year + 1, 1, 1) - else: - end_date = datetime(yearmonth.year, yearmonth.month + 1, 1) - return start_date, end_date - def get_monthly_logged_in_user_count(self, institution, yearmonth): - start_date, end_date = self.get_month_start_end(yearmonth) return institution.get_institution_users().filter( - date_last_login__gte=start_date, - date_last_login__lt=end_date + date_last_login__gte=yearmonth.target_month(), + date_last_login__lt=yearmonth.next_month() ).count() def get_monthly_active_user_count(self, institution, yearmonth): - start_date, end_date = self.get_month_start_end(yearmonth) return institution.get_institution_users().filter( date_disabled__isnull=True, - date_last_login__gte=start_date, - date_last_login__lt=end_date + date_last_login__gte=yearmonth.target_month(), + date_last_login__lt=yearmonth.next_month() ).count()