Skip to content

Commit

Permalink
less loopy institutional-users metrics reporter
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Sep 5, 2024
1 parent 48a6e16 commit f1b6d79
Showing 1 changed file with 76 additions and 79 deletions.
155 changes: 76 additions & 79 deletions osf/metrics/reporters/institutional_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
import datetime

from django.contrib.contenttypes.models import ContentType
from django.db.models import Q, F, Sum

from osf import models as osfdb
from osf.model.spam import SpamStatus
from addons.osfstorage.models import OsfStorageFile
from api.caching.settings import STORAGE_USAGE_KEY
from api.caching.utils import storage_usage_cache
from api.caching.tasks import update_storage_usage_cache
from osf.metrics.reports import InstitutionalUserReport
from osf.metrics.utils import YearMonth
from website import settings as website_settings
from ._base import MonthlyReporter


Expand Down Expand Up @@ -46,93 +44,92 @@ def __post_init__(self):
month_last_login=YearMonth.from_date(self.user.date_last_login),
account_creation_date=YearMonth.from_date(self.user.created),
orcid_id=self.user.get_verified_external_id('ORCID', verified_only=True),
# initialize counts to 0:
public_project_count=0,
private_project_count=0,
public_registration_count=0,
embargoed_registration_count=0,
storage_byte_count=0,
public_file_count=0,
published_preprint_count=0,
public_project_count=self._public_project_queryset().count(),
private_project_count=self._private_project_queryset().count(),
public_registration_count=self._public_registration_queryset().count(),
embargoed_registration_count=self._embargoed_registration_queryset().count(),
public_file_count=self._public_osfstorage_file_queryset().count(),
published_preprint_count=self._published_preprint_queryset().count(),
storage_byte_count=self._storage_byte_count(),
)
self._fill_counts()

def _fill_counts(self) -> None:
for _preprint in self._preprint_queryset().iterator(chunk_size=_CHUNK_SIZE):
self._add_counts_for_preprint(_preprint)
for _node in self._node_queryset().iterator(chunk_size=_CHUNK_SIZE):
_is_root = (_node.pk == _node.root_id)
if not _is_root:
self._add_counts_for_component(_node)
elif isinstance(_node, osfdb.Node):
self._add_counts_for_project(_node)
elif isinstance(_node, osfdb.Registration):
self._add_counts_for_registration(_node)
else:
raise ValueError(f'expected "node" to be project, component, or registration; got {_node} (type {type(_node)})')

def _node_queryset(self):
_institution_node_qs = self.institution.nodes.filter(
type__in=('osf.node', 'osf.registration'), # `type` field from TypedModel
created__lt=self.before_datetime,
is_deleted=False,
)
_user_institution_node_qs = osfdb.Node.objects.get_nodes_for_user(
).exclude(spam_status=SpamStatus.SPAM)
return osfdb.Node.objects.get_nodes_for_user(
user=self.user,
base_queryset=_institution_node_qs,
)
return _user_institution_node_qs.select_related('embargo')

def _preprint_queryset(self):
def _public_project_queryset(self):
return self._node_queryset().filter(
type='osf.node', # `type` field from TypedModel
is_public=True,
root_id=F('pk'), # only root nodes
)

def _private_project_queryset(self):
return self._node_queryset().filter(
type='osf.node', # `type` field from TypedModel
is_public=False,
root_id=F('pk'), # only root nodes
)

def _public_registration_queryset(self):
return self._node_queryset().filter(
type='osf.registration', # `type` field from TypedModel
is_public=True,
root_id=F('pk'), # only root nodes
)

def _embargoed_registration_queryset(self):
return self._node_queryset().filter(
type='osf.registration', # `type` field from TypedModel
is_public=False,
root_id=F('pk'), # only root nodes
embargo__end_date__gte=self.before_datetime,
)

def _published_preprint_queryset(self):
if not hasattr(osfdb.Preprint, 'affiliated_institutions'):
return osfdb.Preprint.objects.none() # HACK: preprints affiliation project still in-progress
return self.institution.preprints.filter(
_contributors=self.user,
is_published=True,
date_published__lt=self.before_datetime,
return (
osfdb.Preprint.objects.can_view() # published/publicly-viewable
.filter(
affiliated_institutions=self.institution,
_contributors=self.user,
date_published__lt=self.before_datetime,
)
.exclude(spam_status=SpamStatus.SPAM)
)

def _add_counts_for_project(self, project: osfdb.Node) -> None:
self._add_storage_usage(project)
if project.is_public:
self.report.public_project_count += 1
self._add_public_file_count(project)
else:
self.report.private_project_count += 1

def _add_counts_for_registration(self, reg: osfdb.Registration) -> None:
self._add_storage_usage(reg)
if reg.embargo and (reg.embargo.end_date >= self.before_datetime):
self.report.embargoed_registration_count += 1
elif reg.is_public:
self.report.public_registration_count += 1
self._add_public_file_count(reg)

def _add_counts_for_component(self, component: osfdb.AbstractNode) -> None:
self._add_storage_usage(component)
if component.is_public:
self._add_public_file_count(component)

def _add_counts_for_preprint(self, preprint: osfdb.Preprint) -> None:
if preprint.verified_publishable:
self.report.published_preprint_count += 1
self._add_storage_usage(preprint)
self._add_public_file_count(preprint)

def _add_public_file_count(self, filetarget: osfdb.AbstractNode | osfdb.Preprint) -> None:
_file_queryset = OsfStorageFile.active.filter(
target_object_id=filetarget.pk,
target_content_type=ContentType.objects.get_for_model(filetarget),
created__lt=self.before_datetime,
def _public_osfstorage_file_queryset(self):
_target_node_q = Q(
# any public project, registration, project component, or registration component
target_object_id__in=self._node_queryset().filter(is_public=True).values('pk'),
target_content_type=ContentType.objects.get_for_model(osfdb.AbstractNode),
)
_target_preprint_q = Q(
target_object_id__in=self._published_preprint_queryset().values('pk'),
target_content_type=ContentType.objects.get_for_model(osfdb.Preprint),
)
self.report.public_file_count += _file_queryset.count()

def _add_storage_usage(self, obj: osfdb.AbstractNode | osfdb.Preprint) -> None:
if website_settings.ENABLE_STORAGE_USAGE_CACHE:
_cache_key = STORAGE_USAGE_KEY.format(target_id=obj._id)
_byte_count = storage_usage_cache.get(_cache_key)
if _byte_count is None:
update_storage_usage_cache(obj.id, obj._id)
_byte_count = storage_usage_cache.get(_cache_key)
if _byte_count is not None:
self.report.storage_byte_count += _byte_count
return (
OsfStorageFile.objects
.filter(
created__lt=self.before_datetime,
deleted__isnull=True,
purged__isnull=True,
)
.filter(_target_node_q | _target_preprint_q)
)

def _storage_byte_count(self):
return osfdb.FileVersion.filter(
size__gt=0,
created__lt=self.before_datetime,
deleted__isnull=True,
purged__isnull=True,
basefilenode_set__in=self._public_osfstorage_file_queryset(),
).aggregate(storage_bytes=Sum('size'))['storage_bytes']

0 comments on commit f1b6d79

Please sign in to comment.