institution-user metrics view

- add base `ElasticsearchListView` - use `elasticsearch_dsl.Search` as queryset-analogue for code reuse - allows filtering (with 'eq' and 'ne' operators) following `filterable_fields` on the serializer - allows sorting, following `default_ordering` and `ordering_fields` on the view - allows pagination (with `page` and `page[size]` params) - update "new" institution user metrics view with ElasticsearchListView - add `MonthlyReport.most_recent_yearmonth`
CenterForOpenScience · Sep 10, 2024 · 6cf406e · 6cf406e
1 parent b552b91
commit 6cf406e
Show file tree

Hide file tree

Showing 6 changed files with 320 additions and 7 deletions.
diff --git a/api/base/elasticsearch_dsl_views.py b/api/base/elasticsearch_dsl_views.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+import abc
+
+import elasticsearch_dsl as edsl
+from rest_framework import generics
+from rest_framework import exceptions as drf_exceptions
+from rest_framework.settings import api_settings as drf_settings
+
+from api.base.filters import FilterMixin
+from api.base.views import JSONAPIBaseView
+
+
+class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView, abc.ABC):
+    '''abstract view class using `elasticsearch_dsl.Search` as a queryset-analogue
+    '''
+    default_ordering: str | None = None
+    ordering_fields: frozenset[str] = frozenset()
+
+    @abc.abstractmethod
+    def get_default_search(self) -> edsl.Search | None:
+        ...
+
+    ###
+    # beware! inheritance shenanigans below
+
+    # override FilterMixin to disable all operators besides 'eq' and 'ne'
+    MATCHABLE_FIELDS = ()
+    COMPARABLE_FIELDS = ()
+    DEFAULT_OPERATOR_OVERRIDES = {}
+    # (if you want to add fulltext-search or range-filter support, remove the override
+    #  and update `__add_search_filter` to handle those operators -- tho note that the
+    #  underlying elasticsearch field mapping will need to be compatible with the query)
+
+    # override DEFAULT_FILTER_BACKENDS rest_framework setting
+    # (filtering handled in-view to reuse logic from FilterMixin)
+    filter_backends = ()
+
+    # note: because elasticsearch_dsl.Search supports slicing and gives results when iterated on,
+    #       it works fine with default pagination
+
+    # override rest_framework.generics.GenericAPIView
+    def get_queryset(self):
+        _search = self.get_default_search()
+        if _search is None:
+            return []
+        # using parsing logic from FilterMixin (oddly nested dict and all)
+        for _parsed_param in self.parse_query_params(self.request.query_params).values():
+            for _parsed_filter in _parsed_param.values():
+                _search = self.__add_search_filter(
+                    _search,
+                    elastic_field_name=_parsed_filter['source_field_name'],
+                    operator=_parsed_filter['op'],
+                    value=_parsed_filter['value'],
+                )
+        return self.__add_sort(_search)
+
+    ###
+    # private methods
+
+    def __add_sort(self, search: edsl.Search) -> edsl.Search:
+        _elastic_sort = self.__get_elastic_sort()
+        return (search if _elastic_sort is None else search.sort(_elastic_sort))
+
+    def __get_elastic_sort(self) -> str | None:
+        _sort_param = self.request.query_params.get(drf_settings.ORDERING_PARAM, self.default_ordering)
+        if not _sort_param:
+            return None
+        _sort_field, _ascending = (
+            (_sort_param[1:], False)
+            if _sort_param.startswith('-')
+            else (_sort_param, True)
+        )
+        if _sort_field not in self.ordering_fields:
+            raise drf_exceptions.ValidationError(
+                f'invalid value for {drf_settings.ORDERING_PARAM} query param (valid values: {", ".join(self.ordering_fields)})',
+            )
+        _serializer_field = self.get_serializer().fields[_sort_field]
+        _elastic_sort_field = _serializer_field.source
+        return (_elastic_sort_field if _ascending else f'-{_elastic_sort_field}')
+
+    def __add_search_filter(
+        self,
+        search: edsl.Search,
+        elastic_field_name: str,
+        operator: str,
+        value: str,
+    ) -> edsl.Search:
+        match operator:  # operators from FilterMixin
+            case 'eq':
+                if value == '':
+                    return search.exclude('exists', field=elastic_field_name)
+                return search.filter('term', **{elastic_field_name: value})
+            case 'ne':
+                if value == '':
+                    return search.filter('exists', field=elastic_field_name)
+                return search.exclude('term', **{elastic_field_name: value})
+            case _:
+                raise NotImplementedError(f'unsupported filter operator "{operator}"')
diff --git a/api/institutions/serializers.py b/api/institutions/serializers.py
@@ -314,6 +314,11 @@ class NewInstitutionUserMetricsSerializer(JSONAPISerializer):
     class Meta:
         type_ = 'institution-users'
 
+    filterable_fields = frozenset({
+        'department',
+        'orcid_id',
+    })
+
     id = IDField(source='meta.id', read_only=True)
     user_name = ser.CharField(read_only=True)
     department = ser.CharField(read_only=True, source='department_name')
@@ -339,3 +344,6 @@ class Meta:
     )
 
     links = LinksField({})
+
+    def get_absolute_url(self):
+        return None  # there is no detail view for institution-users
diff --git a/api/institutions/views.py b/api/institutions/views.py
@@ -12,10 +12,12 @@
 from osf.metrics import InstitutionProjectCounts
 from osf.models import OSFUser, Node, Institution, Registration
 from osf.metrics import UserInstitutionProjectCounts
+from osf.metrics.reports import InstitutionalUserReport
 from osf.utils import permissions as osf_permissions
 
 from api.base import permissions as base_permissions
-from api.base.filters import ListFilterMixin, FilterMixin
+from api.base.elasticsearch_dsl_views import ElasticsearchListView
+from api.base.filters import ListFilterMixin
 from api.base.views import JSONAPIBaseView
 from api.base.serializers import JSONAPISerializer
 from api.base.utils import get_object_or_error, get_user_auth
@@ -528,7 +530,7 @@ def get_default_queryset(self):
         return self._make_elasticsearch_results_filterable(search, id=institution._id, department=DEFAULT_ES_NULL_VALUE)
 
 
-class _NewInstitutionUserMetricsList(InstitutionMixin, FilterMixin, JSONAPIBaseView):
+class _NewInstitutionUserMetricsList(InstitutionMixin, ElasticsearchListView):
     permission_classes = (
         drf_permissions.IsAuthenticatedOrReadOnly,
         base_permissions.TokenHasScope,
@@ -543,6 +545,31 @@ class _NewInstitutionUserMetricsList(InstitutionMixin, FilterMixin, JSONAPIBaseV
 
     serializer_class = NewInstitutionUserMetricsSerializer
 
+    default_ordering = '-storage_byte_count'
+    ordering_fields = frozenset((
+        'user_name',
+        'department',
+        'month_last_login',
+        'account_creation_date',
+        'public_projects',
+        'private_projects',
+        'public_registration_count',
+        'embargoed_registration_count',
+        'published_preprint_count',
+        'public_file_count',
+        'storage_byte_count',
+    ))
+
+    def get_default_search(self):
+        _yearmonth = InstitutionalUserReport.most_recent_yearmonth()
+        if _yearmonth is None:
+            return None
+        return (
+            InstitutionalUserReport.search()
+            .filter('term', report_yearmonth=str(_yearmonth))
+            .filter('term', institution_id=self.get_institution()._id)
+        )
+
 
 institution_user_metrics_list_view = view_toggled_by_feature_flag(
     flag_name=osf.features.INSTITUTIONAL_DASHBOARD_2024,

diff --git a/api_tests/institutions/views/test_institution_user_metric_list.py b/api_tests/institutions/views/test_institution_user_metric_list.py
@@ -2,6 +2,7 @@
 import csv
 from io import StringIO
 from random import random
+from urllib.parse import urlencode
 
 import pytest
 from waffle.testutils import override_flag
@@ -14,10 +15,11 @@
 )
 
 from osf.metrics import UserInstitutionProjectCounts
+from osf.metrics.reports import InstitutionalUserReport
 
 @pytest.mark.es
 @pytest.mark.django_db
-class TestInstitutionUserMetricList:
+class TestOldInstitutionUserMetricList:
 
     @pytest.fixture(autouse=True)
     def _waffled(self):
@@ -262,3 +264,159 @@ def test_filter_and_sort(self, app, url, user, user2, user3, admin, user4, popul
         assert data[0]['attributes']['department'] == 'Biology dept'
         assert data[1]['attributes']['department'] == 'N/A'
         assert data[2]['attributes']['department'] == 'Psychology dept'
+
+
+@pytest.mark.django_db
+class TestNewInstitutionUserMetricList:
+    @pytest.fixture(autouse=True)
+    def _waffled(self):
+        with override_flag(osf.features.INSTITUTIONAL_DASHBOARD_2024, active=True):
+            yield  # these tests apply only after institution dashboard improvements
+
+    @pytest.fixture()
+    def institution(self):
+        return InstitutionFactory()
+
+    @pytest.fixture()
+    def rando(self):
+        return AuthUserFactory()
+
+    @pytest.fixture()
+    def institutional_admin(self, institution):
+        _admin_user = AuthUserFactory()
+        institution.get_group('institutional_admins').user_set.add(_admin_user)
+        return _admin_user
+
+    @pytest.fixture()
+    def unshown_reports(self, institution):
+        # unshown because another institution
+        _another_institution = InstitutionFactory()
+        _report_factory('2024-08', _another_institution, user_id='nother_inst')
+        # unshown because old
+        _report_factory('2024-07', institution, user_id='old')
+
+    @pytest.fixture()
+    def reports(self, institution):
+        return [
+            _report_factory(
+                '2024-08', institution,
+                user_id='u_sparse',
+                storage_byte_count=53,
+            ),
+            _report_factory(
+                '2024-08', institution,
+                user_id='u_orc',
+                orcid_id='5555-4444-3333-2222',
+                storage_byte_count=8277,
+            ),
+            _report_factory(
+                '2024-08', institution,
+                user_id='u_blargl',
+                department_name='blargl',
+                storage_byte_count=34834834,
+            ),
+            _report_factory(
+                '2024-08', institution,
+                user_id='u_orcomma',
+                orcid_id='4444-3333-2222-1111',
+                department_name='a department, or so, that happens, incidentally, to have commas',
+                storage_byte_count=736662999298,
+            ),
+        ]
+
+    @pytest.fixture()
+    def url(self, institution):
+        return f'/{API_BASE}institutions/{institution._id}/metrics/users/'
+
+    def test_anon(self, app, url):
+        _resp = app.get(url, expect_errors=True)
+        assert _resp.status_code == 401
+
+    def test_rando(self, app, url, rando):
+        _resp = app.get(url, auth=rando.auth, expect_errors=True)
+        assert _resp.status_code == 403
+
+    @pytest.mark.es
+    def test_get_empty(self, app, url, institutional_admin):
+        _resp = app.get(url, auth=institutional_admin.auth)
+        assert _resp.status_code == 200
+        assert _resp.json['data'] == []
+
+    @pytest.mark.es
+    def test_get_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        _resp = app.get(url, auth=institutional_admin.auth)
+        assert _resp.status_code == 200
+        assert len(_resp.json['data']) == len(reports)
+        _expected_user_ids = {_report.user_id for _report in reports}
+        assert set(_user_ids(_resp)) == _expected_user_ids
+
+    @pytest.mark.es
+    def test_filter_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        for _query, _expected_user_ids in (
+            ({'filter[department]': 'nunavum'}, set()),
+            ({'filter[department]': 'incidentally'}, set()),
+            ({'filter[department]': 'blargl'}, {'u_blargl'}),
+            ({'filter[department]': 'a department, or so, that happens, incidentally, to have commas'}, {'u_orcomma'}),
+            ({'filter[department][eq]': 'nunavum'}, set()),
+            ({'filter[department][eq]': 'blargl'}, {'u_blargl'}),
+            ({'filter[department][eq]': 'a department, or so, that happens, incidentally, to have commas'}, {'u_orcomma'}),
+            ({'filter[department][ne]': 'nunavum'}, {'u_sparse', 'u_blargl', 'u_orc', 'u_orcomma'}),
+
+            ({'filter[orcid_id][eq]': '5555-4444-3333-2222'}, {'u_orc'}),
+            ({'filter[orcid_id][ne]': ''}, {'u_orc', 'u_orcomma'}),
+            ({'filter[orcid_id][eq]': ''}, {'u_sparse', 'u_blargl'}),
+            ({
+                'filter[orcid_id]': '',
+                'filter[department]': 'blargl',
+            }, {'u_blargl'}),
+            ({
+                'filter[orcid_id]': '',
+                'filter[department][ne]': 'blargl',
+            }, {'u_sparse'}),
+            ({
+                'filter[orcid_id]': '5555-4444-3333-2222',
+                'filter[department][ne]': 'blargl',
+            }, {'u_orc'}),
+            ({
+                'filter[orcid_id]': '5555-4444-3333-2222',
+                'filter[department][ne]': '',
+            }, set()),
+        ):
+            _resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
+            assert _resp.status_code == 200
+            assert set(_user_ids(_resp)) == _expected_user_ids
+
+    @pytest.mark.es
+    def test_sort_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        for _query, _expected_user_id_list in (
+            ({'sort': 'storage_byte_count'}, ['u_sparse', 'u_orc', 'u_blargl', 'u_orcomma']),
+            ({'sort': '-storage_byte_count'}, ['u_orcomma', 'u_blargl', 'u_orc', 'u_sparse']),
+        ):
+            _resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
+            assert _resp.status_code == 200
+            assert list(_user_ids(_resp)) == _expected_user_id_list
+
+    @pytest.mark.es
+    def test_paginate_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        for _query, _expected_user_id_list in (
+            ({'sort': 'storage_byte_count', 'page[size]': 2}, ['u_sparse', 'u_orc']),
+            ({'sort': 'storage_byte_count', 'page[size]': 2, 'page': 2}, ['u_blargl', 'u_orcomma']),
+            ({'sort': '-storage_byte_count', 'page[size]': 3}, ['u_orcomma', 'u_blargl', 'u_orc']),
+            ({'sort': '-storage_byte_count', 'page[size]': 3, 'page': 2}, ['u_sparse']),
+        ):
+            _resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
+            assert _resp.status_code == 200
+            assert list(_user_ids(_resp)) == _expected_user_id_list
+
+def _user_ids(api_response):
+    for _datum in api_response.json['data']:
+        yield _datum['relationships']['user']['data']['id']
+
+def _report_factory(yearmonth, institution, **kwargs):
+    _report = InstitutionalUserReport(
+        report_yearmonth=yearmonth,
+        institution_id=institution._id,
+        **kwargs,
+    )
+    _report.save(refresh=True)
+    return _report