From ada8f8cc7578670b08dd8cd1dca1c67a707fd29b Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 13 Nov 2023 11:42:16 -0500
Subject: [PATCH 01/23] Get justification from property - Avoids AttributeError
 for component Registrations

---
 osf/external/internet_archive/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/external/internet_archive/tasks.py b/osf/external/internet_archive/tasks.py
index c4247343196..ea2757167af 100644
--- a/osf/external/internet_archive/tasks.py
+++ b/osf/external/internet_archive/tasks.py
@@ -38,7 +38,7 @@ def update_ia_metadata(node, data=None):
             data[Registration.IA_MAPPED_NAMES.get(key, key)] = data.pop(key)
 
         if node.moderation_state == RegistrationModerationStates.WITHDRAWN.db_name:
-            data['withdrawal_justification'] = node.retraction.justification
+            data['withdrawal_justification'] = node.withdrawal_justification
 
         if getattr(node, 'ia_url', None) and node.is_public:
             task = get_task_from_postcommit_queue(

From d8ab85a4efa05f46aa8751fb4b5e91a9e7da88f0 Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Tue, 14 Nov 2023 15:37:22 -0500
Subject: [PATCH 02/23] [ENG-2714] Turn on institutional affiliation by default
 for registrations (#10466)

* fix draft registrations affiliated institution default value for no-project registrations
---
 api/nodes/serializers.py                      | 10 ----
 .../views/test_draft_registration_list.py     | 57 +++++++++++++++----
 osf/models/registrations.py                   | 18 +++---
 3 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/api/nodes/serializers.py b/api/nodes/serializers.py
index aaff14ac1b9..918f156ce3d 100644
--- a/api/nodes/serializers.py
+++ b/api/nodes/serializers.py
@@ -1557,12 +1557,6 @@ class DraftRegistrationLegacySerializer(JSONAPISerializer):
         'html': 'get_absolute_url',
     })
 
-    affiliate_user_institutions = ser.BooleanField(
-        required=False,
-        default=True,
-        help_text='Specify whether user institution affiliations should be copied over to the draft registration.',
-    )
-
     def get_absolute_url(self, obj):
         return obj.absolute_url
 
@@ -1603,7 +1597,6 @@ def create(self, validated_data):
         registration_responses = validated_data.pop('registration_responses', None)
         schema = validated_data.pop('registration_schema')
         provider = validated_data.pop('provider', None)
-        affiliate_user_institutions = validated_data.pop('affiliate_user_institutions', True)
 
         self.enforce_metadata_or_registration_responses(metadata, registration_responses)
 
@@ -1618,9 +1611,6 @@ def create(self, validated_data):
         if registration_responses:
             self.update_registration_responses(draft, registration_responses)
 
-        if affiliate_user_institutions and draft.branched_from_type == DraftNode:
-            draft.affiliated_institutions.set(draft.creator.affiliated_institutions.all())
-
         return draft
 
     class Meta:
diff --git a/api_tests/draft_registrations/views/test_draft_registration_list.py b/api_tests/draft_registrations/views/test_draft_registration_list.py
index c317d8836fc..4c4a7148d55 100644
--- a/api_tests/draft_registrations/views/test_draft_registration_list.py
+++ b/api_tests/draft_registrations/views/test_draft_registration_list.py
@@ -9,7 +9,7 @@
 from api.base.settings.defaults import API_BASE
 
 from osf.migrations import ensure_invisible_and_inactive_schema
-from osf.models import DraftRegistration, NodeLicense, RegistrationProvider, Institution
+from osf.models import DraftRegistration, NodeLicense, RegistrationProvider
 from osf_tests.factories import (
     RegistrationFactory,
     CollectionFactory,
@@ -260,27 +260,64 @@ def test_create_project_based_draft_does_not_email_initiator(
 
         assert not mock_send_mail.called
 
-    def test_affiliated_institutions_are_copied_from_user(
-            self, app, user, url_draft_registrations, payload):
+    def test_affiliated_institutions_are_copied_from_node_no_institutions(self, app, user, url_draft_registrations, payload):
+        """
+        Draft registrations that are based on projects get those project's user institutional affiliation,
+        those "no-project" registrations inherit the user's institutional affiliation.
+
+        This tests a scenario where a user bases a registration on a node without affiliations, and so the
+        draft registration has no institutional affiliation from the user or the node.
+        """
         project = ProjectFactory(is_public=True, creator=user)
-        InstitutionFactory()
         payload['data']['relationships']['branched_from']['data']['id'] = project._id
         res = app.post_json_api(
-            url_draft_registrations, payload,
-            auth=user.auth, expect_errors=True)
+            url_draft_registrations,
+            payload,
+            auth=user.auth,
+        )
         assert res.status_code == 201
         draft_registration = DraftRegistration.load(res.json['data']['id'])
         assert not draft_registration.affiliated_institutions.exists()
 
+    def test_affiliated_institutions_are_copied_from_node(self, app, user, url_draft_registrations, payload):
+        """
+        Draft registrations that are based on projects get those project's user institutional affiliation,
+        those "no-project" registrations inherit the user's institutional affiliation.
+
+        This tests a scenario where a user bases their registration on a project that has a current institutional
+        affiliation which is copied over to the draft registrations.
+        """
+        institution = InstitutionFactory()
+
         project = ProjectFactory(is_public=True, creator=user)
+        project.affiliated_institutions.add(institution)
         payload['data']['relationships']['branched_from']['data']['id'] = project._id
-        user.add_multiple_institutions_non_sso(Institution.objects.filter(id__lt=3))
         res = app.post_json_api(
-            url_draft_registrations, payload,
-            auth=user.auth, expect_errors=True)
+            url_draft_registrations,
+            payload,
+            auth=user.auth,
+        )
+        assert res.status_code == 201
+        draft_registration = DraftRegistration.load(res.json['data']['id'])
+        assert list(draft_registration.affiliated_institutions.all()) == list(project.affiliated_institutions.all())
+
+    def test_affiliated_institutions_are_copied_from_user(self, app, user, url_draft_registrations, payload):
+        """
+        Draft registrations that are based on projects get those project's user institutional affiliation,
+        those "no-project" registrations inherit the user's institutional affiliation.
+        """
+        institution = InstitutionFactory()
+        user.add_or_update_affiliated_institution(institution)
+
+        del payload['data']['relationships']['branched_from']
+        res = app.post_json_api(
+            url_draft_registrations,
+            payload,
+            auth=user.auth,
+        )
         assert res.status_code == 201
         draft_registration = DraftRegistration.load(res.json['data']['id'])
-        assert not draft_registration.affiliated_institutions.all() == user.get_affiliated_institutions()
+        assert list(draft_registration.affiliated_institutions.all()) == list(user.get_affiliated_institutions())
 
 
 class TestDraftRegistrationCreateWithoutNode(TestDraftRegistrationCreate):
diff --git a/osf/models/registrations.py b/osf/models/registrations.py
index ceff62c98a2..92e1f32bcfb 100644
--- a/osf/models/registrations.py
+++ b/osf/models/registrations.py
@@ -1262,31 +1262,31 @@ def create_from_node(cls, user, schema, node=None, data=None, provider=None):
             provider.validate_schema(schema)
 
         excluded_attributes = []
-        if not node:
-            # If no node provided, a DraftNode is created for you
-            node = DraftNode.objects.create(creator=user, title=settings.DEFAULT_DRAFT_NODE_TITLE)
-            # Force the user to add their own title for no-project
+        if node:
+            branched_from = node
+        else:
+            branched_from = DraftNode.objects.create(creator=user, title=settings.DEFAULT_DRAFT_NODE_TITLE)
             excluded_attributes.append('title')
 
-        if not (isinstance(node, Node) or isinstance(node, DraftNode)):
+        if not isinstance(branched_from, (Node, DraftNode)):
             raise DraftRegistrationStateError()
 
         draft = cls(
             initiator=user,
-            branched_from=node,
+            branched_from=branched_from,
             registration_schema=schema,
             registration_metadata=data or {},
             provider=provider,
         )
         draft.save()
         draft.copy_editable_fields(
-            node,
-            save=True,
+            branched_from,
             excluded_attributes=excluded_attributes
         )
         draft.update(data, auth=Auth(user))
 
-        if node.type == 'osf.draftnode':
+        if not node:
+            draft.affiliated_institutions.add(*draft.creator.get_affiliated_institutions())
             initiator_permissions = draft.contributor_set.get(user=user).permission
             signals.contributor_added.send(
                 draft,

From 314105711230cdb66de1be9dbc6f3da697681249 Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Wed, 15 Nov 2023 09:06:08 -0500
Subject: [PATCH 03/23] [ENG-1058] Update GDPR for draftregistrations and
 draftnodes (#10462)

* update gdpr to delete draftregistrations and draftnode

---------

Co-authored-by: John Tordoff <>
---
 osf/models/user.py     | 149 ++++++++++++++++++++++++++++++-----------
 osf_tests/test_user.py |  46 ++++++++++++-
 2 files changed, 152 insertions(+), 43 deletions(-)

diff --git a/osf/models/user.py b/osf/models/user.py
index ba8a6fb59f9..a16a617cca0 100644
--- a/osf/models/user.py
+++ b/osf/models/user.py
@@ -1896,69 +1896,138 @@ def check_spam(self, saved_fields, request_headers):
 
         return is_spam
 
+    def _validate_admin_status_for_gdpr_delete(self, resource):
+        """
+        Ensure that deleting the user won't leave the node without an admin.
+
+        Args:
+        - resource: An instance of a resource, probably AbstractNode or DraftRegistration.
+        """
+        alternate_admins = OSFUser.objects.filter(
+            groups__name=resource.format_group(ADMIN),
+            is_active=True
+        ).exclude(id=self.id).exists()
+
+        if not alternate_admins:
+            raise UserStateError(
+                f'You cannot delete {resource.__class__.__name__} {resource._id} because it would be '
+                f'a {resource.__class__.__name__} with contributors, but with no admin.'
+            )
+
+    def _validate_addons_for_gdpr_delete(self, resource):
+        """
+        Ensure that the user's external accounts on the node won't cause issues upon deletion.
+
+        Args:
+        - resource: An instance of a resource, probably AbstractNode or DraftRegistration.
+        """
+        for addon in resource.get_addons():
+            if addon.short_name not in ('osfstorage', 'wiki') and \
+                    addon.user_settings and addon.user_settings.owner.id == self.id:
+                raise UserStateError(
+                    f'You cannot delete this user because they have an external account for {addon.short_name} '
+                    f'attached to {resource.__class__.__name__} {resource._id}, which has other contributors.'
+                )
+
     def gdpr_delete(self):
         """
-        This function does not remove the user object reference from our database, but it does disable the account and
-        remove identifying in a manner compliant with GDPR guidelines.
+        Complies with GDPR guidelines by disabling the account and removing identifying information.
+        """
+
+        # Check if user has something intentionally public, like preprints or registrations
+        self._validate_no_public_entities()
+
+        # Check if user has any non-registration AbstractNodes or DraftRegistrations that they might still share with
+        # other contributors
+        self._validate_and_remove_resource_for_gdpr_delete(
+            self.nodes.exclude(type='osf.registration'),  # Includes DraftNodes and other typed nodes
+            hard_delete=False
+        )
+        self._validate_and_remove_resource_for_gdpr_delete(
+            self.draft_registrations.all(),
+            hard_delete=True
+        )
 
-        Follows the protocol described in
-        https://openscience.atlassian.net/wiki/spaces/PRODUC/pages/482803755/GDPR-Related+protocols
+        # A Potentially out of date check that user isn't a member of a OSFGroup
+        self._validate_osf_groups()
 
+        # Finally delete the user's info.
+        self._clear_identifying_information()
+
+    def _validate_no_public_entities(self):
+        """
+        Ensure that the user doesn't have any public facing resources like Registrations or Preprints
         """
-        from osf.models import Preprint, AbstractNode
+        from osf.models import Preprint
 
-        user_nodes = self.nodes.exclude(is_deleted=True)
-        #  Validates the user isn't trying to delete things they deliberately made public.
-        if user_nodes.filter(type='osf.registration').exists():
+        if self.nodes.filter(deleted__isnull=True, type='osf.registration').exists():
             raise UserStateError('You cannot delete this user because they have one or more registrations.')
 
         if Preprint.objects.filter(_contributors=self, ever_public=True, deleted__isnull=True).exists():
             raise UserStateError('You cannot delete this user because they have one or more preprints.')
 
-        # Validates that the user isn't trying to delete things nodes they are the only admin on.
-        personal_nodes = (
-            AbstractNode.objects.annotate(contrib_count=Count('_contributors'))
-            .filter(contrib_count__lte=1)
-            .filter(contributor__user=self)
-            .exclude(is_deleted=True)
-        )
-        shared_nodes = user_nodes.exclude(id__in=personal_nodes.values_list('id'))
+    def _validate_and_remove_resource_for_gdpr_delete(self, resources, hard_delete):
+        """
+        This method ensures a user's resources are properly deleted of using during GDPR delete request.
 
-        for node in shared_nodes.exclude(type__in=['osf.quickfilesnode', 'osf.draftnode']):
-            alternate_admins = OSFUser.objects.filter(groups__name=node.format_group(ADMIN)).filter(is_active=True).exclude(id=self.id)
-            if not alternate_admins:
-                raise UserStateError(
-                    'You cannot delete node {} because it would be a node with contributors, but with no admin.'.format(
-                        node._id))
+        Args:
+        - resources: A queryset of resources probably of AbstractNode or DraftRegistration.
+        - hard_delete: A boolean indicating whether the resource should be permentently deleted or just marked as such.
+        """
+        model = resources.query.model
 
-            for addon in node.get_addons():
-                if addon.short_name not in ('osfstorage', 'wiki') and addon.user_settings and addon.user_settings.owner.id == self.id:
-                    raise UserStateError('You cannot delete this user because they '
-                                         'have an external account for {} attached to Node {}, '
-                                         'which has other contributors.'.format(addon.short_name, node._id))
+        filter_deleted = {}
+        if not hard_delete:
+            filter_deleted = {'deleted__isnull': True}
 
-        for group in self.osf_groups:
-            if not group.managers.exclude(id=self.id).filter(is_registered=True).exists() and group.members.exclude(id=self.id).exists():
-                raise UserStateError('You cannot delete this user because they are the only registered manager of OSFGroup {} that contains other members.'.format(group._id))
+        personal_resources = model.objects.annotate(
+            contrib_count=Count('_contributors')
+        ).filter(
+            contrib_count__lte=1,
+            _contributors=self
+        ).filter(
+            **filter_deleted
+        )
 
-        for node in shared_nodes.all():
-            logger.info('Removing {self._id} as a contributor to node (pk:{node_id})...'.format(self=self, node_id=node.pk))
-            node.remove_contributor(self, auth=Auth(self), log=False)
+        shared_resources = resources.exclude(id__in=personal_resources.values_list('id'))
+        for node in shared_resources:
+            self._validate_admin_status_for_gdpr_delete(node)
+            self._validate_addons_for_gdpr_delete(node)
 
-        # This is doesn't to remove identifying info, but ensures other users can't see the deleted user's profile etc.
-        self.deactivate_account()
+        for resource in shared_resources.all():
+            logger.info(f'Removing {self._id} as a contributor to {resource.__class__.__name__} (pk:{resource.pk})...')
+            resource.remove_contributor(self, auth=Auth(self), log=False)
 
-        # delete all personal nodes (one contributor), bookmarks, quickfiles etc.
-        for node in personal_nodes.all():
-            logger.info('Soft-deleting node (pk: {node_id})...'.format(node_id=node.pk))
-            node.remove_node(auth=Auth(self))
+        # Delete all personal entities
+        for entity in personal_resources.all():
+            if hard_delete:
+                logger.info(f'Hard-deleting {entity.__class__.__name__} (pk: {entity.pk})...')
+                entity.delete()
+            else:
+                logger.info(f'Soft-deleting {entity.__class__.__name__} (pk: {entity.pk})...')
+                entity.remove_node(auth=Auth(self))
 
+    def _validate_osf_groups(self):
+        """
+        This method ensures a user isn't in an OSFGroup before deleting them..
+        """
         for group in self.osf_groups:
-            if len(group.managers) == 1 and group.managers[0] == self:
+            if not group.managers.exclude(id=self.id).filter(is_registered=True).exists() and group.members.exclude(
+                    id=self.id).exists():
+                raise UserStateError(
+                    f'You cannot delete this user because they are the only registered manager of OSFGroup {group._id} that contains other members.')
+            elif len(group.managers) == 1 and group.managers[0] == self:
                 group.remove_group()
             else:
                 group.remove_member(self)
 
+    def _clear_identifying_information(self):
+        '''
+        This method ensures a user's info is deleted during a GDPR delete
+        '''
+        # This doesn't remove identifying info, but ensures other users can't see the deleted user's profile etc.
+        self.deactivate_account()
+
         logger.info('Clearing identifying information...')
         # This removes identifying info
         # hard-delete all emails associated with the user
diff --git a/osf_tests/test_user.py b/osf_tests/test_user.py
index 0c67bc4ed43..53e717df2d1 100644
--- a/osf_tests/test_user.py
+++ b/osf_tests/test_user.py
@@ -37,6 +37,8 @@
     NotableDomain,
     PreprintContributor,
     DraftRegistrationContributor,
+    DraftRegistration,
+    DraftNode,
     UserSessionMap,
 )
 from osf.models.institution_affiliation import get_user_by_institution_identity
@@ -66,7 +68,8 @@
     UnregUserFactory,
     UserFactory,
     RegistrationFactory,
-    PreprintFactory
+    PreprintFactory,
+    DraftNodeFactory
 )
 from tests.base import OsfTestCase
 from tests.utils import run_celery_tasks
@@ -2387,6 +2390,12 @@ def registration(self, user):
         registration.save()
         return registration
 
+    @pytest.fixture()
+    def registration_with_draft_node(self, user, registration):
+        registration.branched_from = DraftNodeFactory(creator=user)
+        registration.save()
+        return registration
+
     @pytest.fixture()
     def project(self, user):
         project = ProjectFactory(creator=user)
@@ -2433,11 +2442,42 @@ def test_can_gdpr_delete_personal_nodes(self, user):
         user.gdpr_delete()
         assert user.nodes.exclude(is_deleted=True).count() == 0
 
+    def test_can_gdpr_delete_personal_registrations(self, user, registration_with_draft_node):
+        assert DraftRegistration.objects.all().count() == 1
+        assert DraftNode.objects.all().count() == 1
+
+        with pytest.raises(UserStateError) as exc_info:
+            user.gdpr_delete()
+
+        assert exc_info.value.args[0] == 'You cannot delete this user because they have one or more registrations.'
+        assert DraftRegistration.objects.all().count() == 1
+        assert DraftNode.objects.all().count() == 1
+
+        registration_with_draft_node.remove_node(Auth(user))
+        assert DraftRegistration.objects.all().count() == 1
+        assert DraftNode.objects.all().count() == 1
+        user.gdpr_delete()
+
+        # DraftNodes soft-deleted, DraftRegistions hard-deleted
+        assert user.nodes.exclude(is_deleted=True).count() == 0
+        assert DraftRegistration.objects.all().count() == 0
+
     def test_can_gdpr_delete_shared_nodes_with_multiple_admins(self, user, project_with_two_admins):
 
         user.gdpr_delete()
         assert user.nodes.all().count() == 0
 
+    def test_can_gdpr_delete_shared_draft_registration_with_multiple_admins(self, user, registration):
+        other_admin = AuthUserFactory()
+        draft_registrations = user.draft_registrations.get()
+        draft_registrations.add_contributor(other_admin, permissions='admin')
+        assert draft_registrations.contributors.all().count() == 2
+        registration.delete_registration_tree(save=True)
+
+        user.gdpr_delete()
+        assert draft_registrations.contributors.get() == other_admin
+        assert user.nodes.filter(deleted__isnull=True).count() == 0
+
     def test_cant_gdpr_delete_registrations(self, user, registration):
 
         with pytest.raises(UserStateError) as exc_info:
@@ -2457,8 +2497,8 @@ def test_cant_gdpr_delete_shared_node_if_only_admin(self, user, project_user_is_
         with pytest.raises(UserStateError) as exc_info:
             user.gdpr_delete()
 
-        assert exc_info.value.args[0] == 'You cannot delete node {} because it would' \
-                                         ' be a node with contributors, but with no admin.'.format(project_user_is_only_admin._id)
+        assert exc_info.value.args[0] == 'You cannot delete Node {} because it would' \
+                                         ' be a Node with contributors, but with no admin.'.format(project_user_is_only_admin._id)
 
     def test_cant_gdpr_delete_osf_group_if_only_manager(self, user):
         group = OSFGroupFactory(name='My Group', creator=user)

From df34b8c6b1a792cd9c570b92948df9a47ad3c4ac Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Wed, 22 Nov 2023 15:50:49 -0500
Subject: [PATCH 04/23] Update ReviewActionListCreate write scopes

---
 api/actions/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/actions/views.py b/api/actions/views.py
index 2c7276873fa..be0f47a0a8d 100644
--- a/api/actions/views.py
+++ b/api/actions/views.py
@@ -159,7 +159,7 @@ class ReviewActionListCreate(JSONAPIBaseView, generics.ListCreateAPIView, ListFi
     )
 
     required_read_scopes = [CoreScopes.ACTIONS_READ]
-    required_write_scopes = [CoreScopes.NULL]
+    required_write_scopes = [CoreScopes.ACTIONS_WRITE]
 
     parser_classes = (JSONAPIMultipleRelationshipsParser, JSONAPIMultipleRelationshipsParserForRegularJSON,)
     serializer_class = ReviewActionSerializer

From 207de84f9b8a3361f62f2662ac7f5755f732793a Mon Sep 17 00:00:00 2001
From: Yuhuai Liu <yuhuai@cos.io>
Date: Wed, 1 Nov 2023 10:46:01 -0400
Subject: [PATCH 05/23] Routing changes for Preprints Modernization - Phase 1

---
 api/base/serializers.py |  3 +++
 website/routes.py       | 13 ++++++++++---
 website/views.py        |  4 +++-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/api/base/serializers.py b/api/base/serializers.py
index c21c76dc394..c8cdff04c7e 100644
--- a/api/base/serializers.py
+++ b/api/base/serializers.py
@@ -915,6 +915,9 @@ def to_representation(self, value):
                             or related_class.view_name == 'registration-citation':
                         related_id = resolved_url.kwargs['node_id']
                         related_type = 'citation'
+                    elif related_class.view_name == 'preprint-citation':
+                        related_id = resolved_url.kwargs['preprint_id']
+                        related_type = 'citation'
                     elif related_type in ('preprint_providers', 'preprint-providers', 'registration-providers'):
                         related_id = resolved_url.kwargs['provider_id']
                     elif related_type in ('registrations', 'draft_nodes'):
diff --git a/website/routes.py b/website/routes.py
index 6634c6a97f5..787fe2e367b 100644
--- a/website/routes.py
+++ b/website/routes.py
@@ -260,9 +260,16 @@ def ember_app(path=None):
     for k in EXTERNAL_EMBER_APPS.keys():
         if request.path.strip('/').startswith(k):
             ember_app = EXTERNAL_EMBER_APPS[k]
-            if k == 'preprints' and request.path.rstrip('/').endswith('discover'):
-                # Route preprint discover pages to new search page in EOW
-                ember_app = EXTERNAL_EMBER_APPS.get('ember_osf_web', False) or ember_app
+            if k == 'preprints':
+                if request.path.rstrip('/').endswith('edit'):
+                    # Route preprint edit pages to old preprint app
+                    ember_app = EXTERNAL_EMBER_APPS.get('preprints', False) or ember_app
+                elif request.path.rstrip('/').endswith('submit'):
+                    # Route preprint submit pages to old preprint app
+                    ember_app = EXTERNAL_EMBER_APPS.get('preprints', False) or ember_app
+                else:
+                    # Route other preprint pages to EOW
+                    ember_app = EXTERNAL_EMBER_APPS.get('ember_osf_web', False) or ember_app
             break
 
     if not ember_app:
diff --git a/website/views.py b/website/views.py
index c3051861791..a8f70421069 100644
--- a/website/views.py
+++ b/website/views.py
@@ -332,7 +332,9 @@ def resolve_guid(guid, suffix=None):
     if isinstance(resource, Preprint):
         if resource.provider.domain_redirect_enabled:
             return redirect(resource.absolute_url, http_status.HTTP_301_MOVED_PERMANENTLY)
-        return stream_emberapp(EXTERNAL_EMBER_APPS['preprints']['server'], preprints_dir)
+        if clean_suffix.endswith('edit'):
+            return stream_emberapp(EXTERNAL_EMBER_APPS['preprints']['server'], preprints_dir)
+        return use_ember_app()
 
     elif isinstance(resource, Registration) and (clean_suffix in ('', 'comments', 'links', 'components', 'resources',)) and waffle.flag_is_active(request, features.EMBER_REGISTRIES_DETAIL_PAGE):
         return use_ember_app()

From b69ab6528810de5afe47aebafd53aed7a941b4f5 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Thu, 30 Nov 2023 13:03:17 -0500
Subject: [PATCH 06/23] Fix ResolveGuid tests

---
 tests/test_views.py | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/tests/test_views.py b/tests/test_views.py
index 920d46a0d07..f1a8c1966ce 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -5018,28 +5018,22 @@ class TestResolveGuid(OsfTestCase):
     def setUp(self):
         super(TestResolveGuid, self).setUp()
 
-    def test_preprint_provider_without_domain(self):
+    @mock.patch('website.views.use_ember_app')
+    def test_preprint_provider_without_domain(self, mock_use_ember_app):
         provider = PreprintProviderFactory(domain='')
         preprint = PreprintFactory(provider=provider)
         url = web_url_for('resolve_guid', _guid=True, guid=preprint._id)
         res = self.app.get(url)
-        assert_equal(res.status_code, 200)
-        assert_equal(
-            res.request.path,
-            '/{}/'.format(preprint._id)
-        )
+        mock_use_ember_app.assert_called_with()
 
-    def test_preprint_provider_with_domain_without_redirect(self):
+    @mock.patch('website.views.use_ember_app')
+    def test_preprint_provider_with_domain_without_redirect(self, mock_use_ember_app):
         domain = 'https://test.com/'
         provider = PreprintProviderFactory(_id='test', domain=domain, domain_redirect_enabled=False)
         preprint = PreprintFactory(provider=provider)
         url = web_url_for('resolve_guid', _guid=True, guid=preprint._id)
         res = self.app.get(url)
-        assert_equal(res.status_code, 200)
-        assert_equal(
-            res.request.path,
-            '/{}/'.format(preprint._id)
-        )
+        mock_use_ember_app.assert_called_with()
 
     def test_preprint_provider_with_domain_with_redirect(self):
         domain = 'https://test.com/'
@@ -5062,16 +5056,13 @@ def test_preprint_provider_with_domain_with_redirect(self):
 
 
 
-    def test_preprint_provider_with_osf_domain(self):
+    @mock.patch('website.views.use_ember_app')
+    def test_preprint_provider_with_osf_domain(self, mock_use_ember_app):
         provider = PreprintProviderFactory(_id='osf', domain='https://osf.io/')
         preprint = PreprintFactory(provider=provider)
         url = web_url_for('resolve_guid', _guid=True, guid=preprint._id)
         res = self.app.get(url)
-        assert_equal(res.status_code, 200)
-        assert_equal(
-            res.request.path,
-            '/{}/'.format(preprint._id)
-        )
+        mock_use_ember_app.assert_called_with()
 
 
 class TestConfirmationViewBlockBingPreview(OsfTestCase):

From 7810a1e86ac520d5ea1ca54a0b91ab58040f69ad Mon Sep 17 00:00:00 2001
From: John Tordoff <>
Date: Mon, 20 Nov 2023 15:06:06 -0500
Subject: [PATCH 07/23] Add AGU Conference campaign

---
 framework/auth/campaigns.py                   |  9 +++++++
 tests/test_campaigns.py                       |  1 +
 website/mails/mails.py                        |  4 +++
 .../confirm_agu_conference_2024.html.mako     | 25 +++++++++++++++++++
 website/util/metrics.py                       |  1 +
 5 files changed, 40 insertions(+)
 create mode 100644 website/templates/emails/confirm_agu_conference_2024.html.mako

diff --git a/framework/auth/campaigns.py b/framework/auth/campaigns.py
index 64552a8f5ef..9d418e863e0 100644
--- a/framework/auth/campaigns.py
+++ b/framework/auth/campaigns.py
@@ -91,6 +91,15 @@ def get_campaigns():
                 }
             })
 
+            newest_campaigns.update({
+                'agu_conference_2023': {
+                    'system_tag': CampaignSourceTags.AguConference2023.value,
+                    'redirect_url': '',
+                    'confirmation_email_template': mails.CONFIRM_EMAIL_AGU_CONFERENCE_2023,
+                    'login_type': 'native',
+                }
+            })
+
             CAMPAIGNS = newest_campaigns
             CAMPAIGNS_LAST_REFRESHED = timezone.now()
 
diff --git a/tests/test_campaigns.py b/tests/test_campaigns.py
index 442d1d1f931..66cb7f348db 100644
--- a/tests/test_campaigns.py
+++ b/tests/test_campaigns.py
@@ -44,6 +44,7 @@ def setUp(self):
             'psyarxiv-preprints',
             'osf-registries',
             'osf-registered-reports',
+            'agu_conference_2023',
         ]
         self.refresh = timezone.now()
         campaigns.CAMPAIGNS = None  # force campaign refresh now that preprint providers are populated
diff --git a/website/mails/mails.py b/website/mails/mails.py
index d0263c59f95..4ecb438a7e8 100644
--- a/website/mails/mails.py
+++ b/website/mails/mails.py
@@ -188,6 +188,10 @@ def get_english_article(word):
     'confirm_erpc',
     subject='OSF Account Verification, Election Research Preacceptance Competition'
 )
+CONFIRM_EMAIL_AGU_CONFERENCE_2023 = Mail(
+    'confirm_agu_conference_2023',
+    subject='OSF Account Verification, from the American Geophysical Union Conference'
+)
 CONFIRM_EMAIL_PREPRINTS = lambda name, provider: Mail(
     'confirm_preprints_{}'.format(name),
     subject='OSF Account Verification, {}'.format(provider)
diff --git a/website/templates/emails/confirm_agu_conference_2024.html.mako b/website/templates/emails/confirm_agu_conference_2024.html.mako
new file mode 100644
index 00000000000..6d61636068c
--- /dev/null
+++ b/website/templates/emails/confirm_agu_conference_2024.html.mako
@@ -0,0 +1,25 @@
+<%inherit file="notify_base.mako" />
+
+<%def name="content()">
+<tr>
+  <td style="border-collapse: collapse;">
+    Hello ${user.fullname},<br>
+    <br>
+
+    Thank you for joining us at the AGU Open Science Pavilion, and welcome to the Open Science Framework.
+
+    We are pleased to offer a special AGU attendees exclusive community call to continue our conversation and to help
+    you get oriented on the OSF. This is an opportunity for us to show you useful OSF features, talk about
+    open science in your domains, and for you to ask any questions you may have.
+    You can register for this free event here:
+    <br>
+    https://cos-io.zoom.us/meeting/register/tZAuceCvrjotHNG3n6XzLFDv1Rnn2hkjczHr
+    <br>
+    To continue, please verify your email address by visiting this link:<br>
+    <br>
+    ${confirmation_url}<br>
+    <br>
+    From the team at the Center for Open Science<br>
+
+</tr>
+</%def>
diff --git a/website/util/metrics.py b/website/util/metrics.py
index 4416b4f5cd4..19c9773e935 100644
--- a/website/util/metrics.py
+++ b/website/util/metrics.py
@@ -49,6 +49,7 @@ class CampaignSourceTags(Enum):
     ErpChallenge = campaign_source_tag('erp_challenge')
     OsfRegisteredReports = campaign_source_tag('osf_registered_reports')
     Osf4m = campaign_source_tag('osf4m')
+    AguConference2023 = campaign_source_tag('agu_conference_2023')
 
 
 class OsfClaimedTags(Enum):

From 9e0322d9ba888900a9fdc04b794af9abd50b0ad0 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Tue, 28 Nov 2023 18:18:35 -0500
Subject: [PATCH 08/23] Rename, reword template - Fix redirect

---
 framework/auth/campaigns.py                                   | 2 +-
 ...e_2024.html.mako => confirm_agu_conference_2023.html.mako} | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename website/templates/emails/{confirm_agu_conference_2024.html.mako => confirm_agu_conference_2023.html.mako} (88%)

diff --git a/framework/auth/campaigns.py b/framework/auth/campaigns.py
index 9d418e863e0..95203e058ca 100644
--- a/framework/auth/campaigns.py
+++ b/framework/auth/campaigns.py
@@ -94,7 +94,7 @@ def get_campaigns():
             newest_campaigns.update({
                 'agu_conference_2023': {
                     'system_tag': CampaignSourceTags.AguConference2023.value,
-                    'redirect_url': '',
+                    'redirect_url': furl.furl(DOMAIN).add(path='dashboard/').url,
                     'confirmation_email_template': mails.CONFIRM_EMAIL_AGU_CONFERENCE_2023,
                     'login_type': 'native',
                 }
diff --git a/website/templates/emails/confirm_agu_conference_2024.html.mako b/website/templates/emails/confirm_agu_conference_2023.html.mako
similarity index 88%
rename from website/templates/emails/confirm_agu_conference_2024.html.mako
rename to website/templates/emails/confirm_agu_conference_2023.html.mako
index 6d61636068c..429ec911410 100644
--- a/website/templates/emails/confirm_agu_conference_2024.html.mako
+++ b/website/templates/emails/confirm_agu_conference_2023.html.mako
@@ -14,8 +14,8 @@
     You can register for this free event here:
     <br>
     https://cos-io.zoom.us/meeting/register/tZAuceCvrjotHNG3n6XzLFDv1Rnn2hkjczHr
-    <br>
-    To continue, please verify your email address by visiting this link:<br>
+    <br><br>
+    To confirm your OSF account, please verify your email address by visiting this link:<br>
     <br>
     ${confirmation_url}<br>
     <br>

From 561d81ec46ae3bd05a7159387f4e10ef12e092e2 Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Thu, 14 Dec 2023 11:55:25 -0500
Subject: [PATCH 09/23] [ENG-4823] Add Collection Metadata Options (#10499)

* add collection metadata options for ibdgc

---------

Co-authored-by: John Tordoff <>
---
 admin/collection_providers/forms.py           | 62 +++++++++++++++++++
 admin/collection_providers/views.py           | 59 +++++++++---------
 .../js/pages/collection-provider-page.js      | 26 ++++++++
 .../collection_providers/detail.html          |  8 +++
 .../update_collection_provider_form.html      | 12 ++++
 api/collections/serializers.py                | 20 ++++++
 api_tests/search/views/test_views.py          | 20 ++++++
 osf/migrations/0017_auto_20231212_1843.py     | 34 ++++++++++
 osf/models/collection.py                      | 20 +++++-
 osf/models/collection_submission.py           | 10 +++
 website/project/views/node.py                 |  2 +
 website/search/elastic_search.py              |  2 +
 website/templates/project/project.mako        | 27 ++++++++
 13 files changed, 273 insertions(+), 29 deletions(-)
 create mode 100644 osf/migrations/0017_auto_20231212_1843.py

diff --git a/admin/collection_providers/forms.py b/admin/collection_providers/forms.py
index ca0358a126d..4b8af62bb82 100644
--- a/admin/collection_providers/forms.py
+++ b/admin/collection_providers/forms.py
@@ -15,6 +15,8 @@ class CollectionProviderForm(forms.ModelForm):
     program_area_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
     school_type_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
     study_design_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
+    data_type_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
+    disease_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
     _id = forms.SlugField(
         required=True,
         help_text='URL Slug',
@@ -268,3 +270,63 @@ def clean_study_design_choices(self):
             if choices:
                 added_choices = json.loads(choices)
         return {'added': added_choices, 'removed': removed_choices}
+
+    def clean_disease_choices(self):
+        if not self.data.get('disease_choices'):
+            return {'added': [], 'removed': []}
+
+        collection_provider = self.instance
+        primary_collection = collection_provider.primary_collection
+        if primary_collection:  # Modifying an existing CollectionProvider
+            old_choices = {c.strip(' ') for c in primary_collection.disease_choices}
+            updated_choices = {c.strip(' ') for c in json.loads(self.data.get('disease_choices'))}
+            added_choices = updated_choices - old_choices
+            removed_choices = old_choices - updated_choices
+
+            active_removed_choices = set(
+                primary_collection.collectionsubmission_set.filter(
+                    disease__in=removed_choices
+                ).values_list('disease', flat=True)
+            )
+            if active_removed_choices:
+                raise forms.ValidationError(
+                    'Cannot remove the following choices for "disease", as they are '
+                    f'currently in use: {active_removed_choices}'
+                )
+        else:  # Creating a new CollectionProvider
+            added_choices = set()
+            removed_choices = set()
+            choices = self.data.get('disease_choices')
+            if choices:
+                added_choices = json.loads(choices)
+        return {'added': added_choices, 'removed': removed_choices}
+
+    def clean_data_type_choices(self):
+        if not self.data.get('data_type_choices'):
+            return {'added': [], 'removed': []}
+
+        collection_provider = self.instance
+        primary_collection = collection_provider.primary_collection
+        if primary_collection:  # Modifying an existing CollectionProvider
+            old_choices = {c.strip(' ') for c in primary_collection.data_type_choices}
+            updated_choices = {c.strip(' ') for c in json.loads(self.data.get('data_type_choices'))}
+            added_choices = updated_choices - old_choices
+            removed_choices = old_choices - updated_choices
+
+            active_removed_choices = set(
+                primary_collection.collectionsubmission_set.filter(
+                    data_type__in=removed_choices
+                ).values_list('data_type', flat=True)
+            )
+            if active_removed_choices:
+                raise forms.ValidationError(
+                    'Cannot remove the following choices for "data_type", as they are '
+                    f'currently in use: {active_removed_choices}'
+                )
+        else:  # Creating a new CollectionProvider
+            added_choices = set()
+            removed_choices = set()
+            choices = self.data.get('data_type_choices')
+            if choices:
+                added_choices = json.loads(choices)
+        return {'added': added_choices, 'removed': removed_choices}
diff --git a/admin/collection_providers/views.py b/admin/collection_providers/views.py
index d5c950ed4fd..699d82cf533 100644
--- a/admin/collection_providers/views.py
+++ b/admin/collection_providers/views.py
@@ -21,6 +21,17 @@
 from admin.providers.views import AddAdminOrModerator, RemoveAdminsAndModerators
 
 
+def _process_collection_choices(provider, choices_name, form):
+    collection = provider.primary_collection
+    choices_name_attr = f'{choices_name}_choices'
+    choices_added = form.cleaned_data[choices_name_attr]['added']
+    choices_removed = form.cleaned_data[choices_name_attr]['removed']
+
+    getattr(collection, choices_name_attr).extend(choices_added)
+    for item in choices_removed:
+        getattr(collection, choices_name_attr).remove(item)
+
+
 class CreateCollectionProvider(PermissionRequiredMixin, CreateView):
     raise_exception = True
     permission_required = 'osf.change_collectionprovider'
@@ -47,6 +58,10 @@ def form_valid(self, form):
             self.object.primary_collection.school_type_choices.append(item)
         for item in form.cleaned_data['study_design_choices']['added']:
             self.object.primary_collection.study_design_choices.append(item)
+        for item in form.cleaned_data['data_type_choices']['added']:
+            self.object.primary_collection.data_type_choices.append(item)
+        for item in form.cleaned_data['disease_choices']['added']:
+            self.object.primary_collection.disease_choices.append(item)
         self.object.primary_collection.save()
         return super().form_valid(form)
 
@@ -163,6 +178,16 @@ def get_context_data(self, *args, **kwargs):
         ))
         kwargs['study_design_choices'] = study_design_choices_html
 
+        disease_choices_html = '<ul>{choices}</ul>'.format(choices=''.join(
+            f'<li>{choice}</li>' for choice in primary_collection.disease_choices
+        ))
+        kwargs['disease_choices'] = disease_choices_html
+
+        data_type_choices_html = '<ul>{choices}</ul>'.format(choices=''.join(
+            f'<li>{choice}</li>' for choice in primary_collection.data_type_choices
+        ))
+        kwargs['data_type_choices'] = data_type_choices_html
+
         # get a dict of model fields so that we can set the initial value for the update form
         fields = model_to_dict(collection_provider)
         fields['collected_type_choices'] = json.dumps(primary_collection.collected_type_choices)
@@ -175,6 +200,8 @@ def get_context_data(self, *args, **kwargs):
 
         fields['school_type_choices'] = json.dumps(primary_collection.school_type_choices)
         fields['study_design_choices'] = json.dumps(primary_collection.study_design_choices)
+        fields['data_type_choices'] = json.dumps(primary_collection.data_type_choices)
+        fields['disease_choices'] = json.dumps(primary_collection.disease_choices)
 
         # compile html list of collected_type_choices
         if collection_provider.primary_collection:
@@ -235,34 +262,8 @@ class CollectionProviderChangeForm(PermissionRequiredMixin, UpdateView):
 
     def form_valid(self, form):
         if self.object.primary_collection:
-            self.object.primary_collection.collected_type_choices.extend(form.cleaned_data['collected_type_choices']['added'])
-            for item in form.cleaned_data['collected_type_choices']['removed']:
-                self.object.primary_collection.collected_type_choices.remove(item)
-
-            self.object.primary_collection.status_choices.extend(form.cleaned_data['status_choices']['added'])
-            for item in form.cleaned_data['status_choices']['removed']:
-                self.object.primary_collection.status_choices.remove(item)
-
-            self.object.primary_collection.issue_choices.extend(form.cleaned_data['issue_choices']['added'])
-            for item in form.cleaned_data['issue_choices']['removed']:
-                self.object.primary_collection.issue_choices.remove(item)
-
-            self.object.primary_collection.volume_choices.extend(form.cleaned_data['volume_choices']['added'])
-            for item in form.cleaned_data['volume_choices']['removed']:
-                self.object.primary_collection.volume_choices.remove(item)
-
-            self.object.primary_collection.program_area_choices.extend(form.cleaned_data['program_area_choices']['added'])
-            for item in form.cleaned_data['program_area_choices']['removed']:
-                self.object.primary_collection.program_area_choices.remove(item)
-
-        self.object.primary_collection.school_type_choices.extend(form.cleaned_data['school_type_choices']['added'])
-        for item in form.cleaned_data['school_type_choices']['removed']:
-            self.object.primary_collection.school_type_choices.remove(item)
-
-        self.object.primary_collection.study_design_choices.extend(form.cleaned_data['study_design_choices']['added'])
-        for item in form.cleaned_data['study_design_choices']['removed']:
-            self.object.primary_collection.study_design_choices.remove(item)
-
+            for choices_name in ['collected_type', 'status', 'issue', 'volume', 'program_area', 'school_type', 'study_design', 'data_type', 'disease']:
+                _process_collection_choices(self.object, choices_name, form)
         self.object.primary_collection.save()
         return super().form_valid(form)
 
@@ -399,6 +400,8 @@ def create_or_update_provider(self, provider_data):
             provider.primary_collection.program_area_choices = primary_collection['fields']['program_area_choices']
             provider.primary_collection.school_type_choices = primary_collection['fields']['school_type_choices']
             provider.primary_collection.study_design_choices = primary_collection['fields']['study_design_choices']
+            provider.primary_collection.disease_choices = primary_collection['fields']['disease_choices']
+            provider.primary_collection.data_type_choices = primary_collection['fields']['data_type_choices']
             provider.primary_collection.save()
         if licenses:
             provider.licenses_acceptable.set(licenses)
diff --git a/admin/static/js/pages/collection-provider-page.js b/admin/static/js/pages/collection-provider-page.js
index 2964fdc5a56..11e6e2302b4 100644
--- a/admin/static/js/pages/collection-provider-page.js
+++ b/admin/static/js/pages/collection-provider-page.js
@@ -57,6 +57,22 @@ $('#tags-input-study-design').on('itemRemoved', function(event) {
     $('#id_study_design_choices').val(JSON.stringify($('#tags-input-study-design').tagsinput('items')));
 });
 
+$('#tags-input-data-type').on('itemAdded', function(event) {
+    $('#id_data_type_choices').val(JSON.stringify($('#tags-input-data-type').tagsinput('items')));
+});
+
+$('#tags-input-data-type').on('itemRemoved', function(event) {
+    $('#id_data_type_choices').val(JSON.stringify($('#tags-input-data-type').tagsinput('items')));
+});
+
+$('#tags-input-disease').on('itemAdded', function(event) {
+    $('#id_disease_choices').val(JSON.stringify($('#tags-input-disease').tagsinput('items')));
+});
+
+$('#tags-input-disease').on('itemRemoved', function(event) {
+    $('#id_disease_choices').val(JSON.stringify($('#tags-input-disease').tagsinput('items')));
+});
+
 
 $(document).ready(function() {
    var collectedTypeItems = JSON.parse($('#id_collected_type_choices').val());
@@ -93,4 +109,14 @@ $(document).ready(function() {
    studyDesignItems.forEach(function(element){
        $('#tags-input-study-design').tagsinput('add', element)
    });
+
+   var diseaseItems = JSON.parse($('#id_disease_choices').val());
+   diseaseItems.forEach(function(element){
+       $('#tags-input-disease').tagsinput('add', element)
+   });
+
+   var dataTypeItems = JSON.parse($('#id_data_type_choices').val());
+   dataTypeItems.forEach(function(element){
+       $('#tags-input-data-type').tagsinput('add', element)
+   });
 });
diff --git a/admin/templates/collection_providers/detail.html b/admin/templates/collection_providers/detail.html
index 7d488dd974a..c015a90fe8b 100644
--- a/admin/templates/collection_providers/detail.html
+++ b/admin/templates/collection_providers/detail.html
@@ -66,6 +66,14 @@ <h2>{{ collection_provider.name }}</h2>
 						<th>study_design_choices</th>
 						<td>{{ study_design_choices | safe}}</td>
 					</tr>
+					<tr>
+						<th>disease_choices</th>
+						<td>{{ disease_choices | safe}}</td>
+					</tr>
+					<tr>
+						<th>data_type_choices</th>
+						<td>{{ data_type_choices | safe}}</td>
+					</tr>
                 </table>
             </div>
         </div>
diff --git a/admin/templates/collection_providers/update_collection_provider_form.html b/admin/templates/collection_providers/update_collection_provider_form.html
index 05422db6d9a..c64198c2e95 100644
--- a/admin/templates/collection_providers/update_collection_provider_form.html
+++ b/admin/templates/collection_providers/update_collection_provider_form.html
@@ -98,6 +98,18 @@
 					<input id="tags-input-study-design" type="text" data-role="tagsinput"/>
 				</div>
 			</div>
+			<div>
+				<label>Disease choices:</label>
+				<div class=#bootstrap-tagsinput">
+					<input id="tags-input-disease" type="text" data-role="tagsinput"/>
+				</div>
+			</div>
+			<div>
+				<label>Data Type choices:</label>
+				<div class=#bootstrap-tagsinput">
+					<input id="tags-input-data-type" type="text" data-role="tagsinput"/>
+				</div>
+			</div>
             <input class="form-button" type="submit" value="Save" />
         </form>
     </div>
diff --git a/api/collections/serializers.py b/api/collections/serializers.py
index 3b5a10c7ec6..7499015aaa0 100644
--- a/api/collections/serializers.py
+++ b/api/collections/serializers.py
@@ -68,6 +68,14 @@ class CollectionSerializer(JSONAPISerializer):
         child=ser.CharField(max_length=127),
         default=list(),
     )
+    data_type_choices = ser.ListField(
+        child=ser.CharField(max_length=127),
+        default=list(),
+    )
+    disease_choices = ser.ListField(
+        child=ser.CharField(max_length=127),
+        default=list(),
+    )
 
     links = LinksField({})
 
@@ -241,6 +249,8 @@ def subjects_view_kwargs(self):
     program_area = ser.CharField(required=False)
     school_type = ser.CharField(required=False)
     study_design = ser.CharField(required=False)
+    data_type = ser.CharField(required=False)
+    disease = ser.CharField(required=False)
 
     def get_absolute_url(self, obj):
         return absolute_reverse(
@@ -272,6 +282,10 @@ def update(self, obj, validated_data):
             obj.school_Type = validated_data.pop('school_type')
         if 'study_design' in validated_data:
             obj.study_design = validated_data.pop('study_design')
+        if 'data_type' in validated_data:
+            obj.data_type = validated_data.pop('data_type')
+        if 'disease' in validated_data:
+            obj.disease = validated_data.pop('disease')
 
         obj.save()
         return obj
@@ -337,6 +351,8 @@ def subjects_view_kwargs(self):
     program_area = ser.CharField(required=False)
     school_type = ser.CharField(required=False)
     study_design = ser.CharField(required=False)
+    date_type = ser.CharField(required=False)
+    disease = ser.CharField(required=False)
 
     def get_absolute_url(self, obj):
         return absolute_reverse(
@@ -368,6 +384,10 @@ def update(self, obj, validated_data):
             obj.school_Type = validated_data.pop('school_type')
         if 'study_design' in validated_data:
             obj.study_design = validated_data.pop('study_design')
+        if 'data_type' in validated_data:
+            obj.data_type = validated_data.pop('data_type')
+        if 'disease' in validated_data:
+            obj.disease = validated_data.pop('disease')
 
         obj.save()
         return obj
diff --git a/api_tests/search/views/test_views.py b/api_tests/search/views/test_views.py
index 4fc5e0e1196..7ebf6e769d0 100644
--- a/api_tests/search/views/test_views.py
+++ b/api_tests/search/views/test_views.py
@@ -50,6 +50,7 @@ def collection_public(self, user):
         return CollectionFactory(creator=user, provider=CollectionProviderFactory(), is_public=True,
                                  status_choices=['', 'asdf', 'lkjh'], collected_type_choices=['', 'asdf', 'lkjh'],
                                  issue_choices=['', '0', '1', '2'], volume_choices=['', '0', '1', '2'],
+                                 disease_choices=['illness'], data_type_choices=['realness'],
                                  program_area_choices=['', 'asdf', 'lkjh'])
 
     @pytest.fixture()
@@ -1000,3 +1001,22 @@ def test_POST_search_collections(
         assert res.json['links']['meta']['total'] == 1
         assert len(res.json['data']) == 1
         assert res.json['data'][0]['id'] == node_with_abstract._id
+
+    def test_POST_search_collections_disease_data_type(
+            self, app, url_collection_search, user, node_one, node_two, collection_public,
+            node_with_abstract, node_private, registration_collection, registration_one,
+            registration_two, registration_private, reg_with_abstract):
+
+        collection_public.collect_object(node_one, user, disease='illness', data_type='realness')
+        collection_public.collect_object(node_two, user, data_type='realness')
+
+        payload = self.post_payload(disease='illness')
+        res = app.post_json_api(url_collection_search, payload)
+        assert res.status_code == 200
+        assert res.json['links']['meta']['total'] == 1
+
+        payload = self.post_payload(dataType='realness')
+        res = app.post_json_api(url_collection_search, payload)
+        assert res.status_code == 200
+        assert res.json['links']['meta']['total'] == 2
+        assert len(res.json['data']) == 2
diff --git a/osf/migrations/0017_auto_20231212_1843.py b/osf/migrations/0017_auto_20231212_1843.py
new file mode 100644
index 00000000000..5c13864dfc7
--- /dev/null
+++ b/osf/migrations/0017_auto_20231212_1843.py
@@ -0,0 +1,34 @@
+# Generated by Django 3.2.17 on 2023-12-12 18:43
+
+import django.contrib.postgres.fields
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('osf', '0016_auto_20230828_1810'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='collection',
+            name='data_type_choices',
+            field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=127), blank=True, default=list, size=None),
+        ),
+        migrations.AddField(
+            model_name='collection',
+            name='disease_choices',
+            field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=127), blank=True, default=list, size=None),
+        ),
+        migrations.AddField(
+            model_name='collectionsubmission',
+            name='data_type',
+            field=models.CharField(blank=True, help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', max_length=127),
+        ),
+        migrations.AddField(
+            model_name='collectionsubmission',
+            name='disease',
+            field=models.CharField(blank=True, help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', max_length=127),
+        ),
+    ]
diff --git a/osf/models/collection.py b/osf/models/collection.py
index 36fabff27af..ca2620a5e7b 100644
--- a/osf/models/collection.py
+++ b/osf/models/collection.py
@@ -55,6 +55,8 @@ class Meta:
     program_area_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
     school_type_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
     study_design_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
+    disease_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
+    data_type_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
     is_public = models.BooleanField(default=False, db_index=True)
     is_promoted = models.BooleanField(default=False, db_index=True)
     is_bookmark_collection = models.BooleanField(default=False, db_index=True)
@@ -160,7 +162,7 @@ def has_permission(self, user, perm):
 
     def collect_object(
             self, obj, collector, collected_type=None, status=None, volume=None, issue=None,
-            program_area=None, school_type=None, study_design=None):
+            program_area=None, school_type=None, study_design=None, data_type=None, disease=None):
         """ Adds object to collection, creates CollectionSubmission reference
             Performs type / metadata validation. User permissions checked in view.
 
@@ -177,6 +179,8 @@ def collect_object(
         program_area = program_area or ''
         school_type = school_type or ''
         study_design = study_design or ''
+        data_type = data_type or ''
+        disease = disease or ''
 
         if not self.collected_type_choices and collected_type:
             raise ValidationError('May not specify "type" for this collection')
@@ -220,6 +224,18 @@ def collect_object(
             elif study_design not in self.study_design_choices:
                 raise ValidationError(f'"{study_design}" is not an acceptable "study_design" for this collection')
 
+        if disease:
+            if not self.disease_choices:
+                raise ValidationError('May not specify "disease" for this collection')
+            elif disease not in self.disease_choices:
+                raise ValidationError(f'"{disease}" is not an acceptable "disease" for this collection')
+
+        if data_type:
+            if not self.data_type_choices:
+                raise ValidationError('May not specify "data_type" for this collection')
+            elif data_type not in self.data_type_choices:
+                raise ValidationError(f'"{data_type}" is not an acceptable "data_type" for this collection')
+
         if not any([isinstance(obj, t.model_class()) for t in self.collected_types.all()]):
             # Not all objects have a content_type_pk, have to look the other way.
             # Ideally, all objects would, and we could do:
@@ -248,6 +264,8 @@ def collect_object(
             collection_submission.program_area = program_area
             collection_submission.school_type = school_type
             collection_submission.study_design = study_design
+            collection_submission.data_type = data_type
+            collection_submission.disease = disease
             collection_submission.save()
 
             return collection_submission
diff --git a/osf/models/collection_submission.py b/osf/models/collection_submission.py
index d4819255991..963d38b116b 100644
--- a/osf/models/collection_submission.py
+++ b/osf/models/collection_submission.py
@@ -39,6 +39,16 @@ class Meta:
     program_area = models.CharField(blank=True, max_length=127)
     school_type = models.CharField(blank=True, max_length=127)
     study_design = models.CharField(blank=True, max_length=127)
+    disease = models.CharField(
+        help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium',
+        blank=True,
+        max_length=127
+    )
+    data_type = models.CharField(
+        help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium',
+        blank=True,
+        max_length=127
+    )
     machine_state = models.IntegerField(
         choices=CollectionSubmissionStates.int_field_choices(),
         default=CollectionSubmissionStates.IN_PROGRESS,
diff --git a/website/project/views/node.py b/website/project/views/node.py
index c2ee444ab88..70a47c1b4d8 100644
--- a/website/project/views/node.py
+++ b/website/project/views/node.py
@@ -911,6 +911,8 @@ def serialize_collections(collection_submissions, auth):
         'node_id': collection_submission.guid._id,
         'study_design': collection_submission.study_design,
         'program_area': collection_submission.program_area,
+        'disease': collection_submission.disease,
+        'data_type': collection_submission.data_type,
         'state': collection_submission.state.db_name,
         'subjects': list(collection_submission.subjects.values_list('text', flat=True)),
         'is_public': collection_submission.collection.is_public,
diff --git a/website/search/elastic_search.py b/website/search/elastic_search.py
index bc418d3c940..6b479c29de5 100644
--- a/website/search/elastic_search.py
+++ b/website/search/elastic_search.py
@@ -618,6 +618,8 @@ def serialize_collection_submission(collection_submission):
         'programArea': collection_submission.program_area,
         'schoolType': collection_submission.school_type,
         'studyDesign': collection_submission.study_design,
+        'disease': collection_submission.disease,
+        'dataType': collection_submission.data_type,
         'subjects': list(collection_submission.subjects.values_list('text', flat=True)),
         'title': getattr(obj, 'title', ''),
         'url': getattr(obj, 'url', ''),
diff --git a/website/templates/project/project.mako b/website/templates/project/project.mako
index d220853e1b9..356fa93a562 100644
--- a/website/templates/project/project.mako
+++ b/website/templates/project/project.mako
@@ -401,6 +401,20 @@
                                     </div>
                                 % endif
                                 <hr>
+                                % if collection['disease'] and collection['data_type']:
+                                    <div  style="padding-left: 30px;">
+                                        Disease: <i>${collection['disease']}</i> |&nbsp; Data Type: <i>${collection['data_type']}</i>
+                                    </div>
+                                % elif collection['disease']:
+                                    <div  style="padding-left: 30px;">
+                                        Disease: <i>${collection['disease']}</i>
+                                    </div>
+                                % elif collection['data_type']:
+                                    <div  style="padding-left: 30px;">
+                                        Data Type: <i>${collection['data_type']}</i>
+                                    </div>
+                                % endif
+                                <hr>
                             % elif collection['state'] == 'pending' and user['is_contributor_or_group_member']:
                                 % if user['is_admin']:
                                     <a class="fa fa-close collections-cancel-icon pull-right" collection_id=${collection['collection_id']} node_id=${collection['node_id']} aria-label="Cancel Submission Request Button"></a>
@@ -448,6 +462,19 @@
                                         Program Area: <i>${collection['program_area']}</i>
                                     </div>
                                 % endif
+                                % if collection['disease'] and collection['data_type']:
+                                    <div  style="padding-left: 30px;">
+                                        Disease: <i>${collection['disease']}</i> |&nbsp; Data Type: <i>${collection['data_type']}</i>
+                                    </div>
+                                % elif collection['disease']:
+                                    <div  style="padding-left: 30px;">
+                                        Disease: <i>${collection['disease']}</i>
+                                    </div>
+                                % elif collection['data_type']:
+                                    <div  style="padding-left: 30px;">
+                                        Data Type: <i>${collection['data_type']}</i>
+                                    </div>
+                                % endif
                                 <hr>
                             % elif collection['state'] == 'rejected' and user['is_contributor_or_group_member']:
                                 % if user['is_admin']:

From af35c28a493c29161424fc4f9c7df9d662c0e3c1 Mon Sep 17 00:00:00 2001
From: John Tordoff <>
Date: Fri, 15 Dec 2023 09:59:09 -0500
Subject: [PATCH 10/23] Add UNVERIFIED Domain classification

Squashed commit of the following:

commit 5615a3adf65b941f3929f4f8201a0eb0d1fe1c88
Author: John Tordoff <>
Date:   Tue Dec 12 16:42:54 2023 -0500

    update is_triaged behavior

commit a9a49f281c993fef425fa0db24ec2c33924e34c8
Author: John Tordoff <>
Date:   Tue Dec 12 15:53:49 2023 -0500

    change test case to account for new exception handing for domain sniffer

commit 096e1ab68b5e7d5c2615bdd0f064bcddaad85d16
Author: John Tordoff <>
Date:   Tue Dec 12 14:03:25 2023 -0500

    redo exception handling and add migration file

commit 89b37f32ce7c22076e0ef7fa7c2feceb81e24b86
Author: John Tordoff <>
Date:   Mon Dec 11 12:13:31 2023 -0500

    make timeouts classify notable domains as unverified
---
 osf/external/spam/tasks.py                    | 21 +++++++++----
 .../0017_alter_notabledomain_note.py          | 19 ++++++++++++
 osf/models/notable_domain.py                  |  1 +
 osf_tests/test_notable_domains.py             | 31 ++++++++++---------
 4 files changed, 52 insertions(+), 20 deletions(-)
 create mode 100644 osf/migrations/0017_alter_notabledomain_note.py

diff --git a/osf/external/spam/tasks.py b/osf/external/spam/tasks.py
index cc3f9e16a16..fabb7dfb935 100644
--- a/osf/external/spam/tasks.py
+++ b/osf/external/spam/tasks.py
@@ -46,15 +46,20 @@ def _check_resource_for_domains(guid, content):
     resource = guid.referent
     spammy_domains = []
     referrer_content_type = ContentType.objects.get_for_model(resource)
-    for domain in _extract_domains(content):
-        notable_domain, _ = NotableDomain.objects.get_or_create(domain=domain)
+    for domain, note in _extract_domains(content):
+        notable_domain, _ = NotableDomain.objects.get_or_create(
+            domain=domain,
+            defaults={'note': note}
+        )
         if notable_domain.note == NotableDomain.Note.EXCLUDE_FROM_ACCOUNT_CREATION_AND_CONTENT:
             spammy_domains.append(notable_domain.domain)
         DomainReference.objects.get_or_create(
             domain=notable_domain,
             referrer_object_id=resource.id,
             referrer_content_type=referrer_content_type,
-            defaults={'is_triaged': notable_domain.note != NotableDomain.Note.UNKNOWN}
+            defaults={
+                'is_triaged': notable_domain.note not in (NotableDomain.Note.UNKNOWN, NotableDomain.Note.UNVERIFIED)
+            }
         )
     if spammy_domains:
         resource.confirm_spam(save=True, domains=list(spammy_domains))
@@ -72,8 +77,11 @@ def check_resource_for_domains_async(guid, content):
 
 
 def _extract_domains(content):
+    from osf.models import NotableDomain
+
     extracted_domains = set()
     for match in DOMAIN_REGEX.finditer(content):
+        note = NotableDomain.Note.UNKNOWN
         domain = match.group('domain')
         if not domain or domain in extracted_domains:
             continue
@@ -85,10 +93,11 @@ def _extract_domains(content):
 
         try:
             response = requests.head(constructed_url, timeout=settings.DOMAIN_EXTRACTION_TIMEOUT)
-        except (requests.exceptions.ConnectionError, requests.exceptions.InvalidURL):
+        except requests.exceptions.InvalidURL:
+            # Likely false-positive from a filename.ext
             continue
         except requests.exceptions.RequestException:
-            pass
+            note = NotableDomain.Note.UNVERIFIED
         else:
             # Store the redirect location (to help catch link shorteners)
             if response.status_code in REDIRECT_CODES and 'location' in response.headers:
@@ -99,7 +108,7 @@ def _extract_domains(content):
         # Avoid returning a duplicate domain discovered via redirect
         if domain not in extracted_domains:
             extracted_domains.add(domain)
-            yield domain
+            yield domain, note
 
 
 @run_postcommit(once_per_request=False, celery=True)
diff --git a/osf/migrations/0017_alter_notabledomain_note.py b/osf/migrations/0017_alter_notabledomain_note.py
new file mode 100644
index 00000000000..056568cffbe
--- /dev/null
+++ b/osf/migrations/0017_alter_notabledomain_note.py
@@ -0,0 +1,19 @@
+# Generated by Django 3.2.17 on 2023-12-12 19:02
+
+from django.db import migrations, models
+import osf.models.notable_domain
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('osf', '0016_auto_20230828_1810'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='notabledomain',
+            name='note',
+            field=models.IntegerField(choices=[(0, 'EXCLUDE_FROM_ACCOUNT_CREATION_AND_CONTENT'), (1, 'ASSUME_HAM_UNTIL_REPORTED'), (2, 'UNKNOWN'), (3, 'IGNORED'), (4, 'UNVERIFIED')], default=osf.models.notable_domain.NotableDomain.Note['UNKNOWN']),
+        ),
+    ]
diff --git a/osf/models/notable_domain.py b/osf/models/notable_domain.py
index 5b960718ed9..03ebcfd6e40 100644
--- a/osf/models/notable_domain.py
+++ b/osf/models/notable_domain.py
@@ -14,6 +14,7 @@ class Note(IntEnum):
         ASSUME_HAM_UNTIL_REPORTED = 1
         UNKNOWN = 2
         IGNORED = 3
+        UNVERIFIED = 4  # Timedout couldn't determine
 
         @classmethod
         def choices(cls):
diff --git a/osf_tests/test_notable_domains.py b/osf_tests/test_notable_domains.py
index 4c9e39908dd..78edd11e967 100644
--- a/osf_tests/test_notable_domains.py
+++ b/osf_tests/test_notable_domains.py
@@ -30,31 +30,34 @@ def test_extract_domains__optional_components(self, protocol_component, www_comp
         sample_text = f'This is a link: {test_url}'
         with mock.patch.object(spam_tasks.requests, 'head'):
             domains = list(spam_tasks._extract_domains(sample_text))
-        assert domains == ['osf.io']
+        assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__url_in_quotes(self):
         sample_text = '"osf.io"'
         with mock.patch.object(spam_tasks.requests, 'head'):
             domains = list(spam_tasks._extract_domains(sample_text))
-        assert domains == ['osf.io']
+        assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__url_in_parens(self):
         sample_text = '(osf.io)'
         with mock.patch.object(spam_tasks.requests, 'head'):
             domains = list(spam_tasks._extract_domains(sample_text))
-        assert domains == ['osf.io']
+        assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__captures_domain_with_multiple_subdomains(self):
         sample_text = 'This is a link: https://api.test.osf.io'
         with mock.patch.object(spam_tasks.requests, 'head'):
             domains = list(spam_tasks._extract_domains(sample_text))
-        assert domains == ['api.test.osf.io']
+        assert domains == [('api.test.osf.io', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__captures_multiple_domains(self):
         sample_text = 'This is a domain: http://osf.io. This is another domain: www.cos.io'
         with mock.patch.object(spam_tasks.requests, 'head'):
             domains = set(spam_tasks._extract_domains(sample_text))
-        assert domains == {'osf.io', 'cos.io'}
+        assert domains == {
+            ('osf.io', NotableDomain.Note.UNKNOWN),
+            ('cos.io', NotableDomain.Note.UNKNOWN),
+        }
 
     def test_extract_domains__no_domains(self):
         sample_text = 'http://fakeout!'
@@ -63,19 +66,19 @@ def test_extract_domains__no_domains(self):
         assert not domains
         mock_head.assert_not_called()
 
-    def test_extract_domains__ignored_if_does_not_resolve(self):
+    def test_extract_domains__unverfied_if_does_not_resolve(self):
         sample_text = 'This.will.not.connect'
         with mock.patch.object(spam_tasks.requests, 'head') as mock_head:
             mock_head.side_effect = spam_tasks.requests.exceptions.ConnectionError
             domains = set(spam_tasks._extract_domains(sample_text))
-        assert not domains
+        assert domains == {('This.will.not.connect', NotableDomain.Note.UNVERIFIED)}
 
     def test_actract_domains__returned_on_error(self):
         sample_text = 'This.will.timeout'
         with mock.patch.object(spam_tasks.requests, 'head') as mock_head:
             mock_head.side_effect = spam_tasks.requests.exceptions.Timeout
             domains = set(spam_tasks._extract_domains(sample_text))
-        assert domains == {sample_text}
+        assert domains == {(sample_text, NotableDomain.Note.UNVERIFIED)}
 
     @pytest.mark.parametrize('status_code', [301, 302, 303, 307, 308])
     def test_extract_domains__follows_redirect(self, status_code):
@@ -85,7 +88,7 @@ def test_extract_domains__follows_redirect(self, status_code):
         sample_text = 'redirect.me'
         with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response):
             domains = list(spam_tasks._extract_domains(sample_text))
-        assert domains == ['redirected.com']
+        assert domains == [('redirected.com', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__redirect_code_no_location(self):
         mock_response = SimpleNamespace()
@@ -94,7 +97,7 @@ def test_extract_domains__redirect_code_no_location(self):
         sample_text = 'redirect.me'
         with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response):
             domains = list(spam_tasks._extract_domains(sample_text))
-        assert domains == ['redirect.me']
+        assert domains == [('redirect.me', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__redirect_code_bad_location(self):
         mock_response = SimpleNamespace()
@@ -103,7 +106,7 @@ def test_extract_domains__redirect_code_bad_location(self):
         sample_text = 'redirect.me'
         with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response):
             domains = list(spam_tasks._extract_domains(sample_text))
-        assert domains == ['redirect.me']
+        assert domains == [('redirect.me', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__redirect_with_full_url_no_protocol(self):
         mock_response = SimpleNamespace()
@@ -114,7 +117,7 @@ def test_extract_domains__redirect_with_full_url_no_protocol(self):
         with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response) as mock_object:
             domains = list(spam_tasks._extract_domains(sample_text))
             mock_object.assert_called_once_with(f'https://{target_url}', timeout=60)
-        assert domains == ['osf.io']
+        assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__redirect_with_full_url_and_protocol(self):
         mock_response = SimpleNamespace()
@@ -125,13 +128,13 @@ def test_extract_domains__redirect_with_full_url_and_protocol(self):
         with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response) as mock_object:
             domains = list(spam_tasks._extract_domains(sample_text))
             mock_object.assert_called_once_with(target_url, timeout=60)
-        assert domains == ['osf.io']
+        assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__deduplicates(self):
         sample_text = 'osf.io osf.io osf.io and, oh, yeah, osf.io'
         with mock.patch.object(spam_tasks.requests, 'head'):
             domains = list(spam_tasks._extract_domains(sample_text))
-        assert domains == ['osf.io']
+        assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)]
 
     def test_extract_domains__ignores_floats(self):
         sample_text = 'this is a number 3.1415 not a domain'

From 1722bbd83cf2777d8f936b274df54bdcca384ea9 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 18 Dec 2023 09:46:20 -0500
Subject: [PATCH 11/23] Add merge migration

---
 osf/migrations/0018_merge_20231218_1446.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 osf/migrations/0018_merge_20231218_1446.py

diff --git a/osf/migrations/0018_merge_20231218_1446.py b/osf/migrations/0018_merge_20231218_1446.py
new file mode 100644
index 00000000000..f76317978f7
--- /dev/null
+++ b/osf/migrations/0018_merge_20231218_1446.py
@@ -0,0 +1,14 @@
+# Generated by Django 3.2.17 on 2023-12-18 14:46
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('osf', '0017_alter_notabledomain_note'),
+        ('osf', '0017_auto_20231212_1843'),
+    ]
+
+    operations = [
+    ]

From 1d626fade18230ada50ece3dd4159a6bc36ade12 Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Thu, 14 Dec 2023 11:55:25 -0500
Subject: [PATCH 12/23] [ENG-4823] Add Collection Metadata Options (#10499)

* add collection metadata options for ibdgc

---------

Co-authored-by: John Tordoff <>
---
 admin/collection_providers/forms.py           | 62 +++++++++++++++++++
 admin/collection_providers/views.py           | 59 +++++++++---------
 .../js/pages/collection-provider-page.js      | 26 ++++++++
 .../collection_providers/detail.html          |  8 +++
 .../update_collection_provider_form.html      | 12 ++++
 api/collections/serializers.py                | 20 ++++++
 api_tests/search/views/test_views.py          | 20 ++++++
 osf/migrations/0017_auto_20231212_1843.py     | 34 ++++++++++
 osf/models/collection.py                      | 20 +++++-
 osf/models/collection_submission.py           | 10 +++
 website/project/views/node.py                 |  2 +
 website/search/elastic_search.py              |  2 +
 website/templates/project/project.mako        | 27 ++++++++
 13 files changed, 273 insertions(+), 29 deletions(-)
 create mode 100644 osf/migrations/0017_auto_20231212_1843.py

diff --git a/admin/collection_providers/forms.py b/admin/collection_providers/forms.py
index ca0358a126d..4b8af62bb82 100644
--- a/admin/collection_providers/forms.py
+++ b/admin/collection_providers/forms.py
@@ -15,6 +15,8 @@ class CollectionProviderForm(forms.ModelForm):
     program_area_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
     school_type_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
     study_design_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
+    data_type_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
+    disease_choices = forms.CharField(widget=forms.HiddenInput(), required=False)
     _id = forms.SlugField(
         required=True,
         help_text='URL Slug',
@@ -268,3 +270,63 @@ def clean_study_design_choices(self):
             if choices:
                 added_choices = json.loads(choices)
         return {'added': added_choices, 'removed': removed_choices}
+
+    def clean_disease_choices(self):
+        if not self.data.get('disease_choices'):
+            return {'added': [], 'removed': []}
+
+        collection_provider = self.instance
+        primary_collection = collection_provider.primary_collection
+        if primary_collection:  # Modifying an existing CollectionProvider
+            old_choices = {c.strip(' ') for c in primary_collection.disease_choices}
+            updated_choices = {c.strip(' ') for c in json.loads(self.data.get('disease_choices'))}
+            added_choices = updated_choices - old_choices
+            removed_choices = old_choices - updated_choices
+
+            active_removed_choices = set(
+                primary_collection.collectionsubmission_set.filter(
+                    disease__in=removed_choices
+                ).values_list('disease', flat=True)
+            )
+            if active_removed_choices:
+                raise forms.ValidationError(
+                    'Cannot remove the following choices for "disease", as they are '
+                    f'currently in use: {active_removed_choices}'
+                )
+        else:  # Creating a new CollectionProvider
+            added_choices = set()
+            removed_choices = set()
+            choices = self.data.get('disease_choices')
+            if choices:
+                added_choices = json.loads(choices)
+        return {'added': added_choices, 'removed': removed_choices}
+
+    def clean_data_type_choices(self):
+        if not self.data.get('data_type_choices'):
+            return {'added': [], 'removed': []}
+
+        collection_provider = self.instance
+        primary_collection = collection_provider.primary_collection
+        if primary_collection:  # Modifying an existing CollectionProvider
+            old_choices = {c.strip(' ') for c in primary_collection.data_type_choices}
+            updated_choices = {c.strip(' ') for c in json.loads(self.data.get('data_type_choices'))}
+            added_choices = updated_choices - old_choices
+            removed_choices = old_choices - updated_choices
+
+            active_removed_choices = set(
+                primary_collection.collectionsubmission_set.filter(
+                    data_type__in=removed_choices
+                ).values_list('data_type', flat=True)
+            )
+            if active_removed_choices:
+                raise forms.ValidationError(
+                    'Cannot remove the following choices for "data_type", as they are '
+                    f'currently in use: {active_removed_choices}'
+                )
+        else:  # Creating a new CollectionProvider
+            added_choices = set()
+            removed_choices = set()
+            choices = self.data.get('data_type_choices')
+            if choices:
+                added_choices = json.loads(choices)
+        return {'added': added_choices, 'removed': removed_choices}
diff --git a/admin/collection_providers/views.py b/admin/collection_providers/views.py
index d5c950ed4fd..699d82cf533 100644
--- a/admin/collection_providers/views.py
+++ b/admin/collection_providers/views.py
@@ -21,6 +21,17 @@
 from admin.providers.views import AddAdminOrModerator, RemoveAdminsAndModerators
 
 
+def _process_collection_choices(provider, choices_name, form):
+    collection = provider.primary_collection
+    choices_name_attr = f'{choices_name}_choices'
+    choices_added = form.cleaned_data[choices_name_attr]['added']
+    choices_removed = form.cleaned_data[choices_name_attr]['removed']
+
+    getattr(collection, choices_name_attr).extend(choices_added)
+    for item in choices_removed:
+        getattr(collection, choices_name_attr).remove(item)
+
+
 class CreateCollectionProvider(PermissionRequiredMixin, CreateView):
     raise_exception = True
     permission_required = 'osf.change_collectionprovider'
@@ -47,6 +58,10 @@ def form_valid(self, form):
             self.object.primary_collection.school_type_choices.append(item)
         for item in form.cleaned_data['study_design_choices']['added']:
             self.object.primary_collection.study_design_choices.append(item)
+        for item in form.cleaned_data['data_type_choices']['added']:
+            self.object.primary_collection.data_type_choices.append(item)
+        for item in form.cleaned_data['disease_choices']['added']:
+            self.object.primary_collection.disease_choices.append(item)
         self.object.primary_collection.save()
         return super().form_valid(form)
 
@@ -163,6 +178,16 @@ def get_context_data(self, *args, **kwargs):
         ))
         kwargs['study_design_choices'] = study_design_choices_html
 
+        disease_choices_html = '<ul>{choices}</ul>'.format(choices=''.join(
+            f'<li>{choice}</li>' for choice in primary_collection.disease_choices
+        ))
+        kwargs['disease_choices'] = disease_choices_html
+
+        data_type_choices_html = '<ul>{choices}</ul>'.format(choices=''.join(
+            f'<li>{choice}</li>' for choice in primary_collection.data_type_choices
+        ))
+        kwargs['data_type_choices'] = data_type_choices_html
+
         # get a dict of model fields so that we can set the initial value for the update form
         fields = model_to_dict(collection_provider)
         fields['collected_type_choices'] = json.dumps(primary_collection.collected_type_choices)
@@ -175,6 +200,8 @@ def get_context_data(self, *args, **kwargs):
 
         fields['school_type_choices'] = json.dumps(primary_collection.school_type_choices)
         fields['study_design_choices'] = json.dumps(primary_collection.study_design_choices)
+        fields['data_type_choices'] = json.dumps(primary_collection.data_type_choices)
+        fields['disease_choices'] = json.dumps(primary_collection.disease_choices)
 
         # compile html list of collected_type_choices
         if collection_provider.primary_collection:
@@ -235,34 +262,8 @@ class CollectionProviderChangeForm(PermissionRequiredMixin, UpdateView):
 
     def form_valid(self, form):
         if self.object.primary_collection:
-            self.object.primary_collection.collected_type_choices.extend(form.cleaned_data['collected_type_choices']['added'])
-            for item in form.cleaned_data['collected_type_choices']['removed']:
-                self.object.primary_collection.collected_type_choices.remove(item)
-
-            self.object.primary_collection.status_choices.extend(form.cleaned_data['status_choices']['added'])
-            for item in form.cleaned_data['status_choices']['removed']:
-                self.object.primary_collection.status_choices.remove(item)
-
-            self.object.primary_collection.issue_choices.extend(form.cleaned_data['issue_choices']['added'])
-            for item in form.cleaned_data['issue_choices']['removed']:
-                self.object.primary_collection.issue_choices.remove(item)
-
-            self.object.primary_collection.volume_choices.extend(form.cleaned_data['volume_choices']['added'])
-            for item in form.cleaned_data['volume_choices']['removed']:
-                self.object.primary_collection.volume_choices.remove(item)
-
-            self.object.primary_collection.program_area_choices.extend(form.cleaned_data['program_area_choices']['added'])
-            for item in form.cleaned_data['program_area_choices']['removed']:
-                self.object.primary_collection.program_area_choices.remove(item)
-
-        self.object.primary_collection.school_type_choices.extend(form.cleaned_data['school_type_choices']['added'])
-        for item in form.cleaned_data['school_type_choices']['removed']:
-            self.object.primary_collection.school_type_choices.remove(item)
-
-        self.object.primary_collection.study_design_choices.extend(form.cleaned_data['study_design_choices']['added'])
-        for item in form.cleaned_data['study_design_choices']['removed']:
-            self.object.primary_collection.study_design_choices.remove(item)
-
+            for choices_name in ['collected_type', 'status', 'issue', 'volume', 'program_area', 'school_type', 'study_design', 'data_type', 'disease']:
+                _process_collection_choices(self.object, choices_name, form)
         self.object.primary_collection.save()
         return super().form_valid(form)
 
@@ -399,6 +400,8 @@ def create_or_update_provider(self, provider_data):
             provider.primary_collection.program_area_choices = primary_collection['fields']['program_area_choices']
             provider.primary_collection.school_type_choices = primary_collection['fields']['school_type_choices']
             provider.primary_collection.study_design_choices = primary_collection['fields']['study_design_choices']
+            provider.primary_collection.disease_choices = primary_collection['fields']['disease_choices']
+            provider.primary_collection.data_type_choices = primary_collection['fields']['data_type_choices']
             provider.primary_collection.save()
         if licenses:
             provider.licenses_acceptable.set(licenses)
diff --git a/admin/static/js/pages/collection-provider-page.js b/admin/static/js/pages/collection-provider-page.js
index 2964fdc5a56..11e6e2302b4 100644
--- a/admin/static/js/pages/collection-provider-page.js
+++ b/admin/static/js/pages/collection-provider-page.js
@@ -57,6 +57,22 @@ $('#tags-input-study-design').on('itemRemoved', function(event) {
     $('#id_study_design_choices').val(JSON.stringify($('#tags-input-study-design').tagsinput('items')));
 });
 
+$('#tags-input-data-type').on('itemAdded', function(event) {
+    $('#id_data_type_choices').val(JSON.stringify($('#tags-input-data-type').tagsinput('items')));
+});
+
+$('#tags-input-data-type').on('itemRemoved', function(event) {
+    $('#id_data_type_choices').val(JSON.stringify($('#tags-input-data-type').tagsinput('items')));
+});
+
+$('#tags-input-disease').on('itemAdded', function(event) {
+    $('#id_disease_choices').val(JSON.stringify($('#tags-input-disease').tagsinput('items')));
+});
+
+$('#tags-input-disease').on('itemRemoved', function(event) {
+    $('#id_disease_choices').val(JSON.stringify($('#tags-input-disease').tagsinput('items')));
+});
+
 
 $(document).ready(function() {
    var collectedTypeItems = JSON.parse($('#id_collected_type_choices').val());
@@ -93,4 +109,14 @@ $(document).ready(function() {
    studyDesignItems.forEach(function(element){
        $('#tags-input-study-design').tagsinput('add', element)
    });
+
+   var diseaseItems = JSON.parse($('#id_disease_choices').val());
+   diseaseItems.forEach(function(element){
+       $('#tags-input-disease').tagsinput('add', element)
+   });
+
+   var dataTypeItems = JSON.parse($('#id_data_type_choices').val());
+   dataTypeItems.forEach(function(element){
+       $('#tags-input-data-type').tagsinput('add', element)
+   });
 });
diff --git a/admin/templates/collection_providers/detail.html b/admin/templates/collection_providers/detail.html
index 7d488dd974a..c015a90fe8b 100644
--- a/admin/templates/collection_providers/detail.html
+++ b/admin/templates/collection_providers/detail.html
@@ -66,6 +66,14 @@ <h2>{{ collection_provider.name }}</h2>
 						<th>study_design_choices</th>
 						<td>{{ study_design_choices | safe}}</td>
 					</tr>
+					<tr>
+						<th>disease_choices</th>
+						<td>{{ disease_choices | safe}}</td>
+					</tr>
+					<tr>
+						<th>data_type_choices</th>
+						<td>{{ data_type_choices | safe}}</td>
+					</tr>
                 </table>
             </div>
         </div>
diff --git a/admin/templates/collection_providers/update_collection_provider_form.html b/admin/templates/collection_providers/update_collection_provider_form.html
index 05422db6d9a..c64198c2e95 100644
--- a/admin/templates/collection_providers/update_collection_provider_form.html
+++ b/admin/templates/collection_providers/update_collection_provider_form.html
@@ -98,6 +98,18 @@
 					<input id="tags-input-study-design" type="text" data-role="tagsinput"/>
 				</div>
 			</div>
+			<div>
+				<label>Disease choices:</label>
+				<div class=#bootstrap-tagsinput">
+					<input id="tags-input-disease" type="text" data-role="tagsinput"/>
+				</div>
+			</div>
+			<div>
+				<label>Data Type choices:</label>
+				<div class=#bootstrap-tagsinput">
+					<input id="tags-input-data-type" type="text" data-role="tagsinput"/>
+				</div>
+			</div>
             <input class="form-button" type="submit" value="Save" />
         </form>
     </div>
diff --git a/api/collections/serializers.py b/api/collections/serializers.py
index 3b5a10c7ec6..7499015aaa0 100644
--- a/api/collections/serializers.py
+++ b/api/collections/serializers.py
@@ -68,6 +68,14 @@ class CollectionSerializer(JSONAPISerializer):
         child=ser.CharField(max_length=127),
         default=list(),
     )
+    data_type_choices = ser.ListField(
+        child=ser.CharField(max_length=127),
+        default=list(),
+    )
+    disease_choices = ser.ListField(
+        child=ser.CharField(max_length=127),
+        default=list(),
+    )
 
     links = LinksField({})
 
@@ -241,6 +249,8 @@ def subjects_view_kwargs(self):
     program_area = ser.CharField(required=False)
     school_type = ser.CharField(required=False)
     study_design = ser.CharField(required=False)
+    data_type = ser.CharField(required=False)
+    disease = ser.CharField(required=False)
 
     def get_absolute_url(self, obj):
         return absolute_reverse(
@@ -272,6 +282,10 @@ def update(self, obj, validated_data):
             obj.school_Type = validated_data.pop('school_type')
         if 'study_design' in validated_data:
             obj.study_design = validated_data.pop('study_design')
+        if 'data_type' in validated_data:
+            obj.data_type = validated_data.pop('data_type')
+        if 'disease' in validated_data:
+            obj.disease = validated_data.pop('disease')
 
         obj.save()
         return obj
@@ -337,6 +351,8 @@ def subjects_view_kwargs(self):
     program_area = ser.CharField(required=False)
     school_type = ser.CharField(required=False)
     study_design = ser.CharField(required=False)
+    date_type = ser.CharField(required=False)
+    disease = ser.CharField(required=False)
 
     def get_absolute_url(self, obj):
         return absolute_reverse(
@@ -368,6 +384,10 @@ def update(self, obj, validated_data):
             obj.school_Type = validated_data.pop('school_type')
         if 'study_design' in validated_data:
             obj.study_design = validated_data.pop('study_design')
+        if 'data_type' in validated_data:
+            obj.data_type = validated_data.pop('data_type')
+        if 'disease' in validated_data:
+            obj.disease = validated_data.pop('disease')
 
         obj.save()
         return obj
diff --git a/api_tests/search/views/test_views.py b/api_tests/search/views/test_views.py
index 4fc5e0e1196..7ebf6e769d0 100644
--- a/api_tests/search/views/test_views.py
+++ b/api_tests/search/views/test_views.py
@@ -50,6 +50,7 @@ def collection_public(self, user):
         return CollectionFactory(creator=user, provider=CollectionProviderFactory(), is_public=True,
                                  status_choices=['', 'asdf', 'lkjh'], collected_type_choices=['', 'asdf', 'lkjh'],
                                  issue_choices=['', '0', '1', '2'], volume_choices=['', '0', '1', '2'],
+                                 disease_choices=['illness'], data_type_choices=['realness'],
                                  program_area_choices=['', 'asdf', 'lkjh'])
 
     @pytest.fixture()
@@ -1000,3 +1001,22 @@ def test_POST_search_collections(
         assert res.json['links']['meta']['total'] == 1
         assert len(res.json['data']) == 1
         assert res.json['data'][0]['id'] == node_with_abstract._id
+
+    def test_POST_search_collections_disease_data_type(
+            self, app, url_collection_search, user, node_one, node_two, collection_public,
+            node_with_abstract, node_private, registration_collection, registration_one,
+            registration_two, registration_private, reg_with_abstract):
+
+        collection_public.collect_object(node_one, user, disease='illness', data_type='realness')
+        collection_public.collect_object(node_two, user, data_type='realness')
+
+        payload = self.post_payload(disease='illness')
+        res = app.post_json_api(url_collection_search, payload)
+        assert res.status_code == 200
+        assert res.json['links']['meta']['total'] == 1
+
+        payload = self.post_payload(dataType='realness')
+        res = app.post_json_api(url_collection_search, payload)
+        assert res.status_code == 200
+        assert res.json['links']['meta']['total'] == 2
+        assert len(res.json['data']) == 2
diff --git a/osf/migrations/0017_auto_20231212_1843.py b/osf/migrations/0017_auto_20231212_1843.py
new file mode 100644
index 00000000000..5c13864dfc7
--- /dev/null
+++ b/osf/migrations/0017_auto_20231212_1843.py
@@ -0,0 +1,34 @@
+# Generated by Django 3.2.17 on 2023-12-12 18:43
+
+import django.contrib.postgres.fields
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('osf', '0016_auto_20230828_1810'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='collection',
+            name='data_type_choices',
+            field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=127), blank=True, default=list, size=None),
+        ),
+        migrations.AddField(
+            model_name='collection',
+            name='disease_choices',
+            field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=127), blank=True, default=list, size=None),
+        ),
+        migrations.AddField(
+            model_name='collectionsubmission',
+            name='data_type',
+            field=models.CharField(blank=True, help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', max_length=127),
+        ),
+        migrations.AddField(
+            model_name='collectionsubmission',
+            name='disease',
+            field=models.CharField(blank=True, help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', max_length=127),
+        ),
+    ]
diff --git a/osf/models/collection.py b/osf/models/collection.py
index 36fabff27af..ca2620a5e7b 100644
--- a/osf/models/collection.py
+++ b/osf/models/collection.py
@@ -55,6 +55,8 @@ class Meta:
     program_area_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
     school_type_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
     study_design_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
+    disease_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
+    data_type_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list)
     is_public = models.BooleanField(default=False, db_index=True)
     is_promoted = models.BooleanField(default=False, db_index=True)
     is_bookmark_collection = models.BooleanField(default=False, db_index=True)
@@ -160,7 +162,7 @@ def has_permission(self, user, perm):
 
     def collect_object(
             self, obj, collector, collected_type=None, status=None, volume=None, issue=None,
-            program_area=None, school_type=None, study_design=None):
+            program_area=None, school_type=None, study_design=None, data_type=None, disease=None):
         """ Adds object to collection, creates CollectionSubmission reference
             Performs type / metadata validation. User permissions checked in view.
 
@@ -177,6 +179,8 @@ def collect_object(
         program_area = program_area or ''
         school_type = school_type or ''
         study_design = study_design or ''
+        data_type = data_type or ''
+        disease = disease or ''
 
         if not self.collected_type_choices and collected_type:
             raise ValidationError('May not specify "type" for this collection')
@@ -220,6 +224,18 @@ def collect_object(
             elif study_design not in self.study_design_choices:
                 raise ValidationError(f'"{study_design}" is not an acceptable "study_design" for this collection')
 
+        if disease:
+            if not self.disease_choices:
+                raise ValidationError('May not specify "disease" for this collection')
+            elif disease not in self.disease_choices:
+                raise ValidationError(f'"{disease}" is not an acceptable "disease" for this collection')
+
+        if data_type:
+            if not self.data_type_choices:
+                raise ValidationError('May not specify "data_type" for this collection')
+            elif data_type not in self.data_type_choices:
+                raise ValidationError(f'"{data_type}" is not an acceptable "data_type" for this collection')
+
         if not any([isinstance(obj, t.model_class()) for t in self.collected_types.all()]):
             # Not all objects have a content_type_pk, have to look the other way.
             # Ideally, all objects would, and we could do:
@@ -248,6 +264,8 @@ def collect_object(
             collection_submission.program_area = program_area
             collection_submission.school_type = school_type
             collection_submission.study_design = study_design
+            collection_submission.data_type = data_type
+            collection_submission.disease = disease
             collection_submission.save()
 
             return collection_submission
diff --git a/osf/models/collection_submission.py b/osf/models/collection_submission.py
index d4819255991..963d38b116b 100644
--- a/osf/models/collection_submission.py
+++ b/osf/models/collection_submission.py
@@ -39,6 +39,16 @@ class Meta:
     program_area = models.CharField(blank=True, max_length=127)
     school_type = models.CharField(blank=True, max_length=127)
     study_design = models.CharField(blank=True, max_length=127)
+    disease = models.CharField(
+        help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium',
+        blank=True,
+        max_length=127
+    )
+    data_type = models.CharField(
+        help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium',
+        blank=True,
+        max_length=127
+    )
     machine_state = models.IntegerField(
         choices=CollectionSubmissionStates.int_field_choices(),
         default=CollectionSubmissionStates.IN_PROGRESS,
diff --git a/website/project/views/node.py b/website/project/views/node.py
index c2ee444ab88..70a47c1b4d8 100644
--- a/website/project/views/node.py
+++ b/website/project/views/node.py
@@ -911,6 +911,8 @@ def serialize_collections(collection_submissions, auth):
         'node_id': collection_submission.guid._id,
         'study_design': collection_submission.study_design,
         'program_area': collection_submission.program_area,
+        'disease': collection_submission.disease,
+        'data_type': collection_submission.data_type,
         'state': collection_submission.state.db_name,
         'subjects': list(collection_submission.subjects.values_list('text', flat=True)),
         'is_public': collection_submission.collection.is_public,
diff --git a/website/search/elastic_search.py b/website/search/elastic_search.py
index bc418d3c940..6b479c29de5 100644
--- a/website/search/elastic_search.py
+++ b/website/search/elastic_search.py
@@ -618,6 +618,8 @@ def serialize_collection_submission(collection_submission):
         'programArea': collection_submission.program_area,
         'schoolType': collection_submission.school_type,
         'studyDesign': collection_submission.study_design,
+        'disease': collection_submission.disease,
+        'dataType': collection_submission.data_type,
         'subjects': list(collection_submission.subjects.values_list('text', flat=True)),
         'title': getattr(obj, 'title', ''),
         'url': getattr(obj, 'url', ''),
diff --git a/website/templates/project/project.mako b/website/templates/project/project.mako
index d220853e1b9..356fa93a562 100644
--- a/website/templates/project/project.mako
+++ b/website/templates/project/project.mako
@@ -401,6 +401,20 @@
                                     </div>
                                 % endif
                                 <hr>
+                                % if collection['disease'] and collection['data_type']:
+                                    <div  style="padding-left: 30px;">
+                                        Disease: <i>${collection['disease']}</i> |&nbsp; Data Type: <i>${collection['data_type']}</i>
+                                    </div>
+                                % elif collection['disease']:
+                                    <div  style="padding-left: 30px;">
+                                        Disease: <i>${collection['disease']}</i>
+                                    </div>
+                                % elif collection['data_type']:
+                                    <div  style="padding-left: 30px;">
+                                        Data Type: <i>${collection['data_type']}</i>
+                                    </div>
+                                % endif
+                                <hr>
                             % elif collection['state'] == 'pending' and user['is_contributor_or_group_member']:
                                 % if user['is_admin']:
                                     <a class="fa fa-close collections-cancel-icon pull-right" collection_id=${collection['collection_id']} node_id=${collection['node_id']} aria-label="Cancel Submission Request Button"></a>
@@ -448,6 +462,19 @@
                                         Program Area: <i>${collection['program_area']}</i>
                                     </div>
                                 % endif
+                                % if collection['disease'] and collection['data_type']:
+                                    <div  style="padding-left: 30px;">
+                                        Disease: <i>${collection['disease']}</i> |&nbsp; Data Type: <i>${collection['data_type']}</i>
+                                    </div>
+                                % elif collection['disease']:
+                                    <div  style="padding-left: 30px;">
+                                        Disease: <i>${collection['disease']}</i>
+                                    </div>
+                                % elif collection['data_type']:
+                                    <div  style="padding-left: 30px;">
+                                        Data Type: <i>${collection['data_type']}</i>
+                                    </div>
+                                % endif
                                 <hr>
                             % elif collection['state'] == 'rejected' and user['is_contributor_or_group_member']:
                                 % if user['is_admin']:

From 54ad5d85d61b32326c1065f4fc61005248608df2 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 18 Dec 2023 09:46:20 -0500
Subject: [PATCH 13/23] Add merge migration

---
 osf/migrations/0018_merge_20231218_1446.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 osf/migrations/0018_merge_20231218_1446.py

diff --git a/osf/migrations/0018_merge_20231218_1446.py b/osf/migrations/0018_merge_20231218_1446.py
new file mode 100644
index 00000000000..f76317978f7
--- /dev/null
+++ b/osf/migrations/0018_merge_20231218_1446.py
@@ -0,0 +1,14 @@
+# Generated by Django 3.2.17 on 2023-12-18 14:46
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('osf', '0017_alter_notabledomain_note'),
+        ('osf', '0017_auto_20231212_1843'),
+    ]
+
+    operations = [
+    ]

From f9b2d2828031569db386a37e414b9144eb2e8ef3 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 8 Jan 2024 10:59:13 -0500
Subject: [PATCH 14/23] Update sitemap for preprint routes, file downloads
 [ENG-4919]

---
 osf_tests/test_generate_sitemap.py |  6 ++++--
 scripts/generate_sitemap.py        | 20 +++++++++++++++++---
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/osf_tests/test_generate_sitemap.py b/osf_tests/test_generate_sitemap.py
index 2b821945fd1..f8f8ab4a3ef 100644
--- a/osf_tests/test_generate_sitemap.py
+++ b/osf_tests/test_generate_sitemap.py
@@ -118,8 +118,10 @@ def all_included_links(self, user_admin_project_public, user_admin_project_priva
             project_preprint_osf.url,
             project_preprint_other.url,
             registration_active.url,
-            '/{}/'.format(preprint_osf._id),
-            '/preprints/{}/{}/'.format(provider_other._id, preprint_other._id),
+            '/preprints/{}/{}'.format(preprint_osf.provider._id, preprint_osf._id),
+            '/preprints/{}/{}'.format(provider_other._id, preprint_other._id),
+            '/{}/download/?format=pdf'.format(preprint_osf._id),
+            '/{}/download/?format=pdf'.format(preprint_other._id)
         ])
         urls_to_include = [urljoin(settings.DOMAIN, item) for item in urls_to_include]
 
diff --git a/scripts/generate_sitemap.py b/scripts/generate_sitemap.py
index 5249b4fb8a3..9db08928807 100644
--- a/scripts/generate_sitemap.py
+++ b/scripts/generate_sitemap.py
@@ -203,17 +203,31 @@ def generate(self):
         objs = (Preprint.objects.can_view()
                     .select_related('node', 'provider', 'primary_file'))
         progress.start(objs.count() * 2, 'PREP: ')
-        osf = PreprintProvider.objects.get(_id='osf')
         for obj in objs:
             try:
                 preprint_date = obj.modified.strftime('%Y-%m-%d')
                 config = settings.SITEMAP_PREPRINT_CONFIG
-                preprint_url = obj.url
-                provider = obj.provider
+                preprint_url = os.path.join('preprints', obj.provider._id, obj._id)
                 config['loc'] = urljoin(settings.DOMAIN, preprint_url)
                 config['lastmod'] = preprint_date
                 self.add_url(config)
 
+                # Preprint file urls
+                try:
+                    file_config = settings.SITEMAP_PREPRINT_FILE_CONFIG
+                    file_config['loc'] = urljoin(
+                        settings.DOMAIN,
+                        os.path.join(
+                            obj._id,
+                            'download',
+                            '?format=pdf'
+                        )
+                    )
+                    file_config['lastmod'] = preprint_date
+                    self.add_url(file_config)
+                except Exception as e:
+                    self.log_errors(obj.primary_file, obj.primary_file._id, e)
+
             except Exception as e:
                 self.log_errors(obj, obj._id, e)
             progress.increment(2)

From 900b33a3f3819d795f69948d845989eadc15bb8c Mon Sep 17 00:00:00 2001
From: John Tordoff <>
Date: Fri, 5 Jan 2024 12:20:48 -0500
Subject: [PATCH 15/23] use content instead of sanitized text for spam filter.

---
 addons/wiki/models.py             | 6 +-----
 osf_tests/test_notable_domains.py | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/addons/wiki/models.py b/addons/wiki/models.py
index 0c057342fd8..f2da7b41d9a 100644
--- a/addons/wiki/models.py
+++ b/addons/wiki/models.py
@@ -203,11 +203,7 @@ def check_spam(self):
         )
 
     def _get_spam_content(self, node):
-        content = []
-        content.append(self.raw_text(node))
-        if not content:
-            return None
-        return ' '.join(content)
+        return self.content or None
 
     def clone_version(self, wiki_page, user):
         """Clone a node wiki page.
diff --git a/osf_tests/test_notable_domains.py b/osf_tests/test_notable_domains.py
index 78edd11e967..dde9970e4b7 100644
--- a/osf_tests/test_notable_domains.py
+++ b/osf_tests/test_notable_domains.py
@@ -268,7 +268,7 @@ def test_extract_domains_from_wiki__public_project_extracts_domains_on_wiki_save
         project = wiki_version.wiki_page.node
         project.is_public = True
         project.save()
-        wiki_version.content = 'This has a domain: https://cos.io'
+        wiki_version.content = '[EXTREME VIDEO] <b><a href="https://cos.io/JAkeEloit">WATCH VIDEO</a></b>'
 
         request_context.g.current_session = {'auth_user_id': project.creator._id}
         with mock.patch.object(spam_tasks.requests, 'head'):

From a8ace8ec28a2a7bcebfd8730df7dc82f2510e2a9 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Wed, 10 Jan 2024 09:29:32 -0500
Subject: [PATCH 16/23] Avoid 401 when indexing withdrawn preprints

---
 osf_tests/test_generate_sitemap.py | 17 ++++++++++++++---
 scripts/generate_sitemap.py        | 28 +++++++++++++++-------------
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/osf_tests/test_generate_sitemap.py b/osf_tests/test_generate_sitemap.py
index f8f8ab4a3ef..b8429cca77a 100644
--- a/osf_tests/test_generate_sitemap.py
+++ b/osf_tests/test_generate_sitemap.py
@@ -6,6 +6,7 @@
 import tempfile
 import xml
 from future.moves.urllib.parse import urljoin
+from django.utils import timezone
 
 from scripts import generate_sitemap
 from osf_tests.factories import (AuthUserFactory, ProjectFactory, RegistrationFactory, CollectionFactory,
@@ -98,6 +99,15 @@ def preprint_osf(self, project_preprint_osf, user_admin_project_public, provider
                                              creator=user_admin_project_public,
                                              provider=provider_osf)
 
+    @pytest.fixture(autouse=True)
+    def preprint_withdrawn(self, project_preprint_osf, user_admin_project_public, provider_osf):
+        preprint = PreprintFactory(project=project_preprint_osf,
+                                             creator=user_admin_project_public,
+                                             provider=provider_osf)
+        preprint.date_withdrawn = timezone.now()
+        preprint.save()
+        return preprint
+
     @pytest.fixture(autouse=True)
     def preprint_other(self, project_preprint_other, user_admin_project_public, provider_other):
         return PreprintFactory(project=project_preprint_other,
@@ -107,8 +117,8 @@ def preprint_other(self, project_preprint_other, user_admin_project_public, prov
     @pytest.fixture(autouse=True)
     def all_included_links(self, user_admin_project_public, user_admin_project_private, project_registration_public,
                              project_preprint_osf, project_preprint_other,
-                             registration_active, provider_other, preprint_osf,
-                             preprint_other):
+                             registration_active, provider_other, provider_osf,
+                             preprint_osf, preprint_other, preprint_withdrawn):
         # Return urls of all fixtures
         urls_to_include = [item['loc'] for item in settings.SITEMAP_STATIC_URLS]
         urls_to_include.extend([
@@ -118,8 +128,9 @@ def all_included_links(self, user_admin_project_public, user_admin_project_priva
             project_preprint_osf.url,
             project_preprint_other.url,
             registration_active.url,
-            '/preprints/{}/{}'.format(preprint_osf.provider._id, preprint_osf._id),
+            '/preprints/{}/{}'.format(provider_osf._id, preprint_osf._id),
             '/preprints/{}/{}'.format(provider_other._id, preprint_other._id),
+            '/preprints/{}/{}'.format(provider_osf._id, preprint_withdrawn._id),
             '/{}/download/?format=pdf'.format(preprint_osf._id),
             '/{}/download/?format=pdf'.format(preprint_other._id)
         ])
diff --git a/scripts/generate_sitemap.py b/scripts/generate_sitemap.py
index 9db08928807..a31a9febe1a 100644
--- a/scripts/generate_sitemap.py
+++ b/scripts/generate_sitemap.py
@@ -213,20 +213,22 @@ def generate(self):
                 self.add_url(config)
 
                 # Preprint file urls
-                try:
-                    file_config = settings.SITEMAP_PREPRINT_FILE_CONFIG
-                    file_config['loc'] = urljoin(
-                        settings.DOMAIN,
-                        os.path.join(
-                            obj._id,
-                            'download',
-                            '?format=pdf'
+                if not obj.is_retracted:
+                    # Withdrawn preprints may be viewed but not downloaded
+                    try:
+                        file_config = settings.SITEMAP_PREPRINT_FILE_CONFIG
+                        file_config['loc'] = urljoin(
+                            settings.DOMAIN,
+                            os.path.join(
+                                obj._id,
+                                'download',
+                                '?format=pdf'
+                            )
                         )
-                    )
-                    file_config['lastmod'] = preprint_date
-                    self.add_url(file_config)
-                except Exception as e:
-                    self.log_errors(obj.primary_file, obj.primary_file._id, e)
+                        file_config['lastmod'] = preprint_date
+                        self.add_url(file_config)
+                    except Exception as e:
+                        self.log_errors(obj.primary_file, obj.primary_file._id, e)
 
             except Exception as e:
                 self.log_errors(obj, obj._id, e)

From e48061310064835028295ed76a2433ba1927f887 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Wed, 10 Jan 2024 10:29:08 -0500
Subject: [PATCH 17/23] Add actions to update domain notes

---
 osf/admin.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/osf/admin.py b/osf/admin.py
index 40434c36b71..7b8257389c5 100644
--- a/osf/admin.py
+++ b/osf/admin.py
@@ -7,6 +7,7 @@
 from django.http import HttpResponseRedirect
 from django.urls import reverse
 
+from osf.external.spam.tasks import reclassify_domain_references
 from osf.models import OSFUser, Node, NotableDomain, NodeLicense
 from osf.models.notable_domain import DomainReference
 
@@ -50,11 +51,40 @@ class NotableDomainAdmin(admin.ModelAdmin):
     list_display = ('domain', 'note', 'number_of_references')
     list_filter = ('note',)
     search_fields = ('domain',)
+    actions = ['make_ignored', 'make_excluded']
 
     @admin.display(ordering='number_of_references')
     def number_of_references(self, obj):
         return obj.number_of_references
 
+    @admin.action(description='Mark selected as IGNORED')
+    def make_ignored(self, request, queryset):
+        signatures = []
+        target_note = 3  # IGNORED
+        for obj in queryset:
+            signatures.append({
+                'notable_domain_id': obj.pk,
+                'current_note': target_note,
+                'previous_note': obj.note
+            })
+        queryset.update(note=target_note)
+        for sig in signatures:
+            reclassify_domain_references.apply_async(kwargs=sig)
+
+    @admin.action(description='Mark selected as EXCLUDED')
+    def make_excluded(self, request, queryset):
+        signatures = []
+        target_note = 0  # EXCLUDE_FROM_ACCOUNT_CREATION_AND_CONTENT
+        for obj in queryset:
+            signatures.append({
+                'notable_domain_id': obj.pk,
+                'current_note': target_note,
+                'previous_note': obj.note
+            })
+        queryset.update(note=target_note)
+        for sig in signatures:
+            reclassify_domain_references.apply_async(kwargs=sig)
+
     def get_urls(self):
         urls = super().get_urls()
         return [

From 45029e47ff98d8ba57b8f3ab9ebf7be04c191cd0 Mon Sep 17 00:00:00 2001
From: Abram Booth <aaxelb@users.noreply.github.com>
Date: Wed, 17 Jan 2024 16:00:45 -0500
Subject: [PATCH 18/23] fix: multiple funding awards from the same funder
 (#10512)

would previously generate invalid xml for datacite,
now should not.
[ENG-5044]
---
 .../datacite/datacite_tree_walker.py          | 53 +++++++++++--------
 .../expected_metadata_files/file_full.turtle  | 16 +++++-
 .../preprint_full.turtle                      | 16 +++++-
 .../project_full.datacite.json                | 19 +++++++
 .../project_full.datacite.xml                 | 10 ++++
 .../project_full.turtle                       | 16 +++++-
 .../registration_full.turtle                  | 16 +++++-
 .../metadata/test_serialized_metadata.py      | 16 +++++-
 8 files changed, 131 insertions(+), 31 deletions(-)

diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py
index 08990d466a9..bddc64fac6a 100644
--- a/osf/metadata/serializers/datacite/datacite_tree_walker.py
+++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py
@@ -268,33 +268,42 @@ def _visit_dates(self, parent_el):
 
     def _visit_funding_references(self, parent_el):
         fundrefs_el = self.visit(parent_el, 'fundingReferences', is_list=True)
+        _visited_funders = set()
+        for _funding_award in sorted(self.basket[OSF.hasFunding]):
+            # datacite allows at most one funder per funding reference
+            _funder = next(self.basket[_funding_award:DCTERMS.contributor])
+            self._funding_reference(fundrefs_el, _funder, _funding_award)
+            _visited_funders.add(_funder)
         for _funder in self.basket[OSF.funder]:
-            fundref_el = self.visit(fundrefs_el, 'fundingReference')
-            self.visit(fundref_el, 'funderName', text=next(self.basket[_funder:FOAF.name], ''))
-            funder_identifier = next(self.basket[_funder:DCTERMS.identifier], '')
+            if _funder not in _visited_funders:
+                self._funding_reference(fundrefs_el, _funder)
+
+    def _funding_reference(self, fundrefs_el, funder, funding_award=None):
+        _fundref_el = self.visit(fundrefs_el, 'fundingReference')
+        self.visit(_fundref_el, 'funderName', text=next(self.basket[funder:FOAF.name], ''))
+        _funder_identifier = next(self.basket[funder:DCTERMS.identifier], '')
+        self.visit(
+            _fundref_el,
+            'funderIdentifier',
+            text=_funder_identifier,
+            attrib={
+                'funderIdentifierType': self._funder_identifier_type(_funder_identifier),
+            },
+        )
+        if funding_award is not None:
             self.visit(
-                fundref_el,
-                'funderIdentifier',
-                text=funder_identifier,
+                _fundref_el,
+                'awardNumber',
+                text=next(self.basket[funding_award:OSF.awardNumber], ''),
                 attrib={
-                    'funderIdentifierType': self._funder_identifier_type(funder_identifier),
+                    'awardURI': (
+                        str(funding_award)
+                        if isinstance(funding_award, rdflib.URIRef)
+                        else ''
+                    )
                 },
             )
-            for _funding_award in self.basket[OSF.hasFunding]:
-                if _funder in self.basket[_funding_award:DCTERMS.contributor]:
-                    self.visit(
-                        fundref_el,
-                        'awardNumber',
-                        text=next(self.basket[_funding_award:OSF.awardNumber], ''),
-                        attrib={
-                            'awardURI': (
-                                str(_funding_award)
-                                if isinstance(_funding_award, rdflib.URIRef)
-                                else ''
-                            )
-                        },
-                    )
-                    self.visit(fundref_el, 'awardTitle', text=next(self.basket[_funding_award:DCTERMS.title], ''))
+            self.visit(_fundref_el, 'awardTitle', text=next(self.basket[funding_award:DCTERMS.title], ''))
 
     def _visit_publication_year(self, parent_el, focus_iri):
         year_copyrighted = next(self.basket[focus_iri:DCTERMS.dateCopyrighted], None)
diff --git a/osf_tests/metadata/expected_metadata_files/file_full.turtle b/osf_tests/metadata/expected_metadata_files/file_full.turtle
index d04eca39e8e..4859b2bf84a 100644
--- a/osf_tests/metadata/expected_metadata_files/file_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/file_full.turtle
@@ -28,8 +28,10 @@
     dcterms:title "this is a project title!"@en ;
     dcterms:type <https://schema.datacite.org/meta/kernel-4.4/#Dataset> ;
     owl:sameAs <https://doi.org/10.70102/FK2osf.io/w2ibb> ;
-    osf:funder <https://doi.org/10.$$$$> ;
-    osf:hasFunding <https://moneypockets.example/millions> .
+    osf:funder <https://doi.org/10.$>,
+        <https://doi.org/10.$$$$> ;
+    osf:hasFunding <https://moneypockets.example/millions>,
+        <https://moneypockets.example/millions-more> .
 
 <http://localhost:5000/w3ibb?revision=1> a osf:FileVersion ;
     dcterms:created "2123-05-04" ;
@@ -46,6 +48,12 @@
     dcterms:title "because reasons" ;
     osf:awardNumber "10000000" .
 
+<https://moneypockets.example/millions-more> a osf:FundingAward ;
+    dcterms:contributor <https://doi.org/10.$$$$> ;
+    dcterms:identifier "https://moneypockets.example/millions-more" ;
+    dcterms:title "because reasons!" ;
+    osf:awardNumber "2000000" .
+
 <https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode> dcterms:identifier "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode" ;
     foaf:name "CC-By Attribution-NonCommercial-NoDerivatives 4.0 International" .
 
@@ -53,6 +61,10 @@
     dcterms:identifier "https://doi.org/10.$$$$" ;
     foaf:name "Mx. Moneypockets" .
 
+<https://doi.org/10.$> a dcterms:Agent ;
+    dcterms:identifier "https://doi.org/10.$" ;
+    foaf:name "Caring Fan" .
+
 <http://localhost:5000/w1ibb> a dcterms:Agent,
         foaf:Person ;
     dcterms:identifier "http://localhost:5000/w1ibb" ;
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
index 59943430882..10ae10a7741 100644
--- a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
@@ -50,8 +50,10 @@
     dcterms:title "this is a project title!"@en ;
     dcterms:type <https://schema.datacite.org/meta/kernel-4.4/#Dataset> ;
     owl:sameAs <https://doi.org/10.70102/FK2osf.io/w2ibb> ;
-    osf:funder <https://doi.org/10.$$$$> ;
-    osf:hasFunding <https://moneypockets.example/millions> .
+    osf:funder <https://doi.org/10.$>,
+        <https://doi.org/10.$$$$> ;
+    osf:hasFunding <https://moneypockets.example/millions>,
+        <https://moneypockets.example/millions-more> .
 
 <http://localhost:8000/v2/subjects/subjwobb/> a skos:Concept ;
     skos:broader <http://localhost:8000/v2/subjects/subjwibb/> ;
@@ -77,6 +79,12 @@
     dcterms:title "because reasons" ;
     osf:awardNumber "10000000" .
 
+<https://moneypockets.example/millions-more> a osf:FundingAward ;
+    dcterms:contributor <https://doi.org/10.$$$$> ;
+    dcterms:identifier "https://moneypockets.example/millions-more" ;
+    dcterms:title "because reasons!" ;
+    osf:awardNumber "2000000" .
+
 <https://schema.datacite.org/meta/kernel-4.4/#Dataset> rdfs:label "Dataset"@en .
 
 <https://schema.datacite.org/meta/kernel-4.4/#Preprint> rdfs:label "Preprint"@en .
@@ -106,6 +114,10 @@
     dcterms:identifier "https://doi.org/10.$$$$" ;
     foaf:name "Mx. Moneypockets" .
 
+<https://doi.org/10.$> a dcterms:Agent ;
+    dcterms:identifier "https://doi.org/10.$" ;
+    foaf:name "Caring Fan" .
+
 <http://localhost:8000/v2/subjects/subjwibbb/> a skos:Concept ;
     skos:inScheme <https://bepress.com/reference_guide_dc/disciplines/> ;
     skos:prefLabel "wibbble" .
diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json
index d77541c609e..43d3373c9f1 100644
--- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json
+++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json
@@ -59,6 +59,25 @@
         "funderIdentifierType": "Crossref Funder ID"
       },
       "funderName": "Mx. Moneypockets"
+    },
+    {
+      "awardNumber": {
+        "awardNumber": "2000000",
+        "awardURI": "https://moneypockets.example/millions-more"
+      },
+      "awardTitle": "because reasons!",
+      "funderIdentifier": {
+        "funderIdentifier": "https://doi.org/10.$$$$",
+        "funderIdentifierType": "Crossref Funder ID"
+      },
+      "funderName": "Mx. Moneypockets"
+    },
+    {
+      "funderIdentifier": {
+        "funderIdentifier": "https://doi.org/10.$",
+        "funderIdentifierType": "Crossref Funder ID"
+      },
+      "funderName": "Caring Fan"
     }
   ],
   "identifier": {
diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml
index 95f15129b3f..8cf7efb1221 100644
--- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml
+++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml
@@ -42,6 +42,16 @@
       <awardNumber awardURI="https://moneypockets.example/millions">10000000</awardNumber>
       <awardTitle>because reasons</awardTitle>
     </fundingReference>
+    <fundingReference>
+      <funderName>Mx. Moneypockets</funderName>
+      <funderIdentifier funderIdentifierType="Crossref Funder ID">https://doi.org/10.$$$$</funderIdentifier>
+      <awardNumber awardURI="https://moneypockets.example/millions-more">2000000</awardNumber>
+      <awardTitle>because reasons!</awardTitle>
+    </fundingReference>
+    <fundingReference>
+      <funderName>Caring Fan</funderName>
+      <funderIdentifier funderIdentifierType="Crossref Funder ID">https://doi.org/10.$</funderIdentifier>
+    </fundingReference>
   </fundingReferences>
   <relatedIdentifiers>
     <relatedIdentifier relatedIdentifierType="URL" relationType="HasVersion">http://localhost:5000/w5ibb</relatedIdentifier>
diff --git a/osf_tests/metadata/expected_metadata_files/project_full.turtle b/osf_tests/metadata/expected_metadata_files/project_full.turtle
index 5fa0dad1229..4a601897f11 100644
--- a/osf_tests/metadata/expected_metadata_files/project_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/project_full.turtle
@@ -24,8 +24,10 @@
     owl:sameAs <https://doi.org/10.70102/FK2osf.io/w2ibb> ;
     dcat:accessService <http://localhost:5000> ;
     osf:contains <http://localhost:5000/w3ibb> ;
-    osf:funder <https://doi.org/10.$$$$> ;
-    osf:hasFunding <https://moneypockets.example/millions> ;
+    osf:funder <https://doi.org/10.$>,
+        <https://doi.org/10.$$$$> ;
+    osf:hasFunding <https://moneypockets.example/millions>,
+        <https://moneypockets.example/millions-more> ;
     osf:hostingInstitution <https://cos.io/> ;
     osf:supplements <http://localhost:5000/w4ibb> .
 
@@ -64,6 +66,12 @@
     dcterms:title "because reasons" ;
     osf:awardNumber "10000000" .
 
+<https://moneypockets.example/millions-more> a osf:FundingAward ;
+    dcterms:contributor <https://doi.org/10.$$$$> ;
+    dcterms:identifier "https://moneypockets.example/millions-more" ;
+    dcterms:title "because reasons!" ;
+    osf:awardNumber "2000000" .
+
 <https://cos.io/> a dcterms:Agent,
         foaf:Organization ;
     dcterms:identifier "https://cos.io/",
@@ -88,6 +96,10 @@
     dcterms:identifier "https://doi.org/10.$$$$" ;
     foaf:name "Mx. Moneypockets" .
 
+<https://doi.org/10.$> a dcterms:Agent ;
+    dcterms:identifier "https://doi.org/10.$" ;
+    foaf:name "Caring Fan" .
+
 <http://localhost:5000/w1ibb> a dcterms:Agent,
         foaf:Person ;
     dcterms:identifier "http://localhost:5000/w1ibb" ;
diff --git a/osf_tests/metadata/expected_metadata_files/registration_full.turtle b/osf_tests/metadata/expected_metadata_files/registration_full.turtle
index 9101e9f64b3..2fe48ce7fae 100644
--- a/osf_tests/metadata/expected_metadata_files/registration_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/registration_full.turtle
@@ -35,8 +35,10 @@
     dcterms:title "this is a project title!"@en ;
     dcterms:type <https://schema.datacite.org/meta/kernel-4.4/#Dataset> ;
     owl:sameAs <https://doi.org/10.70102/FK2osf.io/w2ibb> ;
-    osf:funder <https://doi.org/10.$$$$> ;
-    osf:hasFunding <https://moneypockets.example/millions> .
+    osf:funder <https://doi.org/10.$>,
+        <https://doi.org/10.$$$$> ;
+    osf:hasFunding <https://moneypockets.example/millions>,
+        <https://moneypockets.example/millions-more> .
 
 <https://moneypockets.example/millions> a osf:FundingAward ;
     dcterms:contributor <https://doi.org/10.$$$$> ;
@@ -44,6 +46,12 @@
     dcterms:title "because reasons" ;
     osf:awardNumber "10000000" .
 
+<https://moneypockets.example/millions-more> a osf:FundingAward ;
+    dcterms:contributor <https://doi.org/10.$$$$> ;
+    dcterms:identifier "https://moneypockets.example/millions-more" ;
+    dcterms:title "because reasons!" ;
+    osf:awardNumber "2000000" .
+
 <https://cos.io/> a dcterms:Agent,
         foaf:Organization ;
     dcterms:identifier "https://cos.io/",
@@ -63,6 +71,10 @@
     dcterms:identifier "https://doi.org/10.$$$$" ;
     foaf:name "Mx. Moneypockets" .
 
+<https://doi.org/10.$> a dcterms:Agent ;
+    dcterms:identifier "https://doi.org/10.$" ;
+    foaf:name "Caring Fan" .
+
 <http://localhost:5000/w1ibb> a dcterms:Agent,
         foaf:Person ;
     dcterms:identifier "http://localhost:5000/w1ibb" ;
diff --git a/osf_tests/metadata/test_serialized_metadata.py b/osf_tests/metadata/test_serialized_metadata.py
index ec9eb6b4af2..bc6b1387c60 100644
--- a/osf_tests/metadata/test_serialized_metadata.py
+++ b/osf_tests/metadata/test_serialized_metadata.py
@@ -222,13 +222,27 @@ def _setUp_full(self):
             'language': 'en',
             'resource_type_general': 'Dataset',
             'funding_info': [
-                {
+                {  # full funding reference:
                     'funder_name': 'Mx. Moneypockets',
                     'funder_identifier': 'https://doi.org/10.$$$$',
                     'funder_identifier_type': 'Crossref Funder ID',
                     'award_number': '10000000',
                     'award_uri': 'https://moneypockets.example/millions',
                     'award_title': 'because reasons',
+                }, {  # second funding award from the same funder:
+                    'funder_name': 'Mx. Moneypockets',
+                    'funder_identifier': 'https://doi.org/10.$$$$',
+                    'funder_identifier_type': 'Crossref Funder ID',
+                    'award_number': '2000000',
+                    'award_uri': 'https://moneypockets.example/millions-more',
+                    'award_title': 'because reasons!',
+                }, {  # no award info, just a funder:
+                    'funder_name': 'Caring Fan',
+                    'funder_identifier': 'https://doi.org/10.$',
+                    'funder_identifier_type': 'Crossref Funder ID',
+                    'award_number': '',
+                    'award_uri': '',
+                    'award_title': '',
                 },
             ],
         }, auth=self.user)

From 0c1ec2514e031c3b8bfd5da24e898f46c295a235 Mon Sep 17 00:00:00 2001
From: Abram Booth <aaxelb@users.noreply.github.com>
Date: Thu, 18 Jan 2024 11:23:10 -0500
Subject: [PATCH 19/23] [ENG-4335] subjects on projects (#10324)

* add root-level subjects list

* add NodeSerializer.subjects_acceptable

* simplify query
---
 api/nodes/serializers.py           | 6 ++++++
 api/subjects/urls.py               | 1 +
 api/subjects/views.py              | 4 +++-
 api_tests/base/test_serializers.py | 2 +-
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/api/nodes/serializers.py b/api/nodes/serializers.py
index 918f156ce3d..b4b9e27a50b 100644
--- a/api/nodes/serializers.py
+++ b/api/nodes/serializers.py
@@ -537,6 +537,12 @@ class NodeSerializer(TaxonomizableSerializerMixin, JSONAPISerializer):
         related_view_kwargs={'node_id': '<_id>'},
     )
 
+    subjects_acceptable = HideIfRegistration(RelationshipField(
+        related_view='subjects:subject-list',
+        related_view_kwargs={},
+        read_only=True,
+    ))
+
     @property
     def subjects_related_view(self):
         # Overrides TaxonomizableSerializerMixin
diff --git a/api/subjects/urls.py b/api/subjects/urls.py
index e80581e179d..31b6d8d4505 100644
--- a/api/subjects/urls.py
+++ b/api/subjects/urls.py
@@ -5,6 +5,7 @@
 app_name = 'osf'
 
 urlpatterns = [
+    re_path(r'^$', views.SubjectList.as_view(), name=views.SubjectList.view_name),
     re_path(r'^(?P<subject_id>\w+)/$', views.SubjectDetail.as_view(), name=views.SubjectDetail.view_name),
     re_path(r'^(?P<subject_id>\w+)/children/$', views.SubjectChildrenList.as_view(), name=views.SubjectChildrenList.view_name),
 ]
diff --git a/api/subjects/views.py b/api/subjects/views.py
index 2b5ff3deae8..281f5e18019 100644
--- a/api/subjects/views.py
+++ b/api/subjects/views.py
@@ -108,7 +108,9 @@ class SubjectList(JSONAPIBaseView, generics.ListAPIView, ListFilterMixin):
     ordering = ('is_other', '-id',)
 
     def get_default_queryset(self):
-        return optimize_subject_query(Subject.objects.all())
+        return optimize_subject_query(
+            Subject.objects.filter(bepress_subject__isnull=True),
+        )
 
     def get_queryset(self):
         return self.get_queryset_from_request()
diff --git a/api_tests/base/test_serializers.py b/api_tests/base/test_serializers.py
index d523263b582..701bab085bd 100644
--- a/api_tests/base/test_serializers.py
+++ b/api_tests/base/test_serializers.py
@@ -196,7 +196,7 @@ def test_registration_serializer(self):
             'subjects',
             'wiki_enabled']
         # fields that do not appear on registrations
-        non_registration_fields = ['registrations', 'draft_registrations', 'templated_by_count', 'settings', 'storage', 'children', 'groups']
+        non_registration_fields = ['registrations', 'draft_registrations', 'templated_by_count', 'settings', 'storage', 'children', 'groups', 'subjects_acceptable']
 
         for field in NodeSerializer._declared_fields:
             assert_in(field, RegistrationSerializer._declared_fields)

From 754e0627c6d903314f13c3852b029c8e7fdf0ab0 Mon Sep 17 00:00:00 2001
From: Abram Booth <boothaa@gmail.com>
Date: Thu, 18 Jan 2024 11:25:35 -0500
Subject: [PATCH 20/23] [ENG-5011] Subject.get_semantic_iri  use the iri for a
 subject's bepress synonym only when it has the  same text -- it was instead
 doing the opposite

---
 .../subjects/views/test_subject_detail.py     |  1 +
 osf/metadata/osf_gathering.py                 |  2 +-
 osf/models/subject.py                         |  2 +-
 .../preprint_basic.turtle                     | 24 +++++++++----------
 .../preprint_full.turtle                      | 24 +++++++++----------
 osf_tests/metadata/test_osf_gathering.py      | 10 ++++----
 tests/test_subjects.py                        | 16 +++++++++++++
 7 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/api_tests/subjects/views/test_subject_detail.py b/api_tests/subjects/views/test_subject_detail.py
index ca7bd592666..20a96c758ed 100644
--- a/api_tests/subjects/views/test_subject_detail.py
+++ b/api_tests/subjects/views/test_subject_detail.py
@@ -44,6 +44,7 @@ def test_get_subject_detail(self, app, url_subject_detail, subject, subject_chil
         assert 'parent' in data['relationships']
         assert data['relationships']['parent']['data'] is None
         assert data['relationships']['children']['links']['related']['meta']['count'] == 2
+        assert data['links']['iri'] == subject.get_semantic_iri()
 
         # Follow children link
         children_link = data['relationships']['children']['links']['related']['href']
diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py
index 268fb6cc733..fb8dc55b8f5 100644
--- a/osf/metadata/osf_gathering.py
+++ b/osf/metadata/osf_gathering.py
@@ -539,7 +539,7 @@ def _subject_triples(dbsubject, *, child_ref=None, related_ref=None):
     _is_bepress = (not dbsubject.bepress_subject)
     _is_distinct_from_bepress = (dbsubject.text != dbsubject.bepress_text)
     if _is_bepress or _is_distinct_from_bepress:
-        _subject_ref = rdflib.URIRef(dbsubject.absolute_api_v2_subject_url)
+        _subject_ref = rdflib.URIRef(dbsubject.get_semantic_iri())
         yield (DCTERMS.subject, _subject_ref)
         yield (_subject_ref, RDF.type, SKOS.Concept)
         yield (_subject_ref, SKOS.prefLabel, dbsubject.text)
diff --git a/osf/models/subject.py b/osf/models/subject.py
index f2b5bdeaa48..ff96d0bbb3e 100644
--- a/osf/models/subject.py
+++ b/osf/models/subject.py
@@ -57,7 +57,7 @@ def get_absolute_url(self):
     def get_semantic_iri(self) -> str:
         _identified_subject = (
             self.bepress_subject
-            if self.bepress_subject and (self.text != self.bepress_subject.text)
+            if self.bepress_subject and (self.text == self.bepress_subject.text)
             else self
         )
         return _identified_subject.absolute_api_v2_subject_url.rstrip('/')
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle
index a7e0e717a26..1218c1054a7 100644
--- a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle
+++ b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle
@@ -15,10 +15,10 @@
         "https://doi.org/11.pp/FK2osf.io/w4ibb" ;
     dcterms:modified "2123-05-04" ;
     dcterms:publisher <http://localhost:5000/preprints/preprovi> ;
-    dcterms:subject <http://localhost:8000/v2/subjects/subjwibb/>,
-        <http://localhost:8000/v2/subjects/subjwibbb/>,
-        <http://localhost:8000/v2/subjects/subjwobb/>,
-        <http://localhost:8000/v2/subjects/subjwobbb/> ;
+    dcterms:subject <http://localhost:8000/v2/subjects/subjwibb>,
+        <http://localhost:8000/v2/subjects/subjwibbb>,
+        <http://localhost:8000/v2/subjects/subjwobb>,
+        <http://localhost:8000/v2/subjects/subjwobbb> ;
     dcterms:title "this is a preprint title!" ;
     dcterms:type <https://schema.datacite.org/meta/kernel-4.4/#Preprint> ;
     owl:sameAs <https://doi.org/11.pp/FK2osf.io/w4ibb> ;
@@ -50,11 +50,11 @@
     dcterms:title "this is a project title!" ;
     owl:sameAs <https://doi.org/10.70102/FK2osf.io/w2ibb> .
 
-<http://localhost:8000/v2/subjects/subjwobb/> a skos:Concept ;
-    skos:broader <http://localhost:8000/v2/subjects/subjwibb/> ;
+<http://localhost:8000/v2/subjects/subjwobb> a skos:Concept ;
+    skos:broader <http://localhost:8000/v2/subjects/subjwibb> ;
     skos:inScheme <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> ;
     skos:prefLabel "wobble" ;
-    skos:related <http://localhost:8000/v2/subjects/subjwobbb/> .
+    skos:related <http://localhost:8000/v2/subjects/subjwobbb> .
 
 <https://cos.io/> a dcterms:Agent,
         foaf:Organization ;
@@ -75,20 +75,20 @@
 <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> a skos:ConceptScheme ;
     dcterms:title "preprovi" .
 
-<http://localhost:8000/v2/subjects/subjwibb/> a skos:Concept ;
+<http://localhost:8000/v2/subjects/subjwibb> a skos:Concept ;
     skos:inScheme <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> ;
     skos:prefLabel "wibble" ;
-    skos:related <http://localhost:8000/v2/subjects/subjwibbb/> .
+    skos:related <http://localhost:8000/v2/subjects/subjwibbb> .
 
-<http://localhost:8000/v2/subjects/subjwobbb/> a skos:Concept ;
-    skos:broader <http://localhost:8000/v2/subjects/subjwibbb/> ;
+<http://localhost:8000/v2/subjects/subjwobbb> a skos:Concept ;
+    skos:broader <http://localhost:8000/v2/subjects/subjwibbb> ;
     skos:inScheme <https://bepress.com/reference_guide_dc/disciplines/> ;
     skos:prefLabel "wobbble" .
 
 <https://bepress.com/reference_guide_dc/disciplines/> a skos:ConceptScheme ;
     dcterms:title "bepress Digital Commons Three-Tiered Taxonomy" .
 
-<http://localhost:8000/v2/subjects/subjwibbb/> a skos:Concept ;
+<http://localhost:8000/v2/subjects/subjwibbb> a skos:Concept ;
     skos:inScheme <https://bepress.com/reference_guide_dc/disciplines/> ;
     skos:prefLabel "wibbble" .
 
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
index 59943430882..9f23bd22895 100644
--- a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
@@ -15,10 +15,10 @@
         "https://doi.org/11.pp/FK2osf.io/w4ibb" ;
     dcterms:modified "2123-05-04" ;
     dcterms:publisher <http://localhost:5000/preprints/preprovi> ;
-    dcterms:subject <http://localhost:8000/v2/subjects/subjwibb/>,
-        <http://localhost:8000/v2/subjects/subjwibbb/>,
-        <http://localhost:8000/v2/subjects/subjwobb/>,
-        <http://localhost:8000/v2/subjects/subjwobbb/> ;
+    dcterms:subject <http://localhost:8000/v2/subjects/subjwibb>,
+        <http://localhost:8000/v2/subjects/subjwibbb>,
+        <http://localhost:8000/v2/subjects/subjwobb>,
+        <http://localhost:8000/v2/subjects/subjwobbb> ;
     dcterms:title "this is a preprint title!" ;
     dcterms:type <https://schema.datacite.org/meta/kernel-4.4/#Preprint> ;
     owl:sameAs <https://doi.org/11.pp/FK2osf.io/w4ibb> ;
@@ -53,11 +53,11 @@
     osf:funder <https://doi.org/10.$$$$> ;
     osf:hasFunding <https://moneypockets.example/millions> .
 
-<http://localhost:8000/v2/subjects/subjwobb/> a skos:Concept ;
-    skos:broader <http://localhost:8000/v2/subjects/subjwibb/> ;
+<http://localhost:8000/v2/subjects/subjwobb> a skos:Concept ;
+    skos:broader <http://localhost:8000/v2/subjects/subjwibb> ;
     skos:inScheme <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> ;
     skos:prefLabel "wobble" ;
-    skos:related <http://localhost:8000/v2/subjects/subjwobbb/> .
+    skos:related <http://localhost:8000/v2/subjects/subjwobbb> .
 
 <https://cos.io/> a dcterms:Agent,
         foaf:Organization ;
@@ -89,13 +89,13 @@
 <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> a skos:ConceptScheme ;
     dcterms:title "preprovi" .
 
-<http://localhost:8000/v2/subjects/subjwibb/> a skos:Concept ;
+<http://localhost:8000/v2/subjects/subjwibb> a skos:Concept ;
     skos:inScheme <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> ;
     skos:prefLabel "wibble" ;
-    skos:related <http://localhost:8000/v2/subjects/subjwibbb/> .
+    skos:related <http://localhost:8000/v2/subjects/subjwibbb> .
 
-<http://localhost:8000/v2/subjects/subjwobbb/> a skos:Concept ;
-    skos:broader <http://localhost:8000/v2/subjects/subjwibbb/> ;
+<http://localhost:8000/v2/subjects/subjwobbb> a skos:Concept ;
+    skos:broader <http://localhost:8000/v2/subjects/subjwibbb> ;
     skos:inScheme <https://bepress.com/reference_guide_dc/disciplines/> ;
     skos:prefLabel "wobbble" .
 
@@ -106,7 +106,7 @@
     dcterms:identifier "https://doi.org/10.$$$$" ;
     foaf:name "Mx. Moneypockets" .
 
-<http://localhost:8000/v2/subjects/subjwibbb/> a skos:Concept ;
+<http://localhost:8000/v2/subjects/subjwibbb> a skos:Concept ;
     skos:inScheme <https://bepress.com/reference_guide_dc/disciplines/> ;
     skos:prefLabel "wibbble" .
 
diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py
index a0b95d51d94..54fe1fdc302 100644
--- a/osf_tests/metadata/test_osf_gathering.py
+++ b/osf_tests/metadata/test_osf_gathering.py
@@ -348,7 +348,7 @@ def test_gather_subjects(self):
         assert_triples(osf_gathering.gather_subjects(self.projectfocus), set())
         _bloo_subject = factories.SubjectFactory(text='Bloomy', provider=_osf_provider)
         self.project.set_subjects([[_bloo_subject._id]], auth=Auth(self.user__admin))
-        _bloo_iri = URIRef(_bloo_subject.absolute_api_v2_subject_url)
+        _bloo_iri = URIRef(_bloo_subject.get_semantic_iri())
         _bepress_iri = rdflib.URIRef('https://bepress.com/reference_guide_dc/disciplines/')
         assert_triples(osf_gathering.gather_subjects(self.projectfocus), {
             (self.projectfocus.iri, DCTERMS.subject, _bloo_iri),
@@ -368,10 +368,10 @@ def test_gather_subjects(self):
             [_customchild_subj._id, _customparent_subj._id],
             [_bloo_subject._id],
         ], auth=Auth(self.user__admin))
-        _parent_iri = URIRef(_parent_subj.absolute_api_v2_subject_url)
-        _child_iri = URIRef(_child_subj.absolute_api_v2_subject_url)
-        _customparent_iri = URIRef(_customparent_subj.absolute_api_v2_subject_url)
-        _customchild_iri = URIRef(_customchild_subj.absolute_api_v2_subject_url)
+        _parent_iri = URIRef(_parent_subj.get_semantic_iri())
+        _child_iri = URIRef(_child_subj.get_semantic_iri())
+        _customparent_iri = URIRef(_customparent_subj.get_semantic_iri())
+        _customchild_iri = URIRef(_customchild_subj.get_semantic_iri())
         _customtax_iri = URIRef(f'{self.registration.provider.absolute_api_v2_url}subjects/')
         assert_triples(osf_gathering.gather_subjects(self.registrationfocus), {
             (self.registrationfocus.iri, DCTERMS.subject, _bloo_iri),
diff --git a/tests/test_subjects.py b/tests/test_subjects.py
index 68a39c2c31d..f3f246a7f7a 100644
--- a/tests/test_subjects.py
+++ b/tests/test_subjects.py
@@ -175,3 +175,19 @@ def test_path(self):
         assert self.bepress_child.path == 'bepress|BePress Text|BePress Child'
         assert self.other_subj.path == 'asdf|Other Text'
         assert self.other_child.path == 'asdf|Other Text|Other Child'
+
+    def test_get_semantic_iri(self):
+        _bepress_iri = self.bepress_subj.get_semantic_iri()
+        _other_iri = self.other_subj.get_semantic_iri()
+        assert _bepress_iri != _other_iri
+        assert _bepress_iri.endswith(self.bepress_subj._id)
+        assert _other_iri.endswith(self.other_subj._id)
+
+        # if a subject has the exact same text as its bepress synonym, expect the bepress subject iri
+        _sametext_subj = SubjectFactory(
+            text=self.bepress_subj.text,
+            bepress_subject=self.bepress_subj,
+            provider=self.asdf_provider,
+        )
+        _sametext_iri = _sametext_subj.get_semantic_iri()
+        assert _bepress_iri == _sametext_iri

From e6b0e29a7582d795695726ad386b9c46d101f2bb Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Tue, 23 Jan 2024 12:58:14 -0500
Subject: [PATCH 21/23] [ENG-3696] Make gotoFileEvents always open in new tab
 (#10482)

Co-authored-by: John Tordoff <>
---
 website/static/js/fangorn.js | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/website/static/js/fangorn.js b/website/static/js/fangorn.js
index f346789e938..ba4e159d716 100644
--- a/website/static/js/fangorn.js
+++ b/website/static/js/fangorn.js
@@ -85,8 +85,6 @@ var OPERATIONS = {
     }
 };
 
-// Cross browser key codes for the Command key
-var COMMAND_KEYS = [224, 17, 91, 93];
 var ESCAPE_KEY = 27;
 var ENTER_KEY = 13;
 
@@ -1573,12 +1571,7 @@ function gotoFileEvent (item, toUrl) {
             }
         }
     }
-
-    if (COMMAND_KEYS.indexOf(tb.pressedKey) !== -1) {
-        window.open(fileurl, '_blank');
-    } else {
-        window.open(fileurl, '_self');
-    }
+    window.open(fileurl, '_blank');
 }
 
 /**

From 57547043c3ef530572f8ae9553d4151630996313 Mon Sep 17 00:00:00 2001
From: Mariia Lychko <95318818+ly-mariia@users.noreply.github.com>
Date: Thu, 25 Jan 2024 20:49:38 +0200
Subject: [PATCH 22/23] [ENG-5208]: Fix python bootstrapping in docker build
 (#10518)

## Purpose

Fixed issue with Python installation.

## Changes

Updated docker file step.
---
 Dockerfile | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index e3ef594b54a..ace0492965a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,10 +26,8 @@ RUN apk add --no-cache --virtual .run-deps \
     libevent \
     && yarn global add bower
 
-RUN apk add curl
-RUN curl https://bootstrap.pypa.io/pip/3.6/get-pip.py -o get-pip.py
-RUN python3 get-pip.py --force-reinstall pip==21.0
-RUN apk del curl
+RUN python3 -m ensurepip && \
+    pip3 install --upgrade pip==21.0
 
 WORKDIR /code
 

From 13f633db88ea7403ccd8ff0ec83f800f0b992360 Mon Sep 17 00:00:00 2001
From: Mariia Lychko <95318818+ly-mariia@users.noreply.github.com>
Date: Mon, 29 Jan 2024 19:45:14 +0200
Subject: [PATCH 23/23] ENG-5208 (#10522)

## Purpose

Error during command run.

## Changes

Updated the instructions to add a new command in case of an error during 'Populate preprint, registration, and collection providers' step.
---
 README-docker-compose.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README-docker-compose.md b/README-docker-compose.md
index 7cb95efd75f..2bf9df3d26f 100644
--- a/README-docker-compose.md
+++ b/README-docker-compose.md
@@ -229,6 +229,8 @@
 - Populate preprint, registration, and collection providers:
   - After resetting your database or with a new install, the required providers and subjects will be created automatically **when you run migrations.** To create more:
     - `docker-compose run --rm web python3 manage.py populate_fake_providers`
+    - _NOTE: In case, you encounter error with missing data, when running the `'populate_fake_providers'` command. Fix this with 'update_taxonomies' command:_
+      - `docker-compose run --rm web python3 -m scripts.update_taxonomies`
 - Populate citation styles
   - Needed for api v2 citation style rendering.
     - `docker-compose run --rm web python3 -m scripts.parse_citation_styles`