From ada8f8cc7578670b08dd8cd1dca1c67a707fd29b Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Mon, 13 Nov 2023 11:42:16 -0500 Subject: [PATCH 01/23] Get justification from property - Avoids AttributeError for component Registrations --- osf/external/internet_archive/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osf/external/internet_archive/tasks.py b/osf/external/internet_archive/tasks.py index c4247343196..ea2757167af 100644 --- a/osf/external/internet_archive/tasks.py +++ b/osf/external/internet_archive/tasks.py @@ -38,7 +38,7 @@ def update_ia_metadata(node, data=None): data[Registration.IA_MAPPED_NAMES.get(key, key)] = data.pop(key) if node.moderation_state == RegistrationModerationStates.WITHDRAWN.db_name: - data['withdrawal_justification'] = node.retraction.justification + data['withdrawal_justification'] = node.withdrawal_justification if getattr(node, 'ia_url', None) and node.is_public: task = get_task_from_postcommit_queue( From d8ab85a4efa05f46aa8751fb4b5e91a9e7da88f0 Mon Sep 17 00:00:00 2001 From: John Tordoff Date: Tue, 14 Nov 2023 15:37:22 -0500 Subject: [PATCH 02/23] [ENG-2714] Turn on institutional affiliation by default for registrations (#10466) * fix draft registrations affiliated institution default value for no-project registrations --- api/nodes/serializers.py | 10 ---- .../views/test_draft_registration_list.py | 57 +++++++++++++++---- osf/models/registrations.py | 18 +++--- 3 files changed, 56 insertions(+), 29 deletions(-) diff --git a/api/nodes/serializers.py b/api/nodes/serializers.py index aaff14ac1b9..918f156ce3d 100644 --- a/api/nodes/serializers.py +++ b/api/nodes/serializers.py @@ -1557,12 +1557,6 @@ class DraftRegistrationLegacySerializer(JSONAPISerializer): 'html': 'get_absolute_url', }) - affiliate_user_institutions = ser.BooleanField( - required=False, - default=True, - help_text='Specify whether user institution affiliations should be copied over to the draft registration.', - ) - def get_absolute_url(self, obj): return obj.absolute_url @@ -1603,7 +1597,6 @@ def create(self, validated_data): registration_responses = validated_data.pop('registration_responses', None) schema = validated_data.pop('registration_schema') provider = validated_data.pop('provider', None) - affiliate_user_institutions = validated_data.pop('affiliate_user_institutions', True) self.enforce_metadata_or_registration_responses(metadata, registration_responses) @@ -1618,9 +1611,6 @@ def create(self, validated_data): if registration_responses: self.update_registration_responses(draft, registration_responses) - if affiliate_user_institutions and draft.branched_from_type == DraftNode: - draft.affiliated_institutions.set(draft.creator.affiliated_institutions.all()) - return draft class Meta: diff --git a/api_tests/draft_registrations/views/test_draft_registration_list.py b/api_tests/draft_registrations/views/test_draft_registration_list.py index c317d8836fc..4c4a7148d55 100644 --- a/api_tests/draft_registrations/views/test_draft_registration_list.py +++ b/api_tests/draft_registrations/views/test_draft_registration_list.py @@ -9,7 +9,7 @@ from api.base.settings.defaults import API_BASE from osf.migrations import ensure_invisible_and_inactive_schema -from osf.models import DraftRegistration, NodeLicense, RegistrationProvider, Institution +from osf.models import DraftRegistration, NodeLicense, RegistrationProvider from osf_tests.factories import ( RegistrationFactory, CollectionFactory, @@ -260,27 +260,64 @@ def test_create_project_based_draft_does_not_email_initiator( assert not mock_send_mail.called - def test_affiliated_institutions_are_copied_from_user( - self, app, user, url_draft_registrations, payload): + def test_affiliated_institutions_are_copied_from_node_no_institutions(self, app, user, url_draft_registrations, payload): + """ + Draft registrations that are based on projects get those project's user institutional affiliation, + those "no-project" registrations inherit the user's institutional affiliation. + + This tests a scenario where a user bases a registration on a node without affiliations, and so the + draft registration has no institutional affiliation from the user or the node. + """ project = ProjectFactory(is_public=True, creator=user) - InstitutionFactory() payload['data']['relationships']['branched_from']['data']['id'] = project._id res = app.post_json_api( - url_draft_registrations, payload, - auth=user.auth, expect_errors=True) + url_draft_registrations, + payload, + auth=user.auth, + ) assert res.status_code == 201 draft_registration = DraftRegistration.load(res.json['data']['id']) assert not draft_registration.affiliated_institutions.exists() + def test_affiliated_institutions_are_copied_from_node(self, app, user, url_draft_registrations, payload): + """ + Draft registrations that are based on projects get those project's user institutional affiliation, + those "no-project" registrations inherit the user's institutional affiliation. + + This tests a scenario where a user bases their registration on a project that has a current institutional + affiliation which is copied over to the draft registrations. + """ + institution = InstitutionFactory() + project = ProjectFactory(is_public=True, creator=user) + project.affiliated_institutions.add(institution) payload['data']['relationships']['branched_from']['data']['id'] = project._id - user.add_multiple_institutions_non_sso(Institution.objects.filter(id__lt=3)) res = app.post_json_api( - url_draft_registrations, payload, - auth=user.auth, expect_errors=True) + url_draft_registrations, + payload, + auth=user.auth, + ) + assert res.status_code == 201 + draft_registration = DraftRegistration.load(res.json['data']['id']) + assert list(draft_registration.affiliated_institutions.all()) == list(project.affiliated_institutions.all()) + + def test_affiliated_institutions_are_copied_from_user(self, app, user, url_draft_registrations, payload): + """ + Draft registrations that are based on projects get those project's user institutional affiliation, + those "no-project" registrations inherit the user's institutional affiliation. + """ + institution = InstitutionFactory() + user.add_or_update_affiliated_institution(institution) + + del payload['data']['relationships']['branched_from'] + res = app.post_json_api( + url_draft_registrations, + payload, + auth=user.auth, + ) assert res.status_code == 201 draft_registration = DraftRegistration.load(res.json['data']['id']) - assert not draft_registration.affiliated_institutions.all() == user.get_affiliated_institutions() + assert list(draft_registration.affiliated_institutions.all()) == list(user.get_affiliated_institutions()) class TestDraftRegistrationCreateWithoutNode(TestDraftRegistrationCreate): diff --git a/osf/models/registrations.py b/osf/models/registrations.py index ceff62c98a2..92e1f32bcfb 100644 --- a/osf/models/registrations.py +++ b/osf/models/registrations.py @@ -1262,31 +1262,31 @@ def create_from_node(cls, user, schema, node=None, data=None, provider=None): provider.validate_schema(schema) excluded_attributes = [] - if not node: - # If no node provided, a DraftNode is created for you - node = DraftNode.objects.create(creator=user, title=settings.DEFAULT_DRAFT_NODE_TITLE) - # Force the user to add their own title for no-project + if node: + branched_from = node + else: + branched_from = DraftNode.objects.create(creator=user, title=settings.DEFAULT_DRAFT_NODE_TITLE) excluded_attributes.append('title') - if not (isinstance(node, Node) or isinstance(node, DraftNode)): + if not isinstance(branched_from, (Node, DraftNode)): raise DraftRegistrationStateError() draft = cls( initiator=user, - branched_from=node, + branched_from=branched_from, registration_schema=schema, registration_metadata=data or {}, provider=provider, ) draft.save() draft.copy_editable_fields( - node, - save=True, + branched_from, excluded_attributes=excluded_attributes ) draft.update(data, auth=Auth(user)) - if node.type == 'osf.draftnode': + if not node: + draft.affiliated_institutions.add(*draft.creator.get_affiliated_institutions()) initiator_permissions = draft.contributor_set.get(user=user).permission signals.contributor_added.send( draft, From 314105711230cdb66de1be9dbc6f3da697681249 Mon Sep 17 00:00:00 2001 From: John Tordoff Date: Wed, 15 Nov 2023 09:06:08 -0500 Subject: [PATCH 03/23] [ENG-1058] Update GDPR for draftregistrations and draftnodes (#10462) * update gdpr to delete draftregistrations and draftnode --------- Co-authored-by: John Tordoff <> --- osf/models/user.py | 149 ++++++++++++++++++++++++++++++----------- osf_tests/test_user.py | 46 ++++++++++++- 2 files changed, 152 insertions(+), 43 deletions(-) diff --git a/osf/models/user.py b/osf/models/user.py index ba8a6fb59f9..a16a617cca0 100644 --- a/osf/models/user.py +++ b/osf/models/user.py @@ -1896,69 +1896,138 @@ def check_spam(self, saved_fields, request_headers): return is_spam + def _validate_admin_status_for_gdpr_delete(self, resource): + """ + Ensure that deleting the user won't leave the node without an admin. + + Args: + - resource: An instance of a resource, probably AbstractNode or DraftRegistration. + """ + alternate_admins = OSFUser.objects.filter( + groups__name=resource.format_group(ADMIN), + is_active=True + ).exclude(id=self.id).exists() + + if not alternate_admins: + raise UserStateError( + f'You cannot delete {resource.__class__.__name__} {resource._id} because it would be ' + f'a {resource.__class__.__name__} with contributors, but with no admin.' + ) + + def _validate_addons_for_gdpr_delete(self, resource): + """ + Ensure that the user's external accounts on the node won't cause issues upon deletion. + + Args: + - resource: An instance of a resource, probably AbstractNode or DraftRegistration. + """ + for addon in resource.get_addons(): + if addon.short_name not in ('osfstorage', 'wiki') and \ + addon.user_settings and addon.user_settings.owner.id == self.id: + raise UserStateError( + f'You cannot delete this user because they have an external account for {addon.short_name} ' + f'attached to {resource.__class__.__name__} {resource._id}, which has other contributors.' + ) + def gdpr_delete(self): """ - This function does not remove the user object reference from our database, but it does disable the account and - remove identifying in a manner compliant with GDPR guidelines. + Complies with GDPR guidelines by disabling the account and removing identifying information. + """ + + # Check if user has something intentionally public, like preprints or registrations + self._validate_no_public_entities() + + # Check if user has any non-registration AbstractNodes or DraftRegistrations that they might still share with + # other contributors + self._validate_and_remove_resource_for_gdpr_delete( + self.nodes.exclude(type='osf.registration'), # Includes DraftNodes and other typed nodes + hard_delete=False + ) + self._validate_and_remove_resource_for_gdpr_delete( + self.draft_registrations.all(), + hard_delete=True + ) - Follows the protocol described in - https://openscience.atlassian.net/wiki/spaces/PRODUC/pages/482803755/GDPR-Related+protocols + # A Potentially out of date check that user isn't a member of a OSFGroup + self._validate_osf_groups() + # Finally delete the user's info. + self._clear_identifying_information() + + def _validate_no_public_entities(self): + """ + Ensure that the user doesn't have any public facing resources like Registrations or Preprints """ - from osf.models import Preprint, AbstractNode + from osf.models import Preprint - user_nodes = self.nodes.exclude(is_deleted=True) - # Validates the user isn't trying to delete things they deliberately made public. - if user_nodes.filter(type='osf.registration').exists(): + if self.nodes.filter(deleted__isnull=True, type='osf.registration').exists(): raise UserStateError('You cannot delete this user because they have one or more registrations.') if Preprint.objects.filter(_contributors=self, ever_public=True, deleted__isnull=True).exists(): raise UserStateError('You cannot delete this user because they have one or more preprints.') - # Validates that the user isn't trying to delete things nodes they are the only admin on. - personal_nodes = ( - AbstractNode.objects.annotate(contrib_count=Count('_contributors')) - .filter(contrib_count__lte=1) - .filter(contributor__user=self) - .exclude(is_deleted=True) - ) - shared_nodes = user_nodes.exclude(id__in=personal_nodes.values_list('id')) + def _validate_and_remove_resource_for_gdpr_delete(self, resources, hard_delete): + """ + This method ensures a user's resources are properly deleted of using during GDPR delete request. - for node in shared_nodes.exclude(type__in=['osf.quickfilesnode', 'osf.draftnode']): - alternate_admins = OSFUser.objects.filter(groups__name=node.format_group(ADMIN)).filter(is_active=True).exclude(id=self.id) - if not alternate_admins: - raise UserStateError( - 'You cannot delete node {} because it would be a node with contributors, but with no admin.'.format( - node._id)) + Args: + - resources: A queryset of resources probably of AbstractNode or DraftRegistration. + - hard_delete: A boolean indicating whether the resource should be permentently deleted or just marked as such. + """ + model = resources.query.model - for addon in node.get_addons(): - if addon.short_name not in ('osfstorage', 'wiki') and addon.user_settings and addon.user_settings.owner.id == self.id: - raise UserStateError('You cannot delete this user because they ' - 'have an external account for {} attached to Node {}, ' - 'which has other contributors.'.format(addon.short_name, node._id)) + filter_deleted = {} + if not hard_delete: + filter_deleted = {'deleted__isnull': True} - for group in self.osf_groups: - if not group.managers.exclude(id=self.id).filter(is_registered=True).exists() and group.members.exclude(id=self.id).exists(): - raise UserStateError('You cannot delete this user because they are the only registered manager of OSFGroup {} that contains other members.'.format(group._id)) + personal_resources = model.objects.annotate( + contrib_count=Count('_contributors') + ).filter( + contrib_count__lte=1, + _contributors=self + ).filter( + **filter_deleted + ) - for node in shared_nodes.all(): - logger.info('Removing {self._id} as a contributor to node (pk:{node_id})...'.format(self=self, node_id=node.pk)) - node.remove_contributor(self, auth=Auth(self), log=False) + shared_resources = resources.exclude(id__in=personal_resources.values_list('id')) + for node in shared_resources: + self._validate_admin_status_for_gdpr_delete(node) + self._validate_addons_for_gdpr_delete(node) - # This is doesn't to remove identifying info, but ensures other users can't see the deleted user's profile etc. - self.deactivate_account() + for resource in shared_resources.all(): + logger.info(f'Removing {self._id} as a contributor to {resource.__class__.__name__} (pk:{resource.pk})...') + resource.remove_contributor(self, auth=Auth(self), log=False) - # delete all personal nodes (one contributor), bookmarks, quickfiles etc. - for node in personal_nodes.all(): - logger.info('Soft-deleting node (pk: {node_id})...'.format(node_id=node.pk)) - node.remove_node(auth=Auth(self)) + # Delete all personal entities + for entity in personal_resources.all(): + if hard_delete: + logger.info(f'Hard-deleting {entity.__class__.__name__} (pk: {entity.pk})...') + entity.delete() + else: + logger.info(f'Soft-deleting {entity.__class__.__name__} (pk: {entity.pk})...') + entity.remove_node(auth=Auth(self)) + def _validate_osf_groups(self): + """ + This method ensures a user isn't in an OSFGroup before deleting them.. + """ for group in self.osf_groups: - if len(group.managers) == 1 and group.managers[0] == self: + if not group.managers.exclude(id=self.id).filter(is_registered=True).exists() and group.members.exclude( + id=self.id).exists(): + raise UserStateError( + f'You cannot delete this user because they are the only registered manager of OSFGroup {group._id} that contains other members.') + elif len(group.managers) == 1 and group.managers[0] == self: group.remove_group() else: group.remove_member(self) + def _clear_identifying_information(self): + ''' + This method ensures a user's info is deleted during a GDPR delete + ''' + # This doesn't remove identifying info, but ensures other users can't see the deleted user's profile etc. + self.deactivate_account() + logger.info('Clearing identifying information...') # This removes identifying info # hard-delete all emails associated with the user diff --git a/osf_tests/test_user.py b/osf_tests/test_user.py index 0c67bc4ed43..53e717df2d1 100644 --- a/osf_tests/test_user.py +++ b/osf_tests/test_user.py @@ -37,6 +37,8 @@ NotableDomain, PreprintContributor, DraftRegistrationContributor, + DraftRegistration, + DraftNode, UserSessionMap, ) from osf.models.institution_affiliation import get_user_by_institution_identity @@ -66,7 +68,8 @@ UnregUserFactory, UserFactory, RegistrationFactory, - PreprintFactory + PreprintFactory, + DraftNodeFactory ) from tests.base import OsfTestCase from tests.utils import run_celery_tasks @@ -2387,6 +2390,12 @@ def registration(self, user): registration.save() return registration + @pytest.fixture() + def registration_with_draft_node(self, user, registration): + registration.branched_from = DraftNodeFactory(creator=user) + registration.save() + return registration + @pytest.fixture() def project(self, user): project = ProjectFactory(creator=user) @@ -2433,11 +2442,42 @@ def test_can_gdpr_delete_personal_nodes(self, user): user.gdpr_delete() assert user.nodes.exclude(is_deleted=True).count() == 0 + def test_can_gdpr_delete_personal_registrations(self, user, registration_with_draft_node): + assert DraftRegistration.objects.all().count() == 1 + assert DraftNode.objects.all().count() == 1 + + with pytest.raises(UserStateError) as exc_info: + user.gdpr_delete() + + assert exc_info.value.args[0] == 'You cannot delete this user because they have one or more registrations.' + assert DraftRegistration.objects.all().count() == 1 + assert DraftNode.objects.all().count() == 1 + + registration_with_draft_node.remove_node(Auth(user)) + assert DraftRegistration.objects.all().count() == 1 + assert DraftNode.objects.all().count() == 1 + user.gdpr_delete() + + # DraftNodes soft-deleted, DraftRegistions hard-deleted + assert user.nodes.exclude(is_deleted=True).count() == 0 + assert DraftRegistration.objects.all().count() == 0 + def test_can_gdpr_delete_shared_nodes_with_multiple_admins(self, user, project_with_two_admins): user.gdpr_delete() assert user.nodes.all().count() == 0 + def test_can_gdpr_delete_shared_draft_registration_with_multiple_admins(self, user, registration): + other_admin = AuthUserFactory() + draft_registrations = user.draft_registrations.get() + draft_registrations.add_contributor(other_admin, permissions='admin') + assert draft_registrations.contributors.all().count() == 2 + registration.delete_registration_tree(save=True) + + user.gdpr_delete() + assert draft_registrations.contributors.get() == other_admin + assert user.nodes.filter(deleted__isnull=True).count() == 0 + def test_cant_gdpr_delete_registrations(self, user, registration): with pytest.raises(UserStateError) as exc_info: @@ -2457,8 +2497,8 @@ def test_cant_gdpr_delete_shared_node_if_only_admin(self, user, project_user_is_ with pytest.raises(UserStateError) as exc_info: user.gdpr_delete() - assert exc_info.value.args[0] == 'You cannot delete node {} because it would' \ - ' be a node with contributors, but with no admin.'.format(project_user_is_only_admin._id) + assert exc_info.value.args[0] == 'You cannot delete Node {} because it would' \ + ' be a Node with contributors, but with no admin.'.format(project_user_is_only_admin._id) def test_cant_gdpr_delete_osf_group_if_only_manager(self, user): group = OSFGroupFactory(name='My Group', creator=user) From df34b8c6b1a792cd9c570b92948df9a47ad3c4ac Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Wed, 22 Nov 2023 15:50:49 -0500 Subject: [PATCH 04/23] Update ReviewActionListCreate write scopes --- api/actions/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/actions/views.py b/api/actions/views.py index 2c7276873fa..be0f47a0a8d 100644 --- a/api/actions/views.py +++ b/api/actions/views.py @@ -159,7 +159,7 @@ class ReviewActionListCreate(JSONAPIBaseView, generics.ListCreateAPIView, ListFi ) required_read_scopes = [CoreScopes.ACTIONS_READ] - required_write_scopes = [CoreScopes.NULL] + required_write_scopes = [CoreScopes.ACTIONS_WRITE] parser_classes = (JSONAPIMultipleRelationshipsParser, JSONAPIMultipleRelationshipsParserForRegularJSON,) serializer_class = ReviewActionSerializer From 207de84f9b8a3361f62f2662ac7f5755f732793a Mon Sep 17 00:00:00 2001 From: Yuhuai Liu Date: Wed, 1 Nov 2023 10:46:01 -0400 Subject: [PATCH 05/23] Routing changes for Preprints Modernization - Phase 1 --- api/base/serializers.py | 3 +++ website/routes.py | 13 ++++++++++--- website/views.py | 4 +++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/api/base/serializers.py b/api/base/serializers.py index c21c76dc394..c8cdff04c7e 100644 --- a/api/base/serializers.py +++ b/api/base/serializers.py @@ -915,6 +915,9 @@ def to_representation(self, value): or related_class.view_name == 'registration-citation': related_id = resolved_url.kwargs['node_id'] related_type = 'citation' + elif related_class.view_name == 'preprint-citation': + related_id = resolved_url.kwargs['preprint_id'] + related_type = 'citation' elif related_type in ('preprint_providers', 'preprint-providers', 'registration-providers'): related_id = resolved_url.kwargs['provider_id'] elif related_type in ('registrations', 'draft_nodes'): diff --git a/website/routes.py b/website/routes.py index 6634c6a97f5..787fe2e367b 100644 --- a/website/routes.py +++ b/website/routes.py @@ -260,9 +260,16 @@ def ember_app(path=None): for k in EXTERNAL_EMBER_APPS.keys(): if request.path.strip('/').startswith(k): ember_app = EXTERNAL_EMBER_APPS[k] - if k == 'preprints' and request.path.rstrip('/').endswith('discover'): - # Route preprint discover pages to new search page in EOW - ember_app = EXTERNAL_EMBER_APPS.get('ember_osf_web', False) or ember_app + if k == 'preprints': + if request.path.rstrip('/').endswith('edit'): + # Route preprint edit pages to old preprint app + ember_app = EXTERNAL_EMBER_APPS.get('preprints', False) or ember_app + elif request.path.rstrip('/').endswith('submit'): + # Route preprint submit pages to old preprint app + ember_app = EXTERNAL_EMBER_APPS.get('preprints', False) or ember_app + else: + # Route other preprint pages to EOW + ember_app = EXTERNAL_EMBER_APPS.get('ember_osf_web', False) or ember_app break if not ember_app: diff --git a/website/views.py b/website/views.py index c3051861791..a8f70421069 100644 --- a/website/views.py +++ b/website/views.py @@ -332,7 +332,9 @@ def resolve_guid(guid, suffix=None): if isinstance(resource, Preprint): if resource.provider.domain_redirect_enabled: return redirect(resource.absolute_url, http_status.HTTP_301_MOVED_PERMANENTLY) - return stream_emberapp(EXTERNAL_EMBER_APPS['preprints']['server'], preprints_dir) + if clean_suffix.endswith('edit'): + return stream_emberapp(EXTERNAL_EMBER_APPS['preprints']['server'], preprints_dir) + return use_ember_app() elif isinstance(resource, Registration) and (clean_suffix in ('', 'comments', 'links', 'components', 'resources',)) and waffle.flag_is_active(request, features.EMBER_REGISTRIES_DETAIL_PAGE): return use_ember_app() From b69ab6528810de5afe47aebafd53aed7a941b4f5 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Thu, 30 Nov 2023 13:03:17 -0500 Subject: [PATCH 06/23] Fix ResolveGuid tests --- tests/test_views.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/tests/test_views.py b/tests/test_views.py index 920d46a0d07..f1a8c1966ce 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -5018,28 +5018,22 @@ class TestResolveGuid(OsfTestCase): def setUp(self): super(TestResolveGuid, self).setUp() - def test_preprint_provider_without_domain(self): + @mock.patch('website.views.use_ember_app') + def test_preprint_provider_without_domain(self, mock_use_ember_app): provider = PreprintProviderFactory(domain='') preprint = PreprintFactory(provider=provider) url = web_url_for('resolve_guid', _guid=True, guid=preprint._id) res = self.app.get(url) - assert_equal(res.status_code, 200) - assert_equal( - res.request.path, - '/{}/'.format(preprint._id) - ) + mock_use_ember_app.assert_called_with() - def test_preprint_provider_with_domain_without_redirect(self): + @mock.patch('website.views.use_ember_app') + def test_preprint_provider_with_domain_without_redirect(self, mock_use_ember_app): domain = 'https://test.com/' provider = PreprintProviderFactory(_id='test', domain=domain, domain_redirect_enabled=False) preprint = PreprintFactory(provider=provider) url = web_url_for('resolve_guid', _guid=True, guid=preprint._id) res = self.app.get(url) - assert_equal(res.status_code, 200) - assert_equal( - res.request.path, - '/{}/'.format(preprint._id) - ) + mock_use_ember_app.assert_called_with() def test_preprint_provider_with_domain_with_redirect(self): domain = 'https://test.com/' @@ -5062,16 +5056,13 @@ def test_preprint_provider_with_domain_with_redirect(self): - def test_preprint_provider_with_osf_domain(self): + @mock.patch('website.views.use_ember_app') + def test_preprint_provider_with_osf_domain(self, mock_use_ember_app): provider = PreprintProviderFactory(_id='osf', domain='https://osf.io/') preprint = PreprintFactory(provider=provider) url = web_url_for('resolve_guid', _guid=True, guid=preprint._id) res = self.app.get(url) - assert_equal(res.status_code, 200) - assert_equal( - res.request.path, - '/{}/'.format(preprint._id) - ) + mock_use_ember_app.assert_called_with() class TestConfirmationViewBlockBingPreview(OsfTestCase): From 7810a1e86ac520d5ea1ca54a0b91ab58040f69ad Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Mon, 20 Nov 2023 15:06:06 -0500 Subject: [PATCH 07/23] Add AGU Conference campaign --- framework/auth/campaigns.py | 9 +++++++ tests/test_campaigns.py | 1 + website/mails/mails.py | 4 +++ .../confirm_agu_conference_2024.html.mako | 25 +++++++++++++++++++ website/util/metrics.py | 1 + 5 files changed, 40 insertions(+) create mode 100644 website/templates/emails/confirm_agu_conference_2024.html.mako diff --git a/framework/auth/campaigns.py b/framework/auth/campaigns.py index 64552a8f5ef..9d418e863e0 100644 --- a/framework/auth/campaigns.py +++ b/framework/auth/campaigns.py @@ -91,6 +91,15 @@ def get_campaigns(): } }) + newest_campaigns.update({ + 'agu_conference_2023': { + 'system_tag': CampaignSourceTags.AguConference2023.value, + 'redirect_url': '', + 'confirmation_email_template': mails.CONFIRM_EMAIL_AGU_CONFERENCE_2023, + 'login_type': 'native', + } + }) + CAMPAIGNS = newest_campaigns CAMPAIGNS_LAST_REFRESHED = timezone.now() diff --git a/tests/test_campaigns.py b/tests/test_campaigns.py index 442d1d1f931..66cb7f348db 100644 --- a/tests/test_campaigns.py +++ b/tests/test_campaigns.py @@ -44,6 +44,7 @@ def setUp(self): 'psyarxiv-preprints', 'osf-registries', 'osf-registered-reports', + 'agu_conference_2023', ] self.refresh = timezone.now() campaigns.CAMPAIGNS = None # force campaign refresh now that preprint providers are populated diff --git a/website/mails/mails.py b/website/mails/mails.py index d0263c59f95..4ecb438a7e8 100644 --- a/website/mails/mails.py +++ b/website/mails/mails.py @@ -188,6 +188,10 @@ def get_english_article(word): 'confirm_erpc', subject='OSF Account Verification, Election Research Preacceptance Competition' ) +CONFIRM_EMAIL_AGU_CONFERENCE_2023 = Mail( + 'confirm_agu_conference_2023', + subject='OSF Account Verification, from the American Geophysical Union Conference' +) CONFIRM_EMAIL_PREPRINTS = lambda name, provider: Mail( 'confirm_preprints_{}'.format(name), subject='OSF Account Verification, {}'.format(provider) diff --git a/website/templates/emails/confirm_agu_conference_2024.html.mako b/website/templates/emails/confirm_agu_conference_2024.html.mako new file mode 100644 index 00000000000..6d61636068c --- /dev/null +++ b/website/templates/emails/confirm_agu_conference_2024.html.mako @@ -0,0 +1,25 @@ +<%inherit file="notify_base.mako" /> + +<%def name="content()"> + + + Hello ${user.fullname},
+
+ + Thank you for joining us at the AGU Open Science Pavilion, and welcome to the Open Science Framework. + + We are pleased to offer a special AGU attendees exclusive community call to continue our conversation and to help + you get oriented on the OSF. This is an opportunity for us to show you useful OSF features, talk about + open science in your domains, and for you to ask any questions you may have. + You can register for this free event here: +
+ https://cos-io.zoom.us/meeting/register/tZAuceCvrjotHNG3n6XzLFDv1Rnn2hkjczHr +
+ To continue, please verify your email address by visiting this link:
+
+ ${confirmation_url}
+
+ From the team at the Center for Open Science
+ + + diff --git a/website/util/metrics.py b/website/util/metrics.py index 4416b4f5cd4..19c9773e935 100644 --- a/website/util/metrics.py +++ b/website/util/metrics.py @@ -49,6 +49,7 @@ class CampaignSourceTags(Enum): ErpChallenge = campaign_source_tag('erp_challenge') OsfRegisteredReports = campaign_source_tag('osf_registered_reports') Osf4m = campaign_source_tag('osf4m') + AguConference2023 = campaign_source_tag('agu_conference_2023') class OsfClaimedTags(Enum): From 9e0322d9ba888900a9fdc04b794af9abd50b0ad0 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Tue, 28 Nov 2023 18:18:35 -0500 Subject: [PATCH 08/23] Rename, reword template - Fix redirect --- framework/auth/campaigns.py | 2 +- ...e_2024.html.mako => confirm_agu_conference_2023.html.mako} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename website/templates/emails/{confirm_agu_conference_2024.html.mako => confirm_agu_conference_2023.html.mako} (88%) diff --git a/framework/auth/campaigns.py b/framework/auth/campaigns.py index 9d418e863e0..95203e058ca 100644 --- a/framework/auth/campaigns.py +++ b/framework/auth/campaigns.py @@ -94,7 +94,7 @@ def get_campaigns(): newest_campaigns.update({ 'agu_conference_2023': { 'system_tag': CampaignSourceTags.AguConference2023.value, - 'redirect_url': '', + 'redirect_url': furl.furl(DOMAIN).add(path='dashboard/').url, 'confirmation_email_template': mails.CONFIRM_EMAIL_AGU_CONFERENCE_2023, 'login_type': 'native', } diff --git a/website/templates/emails/confirm_agu_conference_2024.html.mako b/website/templates/emails/confirm_agu_conference_2023.html.mako similarity index 88% rename from website/templates/emails/confirm_agu_conference_2024.html.mako rename to website/templates/emails/confirm_agu_conference_2023.html.mako index 6d61636068c..429ec911410 100644 --- a/website/templates/emails/confirm_agu_conference_2024.html.mako +++ b/website/templates/emails/confirm_agu_conference_2023.html.mako @@ -14,8 +14,8 @@ You can register for this free event here:
https://cos-io.zoom.us/meeting/register/tZAuceCvrjotHNG3n6XzLFDv1Rnn2hkjczHr -
- To continue, please verify your email address by visiting this link:
+

+ To confirm your OSF account, please verify your email address by visiting this link:

${confirmation_url}

From 561d81ec46ae3bd05a7159387f4e10ef12e092e2 Mon Sep 17 00:00:00 2001 From: John Tordoff Date: Thu, 14 Dec 2023 11:55:25 -0500 Subject: [PATCH 09/23] [ENG-4823] Add Collection Metadata Options (#10499) * add collection metadata options for ibdgc --------- Co-authored-by: John Tordoff <> --- admin/collection_providers/forms.py | 62 +++++++++++++++++++ admin/collection_providers/views.py | 59 +++++++++--------- .../js/pages/collection-provider-page.js | 26 ++++++++ .../collection_providers/detail.html | 8 +++ .../update_collection_provider_form.html | 12 ++++ api/collections/serializers.py | 20 ++++++ api_tests/search/views/test_views.py | 20 ++++++ osf/migrations/0017_auto_20231212_1843.py | 34 ++++++++++ osf/models/collection.py | 20 +++++- osf/models/collection_submission.py | 10 +++ website/project/views/node.py | 2 + website/search/elastic_search.py | 2 + website/templates/project/project.mako | 27 ++++++++ 13 files changed, 273 insertions(+), 29 deletions(-) create mode 100644 osf/migrations/0017_auto_20231212_1843.py diff --git a/admin/collection_providers/forms.py b/admin/collection_providers/forms.py index ca0358a126d..4b8af62bb82 100644 --- a/admin/collection_providers/forms.py +++ b/admin/collection_providers/forms.py @@ -15,6 +15,8 @@ class CollectionProviderForm(forms.ModelForm): program_area_choices = forms.CharField(widget=forms.HiddenInput(), required=False) school_type_choices = forms.CharField(widget=forms.HiddenInput(), required=False) study_design_choices = forms.CharField(widget=forms.HiddenInput(), required=False) + data_type_choices = forms.CharField(widget=forms.HiddenInput(), required=False) + disease_choices = forms.CharField(widget=forms.HiddenInput(), required=False) _id = forms.SlugField( required=True, help_text='URL Slug', @@ -268,3 +270,63 @@ def clean_study_design_choices(self): if choices: added_choices = json.loads(choices) return {'added': added_choices, 'removed': removed_choices} + + def clean_disease_choices(self): + if not self.data.get('disease_choices'): + return {'added': [], 'removed': []} + + collection_provider = self.instance + primary_collection = collection_provider.primary_collection + if primary_collection: # Modifying an existing CollectionProvider + old_choices = {c.strip(' ') for c in primary_collection.disease_choices} + updated_choices = {c.strip(' ') for c in json.loads(self.data.get('disease_choices'))} + added_choices = updated_choices - old_choices + removed_choices = old_choices - updated_choices + + active_removed_choices = set( + primary_collection.collectionsubmission_set.filter( + disease__in=removed_choices + ).values_list('disease', flat=True) + ) + if active_removed_choices: + raise forms.ValidationError( + 'Cannot remove the following choices for "disease", as they are ' + f'currently in use: {active_removed_choices}' + ) + else: # Creating a new CollectionProvider + added_choices = set() + removed_choices = set() + choices = self.data.get('disease_choices') + if choices: + added_choices = json.loads(choices) + return {'added': added_choices, 'removed': removed_choices} + + def clean_data_type_choices(self): + if not self.data.get('data_type_choices'): + return {'added': [], 'removed': []} + + collection_provider = self.instance + primary_collection = collection_provider.primary_collection + if primary_collection: # Modifying an existing CollectionProvider + old_choices = {c.strip(' ') for c in primary_collection.data_type_choices} + updated_choices = {c.strip(' ') for c in json.loads(self.data.get('data_type_choices'))} + added_choices = updated_choices - old_choices + removed_choices = old_choices - updated_choices + + active_removed_choices = set( + primary_collection.collectionsubmission_set.filter( + data_type__in=removed_choices + ).values_list('data_type', flat=True) + ) + if active_removed_choices: + raise forms.ValidationError( + 'Cannot remove the following choices for "data_type", as they are ' + f'currently in use: {active_removed_choices}' + ) + else: # Creating a new CollectionProvider + added_choices = set() + removed_choices = set() + choices = self.data.get('data_type_choices') + if choices: + added_choices = json.loads(choices) + return {'added': added_choices, 'removed': removed_choices} diff --git a/admin/collection_providers/views.py b/admin/collection_providers/views.py index d5c950ed4fd..699d82cf533 100644 --- a/admin/collection_providers/views.py +++ b/admin/collection_providers/views.py @@ -21,6 +21,17 @@ from admin.providers.views import AddAdminOrModerator, RemoveAdminsAndModerators +def _process_collection_choices(provider, choices_name, form): + collection = provider.primary_collection + choices_name_attr = f'{choices_name}_choices' + choices_added = form.cleaned_data[choices_name_attr]['added'] + choices_removed = form.cleaned_data[choices_name_attr]['removed'] + + getattr(collection, choices_name_attr).extend(choices_added) + for item in choices_removed: + getattr(collection, choices_name_attr).remove(item) + + class CreateCollectionProvider(PermissionRequiredMixin, CreateView): raise_exception = True permission_required = 'osf.change_collectionprovider' @@ -47,6 +58,10 @@ def form_valid(self, form): self.object.primary_collection.school_type_choices.append(item) for item in form.cleaned_data['study_design_choices']['added']: self.object.primary_collection.study_design_choices.append(item) + for item in form.cleaned_data['data_type_choices']['added']: + self.object.primary_collection.data_type_choices.append(item) + for item in form.cleaned_data['disease_choices']['added']: + self.object.primary_collection.disease_choices.append(item) self.object.primary_collection.save() return super().form_valid(form) @@ -163,6 +178,16 @@ def get_context_data(self, *args, **kwargs): )) kwargs['study_design_choices'] = study_design_choices_html + disease_choices_html = '
    {choices}
'.format(choices=''.join( + f'
  • {choice}
  • ' for choice in primary_collection.disease_choices + )) + kwargs['disease_choices'] = disease_choices_html + + data_type_choices_html = '
      {choices}
    '.format(choices=''.join( + f'
  • {choice}
  • ' for choice in primary_collection.data_type_choices + )) + kwargs['data_type_choices'] = data_type_choices_html + # get a dict of model fields so that we can set the initial value for the update form fields = model_to_dict(collection_provider) fields['collected_type_choices'] = json.dumps(primary_collection.collected_type_choices) @@ -175,6 +200,8 @@ def get_context_data(self, *args, **kwargs): fields['school_type_choices'] = json.dumps(primary_collection.school_type_choices) fields['study_design_choices'] = json.dumps(primary_collection.study_design_choices) + fields['data_type_choices'] = json.dumps(primary_collection.data_type_choices) + fields['disease_choices'] = json.dumps(primary_collection.disease_choices) # compile html list of collected_type_choices if collection_provider.primary_collection: @@ -235,34 +262,8 @@ class CollectionProviderChangeForm(PermissionRequiredMixin, UpdateView): def form_valid(self, form): if self.object.primary_collection: - self.object.primary_collection.collected_type_choices.extend(form.cleaned_data['collected_type_choices']['added']) - for item in form.cleaned_data['collected_type_choices']['removed']: - self.object.primary_collection.collected_type_choices.remove(item) - - self.object.primary_collection.status_choices.extend(form.cleaned_data['status_choices']['added']) - for item in form.cleaned_data['status_choices']['removed']: - self.object.primary_collection.status_choices.remove(item) - - self.object.primary_collection.issue_choices.extend(form.cleaned_data['issue_choices']['added']) - for item in form.cleaned_data['issue_choices']['removed']: - self.object.primary_collection.issue_choices.remove(item) - - self.object.primary_collection.volume_choices.extend(form.cleaned_data['volume_choices']['added']) - for item in form.cleaned_data['volume_choices']['removed']: - self.object.primary_collection.volume_choices.remove(item) - - self.object.primary_collection.program_area_choices.extend(form.cleaned_data['program_area_choices']['added']) - for item in form.cleaned_data['program_area_choices']['removed']: - self.object.primary_collection.program_area_choices.remove(item) - - self.object.primary_collection.school_type_choices.extend(form.cleaned_data['school_type_choices']['added']) - for item in form.cleaned_data['school_type_choices']['removed']: - self.object.primary_collection.school_type_choices.remove(item) - - self.object.primary_collection.study_design_choices.extend(form.cleaned_data['study_design_choices']['added']) - for item in form.cleaned_data['study_design_choices']['removed']: - self.object.primary_collection.study_design_choices.remove(item) - + for choices_name in ['collected_type', 'status', 'issue', 'volume', 'program_area', 'school_type', 'study_design', 'data_type', 'disease']: + _process_collection_choices(self.object, choices_name, form) self.object.primary_collection.save() return super().form_valid(form) @@ -399,6 +400,8 @@ def create_or_update_provider(self, provider_data): provider.primary_collection.program_area_choices = primary_collection['fields']['program_area_choices'] provider.primary_collection.school_type_choices = primary_collection['fields']['school_type_choices'] provider.primary_collection.study_design_choices = primary_collection['fields']['study_design_choices'] + provider.primary_collection.disease_choices = primary_collection['fields']['disease_choices'] + provider.primary_collection.data_type_choices = primary_collection['fields']['data_type_choices'] provider.primary_collection.save() if licenses: provider.licenses_acceptable.set(licenses) diff --git a/admin/static/js/pages/collection-provider-page.js b/admin/static/js/pages/collection-provider-page.js index 2964fdc5a56..11e6e2302b4 100644 --- a/admin/static/js/pages/collection-provider-page.js +++ b/admin/static/js/pages/collection-provider-page.js @@ -57,6 +57,22 @@ $('#tags-input-study-design').on('itemRemoved', function(event) { $('#id_study_design_choices').val(JSON.stringify($('#tags-input-study-design').tagsinput('items'))); }); +$('#tags-input-data-type').on('itemAdded', function(event) { + $('#id_data_type_choices').val(JSON.stringify($('#tags-input-data-type').tagsinput('items'))); +}); + +$('#tags-input-data-type').on('itemRemoved', function(event) { + $('#id_data_type_choices').val(JSON.stringify($('#tags-input-data-type').tagsinput('items'))); +}); + +$('#tags-input-disease').on('itemAdded', function(event) { + $('#id_disease_choices').val(JSON.stringify($('#tags-input-disease').tagsinput('items'))); +}); + +$('#tags-input-disease').on('itemRemoved', function(event) { + $('#id_disease_choices').val(JSON.stringify($('#tags-input-disease').tagsinput('items'))); +}); + $(document).ready(function() { var collectedTypeItems = JSON.parse($('#id_collected_type_choices').val()); @@ -93,4 +109,14 @@ $(document).ready(function() { studyDesignItems.forEach(function(element){ $('#tags-input-study-design').tagsinput('add', element) }); + + var diseaseItems = JSON.parse($('#id_disease_choices').val()); + diseaseItems.forEach(function(element){ + $('#tags-input-disease').tagsinput('add', element) + }); + + var dataTypeItems = JSON.parse($('#id_data_type_choices').val()); + dataTypeItems.forEach(function(element){ + $('#tags-input-data-type').tagsinput('add', element) + }); }); diff --git a/admin/templates/collection_providers/detail.html b/admin/templates/collection_providers/detail.html index 7d488dd974a..c015a90fe8b 100644 --- a/admin/templates/collection_providers/detail.html +++ b/admin/templates/collection_providers/detail.html @@ -66,6 +66,14 @@

    {{ collection_provider.name }}

    study_design_choices {{ study_design_choices | safe}} + + disease_choices + {{ disease_choices | safe}} + + + data_type_choices + {{ data_type_choices | safe}} + diff --git a/admin/templates/collection_providers/update_collection_provider_form.html b/admin/templates/collection_providers/update_collection_provider_form.html index 05422db6d9a..c64198c2e95 100644 --- a/admin/templates/collection_providers/update_collection_provider_form.html +++ b/admin/templates/collection_providers/update_collection_provider_form.html @@ -98,6 +98,18 @@ +
    + +
    + +
    +
    +
    + +
    + +
    +
    diff --git a/api/collections/serializers.py b/api/collections/serializers.py index 3b5a10c7ec6..7499015aaa0 100644 --- a/api/collections/serializers.py +++ b/api/collections/serializers.py @@ -68,6 +68,14 @@ class CollectionSerializer(JSONAPISerializer): child=ser.CharField(max_length=127), default=list(), ) + data_type_choices = ser.ListField( + child=ser.CharField(max_length=127), + default=list(), + ) + disease_choices = ser.ListField( + child=ser.CharField(max_length=127), + default=list(), + ) links = LinksField({}) @@ -241,6 +249,8 @@ def subjects_view_kwargs(self): program_area = ser.CharField(required=False) school_type = ser.CharField(required=False) study_design = ser.CharField(required=False) + data_type = ser.CharField(required=False) + disease = ser.CharField(required=False) def get_absolute_url(self, obj): return absolute_reverse( @@ -272,6 +282,10 @@ def update(self, obj, validated_data): obj.school_Type = validated_data.pop('school_type') if 'study_design' in validated_data: obj.study_design = validated_data.pop('study_design') + if 'data_type' in validated_data: + obj.data_type = validated_data.pop('data_type') + if 'disease' in validated_data: + obj.disease = validated_data.pop('disease') obj.save() return obj @@ -337,6 +351,8 @@ def subjects_view_kwargs(self): program_area = ser.CharField(required=False) school_type = ser.CharField(required=False) study_design = ser.CharField(required=False) + date_type = ser.CharField(required=False) + disease = ser.CharField(required=False) def get_absolute_url(self, obj): return absolute_reverse( @@ -368,6 +384,10 @@ def update(self, obj, validated_data): obj.school_Type = validated_data.pop('school_type') if 'study_design' in validated_data: obj.study_design = validated_data.pop('study_design') + if 'data_type' in validated_data: + obj.data_type = validated_data.pop('data_type') + if 'disease' in validated_data: + obj.disease = validated_data.pop('disease') obj.save() return obj diff --git a/api_tests/search/views/test_views.py b/api_tests/search/views/test_views.py index 4fc5e0e1196..7ebf6e769d0 100644 --- a/api_tests/search/views/test_views.py +++ b/api_tests/search/views/test_views.py @@ -50,6 +50,7 @@ def collection_public(self, user): return CollectionFactory(creator=user, provider=CollectionProviderFactory(), is_public=True, status_choices=['', 'asdf', 'lkjh'], collected_type_choices=['', 'asdf', 'lkjh'], issue_choices=['', '0', '1', '2'], volume_choices=['', '0', '1', '2'], + disease_choices=['illness'], data_type_choices=['realness'], program_area_choices=['', 'asdf', 'lkjh']) @pytest.fixture() @@ -1000,3 +1001,22 @@ def test_POST_search_collections( assert res.json['links']['meta']['total'] == 1 assert len(res.json['data']) == 1 assert res.json['data'][0]['id'] == node_with_abstract._id + + def test_POST_search_collections_disease_data_type( + self, app, url_collection_search, user, node_one, node_two, collection_public, + node_with_abstract, node_private, registration_collection, registration_one, + registration_two, registration_private, reg_with_abstract): + + collection_public.collect_object(node_one, user, disease='illness', data_type='realness') + collection_public.collect_object(node_two, user, data_type='realness') + + payload = self.post_payload(disease='illness') + res = app.post_json_api(url_collection_search, payload) + assert res.status_code == 200 + assert res.json['links']['meta']['total'] == 1 + + payload = self.post_payload(dataType='realness') + res = app.post_json_api(url_collection_search, payload) + assert res.status_code == 200 + assert res.json['links']['meta']['total'] == 2 + assert len(res.json['data']) == 2 diff --git a/osf/migrations/0017_auto_20231212_1843.py b/osf/migrations/0017_auto_20231212_1843.py new file mode 100644 index 00000000000..5c13864dfc7 --- /dev/null +++ b/osf/migrations/0017_auto_20231212_1843.py @@ -0,0 +1,34 @@ +# Generated by Django 3.2.17 on 2023-12-12 18:43 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('osf', '0016_auto_20230828_1810'), + ] + + operations = [ + migrations.AddField( + model_name='collection', + name='data_type_choices', + field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=127), blank=True, default=list, size=None), + ), + migrations.AddField( + model_name='collection', + name='disease_choices', + field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=127), blank=True, default=list, size=None), + ), + migrations.AddField( + model_name='collectionsubmission', + name='data_type', + field=models.CharField(blank=True, help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', max_length=127), + ), + migrations.AddField( + model_name='collectionsubmission', + name='disease', + field=models.CharField(blank=True, help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', max_length=127), + ), + ] diff --git a/osf/models/collection.py b/osf/models/collection.py index 36fabff27af..ca2620a5e7b 100644 --- a/osf/models/collection.py +++ b/osf/models/collection.py @@ -55,6 +55,8 @@ class Meta: program_area_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) school_type_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) study_design_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) + disease_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) + data_type_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) is_public = models.BooleanField(default=False, db_index=True) is_promoted = models.BooleanField(default=False, db_index=True) is_bookmark_collection = models.BooleanField(default=False, db_index=True) @@ -160,7 +162,7 @@ def has_permission(self, user, perm): def collect_object( self, obj, collector, collected_type=None, status=None, volume=None, issue=None, - program_area=None, school_type=None, study_design=None): + program_area=None, school_type=None, study_design=None, data_type=None, disease=None): """ Adds object to collection, creates CollectionSubmission reference Performs type / metadata validation. User permissions checked in view. @@ -177,6 +179,8 @@ def collect_object( program_area = program_area or '' school_type = school_type or '' study_design = study_design or '' + data_type = data_type or '' + disease = disease or '' if not self.collected_type_choices and collected_type: raise ValidationError('May not specify "type" for this collection') @@ -220,6 +224,18 @@ def collect_object( elif study_design not in self.study_design_choices: raise ValidationError(f'"{study_design}" is not an acceptable "study_design" for this collection') + if disease: + if not self.disease_choices: + raise ValidationError('May not specify "disease" for this collection') + elif disease not in self.disease_choices: + raise ValidationError(f'"{disease}" is not an acceptable "disease" for this collection') + + if data_type: + if not self.data_type_choices: + raise ValidationError('May not specify "data_type" for this collection') + elif data_type not in self.data_type_choices: + raise ValidationError(f'"{data_type}" is not an acceptable "data_type" for this collection') + if not any([isinstance(obj, t.model_class()) for t in self.collected_types.all()]): # Not all objects have a content_type_pk, have to look the other way. # Ideally, all objects would, and we could do: @@ -248,6 +264,8 @@ def collect_object( collection_submission.program_area = program_area collection_submission.school_type = school_type collection_submission.study_design = study_design + collection_submission.data_type = data_type + collection_submission.disease = disease collection_submission.save() return collection_submission diff --git a/osf/models/collection_submission.py b/osf/models/collection_submission.py index d4819255991..963d38b116b 100644 --- a/osf/models/collection_submission.py +++ b/osf/models/collection_submission.py @@ -39,6 +39,16 @@ class Meta: program_area = models.CharField(blank=True, max_length=127) school_type = models.CharField(blank=True, max_length=127) study_design = models.CharField(blank=True, max_length=127) + disease = models.CharField( + help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', + blank=True, + max_length=127 + ) + data_type = models.CharField( + help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', + blank=True, + max_length=127 + ) machine_state = models.IntegerField( choices=CollectionSubmissionStates.int_field_choices(), default=CollectionSubmissionStates.IN_PROGRESS, diff --git a/website/project/views/node.py b/website/project/views/node.py index c2ee444ab88..70a47c1b4d8 100644 --- a/website/project/views/node.py +++ b/website/project/views/node.py @@ -911,6 +911,8 @@ def serialize_collections(collection_submissions, auth): 'node_id': collection_submission.guid._id, 'study_design': collection_submission.study_design, 'program_area': collection_submission.program_area, + 'disease': collection_submission.disease, + 'data_type': collection_submission.data_type, 'state': collection_submission.state.db_name, 'subjects': list(collection_submission.subjects.values_list('text', flat=True)), 'is_public': collection_submission.collection.is_public, diff --git a/website/search/elastic_search.py b/website/search/elastic_search.py index bc418d3c940..6b479c29de5 100644 --- a/website/search/elastic_search.py +++ b/website/search/elastic_search.py @@ -618,6 +618,8 @@ def serialize_collection_submission(collection_submission): 'programArea': collection_submission.program_area, 'schoolType': collection_submission.school_type, 'studyDesign': collection_submission.study_design, + 'disease': collection_submission.disease, + 'dataType': collection_submission.data_type, 'subjects': list(collection_submission.subjects.values_list('text', flat=True)), 'title': getattr(obj, 'title', ''), 'url': getattr(obj, 'url', ''), diff --git a/website/templates/project/project.mako b/website/templates/project/project.mako index d220853e1b9..356fa93a562 100644 --- a/website/templates/project/project.mako +++ b/website/templates/project/project.mako @@ -401,6 +401,20 @@ % endif
    + % if collection['disease'] and collection['data_type']: +
    + Disease: ${collection['disease']} |  Data Type: ${collection['data_type']} +
    + % elif collection['disease']: +
    + Disease: ${collection['disease']} +
    + % elif collection['data_type']: +
    + Data Type: ${collection['data_type']} +
    + % endif +
    % elif collection['state'] == 'pending' and user['is_contributor_or_group_member']: % if user['is_admin']: @@ -448,6 +462,19 @@ Program Area: ${collection['program_area']} % endif + % if collection['disease'] and collection['data_type']: +
    + Disease: ${collection['disease']} |  Data Type: ${collection['data_type']} +
    + % elif collection['disease']: +
    + Disease: ${collection['disease']} +
    + % elif collection['data_type']: +
    + Data Type: ${collection['data_type']} +
    + % endif
    % elif collection['state'] == 'rejected' and user['is_contributor_or_group_member']: % if user['is_admin']: From af35c28a493c29161424fc4f9c7df9d662c0e3c1 Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Fri, 15 Dec 2023 09:59:09 -0500 Subject: [PATCH 10/23] Add UNVERIFIED Domain classification Squashed commit of the following: commit 5615a3adf65b941f3929f4f8201a0eb0d1fe1c88 Author: John Tordoff <> Date: Tue Dec 12 16:42:54 2023 -0500 update is_triaged behavior commit a9a49f281c993fef425fa0db24ec2c33924e34c8 Author: John Tordoff <> Date: Tue Dec 12 15:53:49 2023 -0500 change test case to account for new exception handing for domain sniffer commit 096e1ab68b5e7d5c2615bdd0f064bcddaad85d16 Author: John Tordoff <> Date: Tue Dec 12 14:03:25 2023 -0500 redo exception handling and add migration file commit 89b37f32ce7c22076e0ef7fa7c2feceb81e24b86 Author: John Tordoff <> Date: Mon Dec 11 12:13:31 2023 -0500 make timeouts classify notable domains as unverified --- osf/external/spam/tasks.py | 21 +++++++++---- .../0017_alter_notabledomain_note.py | 19 ++++++++++++ osf/models/notable_domain.py | 1 + osf_tests/test_notable_domains.py | 31 ++++++++++--------- 4 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 osf/migrations/0017_alter_notabledomain_note.py diff --git a/osf/external/spam/tasks.py b/osf/external/spam/tasks.py index cc3f9e16a16..fabb7dfb935 100644 --- a/osf/external/spam/tasks.py +++ b/osf/external/spam/tasks.py @@ -46,15 +46,20 @@ def _check_resource_for_domains(guid, content): resource = guid.referent spammy_domains = [] referrer_content_type = ContentType.objects.get_for_model(resource) - for domain in _extract_domains(content): - notable_domain, _ = NotableDomain.objects.get_or_create(domain=domain) + for domain, note in _extract_domains(content): + notable_domain, _ = NotableDomain.objects.get_or_create( + domain=domain, + defaults={'note': note} + ) if notable_domain.note == NotableDomain.Note.EXCLUDE_FROM_ACCOUNT_CREATION_AND_CONTENT: spammy_domains.append(notable_domain.domain) DomainReference.objects.get_or_create( domain=notable_domain, referrer_object_id=resource.id, referrer_content_type=referrer_content_type, - defaults={'is_triaged': notable_domain.note != NotableDomain.Note.UNKNOWN} + defaults={ + 'is_triaged': notable_domain.note not in (NotableDomain.Note.UNKNOWN, NotableDomain.Note.UNVERIFIED) + } ) if spammy_domains: resource.confirm_spam(save=True, domains=list(spammy_domains)) @@ -72,8 +77,11 @@ def check_resource_for_domains_async(guid, content): def _extract_domains(content): + from osf.models import NotableDomain + extracted_domains = set() for match in DOMAIN_REGEX.finditer(content): + note = NotableDomain.Note.UNKNOWN domain = match.group('domain') if not domain or domain in extracted_domains: continue @@ -85,10 +93,11 @@ def _extract_domains(content): try: response = requests.head(constructed_url, timeout=settings.DOMAIN_EXTRACTION_TIMEOUT) - except (requests.exceptions.ConnectionError, requests.exceptions.InvalidURL): + except requests.exceptions.InvalidURL: + # Likely false-positive from a filename.ext continue except requests.exceptions.RequestException: - pass + note = NotableDomain.Note.UNVERIFIED else: # Store the redirect location (to help catch link shorteners) if response.status_code in REDIRECT_CODES and 'location' in response.headers: @@ -99,7 +108,7 @@ def _extract_domains(content): # Avoid returning a duplicate domain discovered via redirect if domain not in extracted_domains: extracted_domains.add(domain) - yield domain + yield domain, note @run_postcommit(once_per_request=False, celery=True) diff --git a/osf/migrations/0017_alter_notabledomain_note.py b/osf/migrations/0017_alter_notabledomain_note.py new file mode 100644 index 00000000000..056568cffbe --- /dev/null +++ b/osf/migrations/0017_alter_notabledomain_note.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.17 on 2023-12-12 19:02 + +from django.db import migrations, models +import osf.models.notable_domain + + +class Migration(migrations.Migration): + + dependencies = [ + ('osf', '0016_auto_20230828_1810'), + ] + + operations = [ + migrations.AlterField( + model_name='notabledomain', + name='note', + field=models.IntegerField(choices=[(0, 'EXCLUDE_FROM_ACCOUNT_CREATION_AND_CONTENT'), (1, 'ASSUME_HAM_UNTIL_REPORTED'), (2, 'UNKNOWN'), (3, 'IGNORED'), (4, 'UNVERIFIED')], default=osf.models.notable_domain.NotableDomain.Note['UNKNOWN']), + ), + ] diff --git a/osf/models/notable_domain.py b/osf/models/notable_domain.py index 5b960718ed9..03ebcfd6e40 100644 --- a/osf/models/notable_domain.py +++ b/osf/models/notable_domain.py @@ -14,6 +14,7 @@ class Note(IntEnum): ASSUME_HAM_UNTIL_REPORTED = 1 UNKNOWN = 2 IGNORED = 3 + UNVERIFIED = 4 # Timedout couldn't determine @classmethod def choices(cls): diff --git a/osf_tests/test_notable_domains.py b/osf_tests/test_notable_domains.py index 4c9e39908dd..78edd11e967 100644 --- a/osf_tests/test_notable_domains.py +++ b/osf_tests/test_notable_domains.py @@ -30,31 +30,34 @@ def test_extract_domains__optional_components(self, protocol_component, www_comp sample_text = f'This is a link: {test_url}' with mock.patch.object(spam_tasks.requests, 'head'): domains = list(spam_tasks._extract_domains(sample_text)) - assert domains == ['osf.io'] + assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)] def test_extract_domains__url_in_quotes(self): sample_text = '"osf.io"' with mock.patch.object(spam_tasks.requests, 'head'): domains = list(spam_tasks._extract_domains(sample_text)) - assert domains == ['osf.io'] + assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)] def test_extract_domains__url_in_parens(self): sample_text = '(osf.io)' with mock.patch.object(spam_tasks.requests, 'head'): domains = list(spam_tasks._extract_domains(sample_text)) - assert domains == ['osf.io'] + assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)] def test_extract_domains__captures_domain_with_multiple_subdomains(self): sample_text = 'This is a link: https://api.test.osf.io' with mock.patch.object(spam_tasks.requests, 'head'): domains = list(spam_tasks._extract_domains(sample_text)) - assert domains == ['api.test.osf.io'] + assert domains == [('api.test.osf.io', NotableDomain.Note.UNKNOWN)] def test_extract_domains__captures_multiple_domains(self): sample_text = 'This is a domain: http://osf.io. This is another domain: www.cos.io' with mock.patch.object(spam_tasks.requests, 'head'): domains = set(spam_tasks._extract_domains(sample_text)) - assert domains == {'osf.io', 'cos.io'} + assert domains == { + ('osf.io', NotableDomain.Note.UNKNOWN), + ('cos.io', NotableDomain.Note.UNKNOWN), + } def test_extract_domains__no_domains(self): sample_text = 'http://fakeout!' @@ -63,19 +66,19 @@ def test_extract_domains__no_domains(self): assert not domains mock_head.assert_not_called() - def test_extract_domains__ignored_if_does_not_resolve(self): + def test_extract_domains__unverfied_if_does_not_resolve(self): sample_text = 'This.will.not.connect' with mock.patch.object(spam_tasks.requests, 'head') as mock_head: mock_head.side_effect = spam_tasks.requests.exceptions.ConnectionError domains = set(spam_tasks._extract_domains(sample_text)) - assert not domains + assert domains == {('This.will.not.connect', NotableDomain.Note.UNVERIFIED)} def test_actract_domains__returned_on_error(self): sample_text = 'This.will.timeout' with mock.patch.object(spam_tasks.requests, 'head') as mock_head: mock_head.side_effect = spam_tasks.requests.exceptions.Timeout domains = set(spam_tasks._extract_domains(sample_text)) - assert domains == {sample_text} + assert domains == {(sample_text, NotableDomain.Note.UNVERIFIED)} @pytest.mark.parametrize('status_code', [301, 302, 303, 307, 308]) def test_extract_domains__follows_redirect(self, status_code): @@ -85,7 +88,7 @@ def test_extract_domains__follows_redirect(self, status_code): sample_text = 'redirect.me' with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response): domains = list(spam_tasks._extract_domains(sample_text)) - assert domains == ['redirected.com'] + assert domains == [('redirected.com', NotableDomain.Note.UNKNOWN)] def test_extract_domains__redirect_code_no_location(self): mock_response = SimpleNamespace() @@ -94,7 +97,7 @@ def test_extract_domains__redirect_code_no_location(self): sample_text = 'redirect.me' with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response): domains = list(spam_tasks._extract_domains(sample_text)) - assert domains == ['redirect.me'] + assert domains == [('redirect.me', NotableDomain.Note.UNKNOWN)] def test_extract_domains__redirect_code_bad_location(self): mock_response = SimpleNamespace() @@ -103,7 +106,7 @@ def test_extract_domains__redirect_code_bad_location(self): sample_text = 'redirect.me' with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response): domains = list(spam_tasks._extract_domains(sample_text)) - assert domains == ['redirect.me'] + assert domains == [('redirect.me', NotableDomain.Note.UNKNOWN)] def test_extract_domains__redirect_with_full_url_no_protocol(self): mock_response = SimpleNamespace() @@ -114,7 +117,7 @@ def test_extract_domains__redirect_with_full_url_no_protocol(self): with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response) as mock_object: domains = list(spam_tasks._extract_domains(sample_text)) mock_object.assert_called_once_with(f'https://{target_url}', timeout=60) - assert domains == ['osf.io'] + assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)] def test_extract_domains__redirect_with_full_url_and_protocol(self): mock_response = SimpleNamespace() @@ -125,13 +128,13 @@ def test_extract_domains__redirect_with_full_url_and_protocol(self): with mock.patch.object(spam_tasks.requests, 'head', return_value=mock_response) as mock_object: domains = list(spam_tasks._extract_domains(sample_text)) mock_object.assert_called_once_with(target_url, timeout=60) - assert domains == ['osf.io'] + assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)] def test_extract_domains__deduplicates(self): sample_text = 'osf.io osf.io osf.io and, oh, yeah, osf.io' with mock.patch.object(spam_tasks.requests, 'head'): domains = list(spam_tasks._extract_domains(sample_text)) - assert domains == ['osf.io'] + assert domains == [('osf.io', NotableDomain.Note.UNKNOWN)] def test_extract_domains__ignores_floats(self): sample_text = 'this is a number 3.1415 not a domain' From 1722bbd83cf2777d8f936b274df54bdcca384ea9 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Mon, 18 Dec 2023 09:46:20 -0500 Subject: [PATCH 11/23] Add merge migration --- osf/migrations/0018_merge_20231218_1446.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 osf/migrations/0018_merge_20231218_1446.py diff --git a/osf/migrations/0018_merge_20231218_1446.py b/osf/migrations/0018_merge_20231218_1446.py new file mode 100644 index 00000000000..f76317978f7 --- /dev/null +++ b/osf/migrations/0018_merge_20231218_1446.py @@ -0,0 +1,14 @@ +# Generated by Django 3.2.17 on 2023-12-18 14:46 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('osf', '0017_alter_notabledomain_note'), + ('osf', '0017_auto_20231212_1843'), + ] + + operations = [ + ] From 1d626fade18230ada50ece3dd4159a6bc36ade12 Mon Sep 17 00:00:00 2001 From: John Tordoff Date: Thu, 14 Dec 2023 11:55:25 -0500 Subject: [PATCH 12/23] [ENG-4823] Add Collection Metadata Options (#10499) * add collection metadata options for ibdgc --------- Co-authored-by: John Tordoff <> --- admin/collection_providers/forms.py | 62 +++++++++++++++++++ admin/collection_providers/views.py | 59 +++++++++--------- .../js/pages/collection-provider-page.js | 26 ++++++++ .../collection_providers/detail.html | 8 +++ .../update_collection_provider_form.html | 12 ++++ api/collections/serializers.py | 20 ++++++ api_tests/search/views/test_views.py | 20 ++++++ osf/migrations/0017_auto_20231212_1843.py | 34 ++++++++++ osf/models/collection.py | 20 +++++- osf/models/collection_submission.py | 10 +++ website/project/views/node.py | 2 + website/search/elastic_search.py | 2 + website/templates/project/project.mako | 27 ++++++++ 13 files changed, 273 insertions(+), 29 deletions(-) create mode 100644 osf/migrations/0017_auto_20231212_1843.py diff --git a/admin/collection_providers/forms.py b/admin/collection_providers/forms.py index ca0358a126d..4b8af62bb82 100644 --- a/admin/collection_providers/forms.py +++ b/admin/collection_providers/forms.py @@ -15,6 +15,8 @@ class CollectionProviderForm(forms.ModelForm): program_area_choices = forms.CharField(widget=forms.HiddenInput(), required=False) school_type_choices = forms.CharField(widget=forms.HiddenInput(), required=False) study_design_choices = forms.CharField(widget=forms.HiddenInput(), required=False) + data_type_choices = forms.CharField(widget=forms.HiddenInput(), required=False) + disease_choices = forms.CharField(widget=forms.HiddenInput(), required=False) _id = forms.SlugField( required=True, help_text='URL Slug', @@ -268,3 +270,63 @@ def clean_study_design_choices(self): if choices: added_choices = json.loads(choices) return {'added': added_choices, 'removed': removed_choices} + + def clean_disease_choices(self): + if not self.data.get('disease_choices'): + return {'added': [], 'removed': []} + + collection_provider = self.instance + primary_collection = collection_provider.primary_collection + if primary_collection: # Modifying an existing CollectionProvider + old_choices = {c.strip(' ') for c in primary_collection.disease_choices} + updated_choices = {c.strip(' ') for c in json.loads(self.data.get('disease_choices'))} + added_choices = updated_choices - old_choices + removed_choices = old_choices - updated_choices + + active_removed_choices = set( + primary_collection.collectionsubmission_set.filter( + disease__in=removed_choices + ).values_list('disease', flat=True) + ) + if active_removed_choices: + raise forms.ValidationError( + 'Cannot remove the following choices for "disease", as they are ' + f'currently in use: {active_removed_choices}' + ) + else: # Creating a new CollectionProvider + added_choices = set() + removed_choices = set() + choices = self.data.get('disease_choices') + if choices: + added_choices = json.loads(choices) + return {'added': added_choices, 'removed': removed_choices} + + def clean_data_type_choices(self): + if not self.data.get('data_type_choices'): + return {'added': [], 'removed': []} + + collection_provider = self.instance + primary_collection = collection_provider.primary_collection + if primary_collection: # Modifying an existing CollectionProvider + old_choices = {c.strip(' ') for c in primary_collection.data_type_choices} + updated_choices = {c.strip(' ') for c in json.loads(self.data.get('data_type_choices'))} + added_choices = updated_choices - old_choices + removed_choices = old_choices - updated_choices + + active_removed_choices = set( + primary_collection.collectionsubmission_set.filter( + data_type__in=removed_choices + ).values_list('data_type', flat=True) + ) + if active_removed_choices: + raise forms.ValidationError( + 'Cannot remove the following choices for "data_type", as they are ' + f'currently in use: {active_removed_choices}' + ) + else: # Creating a new CollectionProvider + added_choices = set() + removed_choices = set() + choices = self.data.get('data_type_choices') + if choices: + added_choices = json.loads(choices) + return {'added': added_choices, 'removed': removed_choices} diff --git a/admin/collection_providers/views.py b/admin/collection_providers/views.py index d5c950ed4fd..699d82cf533 100644 --- a/admin/collection_providers/views.py +++ b/admin/collection_providers/views.py @@ -21,6 +21,17 @@ from admin.providers.views import AddAdminOrModerator, RemoveAdminsAndModerators +def _process_collection_choices(provider, choices_name, form): + collection = provider.primary_collection + choices_name_attr = f'{choices_name}_choices' + choices_added = form.cleaned_data[choices_name_attr]['added'] + choices_removed = form.cleaned_data[choices_name_attr]['removed'] + + getattr(collection, choices_name_attr).extend(choices_added) + for item in choices_removed: + getattr(collection, choices_name_attr).remove(item) + + class CreateCollectionProvider(PermissionRequiredMixin, CreateView): raise_exception = True permission_required = 'osf.change_collectionprovider' @@ -47,6 +58,10 @@ def form_valid(self, form): self.object.primary_collection.school_type_choices.append(item) for item in form.cleaned_data['study_design_choices']['added']: self.object.primary_collection.study_design_choices.append(item) + for item in form.cleaned_data['data_type_choices']['added']: + self.object.primary_collection.data_type_choices.append(item) + for item in form.cleaned_data['disease_choices']['added']: + self.object.primary_collection.disease_choices.append(item) self.object.primary_collection.save() return super().form_valid(form) @@ -163,6 +178,16 @@ def get_context_data(self, *args, **kwargs): )) kwargs['study_design_choices'] = study_design_choices_html + disease_choices_html = '
      {choices}
    '.format(choices=''.join( + f'
  • {choice}
  • ' for choice in primary_collection.disease_choices + )) + kwargs['disease_choices'] = disease_choices_html + + data_type_choices_html = '
      {choices}
    '.format(choices=''.join( + f'
  • {choice}
  • ' for choice in primary_collection.data_type_choices + )) + kwargs['data_type_choices'] = data_type_choices_html + # get a dict of model fields so that we can set the initial value for the update form fields = model_to_dict(collection_provider) fields['collected_type_choices'] = json.dumps(primary_collection.collected_type_choices) @@ -175,6 +200,8 @@ def get_context_data(self, *args, **kwargs): fields['school_type_choices'] = json.dumps(primary_collection.school_type_choices) fields['study_design_choices'] = json.dumps(primary_collection.study_design_choices) + fields['data_type_choices'] = json.dumps(primary_collection.data_type_choices) + fields['disease_choices'] = json.dumps(primary_collection.disease_choices) # compile html list of collected_type_choices if collection_provider.primary_collection: @@ -235,34 +262,8 @@ class CollectionProviderChangeForm(PermissionRequiredMixin, UpdateView): def form_valid(self, form): if self.object.primary_collection: - self.object.primary_collection.collected_type_choices.extend(form.cleaned_data['collected_type_choices']['added']) - for item in form.cleaned_data['collected_type_choices']['removed']: - self.object.primary_collection.collected_type_choices.remove(item) - - self.object.primary_collection.status_choices.extend(form.cleaned_data['status_choices']['added']) - for item in form.cleaned_data['status_choices']['removed']: - self.object.primary_collection.status_choices.remove(item) - - self.object.primary_collection.issue_choices.extend(form.cleaned_data['issue_choices']['added']) - for item in form.cleaned_data['issue_choices']['removed']: - self.object.primary_collection.issue_choices.remove(item) - - self.object.primary_collection.volume_choices.extend(form.cleaned_data['volume_choices']['added']) - for item in form.cleaned_data['volume_choices']['removed']: - self.object.primary_collection.volume_choices.remove(item) - - self.object.primary_collection.program_area_choices.extend(form.cleaned_data['program_area_choices']['added']) - for item in form.cleaned_data['program_area_choices']['removed']: - self.object.primary_collection.program_area_choices.remove(item) - - self.object.primary_collection.school_type_choices.extend(form.cleaned_data['school_type_choices']['added']) - for item in form.cleaned_data['school_type_choices']['removed']: - self.object.primary_collection.school_type_choices.remove(item) - - self.object.primary_collection.study_design_choices.extend(form.cleaned_data['study_design_choices']['added']) - for item in form.cleaned_data['study_design_choices']['removed']: - self.object.primary_collection.study_design_choices.remove(item) - + for choices_name in ['collected_type', 'status', 'issue', 'volume', 'program_area', 'school_type', 'study_design', 'data_type', 'disease']: + _process_collection_choices(self.object, choices_name, form) self.object.primary_collection.save() return super().form_valid(form) @@ -399,6 +400,8 @@ def create_or_update_provider(self, provider_data): provider.primary_collection.program_area_choices = primary_collection['fields']['program_area_choices'] provider.primary_collection.school_type_choices = primary_collection['fields']['school_type_choices'] provider.primary_collection.study_design_choices = primary_collection['fields']['study_design_choices'] + provider.primary_collection.disease_choices = primary_collection['fields']['disease_choices'] + provider.primary_collection.data_type_choices = primary_collection['fields']['data_type_choices'] provider.primary_collection.save() if licenses: provider.licenses_acceptable.set(licenses) diff --git a/admin/static/js/pages/collection-provider-page.js b/admin/static/js/pages/collection-provider-page.js index 2964fdc5a56..11e6e2302b4 100644 --- a/admin/static/js/pages/collection-provider-page.js +++ b/admin/static/js/pages/collection-provider-page.js @@ -57,6 +57,22 @@ $('#tags-input-study-design').on('itemRemoved', function(event) { $('#id_study_design_choices').val(JSON.stringify($('#tags-input-study-design').tagsinput('items'))); }); +$('#tags-input-data-type').on('itemAdded', function(event) { + $('#id_data_type_choices').val(JSON.stringify($('#tags-input-data-type').tagsinput('items'))); +}); + +$('#tags-input-data-type').on('itemRemoved', function(event) { + $('#id_data_type_choices').val(JSON.stringify($('#tags-input-data-type').tagsinput('items'))); +}); + +$('#tags-input-disease').on('itemAdded', function(event) { + $('#id_disease_choices').val(JSON.stringify($('#tags-input-disease').tagsinput('items'))); +}); + +$('#tags-input-disease').on('itemRemoved', function(event) { + $('#id_disease_choices').val(JSON.stringify($('#tags-input-disease').tagsinput('items'))); +}); + $(document).ready(function() { var collectedTypeItems = JSON.parse($('#id_collected_type_choices').val()); @@ -93,4 +109,14 @@ $(document).ready(function() { studyDesignItems.forEach(function(element){ $('#tags-input-study-design').tagsinput('add', element) }); + + var diseaseItems = JSON.parse($('#id_disease_choices').val()); + diseaseItems.forEach(function(element){ + $('#tags-input-disease').tagsinput('add', element) + }); + + var dataTypeItems = JSON.parse($('#id_data_type_choices').val()); + dataTypeItems.forEach(function(element){ + $('#tags-input-data-type').tagsinput('add', element) + }); }); diff --git a/admin/templates/collection_providers/detail.html b/admin/templates/collection_providers/detail.html index 7d488dd974a..c015a90fe8b 100644 --- a/admin/templates/collection_providers/detail.html +++ b/admin/templates/collection_providers/detail.html @@ -66,6 +66,14 @@

    {{ collection_provider.name }}

    study_design_choices {{ study_design_choices | safe}} + + disease_choices + {{ disease_choices | safe}} + + + data_type_choices + {{ data_type_choices | safe}} + diff --git a/admin/templates/collection_providers/update_collection_provider_form.html b/admin/templates/collection_providers/update_collection_provider_form.html index 05422db6d9a..c64198c2e95 100644 --- a/admin/templates/collection_providers/update_collection_provider_form.html +++ b/admin/templates/collection_providers/update_collection_provider_form.html @@ -98,6 +98,18 @@ +
    + +
    + +
    +
    +
    + +
    + +
    +
    diff --git a/api/collections/serializers.py b/api/collections/serializers.py index 3b5a10c7ec6..7499015aaa0 100644 --- a/api/collections/serializers.py +++ b/api/collections/serializers.py @@ -68,6 +68,14 @@ class CollectionSerializer(JSONAPISerializer): child=ser.CharField(max_length=127), default=list(), ) + data_type_choices = ser.ListField( + child=ser.CharField(max_length=127), + default=list(), + ) + disease_choices = ser.ListField( + child=ser.CharField(max_length=127), + default=list(), + ) links = LinksField({}) @@ -241,6 +249,8 @@ def subjects_view_kwargs(self): program_area = ser.CharField(required=False) school_type = ser.CharField(required=False) study_design = ser.CharField(required=False) + data_type = ser.CharField(required=False) + disease = ser.CharField(required=False) def get_absolute_url(self, obj): return absolute_reverse( @@ -272,6 +282,10 @@ def update(self, obj, validated_data): obj.school_Type = validated_data.pop('school_type') if 'study_design' in validated_data: obj.study_design = validated_data.pop('study_design') + if 'data_type' in validated_data: + obj.data_type = validated_data.pop('data_type') + if 'disease' in validated_data: + obj.disease = validated_data.pop('disease') obj.save() return obj @@ -337,6 +351,8 @@ def subjects_view_kwargs(self): program_area = ser.CharField(required=False) school_type = ser.CharField(required=False) study_design = ser.CharField(required=False) + date_type = ser.CharField(required=False) + disease = ser.CharField(required=False) def get_absolute_url(self, obj): return absolute_reverse( @@ -368,6 +384,10 @@ def update(self, obj, validated_data): obj.school_Type = validated_data.pop('school_type') if 'study_design' in validated_data: obj.study_design = validated_data.pop('study_design') + if 'data_type' in validated_data: + obj.data_type = validated_data.pop('data_type') + if 'disease' in validated_data: + obj.disease = validated_data.pop('disease') obj.save() return obj diff --git a/api_tests/search/views/test_views.py b/api_tests/search/views/test_views.py index 4fc5e0e1196..7ebf6e769d0 100644 --- a/api_tests/search/views/test_views.py +++ b/api_tests/search/views/test_views.py @@ -50,6 +50,7 @@ def collection_public(self, user): return CollectionFactory(creator=user, provider=CollectionProviderFactory(), is_public=True, status_choices=['', 'asdf', 'lkjh'], collected_type_choices=['', 'asdf', 'lkjh'], issue_choices=['', '0', '1', '2'], volume_choices=['', '0', '1', '2'], + disease_choices=['illness'], data_type_choices=['realness'], program_area_choices=['', 'asdf', 'lkjh']) @pytest.fixture() @@ -1000,3 +1001,22 @@ def test_POST_search_collections( assert res.json['links']['meta']['total'] == 1 assert len(res.json['data']) == 1 assert res.json['data'][0]['id'] == node_with_abstract._id + + def test_POST_search_collections_disease_data_type( + self, app, url_collection_search, user, node_one, node_two, collection_public, + node_with_abstract, node_private, registration_collection, registration_one, + registration_two, registration_private, reg_with_abstract): + + collection_public.collect_object(node_one, user, disease='illness', data_type='realness') + collection_public.collect_object(node_two, user, data_type='realness') + + payload = self.post_payload(disease='illness') + res = app.post_json_api(url_collection_search, payload) + assert res.status_code == 200 + assert res.json['links']['meta']['total'] == 1 + + payload = self.post_payload(dataType='realness') + res = app.post_json_api(url_collection_search, payload) + assert res.status_code == 200 + assert res.json['links']['meta']['total'] == 2 + assert len(res.json['data']) == 2 diff --git a/osf/migrations/0017_auto_20231212_1843.py b/osf/migrations/0017_auto_20231212_1843.py new file mode 100644 index 00000000000..5c13864dfc7 --- /dev/null +++ b/osf/migrations/0017_auto_20231212_1843.py @@ -0,0 +1,34 @@ +# Generated by Django 3.2.17 on 2023-12-12 18:43 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('osf', '0016_auto_20230828_1810'), + ] + + operations = [ + migrations.AddField( + model_name='collection', + name='data_type_choices', + field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=127), blank=True, default=list, size=None), + ), + migrations.AddField( + model_name='collection', + name='disease_choices', + field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=127), blank=True, default=list, size=None), + ), + migrations.AddField( + model_name='collectionsubmission', + name='data_type', + field=models.CharField(blank=True, help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', max_length=127), + ), + migrations.AddField( + model_name='collectionsubmission', + name='disease', + field=models.CharField(blank=True, help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', max_length=127), + ), + ] diff --git a/osf/models/collection.py b/osf/models/collection.py index 36fabff27af..ca2620a5e7b 100644 --- a/osf/models/collection.py +++ b/osf/models/collection.py @@ -55,6 +55,8 @@ class Meta: program_area_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) school_type_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) study_design_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) + disease_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) + data_type_choices = ArrayField(models.CharField(max_length=127), blank=True, default=list) is_public = models.BooleanField(default=False, db_index=True) is_promoted = models.BooleanField(default=False, db_index=True) is_bookmark_collection = models.BooleanField(default=False, db_index=True) @@ -160,7 +162,7 @@ def has_permission(self, user, perm): def collect_object( self, obj, collector, collected_type=None, status=None, volume=None, issue=None, - program_area=None, school_type=None, study_design=None): + program_area=None, school_type=None, study_design=None, data_type=None, disease=None): """ Adds object to collection, creates CollectionSubmission reference Performs type / metadata validation. User permissions checked in view. @@ -177,6 +179,8 @@ def collect_object( program_area = program_area or '' school_type = school_type or '' study_design = study_design or '' + data_type = data_type or '' + disease = disease or '' if not self.collected_type_choices and collected_type: raise ValidationError('May not specify "type" for this collection') @@ -220,6 +224,18 @@ def collect_object( elif study_design not in self.study_design_choices: raise ValidationError(f'"{study_design}" is not an acceptable "study_design" for this collection') + if disease: + if not self.disease_choices: + raise ValidationError('May not specify "disease" for this collection') + elif disease not in self.disease_choices: + raise ValidationError(f'"{disease}" is not an acceptable "disease" for this collection') + + if data_type: + if not self.data_type_choices: + raise ValidationError('May not specify "data_type" for this collection') + elif data_type not in self.data_type_choices: + raise ValidationError(f'"{data_type}" is not an acceptable "data_type" for this collection') + if not any([isinstance(obj, t.model_class()) for t in self.collected_types.all()]): # Not all objects have a content_type_pk, have to look the other way. # Ideally, all objects would, and we could do: @@ -248,6 +264,8 @@ def collect_object( collection_submission.program_area = program_area collection_submission.school_type = school_type collection_submission.study_design = study_design + collection_submission.data_type = data_type + collection_submission.disease = disease collection_submission.save() return collection_submission diff --git a/osf/models/collection_submission.py b/osf/models/collection_submission.py index d4819255991..963d38b116b 100644 --- a/osf/models/collection_submission.py +++ b/osf/models/collection_submission.py @@ -39,6 +39,16 @@ class Meta: program_area = models.CharField(blank=True, max_length=127) school_type = models.CharField(blank=True, max_length=127) study_design = models.CharField(blank=True, max_length=127) + disease = models.CharField( + help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', + blank=True, + max_length=127 + ) + data_type = models.CharField( + help_text='This field was added for use by Inflammatory Bowel Disease Genetics Consortium', + blank=True, + max_length=127 + ) machine_state = models.IntegerField( choices=CollectionSubmissionStates.int_field_choices(), default=CollectionSubmissionStates.IN_PROGRESS, diff --git a/website/project/views/node.py b/website/project/views/node.py index c2ee444ab88..70a47c1b4d8 100644 --- a/website/project/views/node.py +++ b/website/project/views/node.py @@ -911,6 +911,8 @@ def serialize_collections(collection_submissions, auth): 'node_id': collection_submission.guid._id, 'study_design': collection_submission.study_design, 'program_area': collection_submission.program_area, + 'disease': collection_submission.disease, + 'data_type': collection_submission.data_type, 'state': collection_submission.state.db_name, 'subjects': list(collection_submission.subjects.values_list('text', flat=True)), 'is_public': collection_submission.collection.is_public, diff --git a/website/search/elastic_search.py b/website/search/elastic_search.py index bc418d3c940..6b479c29de5 100644 --- a/website/search/elastic_search.py +++ b/website/search/elastic_search.py @@ -618,6 +618,8 @@ def serialize_collection_submission(collection_submission): 'programArea': collection_submission.program_area, 'schoolType': collection_submission.school_type, 'studyDesign': collection_submission.study_design, + 'disease': collection_submission.disease, + 'dataType': collection_submission.data_type, 'subjects': list(collection_submission.subjects.values_list('text', flat=True)), 'title': getattr(obj, 'title', ''), 'url': getattr(obj, 'url', ''), diff --git a/website/templates/project/project.mako b/website/templates/project/project.mako index d220853e1b9..356fa93a562 100644 --- a/website/templates/project/project.mako +++ b/website/templates/project/project.mako @@ -401,6 +401,20 @@ % endif
    + % if collection['disease'] and collection['data_type']: +
    + Disease: ${collection['disease']} |  Data Type: ${collection['data_type']} +
    + % elif collection['disease']: +
    + Disease: ${collection['disease']} +
    + % elif collection['data_type']: +
    + Data Type: ${collection['data_type']} +
    + % endif +
    % elif collection['state'] == 'pending' and user['is_contributor_or_group_member']: % if user['is_admin']: @@ -448,6 +462,19 @@ Program Area: ${collection['program_area']} % endif + % if collection['disease'] and collection['data_type']: +
    + Disease: ${collection['disease']} |  Data Type: ${collection['data_type']} +
    + % elif collection['disease']: +
    + Disease: ${collection['disease']} +
    + % elif collection['data_type']: +
    + Data Type: ${collection['data_type']} +
    + % endif
    % elif collection['state'] == 'rejected' and user['is_contributor_or_group_member']: % if user['is_admin']: From 54ad5d85d61b32326c1065f4fc61005248608df2 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Mon, 18 Dec 2023 09:46:20 -0500 Subject: [PATCH 13/23] Add merge migration --- osf/migrations/0018_merge_20231218_1446.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 osf/migrations/0018_merge_20231218_1446.py diff --git a/osf/migrations/0018_merge_20231218_1446.py b/osf/migrations/0018_merge_20231218_1446.py new file mode 100644 index 00000000000..f76317978f7 --- /dev/null +++ b/osf/migrations/0018_merge_20231218_1446.py @@ -0,0 +1,14 @@ +# Generated by Django 3.2.17 on 2023-12-18 14:46 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('osf', '0017_alter_notabledomain_note'), + ('osf', '0017_auto_20231212_1843'), + ] + + operations = [ + ] From f9b2d2828031569db386a37e414b9144eb2e8ef3 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Mon, 8 Jan 2024 10:59:13 -0500 Subject: [PATCH 14/23] Update sitemap for preprint routes, file downloads [ENG-4919] --- osf_tests/test_generate_sitemap.py | 6 ++++-- scripts/generate_sitemap.py | 20 +++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/osf_tests/test_generate_sitemap.py b/osf_tests/test_generate_sitemap.py index 2b821945fd1..f8f8ab4a3ef 100644 --- a/osf_tests/test_generate_sitemap.py +++ b/osf_tests/test_generate_sitemap.py @@ -118,8 +118,10 @@ def all_included_links(self, user_admin_project_public, user_admin_project_priva project_preprint_osf.url, project_preprint_other.url, registration_active.url, - '/{}/'.format(preprint_osf._id), - '/preprints/{}/{}/'.format(provider_other._id, preprint_other._id), + '/preprints/{}/{}'.format(preprint_osf.provider._id, preprint_osf._id), + '/preprints/{}/{}'.format(provider_other._id, preprint_other._id), + '/{}/download/?format=pdf'.format(preprint_osf._id), + '/{}/download/?format=pdf'.format(preprint_other._id) ]) urls_to_include = [urljoin(settings.DOMAIN, item) for item in urls_to_include] diff --git a/scripts/generate_sitemap.py b/scripts/generate_sitemap.py index 5249b4fb8a3..9db08928807 100644 --- a/scripts/generate_sitemap.py +++ b/scripts/generate_sitemap.py @@ -203,17 +203,31 @@ def generate(self): objs = (Preprint.objects.can_view() .select_related('node', 'provider', 'primary_file')) progress.start(objs.count() * 2, 'PREP: ') - osf = PreprintProvider.objects.get(_id='osf') for obj in objs: try: preprint_date = obj.modified.strftime('%Y-%m-%d') config = settings.SITEMAP_PREPRINT_CONFIG - preprint_url = obj.url - provider = obj.provider + preprint_url = os.path.join('preprints', obj.provider._id, obj._id) config['loc'] = urljoin(settings.DOMAIN, preprint_url) config['lastmod'] = preprint_date self.add_url(config) + # Preprint file urls + try: + file_config = settings.SITEMAP_PREPRINT_FILE_CONFIG + file_config['loc'] = urljoin( + settings.DOMAIN, + os.path.join( + obj._id, + 'download', + '?format=pdf' + ) + ) + file_config['lastmod'] = preprint_date + self.add_url(file_config) + except Exception as e: + self.log_errors(obj.primary_file, obj.primary_file._id, e) + except Exception as e: self.log_errors(obj, obj._id, e) progress.increment(2) From 900b33a3f3819d795f69948d845989eadc15bb8c Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Fri, 5 Jan 2024 12:20:48 -0500 Subject: [PATCH 15/23] use content instead of sanitized text for spam filter. --- addons/wiki/models.py | 6 +----- osf_tests/test_notable_domains.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/addons/wiki/models.py b/addons/wiki/models.py index 0c057342fd8..f2da7b41d9a 100644 --- a/addons/wiki/models.py +++ b/addons/wiki/models.py @@ -203,11 +203,7 @@ def check_spam(self): ) def _get_spam_content(self, node): - content = [] - content.append(self.raw_text(node)) - if not content: - return None - return ' '.join(content) + return self.content or None def clone_version(self, wiki_page, user): """Clone a node wiki page. diff --git a/osf_tests/test_notable_domains.py b/osf_tests/test_notable_domains.py index 78edd11e967..dde9970e4b7 100644 --- a/osf_tests/test_notable_domains.py +++ b/osf_tests/test_notable_domains.py @@ -268,7 +268,7 @@ def test_extract_domains_from_wiki__public_project_extracts_domains_on_wiki_save project = wiki_version.wiki_page.node project.is_public = True project.save() - wiki_version.content = 'This has a domain: https://cos.io' + wiki_version.content = '[EXTREME VIDEO] WATCH VIDEO' request_context.g.current_session = {'auth_user_id': project.creator._id} with mock.patch.object(spam_tasks.requests, 'head'): From a8ace8ec28a2a7bcebfd8730df7dc82f2510e2a9 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Wed, 10 Jan 2024 09:29:32 -0500 Subject: [PATCH 16/23] Avoid 401 when indexing withdrawn preprints --- osf_tests/test_generate_sitemap.py | 17 ++++++++++++++--- scripts/generate_sitemap.py | 28 +++++++++++++++------------- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/osf_tests/test_generate_sitemap.py b/osf_tests/test_generate_sitemap.py index f8f8ab4a3ef..b8429cca77a 100644 --- a/osf_tests/test_generate_sitemap.py +++ b/osf_tests/test_generate_sitemap.py @@ -6,6 +6,7 @@ import tempfile import xml from future.moves.urllib.parse import urljoin +from django.utils import timezone from scripts import generate_sitemap from osf_tests.factories import (AuthUserFactory, ProjectFactory, RegistrationFactory, CollectionFactory, @@ -98,6 +99,15 @@ def preprint_osf(self, project_preprint_osf, user_admin_project_public, provider creator=user_admin_project_public, provider=provider_osf) + @pytest.fixture(autouse=True) + def preprint_withdrawn(self, project_preprint_osf, user_admin_project_public, provider_osf): + preprint = PreprintFactory(project=project_preprint_osf, + creator=user_admin_project_public, + provider=provider_osf) + preprint.date_withdrawn = timezone.now() + preprint.save() + return preprint + @pytest.fixture(autouse=True) def preprint_other(self, project_preprint_other, user_admin_project_public, provider_other): return PreprintFactory(project=project_preprint_other, @@ -107,8 +117,8 @@ def preprint_other(self, project_preprint_other, user_admin_project_public, prov @pytest.fixture(autouse=True) def all_included_links(self, user_admin_project_public, user_admin_project_private, project_registration_public, project_preprint_osf, project_preprint_other, - registration_active, provider_other, preprint_osf, - preprint_other): + registration_active, provider_other, provider_osf, + preprint_osf, preprint_other, preprint_withdrawn): # Return urls of all fixtures urls_to_include = [item['loc'] for item in settings.SITEMAP_STATIC_URLS] urls_to_include.extend([ @@ -118,8 +128,9 @@ def all_included_links(self, user_admin_project_public, user_admin_project_priva project_preprint_osf.url, project_preprint_other.url, registration_active.url, - '/preprints/{}/{}'.format(preprint_osf.provider._id, preprint_osf._id), + '/preprints/{}/{}'.format(provider_osf._id, preprint_osf._id), '/preprints/{}/{}'.format(provider_other._id, preprint_other._id), + '/preprints/{}/{}'.format(provider_osf._id, preprint_withdrawn._id), '/{}/download/?format=pdf'.format(preprint_osf._id), '/{}/download/?format=pdf'.format(preprint_other._id) ]) diff --git a/scripts/generate_sitemap.py b/scripts/generate_sitemap.py index 9db08928807..a31a9febe1a 100644 --- a/scripts/generate_sitemap.py +++ b/scripts/generate_sitemap.py @@ -213,20 +213,22 @@ def generate(self): self.add_url(config) # Preprint file urls - try: - file_config = settings.SITEMAP_PREPRINT_FILE_CONFIG - file_config['loc'] = urljoin( - settings.DOMAIN, - os.path.join( - obj._id, - 'download', - '?format=pdf' + if not obj.is_retracted: + # Withdrawn preprints may be viewed but not downloaded + try: + file_config = settings.SITEMAP_PREPRINT_FILE_CONFIG + file_config['loc'] = urljoin( + settings.DOMAIN, + os.path.join( + obj._id, + 'download', + '?format=pdf' + ) ) - ) - file_config['lastmod'] = preprint_date - self.add_url(file_config) - except Exception as e: - self.log_errors(obj.primary_file, obj.primary_file._id, e) + file_config['lastmod'] = preprint_date + self.add_url(file_config) + except Exception as e: + self.log_errors(obj.primary_file, obj.primary_file._id, e) except Exception as e: self.log_errors(obj, obj._id, e) From e48061310064835028295ed76a2433ba1927f887 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Wed, 10 Jan 2024 10:29:08 -0500 Subject: [PATCH 17/23] Add actions to update domain notes --- osf/admin.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/osf/admin.py b/osf/admin.py index 40434c36b71..7b8257389c5 100644 --- a/osf/admin.py +++ b/osf/admin.py @@ -7,6 +7,7 @@ from django.http import HttpResponseRedirect from django.urls import reverse +from osf.external.spam.tasks import reclassify_domain_references from osf.models import OSFUser, Node, NotableDomain, NodeLicense from osf.models.notable_domain import DomainReference @@ -50,11 +51,40 @@ class NotableDomainAdmin(admin.ModelAdmin): list_display = ('domain', 'note', 'number_of_references') list_filter = ('note',) search_fields = ('domain',) + actions = ['make_ignored', 'make_excluded'] @admin.display(ordering='number_of_references') def number_of_references(self, obj): return obj.number_of_references + @admin.action(description='Mark selected as IGNORED') + def make_ignored(self, request, queryset): + signatures = [] + target_note = 3 # IGNORED + for obj in queryset: + signatures.append({ + 'notable_domain_id': obj.pk, + 'current_note': target_note, + 'previous_note': obj.note + }) + queryset.update(note=target_note) + for sig in signatures: + reclassify_domain_references.apply_async(kwargs=sig) + + @admin.action(description='Mark selected as EXCLUDED') + def make_excluded(self, request, queryset): + signatures = [] + target_note = 0 # EXCLUDE_FROM_ACCOUNT_CREATION_AND_CONTENT + for obj in queryset: + signatures.append({ + 'notable_domain_id': obj.pk, + 'current_note': target_note, + 'previous_note': obj.note + }) + queryset.update(note=target_note) + for sig in signatures: + reclassify_domain_references.apply_async(kwargs=sig) + def get_urls(self): urls = super().get_urls() return [ From 45029e47ff98d8ba57b8f3ab9ebf7be04c191cd0 Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Wed, 17 Jan 2024 16:00:45 -0500 Subject: [PATCH 18/23] fix: multiple funding awards from the same funder (#10512) would previously generate invalid xml for datacite, now should not. [ENG-5044] --- .../datacite/datacite_tree_walker.py | 53 +++++++++++-------- .../expected_metadata_files/file_full.turtle | 16 +++++- .../preprint_full.turtle | 16 +++++- .../project_full.datacite.json | 19 +++++++ .../project_full.datacite.xml | 10 ++++ .../project_full.turtle | 16 +++++- .../registration_full.turtle | 16 +++++- .../metadata/test_serialized_metadata.py | 16 +++++- 8 files changed, 131 insertions(+), 31 deletions(-) diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index 08990d466a9..bddc64fac6a 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -268,33 +268,42 @@ def _visit_dates(self, parent_el): def _visit_funding_references(self, parent_el): fundrefs_el = self.visit(parent_el, 'fundingReferences', is_list=True) + _visited_funders = set() + for _funding_award in sorted(self.basket[OSF.hasFunding]): + # datacite allows at most one funder per funding reference + _funder = next(self.basket[_funding_award:DCTERMS.contributor]) + self._funding_reference(fundrefs_el, _funder, _funding_award) + _visited_funders.add(_funder) for _funder in self.basket[OSF.funder]: - fundref_el = self.visit(fundrefs_el, 'fundingReference') - self.visit(fundref_el, 'funderName', text=next(self.basket[_funder:FOAF.name], '')) - funder_identifier = next(self.basket[_funder:DCTERMS.identifier], '') + if _funder not in _visited_funders: + self._funding_reference(fundrefs_el, _funder) + + def _funding_reference(self, fundrefs_el, funder, funding_award=None): + _fundref_el = self.visit(fundrefs_el, 'fundingReference') + self.visit(_fundref_el, 'funderName', text=next(self.basket[funder:FOAF.name], '')) + _funder_identifier = next(self.basket[funder:DCTERMS.identifier], '') + self.visit( + _fundref_el, + 'funderIdentifier', + text=_funder_identifier, + attrib={ + 'funderIdentifierType': self._funder_identifier_type(_funder_identifier), + }, + ) + if funding_award is not None: self.visit( - fundref_el, - 'funderIdentifier', - text=funder_identifier, + _fundref_el, + 'awardNumber', + text=next(self.basket[funding_award:OSF.awardNumber], ''), attrib={ - 'funderIdentifierType': self._funder_identifier_type(funder_identifier), + 'awardURI': ( + str(funding_award) + if isinstance(funding_award, rdflib.URIRef) + else '' + ) }, ) - for _funding_award in self.basket[OSF.hasFunding]: - if _funder in self.basket[_funding_award:DCTERMS.contributor]: - self.visit( - fundref_el, - 'awardNumber', - text=next(self.basket[_funding_award:OSF.awardNumber], ''), - attrib={ - 'awardURI': ( - str(_funding_award) - if isinstance(_funding_award, rdflib.URIRef) - else '' - ) - }, - ) - self.visit(fundref_el, 'awardTitle', text=next(self.basket[_funding_award:DCTERMS.title], '')) + self.visit(_fundref_el, 'awardTitle', text=next(self.basket[funding_award:DCTERMS.title], '')) def _visit_publication_year(self, parent_el, focus_iri): year_copyrighted = next(self.basket[focus_iri:DCTERMS.dateCopyrighted], None) diff --git a/osf_tests/metadata/expected_metadata_files/file_full.turtle b/osf_tests/metadata/expected_metadata_files/file_full.turtle index d04eca39e8e..4859b2bf84a 100644 --- a/osf_tests/metadata/expected_metadata_files/file_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/file_full.turtle @@ -28,8 +28,10 @@ dcterms:title "this is a project title!"@en ; dcterms:type ; owl:sameAs ; - osf:funder ; - osf:hasFunding . + osf:funder , + ; + osf:hasFunding , + . a osf:FileVersion ; dcterms:created "2123-05-04" ; @@ -46,6 +48,12 @@ dcterms:title "because reasons" ; osf:awardNumber "10000000" . + a osf:FundingAward ; + dcterms:contributor ; + dcterms:identifier "https://moneypockets.example/millions-more" ; + dcterms:title "because reasons!" ; + osf:awardNumber "2000000" . + dcterms:identifier "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode" ; foaf:name "CC-By Attribution-NonCommercial-NoDerivatives 4.0 International" . @@ -53,6 +61,10 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . + a dcterms:Agent ; + dcterms:identifier "https://doi.org/10.$" ; + foaf:name "Caring Fan" . + a dcterms:Agent, foaf:Person ; dcterms:identifier "http://localhost:5000/w1ibb" ; diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle index 59943430882..10ae10a7741 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle @@ -50,8 +50,10 @@ dcterms:title "this is a project title!"@en ; dcterms:type ; owl:sameAs ; - osf:funder ; - osf:hasFunding . + osf:funder , + ; + osf:hasFunding , + . a skos:Concept ; skos:broader ; @@ -77,6 +79,12 @@ dcterms:title "because reasons" ; osf:awardNumber "10000000" . + a osf:FundingAward ; + dcterms:contributor ; + dcterms:identifier "https://moneypockets.example/millions-more" ; + dcterms:title "because reasons!" ; + osf:awardNumber "2000000" . + rdfs:label "Dataset"@en . rdfs:label "Preprint"@en . @@ -106,6 +114,10 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . + a dcterms:Agent ; + dcterms:identifier "https://doi.org/10.$" ; + foaf:name "Caring Fan" . + a skos:Concept ; skos:inScheme ; skos:prefLabel "wibbble" . diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json index d77541c609e..43d3373c9f1 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json +++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json @@ -59,6 +59,25 @@ "funderIdentifierType": "Crossref Funder ID" }, "funderName": "Mx. Moneypockets" + }, + { + "awardNumber": { + "awardNumber": "2000000", + "awardURI": "https://moneypockets.example/millions-more" + }, + "awardTitle": "because reasons!", + "funderIdentifier": { + "funderIdentifier": "https://doi.org/10.$$$$", + "funderIdentifierType": "Crossref Funder ID" + }, + "funderName": "Mx. Moneypockets" + }, + { + "funderIdentifier": { + "funderIdentifier": "https://doi.org/10.$", + "funderIdentifierType": "Crossref Funder ID" + }, + "funderName": "Caring Fan" } ], "identifier": { diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml index 95f15129b3f..8cf7efb1221 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml +++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml @@ -42,6 +42,16 @@ 10000000 because reasons + + Mx. Moneypockets + https://doi.org/10.$$$$ + 2000000 + because reasons! + + + Caring Fan + https://doi.org/10.$ + http://localhost:5000/w5ibb diff --git a/osf_tests/metadata/expected_metadata_files/project_full.turtle b/osf_tests/metadata/expected_metadata_files/project_full.turtle index 5fa0dad1229..4a601897f11 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/project_full.turtle @@ -24,8 +24,10 @@ owl:sameAs ; dcat:accessService ; osf:contains ; - osf:funder ; - osf:hasFunding ; + osf:funder , + ; + osf:hasFunding , + ; osf:hostingInstitution ; osf:supplements . @@ -64,6 +66,12 @@ dcterms:title "because reasons" ; osf:awardNumber "10000000" . + a osf:FundingAward ; + dcterms:contributor ; + dcterms:identifier "https://moneypockets.example/millions-more" ; + dcterms:title "because reasons!" ; + osf:awardNumber "2000000" . + a dcterms:Agent, foaf:Organization ; dcterms:identifier "https://cos.io/", @@ -88,6 +96,10 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . + a dcterms:Agent ; + dcterms:identifier "https://doi.org/10.$" ; + foaf:name "Caring Fan" . + a dcterms:Agent, foaf:Person ; dcterms:identifier "http://localhost:5000/w1ibb" ; diff --git a/osf_tests/metadata/expected_metadata_files/registration_full.turtle b/osf_tests/metadata/expected_metadata_files/registration_full.turtle index 9101e9f64b3..2fe48ce7fae 100644 --- a/osf_tests/metadata/expected_metadata_files/registration_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/registration_full.turtle @@ -35,8 +35,10 @@ dcterms:title "this is a project title!"@en ; dcterms:type ; owl:sameAs ; - osf:funder ; - osf:hasFunding . + osf:funder , + ; + osf:hasFunding , + . a osf:FundingAward ; dcterms:contributor ; @@ -44,6 +46,12 @@ dcterms:title "because reasons" ; osf:awardNumber "10000000" . + a osf:FundingAward ; + dcterms:contributor ; + dcterms:identifier "https://moneypockets.example/millions-more" ; + dcterms:title "because reasons!" ; + osf:awardNumber "2000000" . + a dcterms:Agent, foaf:Organization ; dcterms:identifier "https://cos.io/", @@ -63,6 +71,10 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . + a dcterms:Agent ; + dcterms:identifier "https://doi.org/10.$" ; + foaf:name "Caring Fan" . + a dcterms:Agent, foaf:Person ; dcterms:identifier "http://localhost:5000/w1ibb" ; diff --git a/osf_tests/metadata/test_serialized_metadata.py b/osf_tests/metadata/test_serialized_metadata.py index ec9eb6b4af2..bc6b1387c60 100644 --- a/osf_tests/metadata/test_serialized_metadata.py +++ b/osf_tests/metadata/test_serialized_metadata.py @@ -222,13 +222,27 @@ def _setUp_full(self): 'language': 'en', 'resource_type_general': 'Dataset', 'funding_info': [ - { + { # full funding reference: 'funder_name': 'Mx. Moneypockets', 'funder_identifier': 'https://doi.org/10.$$$$', 'funder_identifier_type': 'Crossref Funder ID', 'award_number': '10000000', 'award_uri': 'https://moneypockets.example/millions', 'award_title': 'because reasons', + }, { # second funding award from the same funder: + 'funder_name': 'Mx. Moneypockets', + 'funder_identifier': 'https://doi.org/10.$$$$', + 'funder_identifier_type': 'Crossref Funder ID', + 'award_number': '2000000', + 'award_uri': 'https://moneypockets.example/millions-more', + 'award_title': 'because reasons!', + }, { # no award info, just a funder: + 'funder_name': 'Caring Fan', + 'funder_identifier': 'https://doi.org/10.$', + 'funder_identifier_type': 'Crossref Funder ID', + 'award_number': '', + 'award_uri': '', + 'award_title': '', }, ], }, auth=self.user) From 0c1ec2514e031c3b8bfd5da24e898f46c295a235 Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Thu, 18 Jan 2024 11:23:10 -0500 Subject: [PATCH 19/23] [ENG-4335] subjects on projects (#10324) * add root-level subjects list * add NodeSerializer.subjects_acceptable * simplify query --- api/nodes/serializers.py | 6 ++++++ api/subjects/urls.py | 1 + api/subjects/views.py | 4 +++- api_tests/base/test_serializers.py | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/api/nodes/serializers.py b/api/nodes/serializers.py index 918f156ce3d..b4b9e27a50b 100644 --- a/api/nodes/serializers.py +++ b/api/nodes/serializers.py @@ -537,6 +537,12 @@ class NodeSerializer(TaxonomizableSerializerMixin, JSONAPISerializer): related_view_kwargs={'node_id': '<_id>'}, ) + subjects_acceptable = HideIfRegistration(RelationshipField( + related_view='subjects:subject-list', + related_view_kwargs={}, + read_only=True, + )) + @property def subjects_related_view(self): # Overrides TaxonomizableSerializerMixin diff --git a/api/subjects/urls.py b/api/subjects/urls.py index e80581e179d..31b6d8d4505 100644 --- a/api/subjects/urls.py +++ b/api/subjects/urls.py @@ -5,6 +5,7 @@ app_name = 'osf' urlpatterns = [ + re_path(r'^$', views.SubjectList.as_view(), name=views.SubjectList.view_name), re_path(r'^(?P\w+)/$', views.SubjectDetail.as_view(), name=views.SubjectDetail.view_name), re_path(r'^(?P\w+)/children/$', views.SubjectChildrenList.as_view(), name=views.SubjectChildrenList.view_name), ] diff --git a/api/subjects/views.py b/api/subjects/views.py index 2b5ff3deae8..281f5e18019 100644 --- a/api/subjects/views.py +++ b/api/subjects/views.py @@ -108,7 +108,9 @@ class SubjectList(JSONAPIBaseView, generics.ListAPIView, ListFilterMixin): ordering = ('is_other', '-id',) def get_default_queryset(self): - return optimize_subject_query(Subject.objects.all()) + return optimize_subject_query( + Subject.objects.filter(bepress_subject__isnull=True), + ) def get_queryset(self): return self.get_queryset_from_request() diff --git a/api_tests/base/test_serializers.py b/api_tests/base/test_serializers.py index d523263b582..701bab085bd 100644 --- a/api_tests/base/test_serializers.py +++ b/api_tests/base/test_serializers.py @@ -196,7 +196,7 @@ def test_registration_serializer(self): 'subjects', 'wiki_enabled'] # fields that do not appear on registrations - non_registration_fields = ['registrations', 'draft_registrations', 'templated_by_count', 'settings', 'storage', 'children', 'groups'] + non_registration_fields = ['registrations', 'draft_registrations', 'templated_by_count', 'settings', 'storage', 'children', 'groups', 'subjects_acceptable'] for field in NodeSerializer._declared_fields: assert_in(field, RegistrationSerializer._declared_fields) From 754e0627c6d903314f13c3852b029c8e7fdf0ab0 Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Thu, 18 Jan 2024 11:25:35 -0500 Subject: [PATCH 20/23] [ENG-5011] Subject.get_semantic_iri use the iri for a subject's bepress synonym only when it has the same text -- it was instead doing the opposite --- .../subjects/views/test_subject_detail.py | 1 + osf/metadata/osf_gathering.py | 2 +- osf/models/subject.py | 2 +- .../preprint_basic.turtle | 24 +++++++++---------- .../preprint_full.turtle | 24 +++++++++---------- osf_tests/metadata/test_osf_gathering.py | 10 ++++---- tests/test_subjects.py | 16 +++++++++++++ 7 files changed, 48 insertions(+), 31 deletions(-) diff --git a/api_tests/subjects/views/test_subject_detail.py b/api_tests/subjects/views/test_subject_detail.py index ca7bd592666..20a96c758ed 100644 --- a/api_tests/subjects/views/test_subject_detail.py +++ b/api_tests/subjects/views/test_subject_detail.py @@ -44,6 +44,7 @@ def test_get_subject_detail(self, app, url_subject_detail, subject, subject_chil assert 'parent' in data['relationships'] assert data['relationships']['parent']['data'] is None assert data['relationships']['children']['links']['related']['meta']['count'] == 2 + assert data['links']['iri'] == subject.get_semantic_iri() # Follow children link children_link = data['relationships']['children']['links']['related']['href'] diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py index 268fb6cc733..fb8dc55b8f5 100644 --- a/osf/metadata/osf_gathering.py +++ b/osf/metadata/osf_gathering.py @@ -539,7 +539,7 @@ def _subject_triples(dbsubject, *, child_ref=None, related_ref=None): _is_bepress = (not dbsubject.bepress_subject) _is_distinct_from_bepress = (dbsubject.text != dbsubject.bepress_text) if _is_bepress or _is_distinct_from_bepress: - _subject_ref = rdflib.URIRef(dbsubject.absolute_api_v2_subject_url) + _subject_ref = rdflib.URIRef(dbsubject.get_semantic_iri()) yield (DCTERMS.subject, _subject_ref) yield (_subject_ref, RDF.type, SKOS.Concept) yield (_subject_ref, SKOS.prefLabel, dbsubject.text) diff --git a/osf/models/subject.py b/osf/models/subject.py index f2b5bdeaa48..ff96d0bbb3e 100644 --- a/osf/models/subject.py +++ b/osf/models/subject.py @@ -57,7 +57,7 @@ def get_absolute_url(self): def get_semantic_iri(self) -> str: _identified_subject = ( self.bepress_subject - if self.bepress_subject and (self.text != self.bepress_subject.text) + if self.bepress_subject and (self.text == self.bepress_subject.text) else self ) return _identified_subject.absolute_api_v2_subject_url.rstrip('/') diff --git a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle index a7e0e717a26..1218c1054a7 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle +++ b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle @@ -15,10 +15,10 @@ "https://doi.org/11.pp/FK2osf.io/w4ibb" ; dcterms:modified "2123-05-04" ; dcterms:publisher ; - dcterms:subject , - , - , - ; + dcterms:subject , + , + , + ; dcterms:title "this is a preprint title!" ; dcterms:type ; owl:sameAs ; @@ -50,11 +50,11 @@ dcterms:title "this is a project title!" ; owl:sameAs . - a skos:Concept ; - skos:broader ; + a skos:Concept ; + skos:broader ; skos:inScheme ; skos:prefLabel "wobble" ; - skos:related . + skos:related . a dcterms:Agent, foaf:Organization ; @@ -75,20 +75,20 @@ a skos:ConceptScheme ; dcterms:title "preprovi" . - a skos:Concept ; + a skos:Concept ; skos:inScheme ; skos:prefLabel "wibble" ; - skos:related . + skos:related . - a skos:Concept ; - skos:broader ; + a skos:Concept ; + skos:broader ; skos:inScheme ; skos:prefLabel "wobbble" . a skos:ConceptScheme ; dcterms:title "bepress Digital Commons Three-Tiered Taxonomy" . - a skos:Concept ; + a skos:Concept ; skos:inScheme ; skos:prefLabel "wibbble" . diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle index 59943430882..9f23bd22895 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle @@ -15,10 +15,10 @@ "https://doi.org/11.pp/FK2osf.io/w4ibb" ; dcterms:modified "2123-05-04" ; dcterms:publisher ; - dcterms:subject , - , - , - ; + dcterms:subject , + , + , + ; dcterms:title "this is a preprint title!" ; dcterms:type ; owl:sameAs ; @@ -53,11 +53,11 @@ osf:funder ; osf:hasFunding . - a skos:Concept ; - skos:broader ; + a skos:Concept ; + skos:broader ; skos:inScheme ; skos:prefLabel "wobble" ; - skos:related . + skos:related . a dcterms:Agent, foaf:Organization ; @@ -89,13 +89,13 @@ a skos:ConceptScheme ; dcterms:title "preprovi" . - a skos:Concept ; + a skos:Concept ; skos:inScheme ; skos:prefLabel "wibble" ; - skos:related . + skos:related . - a skos:Concept ; - skos:broader ; + a skos:Concept ; + skos:broader ; skos:inScheme ; skos:prefLabel "wobbble" . @@ -106,7 +106,7 @@ dcterms:identifier "https://doi.org/10.$$$$" ; foaf:name "Mx. Moneypockets" . - a skos:Concept ; + a skos:Concept ; skos:inScheme ; skos:prefLabel "wibbble" . diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py index a0b95d51d94..54fe1fdc302 100644 --- a/osf_tests/metadata/test_osf_gathering.py +++ b/osf_tests/metadata/test_osf_gathering.py @@ -348,7 +348,7 @@ def test_gather_subjects(self): assert_triples(osf_gathering.gather_subjects(self.projectfocus), set()) _bloo_subject = factories.SubjectFactory(text='Bloomy', provider=_osf_provider) self.project.set_subjects([[_bloo_subject._id]], auth=Auth(self.user__admin)) - _bloo_iri = URIRef(_bloo_subject.absolute_api_v2_subject_url) + _bloo_iri = URIRef(_bloo_subject.get_semantic_iri()) _bepress_iri = rdflib.URIRef('https://bepress.com/reference_guide_dc/disciplines/') assert_triples(osf_gathering.gather_subjects(self.projectfocus), { (self.projectfocus.iri, DCTERMS.subject, _bloo_iri), @@ -368,10 +368,10 @@ def test_gather_subjects(self): [_customchild_subj._id, _customparent_subj._id], [_bloo_subject._id], ], auth=Auth(self.user__admin)) - _parent_iri = URIRef(_parent_subj.absolute_api_v2_subject_url) - _child_iri = URIRef(_child_subj.absolute_api_v2_subject_url) - _customparent_iri = URIRef(_customparent_subj.absolute_api_v2_subject_url) - _customchild_iri = URIRef(_customchild_subj.absolute_api_v2_subject_url) + _parent_iri = URIRef(_parent_subj.get_semantic_iri()) + _child_iri = URIRef(_child_subj.get_semantic_iri()) + _customparent_iri = URIRef(_customparent_subj.get_semantic_iri()) + _customchild_iri = URIRef(_customchild_subj.get_semantic_iri()) _customtax_iri = URIRef(f'{self.registration.provider.absolute_api_v2_url}subjects/') assert_triples(osf_gathering.gather_subjects(self.registrationfocus), { (self.registrationfocus.iri, DCTERMS.subject, _bloo_iri), diff --git a/tests/test_subjects.py b/tests/test_subjects.py index 68a39c2c31d..f3f246a7f7a 100644 --- a/tests/test_subjects.py +++ b/tests/test_subjects.py @@ -175,3 +175,19 @@ def test_path(self): assert self.bepress_child.path == 'bepress|BePress Text|BePress Child' assert self.other_subj.path == 'asdf|Other Text' assert self.other_child.path == 'asdf|Other Text|Other Child' + + def test_get_semantic_iri(self): + _bepress_iri = self.bepress_subj.get_semantic_iri() + _other_iri = self.other_subj.get_semantic_iri() + assert _bepress_iri != _other_iri + assert _bepress_iri.endswith(self.bepress_subj._id) + assert _other_iri.endswith(self.other_subj._id) + + # if a subject has the exact same text as its bepress synonym, expect the bepress subject iri + _sametext_subj = SubjectFactory( + text=self.bepress_subj.text, + bepress_subject=self.bepress_subj, + provider=self.asdf_provider, + ) + _sametext_iri = _sametext_subj.get_semantic_iri() + assert _bepress_iri == _sametext_iri From e6b0e29a7582d795695726ad386b9c46d101f2bb Mon Sep 17 00:00:00 2001 From: John Tordoff Date: Tue, 23 Jan 2024 12:58:14 -0500 Subject: [PATCH 21/23] [ENG-3696] Make gotoFileEvents always open in new tab (#10482) Co-authored-by: John Tordoff <> --- website/static/js/fangorn.js | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/website/static/js/fangorn.js b/website/static/js/fangorn.js index f346789e938..ba4e159d716 100644 --- a/website/static/js/fangorn.js +++ b/website/static/js/fangorn.js @@ -85,8 +85,6 @@ var OPERATIONS = { } }; -// Cross browser key codes for the Command key -var COMMAND_KEYS = [224, 17, 91, 93]; var ESCAPE_KEY = 27; var ENTER_KEY = 13; @@ -1573,12 +1571,7 @@ function gotoFileEvent (item, toUrl) { } } } - - if (COMMAND_KEYS.indexOf(tb.pressedKey) !== -1) { - window.open(fileurl, '_blank'); - } else { - window.open(fileurl, '_self'); - } + window.open(fileurl, '_blank'); } /** From 57547043c3ef530572f8ae9553d4151630996313 Mon Sep 17 00:00:00 2001 From: Mariia Lychko <95318818+ly-mariia@users.noreply.github.com> Date: Thu, 25 Jan 2024 20:49:38 +0200 Subject: [PATCH 22/23] [ENG-5208]: Fix python bootstrapping in docker build (#10518) ## Purpose Fixed issue with Python installation. ## Changes Updated docker file step. --- Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index e3ef594b54a..ace0492965a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,10 +26,8 @@ RUN apk add --no-cache --virtual .run-deps \ libevent \ && yarn global add bower -RUN apk add curl -RUN curl https://bootstrap.pypa.io/pip/3.6/get-pip.py -o get-pip.py -RUN python3 get-pip.py --force-reinstall pip==21.0 -RUN apk del curl +RUN python3 -m ensurepip && \ + pip3 install --upgrade pip==21.0 WORKDIR /code From 13f633db88ea7403ccd8ff0ec83f800f0b992360 Mon Sep 17 00:00:00 2001 From: Mariia Lychko <95318818+ly-mariia@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:45:14 +0200 Subject: [PATCH 23/23] ENG-5208 (#10522) ## Purpose Error during command run. ## Changes Updated the instructions to add a new command in case of an error during 'Populate preprint, registration, and collection providers' step. --- README-docker-compose.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README-docker-compose.md b/README-docker-compose.md index 7cb95efd75f..2bf9df3d26f 100644 --- a/README-docker-compose.md +++ b/README-docker-compose.md @@ -229,6 +229,8 @@ - Populate preprint, registration, and collection providers: - After resetting your database or with a new install, the required providers and subjects will be created automatically **when you run migrations.** To create more: - `docker-compose run --rm web python3 manage.py populate_fake_providers` + - _NOTE: In case, you encounter error with missing data, when running the `'populate_fake_providers'` command. Fix this with 'update_taxonomies' command:_ + - `docker-compose run --rm web python3 -m scripts.update_taxonomies` - Populate citation styles - Needed for api v2 citation style rendering. - `docker-compose run --rm web python3 -m scripts.parse_citation_styles`