From f0323f9207b6e059b9360eb0e45c45f17922d18c Mon Sep 17 00:00:00 2001 From: Longze Chen Date: Thu, 17 Aug 2023 19:21:35 -0400 Subject: [PATCH] [ENG-4624] S3 Improvements (#10416) * Allow s3 to use subfolders as the root folder of a files addon * use colon delineation for s3 buckets, remove bucket_name query_param * fix issue with over mocking Gitlab messing with serializer method * revert serializer for correct id * de-ordinaryize the s3 calling format this allows non us-east-1 regions to work for s3 * add migration for s3 folder_ids and names * change migration to management command * fix tests for management command * flake8 fixes * reintroduce OrdinaryCallingFormat and remove from get_bucket * make migration allow for already converted folder_ids and folder_names * make get bucket names use the correct calling format * clean-up and revert old changes * use boto3 to access bucket regions using V4 s3 auth * move add delim command to management command dir for s3 improvment --------- Co-authored-by: John Tordoff <> Co-authored-by: John Tordoff --- addons/s3/models.py | 75 ++++++++++++------ addons/s3/requirements.txt | 1 + addons/s3/static/s3NodeConfig.js | 8 +- addons/s3/tests/test_model.py | 15 +++- addons/s3/utils.py | 31 ++++++++ addons/s3/views.py | 4 +- api/addons/serializers.py | 2 +- api_tests/nodes/views/test_node_addons.py | 4 +- .../commands/add_colon_delim_to_s3_buckets.py | 77 +++++++++++++++++++ osf_tests/test_s3_folder_migration.py | 41 ++++++++++ 10 files changed, 222 insertions(+), 36 deletions(-) create mode 100644 osf/management/commands/add_colon_delim_to_s3_buckets.py create mode 100644 osf_tests/test_s3_folder_migration.py diff --git a/addons/s3/models.py b/addons/s3/models.py index b42a1552696..3d640b6a861 100644 --- a/addons/s3/models.py +++ b/addons/s3/models.py @@ -8,11 +8,18 @@ from addons.base import exceptions from addons.s3.provider import S3Provider from addons.s3.serializer import S3Serializer -from addons.s3.settings import (BUCKET_LOCATIONS, - ENCRYPT_UPLOADS_DEFAULT) -from addons.s3.utils import (bucket_exists, - get_bucket_location_or_error, - get_bucket_names) +from addons.s3.settings import ( + BUCKET_LOCATIONS, + ENCRYPT_UPLOADS_DEFAULT +) +from addons.s3.utils import ( + bucket_exists, + get_bucket_location_or_error, + get_bucket_names, + get_bucket_prefixes +) +from website.util import api_v2_url + class S3FileNode(BaseFileNode): _provider = 's3' @@ -56,7 +63,9 @@ def display_name(self): return u'{0}: {1}'.format(self.config.full_name, self.folder_id) def set_folder(self, folder_id, auth): - if not bucket_exists(self.external_account.oauth_key, self.external_account.oauth_secret, folder_id): + bucket_name = folder_id.split(':')[0] + + if not bucket_exists(self.external_account.oauth_key, self.external_account.oauth_secret, bucket_name): error_message = ('We are having trouble connecting to that bucket. ' 'Try a different one.') raise exceptions.InvalidFolderError(error_message) @@ -66,7 +75,7 @@ def set_folder(self, folder_id, auth): bucket_location = get_bucket_location_or_error( self.external_account.oauth_key, self.external_account.oauth_secret, - folder_id + bucket_name ) try: bucket_location = BUCKET_LOCATIONS[bucket_location] @@ -78,29 +87,43 @@ def set_folder(self, folder_id, auth): self.folder_name = '{} ({})'.format(folder_id, bucket_location) self.save() - self.nodelogger.log(action='bucket_linked', extra={'bucket': str(folder_id)}, save=True) + self.nodelogger.log(action='bucket_linked', extra={'bucket': bucket_name, 'path': self.folder_id}, save=True) - def get_folders(self, **kwargs): - # This really gets only buckets, not subfolders, - # as that's all we want to be linkable on a node. - try: + def get_folders(self, path, folder_id): + """ + Our S3 implementation allows for folder_id to be a bucket's root, or a subfolder in that bucket. + """ + # This is the root, so list all buckets. + if not folder_id: buckets = get_bucket_names(self) - except Exception: - raise exceptions.InvalidAuthError() - return [ - { + return [{ 'addon': 's3', 'kind': 'folder', - 'id': bucket, + 'id': f'{bucket}:/', 'name': bucket, - 'path': bucket, + 'bucket_name': bucket, + 'path': '/', 'urls': { - 'folders': '' + 'folders': api_v2_url( + f'nodes/{self.owner._id}/addons/s3/folders/', + params={ + 'id': bucket, + 'bucket_name': bucket, + } + ), } - } - for bucket in buckets - ] + } for bucket in buckets] + # This is for a directory for a specific bucket, folders (Prefixes), but not files (Keys) returned, because + # these we can only set folders as our base folder_id + else: + bucket_name, _, path = folder_id.partition(':/') + return get_bucket_prefixes( + self.external_account.oauth_key, + self.external_account.oauth_secret, + prefix=path, + bucket_name=bucket_name + ) @property def complete(self): @@ -135,10 +158,16 @@ def serialize_waterbutler_credentials(self): } def serialize_waterbutler_settings(self): + """ + We use the folder id to hold the bucket location + """ if not self.folder_id: raise exceptions.AddonError('Cannot serialize settings for S3 addon') + + bucket_name = self.folder_id.split(':')[0] return { - 'bucket': self.folder_id, + 'bucket': bucket_name, + 'id': self.folder_id, 'encrypt_uploads': self.encrypt_uploads } diff --git a/addons/s3/requirements.txt b/addons/s3/requirements.txt index 8e6ed87adf9..ed12f60676c 100644 --- a/addons/s3/requirements.txt +++ b/addons/s3/requirements.txt @@ -1 +1,2 @@ boto==2.38.0 +boto3==1.4.7 diff --git a/addons/s3/static/s3NodeConfig.js b/addons/s3/static/s3NodeConfig.js index f5369691dfb..521e4749db9 100644 --- a/addons/s3/static/s3NodeConfig.js +++ b/addons/s3/static/s3NodeConfig.js @@ -28,7 +28,7 @@ var s3FolderPickerViewModel = oop.extend(OauthAddonFolderPicker, { { // TreeBeard Options columnTitles: function() { return [{ - title: 'Buckets', + title: 'Buckets/Folders', width: '75%', sort: false }, { @@ -37,12 +37,6 @@ var s3FolderPickerViewModel = oop.extend(OauthAddonFolderPicker, { sort: false }]; }, - resolveToggle: function(item) { - return ''; - }, - resolveIcon: function(item) { - return m('i.fa.fa-folder-o', ' '); - }, }, tbOpts ); diff --git a/addons/s3/tests/test_model.py b/addons/s3/tests/test_model.py index b616a66c652..6d90760b87b 100644 --- a/addons/s3/tests/test_model.py +++ b/addons/s3/tests/test_model.py @@ -38,6 +38,14 @@ class TestNodeSettings(OAuthAddonNodeSettingsTestSuiteMixin, unittest.TestCase): NodeSettingsClass = NodeSettings UserSettingsFactory = S3UserSettingsFactory + def _node_settings_class_kwargs(self, node, user_settings): + return { + 'user_settings': self.user_settings, + 'folder_id': 'bucket_name:/path_goes_here/with_folder_id', + 'owner': self.node + } + + def test_registration_settings(self): registration = ProjectFactory() clone, message = self.node_settings.after_register( @@ -96,6 +104,9 @@ def test_set_folder(self, mock_location, mock_exists): def test_serialize_settings(self): settings = self.node_settings.serialize_waterbutler_settings() - expected = {'bucket': self.node_settings.folder_id, - 'encrypt_uploads': self.node_settings.encrypt_uploads} + expected = { + 'bucket': 'bucket_name', + 'encrypt_uploads': self.node_settings.encrypt_uploads, + 'id': 'bucket_name:/path_goes_here/with_folder_id' + } assert_equal(settings, expected) diff --git a/addons/s3/utils.py b/addons/s3/utils.py index 0738b5bf397..f07acae213b 100644 --- a/addons/s3/utils.py +++ b/addons/s3/utils.py @@ -1,6 +1,8 @@ import re +import logging from rest_framework import status as http_status +import boto3 from boto import exception from boto.s3.connection import S3Connection from boto.s3.connection import OrdinaryCallingFormat @@ -9,6 +11,8 @@ from addons.base.exceptions import InvalidAuthError, InvalidFolderError from addons.s3.settings import BUCKET_LOCATIONS +logger = logging.getLogger(__name__) + def connect_s3(access_key=None, secret_key=None, node_settings=None): """Helper to build an S3Connection object @@ -122,3 +126,30 @@ def get_bucket_location_or_error(access_key, secret_key, bucket_name): return connect_s3(access_key, secret_key).get_bucket(bucket_name, validate=False).get_location() except exception.S3ResponseError: raise InvalidFolderError() + + +def get_bucket_prefixes(access_key, secret_key, prefix, bucket_name): + s3 = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key + ) + + result = s3.list_objects(Bucket=bucket_name, Prefix=prefix, Delimiter='/') + folders = [] + for common_prefixes in result.get('CommonPrefixes', []): + key_name = common_prefixes.get('Prefix') + if key_name != prefix: + folders.append( + { + 'path': key_name, + 'id': f'{bucket_name}:/{key_name}', + 'folder_id': key_name, + 'kind': 'folder', + 'bucket_name': bucket_name, + 'name': key_name.split('/')[-2], + 'addon': 's3', + } + ) + + return folders diff --git a/addons/s3/views.py b/addons/s3/views.py index aba5ec8f63c..14255ab0de1 100644 --- a/addons/s3/views.py +++ b/addons/s3/views.py @@ -56,7 +56,9 @@ def _set_folder(node_addon, folder, auth): def s3_folder_list(node_addon, **kwargs): """ Returns all the subsequent folders under the folder id passed. """ - return node_addon.get_folders() + path = request.args.get('path', '') + folder_id = request.args.get('id', '') + return node_addon.get_folders(path=path, folder_id=folder_id) @must_be_logged_in def s3_add_user_account(auth, **kwargs): diff --git a/api/addons/serializers.py b/api/addons/serializers.py index bbdd175e72c..9ac7ac2bc12 100644 --- a/api/addons/serializers.py +++ b/api/addons/serializers.py @@ -22,7 +22,7 @@ def get_type(request): }) def get_absolute_url(self, obj): - if obj['addon'] in ('s3', 'figshare', 'github', 'mendeley'): + if obj['addon'] in ('figshare', 'github', 'mendeley'): # These addons don't currently support linking anything other # than top-level objects. return diff --git a/api_tests/nodes/views/test_node_addons.py b/api_tests/nodes/views/test_node_addons.py index 5163b33be13..cebd3f322e7 100644 --- a/api_tests/nodes/views/test_node_addons.py +++ b/api_tests/nodes/views/test_node_addons.py @@ -1013,8 +1013,8 @@ def _settings_kwargs(self, node, user_settings): def _mock_folder_result(self): return { 'name': 'a.bucket', - 'path': 'a.bucket', - 'id': 'a.bucket' + 'path': '/', + 'id': 'a.bucket:/' } @mock.patch('addons.s3.models.get_bucket_names') diff --git a/osf/management/commands/add_colon_delim_to_s3_buckets.py b/osf/management/commands/add_colon_delim_to_s3_buckets.py new file mode 100644 index 00000000000..be3a0c381e5 --- /dev/null +++ b/osf/management/commands/add_colon_delim_to_s3_buckets.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +import logging + +from django.core.management.base import BaseCommand +from django.apps import apps +from django.db.models import F, Value +from django.db.models.functions import Concat, Replace + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + """ + Adds Colon (':') delineators to s3 buckets to separate them from them from their subfolder, so `` + becomes `:/` , the root path. Folder names will also be updated to maintain consistency. + + """ + + def add_arguments(self, parser): + super().add_arguments(parser) + parser.add_argument( + '--reverse', + action='store_true', + dest='reverse', + help='Unsets date_retraction' + ) + + def handle(self, *args, **options): + reverse = options.get('reverse', False) + if reverse: + reverse_update_folder_names() + else: + update_folder_names() + + +def update_folder_names(): + NodeSettings = apps.get_model('addons_s3', 'NodeSettings') + + # Update folder_id for all records + NodeSettings.objects.exclude( + folder_name__contains=':/' + ).update( + folder_id=Concat(F('folder_id'), Value(':/')) + ) + + # Update folder_name for records containing '(' + NodeSettings.objects.filter( + folder_name__contains=' (' + ).exclude( + folder_name__contains=':/' + ).update( + folder_name=Replace(F('folder_name'), Value(' ('), Value(':/ (')) + ) + NodeSettings.objects.exclude( + folder_name__contains=':/' + ).exclude( + folder_name__contains=' (' + ).update( + folder_name=Concat(F('folder_name'), Value(':/')) + ) + logger.info('Update Folder Names/IDs complete') + + +def reverse_update_folder_names(): + NodeSettings = apps.get_model('addons_s3', 'NodeSettings') + + # Reverse update folder_id for all records + NodeSettings.objects.update(folder_id=Replace(F('folder_id'), Value(':/'), Value(''))) + + # Reverse update folder_name for records containing ':/ (' + NodeSettings.objects.filter(folder_name__contains=':/ (').update( + folder_name=Replace(F('folder_name'), Value(':/ ('), Value(' (')) + ) + NodeSettings.objects.filter(folder_name__contains=':/').update( + folder_name=Replace(F('folder_name'), Value(':/'), Value('')) + ) + logger.info('Reverse Update Folder Names/IDs complete') diff --git a/osf_tests/test_s3_folder_migration.py b/osf_tests/test_s3_folder_migration.py new file mode 100644 index 00000000000..067e63c34a3 --- /dev/null +++ b/osf_tests/test_s3_folder_migration.py @@ -0,0 +1,41 @@ +import pytest +from osf.management.commands.add_colon_delim_to_s3_buckets import update_folder_names, reverse_update_folder_names + +@pytest.mark.django_db +class TestUpdateFolderNamesMigration: + + def test_update_folder_names_migration(self): + from addons.s3.models import NodeSettings + from addons.s3.tests.factories import S3NodeSettingsFactory + # Create sample folder names and IDs + S3NodeSettingsFactory(folder_name='Folder 1 (Location 1)', folder_id='folder1') + S3NodeSettingsFactory(folder_name='Folder 2', folder_id='folder2') + S3NodeSettingsFactory(folder_name='Folder 3 (Location 3)', folder_id='folder3') + S3NodeSettingsFactory(folder_name='Folder 4:/ (Location 4)', folder_id='folder4:/') + + update_folder_names() + + # Verify updated folder names and IDs + updated_folder_names_ids = NodeSettings.objects.values_list('folder_name', 'folder_id') + expected_updated_folder_names_ids = { + ('Folder 1:/ (Location 1)', 'folder1:/'), + ('Folder 2:/', 'folder2:/'), + ('Folder 3:/ (Location 3)', 'folder3:/'), + ('Folder 3:/ (Location 3)', 'folder3:/'), + ('Folder 4:/ (Location 4)', 'folder4:/'), + + } + assert set(updated_folder_names_ids) == expected_updated_folder_names_ids + + # Reverse the migration + reverse_update_folder_names() + + # Verify the folder names and IDs after the reverse migration + reverted_folder_names_ids = NodeSettings.objects.values_list('folder_name', 'folder_id') + expected_reverted_folder_names_ids = { + ('Folder 1 (Location 1)', 'folder1'), + ('Folder 2', 'folder2'), + ('Folder 3 (Location 3)', 'folder3'), + ('Folder 4 (Location 4)', 'folder4'), + } + assert set(reverted_folder_names_ids) == expected_reverted_folder_names_ids