Skip to content

Commit

Permalink
[ENG-4624] S3 Improvements (#10416)
Browse files Browse the repository at this point in the history
* Allow s3 to use subfolders as the root folder of a files addon
* use colon delineation for s3 buckets, remove bucket_name query_param
* fix issue with over mocking Gitlab messing with serializer method
* revert serializer for correct id
* de-ordinaryize the s3 calling format this allows non us-east-1 regions to work for s3
* add migration for s3 folder_ids and names
* change migration to management command
* fix tests for management command
* flake8 fixes
* reintroduce OrdinaryCallingFormat and remove from get_bucket
* make migration allow for already converted folder_ids and folder_names
* make get bucket names use the correct calling format
* clean-up and revert old changes
* use boto3 to access bucket regions using V4 s3 auth
* move add delim command to management command dir for s3 improvment

---------
Co-authored-by: John Tordoff <>
Co-authored-by: John Tordoff <[email protected]>
  • Loading branch information
cslzchen authored Aug 17, 2023
1 parent 59f4037 commit f0323f9
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 36 deletions.
75 changes: 52 additions & 23 deletions addons/s3/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,18 @@
from addons.base import exceptions
from addons.s3.provider import S3Provider
from addons.s3.serializer import S3Serializer
from addons.s3.settings import (BUCKET_LOCATIONS,
ENCRYPT_UPLOADS_DEFAULT)
from addons.s3.utils import (bucket_exists,
get_bucket_location_or_error,
get_bucket_names)
from addons.s3.settings import (
BUCKET_LOCATIONS,
ENCRYPT_UPLOADS_DEFAULT
)
from addons.s3.utils import (
bucket_exists,
get_bucket_location_or_error,
get_bucket_names,
get_bucket_prefixes
)
from website.util import api_v2_url


class S3FileNode(BaseFileNode):
_provider = 's3'
Expand Down Expand Up @@ -56,7 +63,9 @@ def display_name(self):
return u'{0}: {1}'.format(self.config.full_name, self.folder_id)

def set_folder(self, folder_id, auth):
if not bucket_exists(self.external_account.oauth_key, self.external_account.oauth_secret, folder_id):
bucket_name = folder_id.split(':')[0]

if not bucket_exists(self.external_account.oauth_key, self.external_account.oauth_secret, bucket_name):
error_message = ('We are having trouble connecting to that bucket. '
'Try a different one.')
raise exceptions.InvalidFolderError(error_message)
Expand All @@ -66,7 +75,7 @@ def set_folder(self, folder_id, auth):
bucket_location = get_bucket_location_or_error(
self.external_account.oauth_key,
self.external_account.oauth_secret,
folder_id
bucket_name
)
try:
bucket_location = BUCKET_LOCATIONS[bucket_location]
Expand All @@ -78,29 +87,43 @@ def set_folder(self, folder_id, auth):
self.folder_name = '{} ({})'.format(folder_id, bucket_location)
self.save()

self.nodelogger.log(action='bucket_linked', extra={'bucket': str(folder_id)}, save=True)
self.nodelogger.log(action='bucket_linked', extra={'bucket': bucket_name, 'path': self.folder_id}, save=True)

def get_folders(self, **kwargs):
# This really gets only buckets, not subfolders,
# as that's all we want to be linkable on a node.
try:
def get_folders(self, path, folder_id):
"""
Our S3 implementation allows for folder_id to be a bucket's root, or a subfolder in that bucket.
"""
# This is the root, so list all buckets.
if not folder_id:
buckets = get_bucket_names(self)
except Exception:
raise exceptions.InvalidAuthError()

return [
{
return [{
'addon': 's3',
'kind': 'folder',
'id': bucket,
'id': f'{bucket}:/',
'name': bucket,
'path': bucket,
'bucket_name': bucket,
'path': '/',
'urls': {
'folders': ''
'folders': api_v2_url(
f'nodes/{self.owner._id}/addons/s3/folders/',
params={
'id': bucket,
'bucket_name': bucket,
}
),
}
}
for bucket in buckets
]
} for bucket in buckets]
# This is for a directory for a specific bucket, folders (Prefixes), but not files (Keys) returned, because
# these we can only set folders as our base folder_id
else:
bucket_name, _, path = folder_id.partition(':/')
return get_bucket_prefixes(
self.external_account.oauth_key,
self.external_account.oauth_secret,
prefix=path,
bucket_name=bucket_name
)

@property
def complete(self):
Expand Down Expand Up @@ -135,10 +158,16 @@ def serialize_waterbutler_credentials(self):
}

def serialize_waterbutler_settings(self):
"""
We use the folder id to hold the bucket location
"""
if not self.folder_id:
raise exceptions.AddonError('Cannot serialize settings for S3 addon')

bucket_name = self.folder_id.split(':')[0]
return {
'bucket': self.folder_id,
'bucket': bucket_name,
'id': self.folder_id,
'encrypt_uploads': self.encrypt_uploads
}

Expand Down
1 change: 1 addition & 0 deletions addons/s3/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
boto==2.38.0
boto3==1.4.7
8 changes: 1 addition & 7 deletions addons/s3/static/s3NodeConfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ var s3FolderPickerViewModel = oop.extend(OauthAddonFolderPicker, {
{ // TreeBeard Options
columnTitles: function() {
return [{
title: 'Buckets',
title: 'Buckets/Folders',
width: '75%',
sort: false
}, {
Expand All @@ -37,12 +37,6 @@ var s3FolderPickerViewModel = oop.extend(OauthAddonFolderPicker, {
sort: false
}];
},
resolveToggle: function(item) {
return '';
},
resolveIcon: function(item) {
return m('i.fa.fa-folder-o', ' ');
},
},
tbOpts
);
Expand Down
15 changes: 13 additions & 2 deletions addons/s3/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ class TestNodeSettings(OAuthAddonNodeSettingsTestSuiteMixin, unittest.TestCase):
NodeSettingsClass = NodeSettings
UserSettingsFactory = S3UserSettingsFactory

def _node_settings_class_kwargs(self, node, user_settings):
return {
'user_settings': self.user_settings,
'folder_id': 'bucket_name:/path_goes_here/with_folder_id',
'owner': self.node
}


def test_registration_settings(self):
registration = ProjectFactory()
clone, message = self.node_settings.after_register(
Expand Down Expand Up @@ -96,6 +104,9 @@ def test_set_folder(self, mock_location, mock_exists):

def test_serialize_settings(self):
settings = self.node_settings.serialize_waterbutler_settings()
expected = {'bucket': self.node_settings.folder_id,
'encrypt_uploads': self.node_settings.encrypt_uploads}
expected = {
'bucket': 'bucket_name',
'encrypt_uploads': self.node_settings.encrypt_uploads,
'id': 'bucket_name:/path_goes_here/with_folder_id'
}
assert_equal(settings, expected)
31 changes: 31 additions & 0 deletions addons/s3/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import re
import logging
from rest_framework import status as http_status

import boto3
from boto import exception
from boto.s3.connection import S3Connection
from boto.s3.connection import OrdinaryCallingFormat
Expand All @@ -9,6 +11,8 @@
from addons.base.exceptions import InvalidAuthError, InvalidFolderError
from addons.s3.settings import BUCKET_LOCATIONS

logger = logging.getLogger(__name__)


def connect_s3(access_key=None, secret_key=None, node_settings=None):
"""Helper to build an S3Connection object
Expand Down Expand Up @@ -122,3 +126,30 @@ def get_bucket_location_or_error(access_key, secret_key, bucket_name):
return connect_s3(access_key, secret_key).get_bucket(bucket_name, validate=False).get_location()
except exception.S3ResponseError:
raise InvalidFolderError()


def get_bucket_prefixes(access_key, secret_key, prefix, bucket_name):
s3 = boto3.client(
's3',
aws_access_key_id=access_key,
aws_secret_access_key=secret_key
)

result = s3.list_objects(Bucket=bucket_name, Prefix=prefix, Delimiter='/')
folders = []
for common_prefixes in result.get('CommonPrefixes', []):
key_name = common_prefixes.get('Prefix')
if key_name != prefix:
folders.append(
{
'path': key_name,
'id': f'{bucket_name}:/{key_name}',
'folder_id': key_name,
'kind': 'folder',
'bucket_name': bucket_name,
'name': key_name.split('/')[-2],
'addon': 's3',
}
)

return folders
4 changes: 3 additions & 1 deletion addons/s3/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ def _set_folder(node_addon, folder, auth):
def s3_folder_list(node_addon, **kwargs):
""" Returns all the subsequent folders under the folder id passed.
"""
return node_addon.get_folders()
path = request.args.get('path', '')
folder_id = request.args.get('id', '')
return node_addon.get_folders(path=path, folder_id=folder_id)

@must_be_logged_in
def s3_add_user_account(auth, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion api/addons/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def get_type(request):
})

def get_absolute_url(self, obj):
if obj['addon'] in ('s3', 'figshare', 'github', 'mendeley'):
if obj['addon'] in ('figshare', 'github', 'mendeley'):
# These addons don't currently support linking anything other
# than top-level objects.
return
Expand Down
4 changes: 2 additions & 2 deletions api_tests/nodes/views/test_node_addons.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,8 +1013,8 @@ def _settings_kwargs(self, node, user_settings):
def _mock_folder_result(self):
return {
'name': 'a.bucket',
'path': 'a.bucket',
'id': 'a.bucket'
'path': '/',
'id': 'a.bucket:/'
}

@mock.patch('addons.s3.models.get_bucket_names')
Expand Down
77 changes: 77 additions & 0 deletions osf/management/commands/add_colon_delim_to_s3_buckets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
import logging

from django.core.management.base import BaseCommand
from django.apps import apps
from django.db.models import F, Value
from django.db.models.functions import Concat, Replace

logger = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Adds Colon (':') delineators to s3 buckets to separate them from them from their subfolder, so `<bucket_name>`
becomes `<bucket_name>:/` , the root path. Folder names will also be updated to maintain consistency.
"""

def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument(
'--reverse',
action='store_true',
dest='reverse',
help='Unsets date_retraction'
)

def handle(self, *args, **options):
reverse = options.get('reverse', False)
if reverse:
reverse_update_folder_names()
else:
update_folder_names()


def update_folder_names():
NodeSettings = apps.get_model('addons_s3', 'NodeSettings')

# Update folder_id for all records
NodeSettings.objects.exclude(
folder_name__contains=':/'
).update(
folder_id=Concat(F('folder_id'), Value(':/'))
)

# Update folder_name for records containing '('
NodeSettings.objects.filter(
folder_name__contains=' ('
).exclude(
folder_name__contains=':/'
).update(
folder_name=Replace(F('folder_name'), Value(' ('), Value(':/ ('))
)
NodeSettings.objects.exclude(
folder_name__contains=':/'
).exclude(
folder_name__contains=' ('
).update(
folder_name=Concat(F('folder_name'), Value(':/'))
)
logger.info('Update Folder Names/IDs complete')


def reverse_update_folder_names():
NodeSettings = apps.get_model('addons_s3', 'NodeSettings')

# Reverse update folder_id for all records
NodeSettings.objects.update(folder_id=Replace(F('folder_id'), Value(':/'), Value('')))

# Reverse update folder_name for records containing ':/ ('
NodeSettings.objects.filter(folder_name__contains=':/ (').update(
folder_name=Replace(F('folder_name'), Value(':/ ('), Value(' ('))
)
NodeSettings.objects.filter(folder_name__contains=':/').update(
folder_name=Replace(F('folder_name'), Value(':/'), Value(''))
)
logger.info('Reverse Update Folder Names/IDs complete')
41 changes: 41 additions & 0 deletions osf_tests/test_s3_folder_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import pytest
from osf.management.commands.add_colon_delim_to_s3_buckets import update_folder_names, reverse_update_folder_names

@pytest.mark.django_db
class TestUpdateFolderNamesMigration:

def test_update_folder_names_migration(self):
from addons.s3.models import NodeSettings
from addons.s3.tests.factories import S3NodeSettingsFactory
# Create sample folder names and IDs
S3NodeSettingsFactory(folder_name='Folder 1 (Location 1)', folder_id='folder1')
S3NodeSettingsFactory(folder_name='Folder 2', folder_id='folder2')
S3NodeSettingsFactory(folder_name='Folder 3 (Location 3)', folder_id='folder3')
S3NodeSettingsFactory(folder_name='Folder 4:/ (Location 4)', folder_id='folder4:/')

update_folder_names()

# Verify updated folder names and IDs
updated_folder_names_ids = NodeSettings.objects.values_list('folder_name', 'folder_id')
expected_updated_folder_names_ids = {
('Folder 1:/ (Location 1)', 'folder1:/'),
('Folder 2:/', 'folder2:/'),
('Folder 3:/ (Location 3)', 'folder3:/'),
('Folder 3:/ (Location 3)', 'folder3:/'),
('Folder 4:/ (Location 4)', 'folder4:/'),

}
assert set(updated_folder_names_ids) == expected_updated_folder_names_ids

# Reverse the migration
reverse_update_folder_names()

# Verify the folder names and IDs after the reverse migration
reverted_folder_names_ids = NodeSettings.objects.values_list('folder_name', 'folder_id')
expected_reverted_folder_names_ids = {
('Folder 1 (Location 1)', 'folder1'),
('Folder 2', 'folder2'),
('Folder 3 (Location 3)', 'folder3'),
('Folder 4 (Location 4)', 'folder4'),
}
assert set(reverted_folder_names_ids) == expected_reverted_folder_names_ids

0 comments on commit f0323f9

Please sign in to comment.