Skip to content

Commit

Permalink
Merge pull request #10418 from Johnetordoff/s3-improvement-migration
Browse files Browse the repository at this point in the history
[S3 Improvements] Delimiter Migration
  • Loading branch information
Johnetordoff authored Jul 18, 2023
2 parents bb078be + 669abe5 commit 92201f9
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 2 deletions.
54 changes: 52 additions & 2 deletions addons/s3/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import logging
from rest_framework import status as http_status

from boto import exception
Expand All @@ -8,6 +9,11 @@
from framework.exceptions import HTTPError
from addons.base.exceptions import InvalidAuthError, InvalidFolderError
from addons.s3.settings import BUCKET_LOCATIONS
from django.apps import apps
from django.db.models import F, Value
from django.db.models.functions import Concat, Replace

logger = logging.getLogger(__name__)


def connect_s3(access_key=None, secret_key=None, node_settings=None):
Expand All @@ -18,7 +24,7 @@ def connect_s3(access_key=None, secret_key=None, node_settings=None):
if node_settings is not None:
if node_settings.external_account is not None:
access_key, secret_key = node_settings.external_account.oauth_key, node_settings.external_account.oauth_secret
connection = S3Connection(access_key, secret_key)
connection = S3Connection(access_key, secret_key, calling_format=OrdinaryCallingFormat())
return connection


Expand Down Expand Up @@ -125,7 +131,7 @@ def get_bucket_location_or_error(access_key, secret_key, bucket_name):


def get_bucket_prefixes(access_key, secret_key, prefix, bucket_name):
bucket = connect_s3(access_key, secret_key).get_bucket(bucket_name)
bucket = S3Connection(access_key, secret_key).get_bucket(bucket_name) # Don't use OrdinaryCallingFormat

folders = []
for key in bucket.list(delimiter='/', prefix=prefix):
Expand All @@ -143,3 +149,47 @@ def get_bucket_prefixes(access_key, secret_key, prefix, bucket_name):
)

return folders


def update_folder_names():
NodeSettings = apps.get_model('addons_s3', 'NodeSettings')

# Update folder_id for all records
NodeSettings.objects.exclude(
folder_name__contains=':/'
).update(
folder_id=Concat(F('folder_id'), Value(':/'))
)

# Update folder_name for records containing '('
NodeSettings.objects.filter(
folder_name__contains=' ('
).exclude(
folder_name__contains=':/'
).update(
folder_name=Replace(F('folder_name'), Value(' ('), Value(':/ ('))
)
NodeSettings.objects.exclude(
folder_name__contains=':/'
).exclude(
folder_name__contains=' ('
).update(
folder_name=Concat(F('folder_name'), Value(':/'))
)
logger.info('Update Folder Names/IDs complete')


def reverse_update_folder_names():
NodeSettings = apps.get_model('addons_s3', 'NodeSettings')

# Reverse update folder_id for all records
NodeSettings.objects.update(folder_id=Replace(F('folder_id'), Value(':/'), Value('')))

# Reverse update folder_name for records containing ':/ ('
NodeSettings.objects.filter(folder_name__contains=':/ (').update(
folder_name=Replace(F('folder_name'), Value(':/ ('), Value(' ('))
)
NodeSettings.objects.filter(folder_name__contains=':/').update(
folder_name=Replace(F('folder_name'), Value(':/'), Value(''))
)
logger.info('Reverse Update Folder Names/IDs complete')
31 changes: 31 additions & 0 deletions osf/management/commands/add_colon_delim_to_s3_buckets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
import logging

from django.core.management.base import BaseCommand
from addons.s3.utils import update_folder_names, reverse_update_folder_names

logger = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Adds Colon (':') delineators to s3 buckets to separate them from them from their subfolder, so `<bucket_name>`
becomes `<bucket_name>:/` , the root path. Folder names will also be updated to maintain consistency.
"""

def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument(
'--reverse',
action='store_true',
dest='reverse',
help='Unsets date_retraction'
)

def handle(self, *args, **options):
reverse = options.get('reverse', False)
if reverse:
reverse_update_folder_names()
else:
update_folder_names()
41 changes: 41 additions & 0 deletions osf_tests/test_s3_folder_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import pytest
from addons.s3.utils import update_folder_names, reverse_update_folder_names

@pytest.mark.django_db
class TestUpdateFolderNamesMigration:

def test_update_folder_names_migration(self):
from addons.s3.models import NodeSettings
from addons.s3.tests.factories import S3NodeSettingsFactory
# Create sample folder names and IDs
S3NodeSettingsFactory(folder_name='Folder 1 (Location 1)', folder_id='folder1')
S3NodeSettingsFactory(folder_name='Folder 2', folder_id='folder2')
S3NodeSettingsFactory(folder_name='Folder 3 (Location 3)', folder_id='folder3')
S3NodeSettingsFactory(folder_name='Folder 4:/ (Location 4)', folder_id='folder4:/')

update_folder_names()

# Verify updated folder names and IDs
updated_folder_names_ids = NodeSettings.objects.values_list('folder_name', 'folder_id')
expected_updated_folder_names_ids = {
('Folder 1:/ (Location 1)', 'folder1:/'),
('Folder 2:/', 'folder2:/'),
('Folder 3:/ (Location 3)', 'folder3:/'),
('Folder 3:/ (Location 3)', 'folder3:/'),
('Folder 4:/ (Location 4)', 'folder4:/'),

}
assert set(updated_folder_names_ids) == expected_updated_folder_names_ids

# Reverse the migration
reverse_update_folder_names()

# Verify the folder names and IDs after the reverse migration
reverted_folder_names_ids = NodeSettings.objects.values_list('folder_name', 'folder_id')
expected_reverted_folder_names_ids = {
('Folder 1 (Location 1)', 'folder1'),
('Folder 2', 'folder2'),
('Folder 3 (Location 3)', 'folder3'),
('Folder 4 (Location 4)', 'folder4'),
}
assert set(reverted_folder_names_ids) == expected_reverted_folder_names_ids

0 comments on commit 92201f9

Please sign in to comment.