diff --git a/apps/filebrowser/src/filebrowser/api.py b/apps/filebrowser/src/filebrowser/api.py index dd8a205838..660c69c1b9 100644 --- a/apps/filebrowser/src/filebrowser/api.py +++ b/apps/filebrowser/src/filebrowser/api.py @@ -15,22 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import logging import posixpath -import os from django.http import HttpResponse from django.utils.translation import gettext as _ -from desktop.lib.django_util import JsonResponse +from aws.s3.s3fs import get_s3_home_directory +from azure.abfs.__init__ import get_abfs_home_directory from desktop.lib import fsmanager -from desktop.lib.i18n import smart_unicode -from desktop.lib.fs.ozone.ofs import get_ofs_home_directory +from desktop.lib.django_util import JsonResponse from desktop.lib.fs.gc.gs import get_gs_home_directory - -from azure.abfs.__init__ import get_home_dir_for_abfs -from aws.s3.s3fs import get_s3_home_directory - +from desktop.lib.fs.ozone.ofs import get_ofs_home_directory +from desktop.lib.i18n import smart_unicode from filebrowser.views import _normalize_path LOG = logging.getLogger() @@ -64,7 +62,7 @@ def get_filesystems(request): @error_handler -def get_filesystems_with_home_dirs(request): # Using as a public API only for now +def get_filesystems_with_home_dirs(request): # Using as a public API only for now filesystems = [] user_home_dir = '' @@ -76,7 +74,7 @@ def get_filesystems_with_home_dirs(request): # Using as a public API only for no elif fs == 'gs': user_home_dir = get_gs_home_directory(request.user) elif fs == 'abfs': - user_home_dir = get_home_dir_for_abfs(request.user) + user_home_dir = get_abfs_home_directory(request.user) elif fs == 'ofs': user_home_dir = get_ofs_home_directory() @@ -107,10 +105,11 @@ def touch(request): if name and (posixpath.sep in name): raise Exception(_("Error creating %s file. Slashes are not allowed in filename." % name)) - + request.fs.create(request.fs.join(path, name)) return HttpResponse(status=200) + @error_handler def rename(request): src_path = request.POST.get('src_path') @@ -132,6 +131,7 @@ def rename(request): request.fs.rename(src_path, dest_path) return HttpResponse(status=200) + @error_handler def content_summary(request, path): path = _normalize_path(path) diff --git a/apps/filebrowser/src/filebrowser/conf.py b/apps/filebrowser/src/filebrowser/conf.py index 2af0757f7f..15a25fb88d 100644 --- a/apps/filebrowser/src/filebrowser/conf.py +++ b/apps/filebrowser/src/filebrowser/conf.py @@ -84,6 +84,7 @@ def get_desktop_enable_download(): type=coerce_bool, default=False) +# DEPRECATED in favor of DEFAULT_HOME_PATH per FS config level. REMOTE_STORAGE_HOME = Config( key="remote_storage_home", type=str, diff --git a/apps/filebrowser/src/filebrowser/views.py b/apps/filebrowser/src/filebrowser/views.py index fa5e18ff7e..b466d96d2f 100644 --- a/apps/filebrowser/src/filebrowser/views.py +++ b/apps/filebrowser/src/filebrowser/views.py @@ -284,8 +284,8 @@ def view(request, path): # default_abfs_home is set in jquery.filechooser.js if 'default_abfs_home' in request.GET: - from azure.abfs.__init__ import get_home_dir_for_abfs - home_dir_path = get_home_dir_for_abfs(request.user) + from azure.abfs.__init__ import get_abfs_home_directory + home_dir_path = get_abfs_home_directory(request.user) if request.fs.isdir(home_dir_path): return format_preserving_redirect( request, diff --git a/desktop/conf.dist/hue.ini b/desktop/conf.dist/hue.ini index 596f8c617d..8b7181aca2 100644 --- a/desktop/conf.dist/hue.ini +++ b/desktop/conf.dist/hue.ini @@ -1002,6 +1002,9 @@ tls=no # The JSON credentials to authenticate to Google Cloud e.g. '{ "type": "service_account", "project_id": .... }' ## json_credentials=None +# Optionally set this for a different home directory path. e.g. gs://gethue-bucket/user +## default_home_path=gs:/// + ## Configuration for Ozone File System # ------------------------------------------------------------------------ [[ozone]] @@ -1712,6 +1715,7 @@ submit_to=True # Redirect client to WebHdfs or S3 for file download. Note: Turning this on will override notebook/redirect_whitelist for user selected file downloads on WebHdfs & S3. ## redirect_download=false +# DEPRECATED in favor of default_home_path per FS config level. # Optionally set this if you want a different home directory path. e.g. s3a://gethue. ## remote_storage_home=s3a://gethue @@ -2026,6 +2030,8 @@ submit_to=True # The time in seconds before a delegate key is expired. Used when filebrowser/redirect_download is used. Default to 4 Hours. ## key_expiry=14400 +# Optionally set this for a different home directory path. e.g. s3a://gethue-bucket/user +## default_home_path=s3a:/// ########################################################################### # Settings for the Azure lib @@ -2058,6 +2064,9 @@ submit_to=True ## fs_defaultfs=abfs://@.dfs.core.windows.net ## webhdfs_url=https://.dfs.core.windows.net +# Optionally set this for a different home directory path. e.g. abfs://gethue-container/user +## default_home_path=abfs:/// + ########################################################################### # Settings for the Sentry lib ########################################################################### diff --git a/desktop/conf/pseudo-distributed.ini.tmpl b/desktop/conf/pseudo-distributed.ini.tmpl index f616c6b00e..9b148947f4 100644 --- a/desktop/conf/pseudo-distributed.ini.tmpl +++ b/desktop/conf/pseudo-distributed.ini.tmpl @@ -986,6 +986,9 @@ # The JSON credentials to authenticate to Google Cloud e.g. '{ "type": "service_account", "project_id": .... }' ## json_credentials=None + # Optionally set this for a different home directory path. e.g. gs://gethue-bucket/user + ## default_home_path=gs:/// + ## Configuration for Ozone File System # ------------------------------------------------------------------------ [[ozone]] @@ -1695,6 +1698,7 @@ # Redirect client to WebHdfs or S3 for file download. Note: Turning this on will override notebook/redirect_whitelist for user selected file downloads on WebHdfs & S3. ## redirect_download=false + # DEPRECATED in favor of default_home_path per FS config level. # Optionally set this if you want a different home directory path. e.g. s3a://gethue. ## remote_storage_home=s3a://gethue @@ -2010,6 +2014,9 @@ # The time in seconds before a delegate key is expired. Used when filebrowser/redirect_download is used. Default to 4 Hours. ## key_expiry=14400 + # Optionally set this for a different home directory path. e.g. s3a://gethue-bucket/user + ## default_home_path=s3a:/// + ########################################################################### # Settings for the Azure lib @@ -2042,6 +2049,9 @@ ## fs_defaultfs=abfs://@.dfs.core.windows.net ## webhdfs_url=https://.dfs.core.windows.net + # Optionally set this for a different home directory path. e.g. abfs://gethue-container/user + ## default_home_path=abfs:/// + ########################################################################### # Settings for the Sentry lib ########################################################################### diff --git a/desktop/core/src/desktop/conf.py b/desktop/core/src/desktop/conf.py index 2bd53e5a35..9a41c28195 100644 --- a/desktop/core/src/desktop/conf.py +++ b/desktop/core/src/desktop/conf.py @@ -2804,7 +2804,13 @@ def get_ldap_bind_password(ldap_config): key='json_credentials', type=str, default=None, - ) + ), + DEFAULT_HOME_PATH=Config( + key="default_home_path", + type=str, + default=None, + help="Optionally set this for a different home directory path. e.g. gs://gethue" + ), ) ) ) diff --git a/desktop/core/src/desktop/lib/fs/gc/gs.py b/desktop/core/src/desktop/lib/fs/gc/gs.py index 8cf3cc948c..75cd2cc1dd 100644 --- a/desktop/core/src/desktop/lib/fs/gc/gs.py +++ b/desktop/core/src/desktop/lib/fs/gc/gs.py @@ -16,34 +16,31 @@ # limitations under the License. import os import re +import time import logging import posixpath -import time from boto.exception import BotoClientError, GSResponseError from boto.gs.connection import Location from boto.gs.key import Key - from boto.s3.prefix import Prefix from django.utils.translation import gettext as _ -from desktop.conf import PERMISSION_ACTION_GS, is_raz_gs -from desktop.lib.fs.gc import GS_ROOT, abspath, parse_uri, translate_gs_error, normpath, join as gs_join -from desktop.lib.fs.gc.gsstat import GSStat +from aws.s3.s3fs import S3FileSystem +from desktop.conf import GC_ACCOUNTS, PERMISSION_ACTION_GS, is_raz_gs +from desktop.lib.fs.gc import GS_ROOT, abspath, join as gs_join, normpath, parse_uri, translate_gs_error from desktop.lib.fs.gc.gsfile import open as gsfile_open - +from desktop.lib.fs.gc.gsstat import GSStat from filebrowser.conf import REMOTE_STORAGE_HOME -from aws.s3.s3fs import S3FileSystem - - DEFAULT_READ_SIZE = 1024 * 1024 # 1MB BUCKET_NAME_PATTERN = re.compile( - "^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$") +r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$") LOG = logging.getLogger() + class GSFileSystemException(IOError): def __init__(self, *args, **kwargs): super(GSFileSystemException, self).__init__(*args, **kwargs) @@ -81,9 +78,16 @@ def decorator(*args, **kwargs): def get_gs_home_directory(user=None): from desktop.models import _handle_user_dir_raz - remote_home_gs = 'gs://' + # REMOTE_STORAGE_HOME is deprecated in favor of DEFAULT_HOME_PATH per FS config level. + # But for backward compatibility, we are still giving preference to REMOTE_STORAGE_HOME path first and if it's not set, + # then check for DEFAULT_HOME_PATH which is set per FS config block. This helps in setting diff DEFAULT_HOME_PATH for diff FS in Hue. + if hasattr(REMOTE_STORAGE_HOME, 'get') and REMOTE_STORAGE_HOME.get() and REMOTE_STORAGE_HOME.get().startswith('gs://'): remote_home_gs = REMOTE_STORAGE_HOME.get() + elif 'default' in GC_ACCOUNTS and GC_ACCOUNTS['default'].DEFAULT_HOME_PATH.get() and GC_ACCOUNTS['default'].DEFAULT_HOME_PATH.get().startswith('gs://'): + remote_home_gs = GC_ACCOUNTS['default'].DEFAULT_HOME_PATH.get() + else: + remote_home_gs = 'gs://' remote_home_gs = _handle_user_dir_raz(user, remote_home_gs) @@ -100,7 +104,7 @@ def __init__(self, gs_connection, expiration=None, fs='gs', headers=None, filebr headers=headers, filebrowser_action=filebrowser_action ) - + @staticmethod def join(*comp_list): return gs_join(*comp_list) @@ -156,7 +160,7 @@ def stats(self, path): Returns: GSStat: An object representing the stats of the file or directory. - + Raises: GSFileSystemException: If the file or directory does not exist. """ @@ -347,7 +351,7 @@ def mkdir(self, path, *args, **kwargs): def _stats(self, path): if GSFileSystem.isroot(path): return GSStat.for_gs_root() - + try: key = self._get_key(path) except BotoClientError as e: @@ -359,7 +363,7 @@ def _stats(self, path): raise GSFileSystemException(_('User is not authorized to access path: "%s"') % path) else: raise GSFileSystemException(_('Failed to access path "%s": %s') % (path, e.reason)) - except Exception as e: # SSL errors show up here, because they've been remapped in boto + except Exception as e: # SSL errors show up here, because they've been remapped in boto raise GSFileSystemException(_('Failed to access path "%s": %s') % (path, str(e))) if key is None: @@ -367,9 +371,9 @@ def _stats(self, path): bucket = self._get_bucket(bucket_name) key = Key(bucket, key_name) - + return self._stats_key(key, self.fs) - + @staticmethod def _stats_key(key, fs='gs'): if key.size is not None: @@ -402,7 +406,7 @@ def _copy(self, src, dst, recursive, use_src_basename): """ src_st = self.stats(src) if src_st.isDir and not recursive: - return None # omitting directory + return None # omitting directory # Check if the source is a directory and destination is not a directory dst = abspath(src, dst) @@ -421,7 +425,7 @@ def _copy(self, src, dst, recursive, use_src_basename): src_bucket = self._get_bucket(src_bucket) dst_bucket = self._get_bucket(dst_bucket) - # Determine whether to keep the source basename when copying directories and + # Determine whether to keep the source basename when copying directories and # calculate the cut-off length for key names accordingly. if keep_src_basename: cut = len(posixpath.dirname(src_key)) # cut of the parent directory name diff --git a/desktop/core/src/desktop/lib/fs/gc/gs_test.py b/desktop/core/src/desktop/lib/fs/gc/gs_test.py new file mode 100644 index 0000000000..bfed17f76f --- /dev/null +++ b/desktop/core/src/desktop/lib/fs/gc/gs_test.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# Licensed to Cloudera, Inc. under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. Cloudera, Inc. licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import pytest +from django.test import TestCase + +from desktop.conf import GC_ACCOUNTS, RAZ, is_gs_enabled +from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.fs.gc.gs import get_gs_home_directory +from desktop.lib.fsmanager import get_client +from filebrowser.conf import REMOTE_STORAGE_HOME +from useradmin.models import User + +LOG = logging.getLogger() + + +@pytest.mark.django_db +def test_get_gs_home_directory(): + client = make_logged_in_client(username="test", groupname="test", recreate=True, is_superuser=False) + user = User.objects.get(username="test") + + client_not_me = make_logged_in_client(username="test_not_me", groupname="test_not_me", recreate=True, is_superuser=False) + user_not_me = User.objects.get(username="test_not_me") + + # When REMOTE_STORAGE_HOME ends with /user in RAZ GS environment. + resets = [RAZ.IS_ENABLED.set_for_testing(True), REMOTE_STORAGE_HOME.set_for_testing('gs://gethue-bucket/user')] + + try: + default_gs_home_path = get_gs_home_directory(user) + assert default_gs_home_path == 'gs://gethue-bucket/user/test' + + default_gs_home_path = get_gs_home_directory(user_not_me) + assert default_gs_home_path == 'gs://gethue-bucket/user/test_not_me' + finally: + for reset in resets: + reset() + + # When GS filesystem's DEFAULT_HOME_PATH ends with /user in RAZ GS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + GC_ACCOUNTS.set_for_testing({'default': {'default_home_path': 'gs://gethue-other-bucket/user'}}), + ] + + try: + default_gs_home_path = get_gs_home_directory(user) + assert default_gs_home_path == 'gs://gethue-other-bucket/user/test' + + default_gs_home_path = get_gs_home_directory(user_not_me) + assert default_gs_home_path == 'gs://gethue-other-bucket/user/test_not_me' + finally: + for reset in resets: + reset() + + # When GS filesystem's DEFAULT_HOME_PATH is set in non-RAZ GS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(False), + GC_ACCOUNTS.set_for_testing({'default': {'default_home_path': 'gs://gethue-other-bucket/test-dir'}}), + ] + + try: + default_gs_home_path = get_gs_home_directory(user) + assert default_gs_home_path == 'gs://gethue-other-bucket/test-dir' + + default_gs_home_path = get_gs_home_directory(user_not_me) + assert default_gs_home_path == 'gs://gethue-other-bucket/test-dir' + finally: + for reset in resets: + reset() + + # When both REMOTE_STORAGE_HOME and GS filesystem's DEFAULT_HOME_PATH are set in RAZ GS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + REMOTE_STORAGE_HOME.set_for_testing('gs://gethue-bucket/user'), + GC_ACCOUNTS.set_for_testing({'default': {'default_home_path': 'gs://gethue-other-bucket/user'}}), + ] + + try: + # Gives preference to REMOTE_STORAGE_HOME for of backward compatibility. + default_gs_home_path = get_gs_home_directory(user) + assert default_gs_home_path == 'gs://gethue-bucket/user/test' + + default_gs_home_path = get_gs_home_directory(user_not_me) + assert default_gs_home_path == 'gs://gethue-bucket/user/test_not_me' + finally: + for reset in resets: + reset() + + # When GS filesystem's DEFAULT_HOME_PATH is set but path does not end with ../user or ../user/ in RAZ GS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + GC_ACCOUNTS.set_for_testing({'default': {'default_home_path': 'gs://gethue-other-bucket/dir'}}), + ] + + try: + default_gs_home_path = get_gs_home_directory(user) + assert default_gs_home_path == 'gs://gethue-other-bucket/dir' + + default_gs_home_path = get_gs_home_directory(user_not_me) + assert default_gs_home_path == 'gs://gethue-other-bucket/dir' + finally: + for reset in resets: + reset() + + # When some different path is set in both RAZ and non-RAZ GS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + REMOTE_STORAGE_HOME.set_for_testing('abfs://gethue-container/user'), + GC_ACCOUNTS.set_for_testing({'default': {'default_home_path': 'abfs://gethue-other-container/dir'}}), + ] + + try: + default_gs_home_path = get_gs_home_directory(user) + assert default_gs_home_path == 'gs://' + + default_gs_home_path = get_gs_home_directory(user_not_me) + assert default_gs_home_path == 'gs://' + finally: + for reset in resets: + reset() + + +class TestGCS(TestCase): + def setup_method(self, method): + if not is_gs_enabled(): + pytest.skip('gs not enabled') + + def test_with_credentials(self): + # Simple test that makes sure no errors are thrown. + client = get_client(fs='gs') + buckets = client.listdir_stats('gs://') + LOG.info(len(buckets)) diff --git a/desktop/core/src/desktop/lib/fs/gc/tests.py b/desktop/core/src/desktop/lib/fs/gc/tests.py deleted file mode 100644 index a0024c49d2..0000000000 --- a/desktop/core/src/desktop/lib/fs/gc/tests.py +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to Cloudera, Inc. under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. Cloudera, Inc. licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import - -import logging -import pytest -import unittest -from django.test import TestCase -from desktop.conf import is_gs_enabled -from desktop.lib.fsmanager import get_client - -LOG = logging.getLogger() - - -class TestGCS(TestCase): - def setup_method(self, method): - if not is_gs_enabled(): - pytest.skip('gs not enabled') - - def test_with_credentials(self): - # Simple test that makes sure no errors are thrown. - client = get_client(fs='gs') - buckets = client.listdir_stats('gs://') - LOG.info(len(buckets)) diff --git a/desktop/core/src/desktop/lib/fs/proxyfs.py b/desktop/core/src/desktop/lib/fs/proxyfs.py index ae4621754e..3137c2d603 100644 --- a/desktop/core/src/desktop/lib/fs/proxyfs.py +++ b/desktop/core/src/desktop/lib/fs/proxyfs.py @@ -13,26 +13,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object import logging - +from builtins import object from urllib.parse import urlparse as lib_urlparse from crequest.middleware import CrequestMiddleware -from useradmin.models import User - -from desktop.auth.backend import is_admin -from desktop.conf import DEFAULT_USER, ENABLE_ORGANIZATIONS, is_ofs_enabled, is_raz_gs -from desktop.lib.fs.ozone import OFS_ROOT - -from desktop.lib.fs.gc.gs import get_gs_home_directory from aws.conf import is_raz_s3 from aws.s3.s3fs import get_s3_home_directory - +from azure.abfs.__init__ import get_abfs_home_directory from azure.conf import is_raz_abfs -from azure.abfs.__init__ import get_home_dir_for_abfs - +from desktop.auth.backend import is_admin +from desktop.conf import DEFAULT_USER, ENABLE_ORGANIZATIONS, is_ofs_enabled, is_raz_gs +from desktop.lib.fs.gc.gs import get_gs_home_directory +from desktop.lib.fs.ozone import OFS_ROOT +from useradmin.models import User LOG = logging.getLogger() DEFAULT_USER = DEFAULT_USER.get() @@ -207,7 +202,7 @@ def create_home_dir(self, home_path=None): Initially home_path will have path value for HDFS, try creating the user home dir for it first. Then, we check if S3/ABFS is configured via RAZ. If yes, try creating user home dir for them next. """ - from desktop.conf import RAZ # Imported dynamically in order to have proper value. + from desktop.conf import RAZ # Imported dynamically in order to have proper value. try: self._get_fs(home_path).create_home_dir(home_path) @@ -222,7 +217,7 @@ def create_home_dir(self, home_path=None): if is_raz_s3(): home_path = get_s3_home_directory(User.objects.get(username=self.getuser())) elif is_raz_abfs(): - home_path = get_home_dir_for_abfs(User.objects.get(username=self.getuser())) + home_path = get_abfs_home_directory(User.objects.get(username=self.getuser())) elif is_raz_gs(): home_path = get_gs_home_directory(User.objects.get(username=self.getuser())) diff --git a/desktop/core/src/desktop/models.py b/desktop/core/src/desktop/models.py index 9bb2bf1c67..10cc624db2 100644 --- a/desktop/core/src/desktop/models.py +++ b/desktop/core/src/desktop/models.py @@ -1751,20 +1751,13 @@ def get_cluster_config(user): def get_remote_home_storage(user=None): remote_home_storage = REMOTE_STORAGE_HOME.get() if hasattr(REMOTE_STORAGE_HOME, 'get') and REMOTE_STORAGE_HOME.get() else None - - if not remote_home_storage: - if get_raz_api_url() and get_raz_s3_default_bucket(): - remote_home_storage = 's3a://%(bucket)s' % get_raz_s3_default_bucket() - - remote_home_storage = _handle_user_dir_raz(user, remote_home_storage) - - return remote_home_storage + return _handle_user_dir_raz(user, remote_home_storage) def _handle_user_dir_raz(user, remote_home_storage): - # In RAZ env, apppend username so that it defaults to user's dir and doesn't give 403 error - if user and remote_home_storage and RAZ.IS_ENABLED.get() and remote_home_storage.endswith('/user'): - remote_home_storage += '/' + user.username + # In RAZ environment, apppend username so that it defaults to user's directory and does not give 403 error + if user and remote_home_storage and RAZ.IS_ENABLED.get() and remote_home_storage.endswith(('/user', '/user/')): + remote_home_storage = remote_home_storage.rstrip('/') + '/' + user.username return remote_home_storage @@ -2017,7 +2010,7 @@ def _get_browser(self): for hdfs_connector in hdfs_connectors: force_home = remote_home_storage and not remote_home_storage.startswith('/') - home_path = self.user.get_home_directory(force_home=force_home).encode('utf-8') + home_path = self.user.get_home_directory(force_home=force_home) interpreters.append({ 'type': 'hdfs', 'displayName': hdfs_connector, @@ -2030,7 +2023,8 @@ def _get_browser(self): }) if 'filebrowser' in self.apps and fsmanager.is_enabled_and_has_access('s3a', self.user): - home_path = remote_home_storage if remote_home_storage else 's3a://'.encode('utf-8') + from aws.s3.s3fs import get_s3_home_directory + home_path = get_s3_home_directory(self.user) interpreters.append({ 'type': 's3', 'displayName': _('S3'), @@ -2040,7 +2034,8 @@ def _get_browser(self): }) if 'filebrowser' in self.apps and fsmanager.is_enabled_and_has_access('gs', self.user): - home_path = remote_home_storage if remote_home_storage else 'gs://'.encode('utf-8') + from desktop.lib.fs.gc.gs import get_gs_home_directory + home_path = get_gs_home_directory(self.user) interpreters.append({ 'type': 'gs', 'displayName': _('GS'), @@ -2050,7 +2045,8 @@ def _get_browser(self): }) if 'filebrowser' in self.apps and fsmanager.is_enabled_and_has_access('adl', self.user): - home_path = remote_home_storage if remote_home_storage else 'adl:/'.encode('utf-8') + # ADLS does not have a dedicated get_home_directory method + home_path = remote_home_storage if remote_home_storage else 'adl:/' interpreters.append({ 'type': 'adls', 'displayName': _('ADLS'), @@ -2060,8 +2056,8 @@ def _get_browser(self): }) if 'filebrowser' in self.apps and fsmanager.is_enabled_and_has_access('abfs', self.user): - from azure.abfs.__init__ import get_home_dir_for_abfs - home_path = remote_home_storage if remote_home_storage else get_home_dir_for_abfs(self.user).encode('utf-8') + from azure.abfs.__init__ import get_abfs_home_directory + home_path = get_abfs_home_directory(self.user) interpreters.append({ 'type': 'abfs', 'displayName': _('ABFS'), @@ -2072,7 +2068,7 @@ def _get_browser(self): if 'filebrowser' in self.apps and fsmanager.is_enabled_and_has_access('ofs', self.user): from desktop.lib.fs.ozone.ofs import get_ofs_home_directory - home_path = get_ofs_home_directory().encode('utf-8') + home_path = get_ofs_home_directory() interpreters.append({ 'type': 'ofs', 'displayName': _('Ozone'), diff --git a/desktop/core/src/desktop/models_tests.py b/desktop/core/src/desktop/models_tests.py index 01d9bc9176..c763651653 100644 --- a/desktop/core/src/desktop/models_tests.py +++ b/desktop/core/src/desktop/models_tests.py @@ -16,41 +16,35 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object import json -import pytest -import sys +from builtins import object from datetime import datetime +from unittest.mock import patch +import pytest from django.core import management from django.db.utils import OperationalError -from beeswax.models import SavedQuery from beeswax.design import hql_query -from notebook.models import import_saved_beeswax_query -from useradmin.models import get_default_user_group, User - -from filebrowser.conf import REMOTE_STORAGE_HOME - -from desktop.conf import has_connectors, RAZ +from beeswax.models import SavedQuery +from desktop.conf import RAZ, has_connectors from desktop.converters import DocumentConverter from desktop.lib.connectors.models import Connector from desktop.lib.django_test_util import make_logged_in_client from desktop.lib.fs import ProxyFS from desktop.lib.test_utils import grant_access -from desktop.models import Directory, Document2, Document, Document2Permission, ClusterConfig, get_remote_home_storage +from desktop.models import ClusterConfig, Directory, Document, Document2, Document2Permission, get_remote_home_storage +from filebrowser.conf import REMOTE_STORAGE_HOME +from notebook.models import import_saved_beeswax_query +from useradmin.models import User, get_default_user_group try: from oozie.models2 import Workflow + has_oozie = True except RuntimeError: has_oozie = False -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - class MockFs(object): def __init__(self): @@ -59,7 +53,6 @@ def __init__(self): @pytest.mark.django_db class TestClusterConfig(object): - def setup_method(self): self.client = make_logged_in_client(username="test", groupname="test", recreate=True, is_superuser=False) self.user = User.objects.get(username="test") @@ -77,7 +70,6 @@ def test_get_fs(self): ClusterConfig(user=self.user) - def test_get_main_quick_action(self): with patch('desktop.models.get_user_preferences') as get_user_preferences: get_user_preferences.return_value = json.dumps({'app': 'editor', 'interpreter': 1}) @@ -87,52 +79,42 @@ def test_get_main_quick_action(self): assert {'type': 1, 'name': 'SQL'}, main_app - def test_get_remote_storage_home(self): - # When default home ends with /user in RAZ ADLS env. - resets = [ - RAZ.IS_ENABLED.set_for_testing(True), - REMOTE_STORAGE_HOME.set_for_testing('abfs://gethue-container/user') - ] + # When REMOTE_STORAGE_HOME is set. + resets = [REMOTE_STORAGE_HOME.set_for_testing('hdfs://gethue/dir')] try: remote_home_storage = get_remote_home_storage(self.user) - assert remote_home_storage == 'abfs://gethue-container/user/test' + assert remote_home_storage == 'hdfs://gethue/dir' remote_home_storage = get_remote_home_storage(self.user_not_me) - assert remote_home_storage == 'abfs://gethue-container/user/test_not_me' + assert remote_home_storage == 'hdfs://gethue/dir' finally: for reset in resets: reset() - # When default home ends with /user in RAZ S3 env. - resets = [ - RAZ.IS_ENABLED.set_for_testing(True), - REMOTE_STORAGE_HOME.set_for_testing('s3a://gethue-bucket/user') - ] + # When REMOTE_STORAGE_HOME is set and ends with /user in RAZ environment. + resets = [RAZ.IS_ENABLED.set_for_testing(True), REMOTE_STORAGE_HOME.set_for_testing('abfs://gethue-container/user')] try: remote_home_storage = get_remote_home_storage(self.user) - assert remote_home_storage == 's3a://gethue-bucket/user/test' + assert remote_home_storage == 'abfs://gethue-container/user/test' remote_home_storage = get_remote_home_storage(self.user_not_me) - assert remote_home_storage == 's3a://gethue-bucket/user/test_not_me' + assert remote_home_storage == 'abfs://gethue-container/user/test_not_me' finally: for reset in resets: reset() - # When default home does not ends with /user in RAZ env - resets = [ - RAZ.IS_ENABLED.set_for_testing(True), - REMOTE_STORAGE_HOME.set_for_testing('abfs://gethue-container') - ] + # When REMOTE_STORAGE_HOME is not set. + resets = [REMOTE_STORAGE_HOME.set_for_testing(None)] try: remote_home_storage = get_remote_home_storage(self.user) - assert remote_home_storage == 'abfs://gethue-container' + assert remote_home_storage is None remote_home_storage = get_remote_home_storage(self.user_not_me) - assert remote_home_storage == 'abfs://gethue-container' + assert remote_home_storage is None finally: for reset in resets: reset() @@ -140,7 +122,6 @@ def test_get_remote_storage_home(self): @pytest.mark.django_db class TestDocument2(object): - def setup_method(self): self.client = make_logged_in_client(username="doc2", groupname="doc2", recreate=True, is_superuser=False) self.user = User.objects.get(username="doc2") @@ -152,11 +133,9 @@ def setup_method(self): self.home_dir = Document2.objects.get_home_directory(user=self.user) - def test_trash_directory(self): assert Directory.objects.filter(owner=self.user, name=Document2.TRASH_DIR, type='directory').exists() - def test_document_create(self): sql = 'SELECT * FROM sample_07' @@ -166,11 +145,7 @@ def test_document_create(self): # is_trashed # is_redacted old_query = SavedQuery.objects.create( - type=SavedQuery.TYPES_MAPPING['hql'], - owner=self.user, - data=design.dumps(), - name='See examples', - desc='Example of old format' + type=SavedQuery.TYPES_MAPPING['hql'], owner=self.user, data=design.dumps(), name='See examples', desc='Example of old format' ) try: @@ -191,7 +166,6 @@ def test_document_create(self): finally: old_query.delete() - def test_get_document(self): doc = Document2.objects.create(name='test_doc', type='query-hive', owner=self.user, data={}) self.home_dir.children.add(doc) @@ -216,11 +190,9 @@ def test_get_document(self): data = json.loads(response.content) assert doc.uuid == data['document']['uuid'] - def test_directory_create_and_rename(self): response = self.client.post( - '/desktop/api2/doc/mkdir', - {'parent_uuid': json.dumps(self.home_dir.uuid), 'name': json.dumps('test_mkdir')} + '/desktop/api2/doc/mkdir', {'parent_uuid': json.dumps(self.home_dir.uuid), 'name': json.dumps('test_mkdir')} ) data = json.loads(response.content) @@ -229,14 +201,12 @@ def test_directory_create_and_rename(self): assert data['directory']['name'] == 'test_mkdir', data assert data['directory']['type'] == 'directory', data - response = self.client.post('/desktop/api2/doc/update', {'uuid': json.dumps(data['directory']['uuid']), - 'name': 'updated'}) + response = self.client.post('/desktop/api2/doc/update', {'uuid': json.dumps(data['directory']['uuid']), 'name': 'updated'}) data = json.loads(response.content) assert 0 == data['status'] assert 'updated' == data['document']['name'], data - def test_file_move(self): source_dir = Directory.objects.create(name='test_mv_file_src', owner=self.user, parent_directory=self.home_dir) target_dir = Directory.objects.create(name='test_mv_file_dst', owner=self.user, parent_directory=self.home_dir) @@ -252,10 +222,9 @@ def test_file_move(self): data = json.loads(response.content) assert '/test_mv_file_src/query1.sql' == data['document']['path'] - response = self.client.post('/desktop/api2/doc/move', { - 'source_doc_uuid': json.dumps(doc.uuid), - 'destination_doc_uuid': json.dumps(target_dir.uuid) - }) + response = self.client.post( + '/desktop/api2/doc/move', {'source_doc_uuid': json.dumps(doc.uuid), 'destination_doc_uuid': json.dumps(target_dir.uuid)} + ) data = json.loads(response.content) assert 0 == data['status'], data @@ -277,18 +246,10 @@ def test_file_copy(self): pytest.skip("Skipping Test") workflow_doc = Document2.objects.create( - name='Copy Test', - type='oozie-workflow2', - owner=self.user, - data={}, - parent_directory=self.home_dir + name='Copy Test', type='oozie-workflow2', owner=self.user, data={}, parent_directory=self.home_dir ) Document.objects.link( - workflow_doc, - owner=workflow_doc.owner, - name=workflow_doc.name, - description=workflow_doc.description, - extra='workflow2' + workflow_doc, owner=workflow_doc.owner, name=workflow_doc.name, description=workflow_doc.description, extra='workflow2' ) workflow = Workflow(user=self.user) @@ -313,9 +274,7 @@ def copy_remote_dir(self, src, dst, *args, **kwargs): ProxyFS.real_copy_remote_dir = ProxyFS.copy_remote_dir ProxyFS.copy_remote_dir = copy_remote_dir - response = self.client.post('/desktop/api2/doc/copy', { - 'uuid': json.dumps(workflow_doc.uuid) - }) + response = self.client.post('/desktop/api2/doc/copy', {'uuid': json.dumps(workflow_doc.uuid)}) finally: Workflow.check_workspace = Workflow.real_check_workspace ProxyFS.copy_remote_dir = ProxyFS.real_copy_remote_dir @@ -333,7 +292,6 @@ def copy_remote_dir(self, src, dst, *args, **kwargs): assert copy_doc.uuid != workflow_doc.uuid assert copy_workflow.get_data()['workflow']['uuid'] != workflow.get_data()['workflow']['uuid'] - def test_directory_move(self): source_dir = Directory.objects.create(name='test_mv', owner=self.user, parent_directory=self.home_dir) target_dir = Directory.objects.create(name='test_mv_dst', owner=self.user, parent_directory=self.home_dir) @@ -348,10 +306,13 @@ def test_directory_move(self): data = json.loads(response.content) assert '/test_mv/query1.sql' == data['document']['path'] - response = self.client.post('/desktop/api2/doc/move', { + response = self.client.post( + '/desktop/api2/doc/move', + { 'source_doc_uuid': json.dumps(Directory.objects.get(owner=self.user, name='test_mv').uuid), - 'destination_doc_uuid': json.dumps(Directory.objects.get(owner=self.user, name='test_mv_dst').uuid) - }) + 'destination_doc_uuid': json.dumps(Directory.objects.get(owner=self.user, name='test_mv_dst').uuid), + }, + ) data = json.loads(response.content) assert 0 == data['status'], data @@ -364,7 +325,6 @@ def test_directory_move(self): data = json.loads(response.content) assert '/test_mv_dst/test_mv/query1.sql' == data['document']['path'] - def test_directory_children(self): # Creates 2 directories and 2 queries and saves to home directory dir1 = Directory.objects.create(name='test_dir1', owner=self.user) @@ -404,15 +364,9 @@ def test_directory_children(self): assert 5 == data['count'] assert 2 == len(data['children']) - def test_update_document(self): doc = Document2.objects.create( - name='initial', - description='initial desc', - type='query-hive', - owner=self.user, - data={}, - parent_directory=self.home_dir + name='initial', description='initial desc', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir ) response = self.client.get('/desktop/api2/doc/', {'uuid': doc.uuid}) @@ -422,10 +376,9 @@ def test_update_document(self): assert 'query-hive' == data['document']['type'] # Update document's name and description - response = self.client.post('/desktop/api2/doc/update', {'uuid': json.dumps(doc.uuid), - 'name': 'updated', - 'description': 'updated desc', - 'type': 'bogus-type'}) + response = self.client.post( + '/desktop/api2/doc/update', {'uuid': json.dumps(doc.uuid), 'name': 'updated', 'description': 'updated desc', 'type': 'bogus-type'} + ) data = json.loads(response.content) assert 0 == data['status'] assert 'document' in data, data @@ -434,7 +387,6 @@ def test_update_document(self): # Non-whitelisted attributes should remain unchanged assert 'query-hive' == data['document']['type'], data - def test_document_trash(self): # Create document under home and directory under home with child document # / @@ -492,17 +444,11 @@ def test_document_trash(self): assert 1 == data['count'] assert Document2.TRASH_DIR in [f['name'] for f in data['children']] - def test_get_history(self): history = Document2.objects.get_history(user=self.user, doc_type='query-hive') assert not history.filter(name='test_get_history').exists() - query = Document2.objects.create( - name='test_get_history', - type='query-hive', - owner=self.user, - is_history=True - ) + query = Document2.objects.create(name='test_get_history', type='query-hive', owner=self.user, is_history=True) try: history = Document2.objects.get_history(user=self.user, doc_type='query-hive') @@ -510,20 +456,10 @@ def test_get_history(self): finally: query.delete() - def test_get_history_with_connector(self): - connector = Connector.objects.create( - name='MySql', - dialect='mysql' - ) + connector = Connector.objects.create(name='MySql', dialect='mysql') - query = Document2.objects.create( - name='test_get_history', - type='query-hive', - owner=self.user, - is_history=False, - connector=connector - ) + query = Document2.objects.create(name='test_get_history', type='query-hive', owner=self.user, is_history=False, connector=connector) try: history = Document2.objects.get_history(user=self.user, doc_type='query-hive', connector_id=connector.id) @@ -538,36 +474,31 @@ def test_get_history_with_connector(self): query.delete() connector.delete() - def test_validate_immutable_user_directories(self): # Test that home and Trash directories cannot be recreated or modified test_dir = Directory.objects.create(name='test_dir', owner=self.user, parent_directory=self.home_dir) response = self.client.post( - '/desktop/api2/doc/mkdir', - {'parent_uuid': json.dumps(test_dir.uuid), 'name': json.dumps(Document2.TRASH_DIR)} + '/desktop/api2/doc/mkdir', {'parent_uuid': json.dumps(test_dir.uuid), 'name': json.dumps(Document2.TRASH_DIR)} ) data = json.loads(response.content) assert -1 == data['status'], data assert 'Cannot create or modify directory with name: .Trash' == data['message'] - response = self.client.post('/desktop/api2/doc/move', { - 'source_doc_uuid': json.dumps(self.home_dir.uuid), - 'destination_doc_uuid': json.dumps(test_dir.uuid) - }) + response = self.client.post( + '/desktop/api2/doc/move', {'source_doc_uuid': json.dumps(self.home_dir.uuid), 'destination_doc_uuid': json.dumps(test_dir.uuid)} + ) data = json.loads(response.content) assert -1 == data['status'], data assert 'Cannot create or modify directory with name: ' == data['message'] trash_dir = Directory.objects.get(name=Document2.TRASH_DIR, owner=self.user) - response = self.client.post('/desktop/api2/doc/move', { - 'source_doc_uuid': json.dumps(trash_dir.uuid), - 'destination_doc_uuid': json.dumps(test_dir.uuid) - }) + response = self.client.post( + '/desktop/api2/doc/move', {'source_doc_uuid': json.dumps(trash_dir.uuid), 'destination_doc_uuid': json.dumps(test_dir.uuid)} + ) data = json.loads(response.content) assert -1 == data['status'], data assert 'Cannot create or modify directory with name: .Trash' == data['message'] - def test_validate_circular_directory(self): # Test that saving a document with cycle raises an error, i.e. - This should fail: # a.parent_directory = b @@ -576,50 +507,45 @@ def test_validate_circular_directory(self): c_dir = Directory.objects.create(name='c', owner=self.user) b_dir = Directory.objects.create(name='b', owner=self.user, parent_directory=c_dir) a_dir = Directory.objects.create(name='a', owner=self.user, parent_directory=b_dir) - response = self.client.post('/desktop/api2/doc/move', { - 'source_doc_uuid': json.dumps(c_dir.uuid), - 'destination_doc_uuid': json.dumps(a_dir.uuid) - }) + response = self.client.post( + '/desktop/api2/doc/move', {'source_doc_uuid': json.dumps(c_dir.uuid), 'destination_doc_uuid': json.dumps(a_dir.uuid)} + ) data = json.loads(response.content) assert -1 == data['status'], data assert 'circular dependency' in data['message'], data # Test simple case where directory is saved to self as parent dir = Directory.objects.create(name='dir', owner=self.user) - response = self.client.post('/desktop/api2/doc/move', { - 'source_doc_uuid': json.dumps(dir.uuid), - 'destination_doc_uuid': json.dumps(dir.uuid) - }) + response = self.client.post( + '/desktop/api2/doc/move', {'source_doc_uuid': json.dumps(dir.uuid), 'destination_doc_uuid': json.dumps(dir.uuid)} + ) data = json.loads(response.content) assert -1 == data['status'], data assert 'circular dependency' in data['message'], data - def test_api_get_data(self): doc_data = {'info': 'hello', 'is_history': False} doc = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data=json.dumps(doc_data)) doc_data.update({'id': doc.id, 'uuid': doc.uuid}) - response = self.client.get('/desktop/api2/doc/', { + response = self.client.get( + '/desktop/api2/doc/', + { 'uuid': doc.uuid, - }) + }, + ) data = json.loads(response.content) assert 'document' in data, data assert not data['data'], data - response = self.client.get('/desktop/api2/doc/', { - 'uuid': doc.uuid, - 'data': 'true' - }) + response = self.client.get('/desktop/api2/doc/', {'uuid': doc.uuid, 'data': 'true'}) data = json.loads(response.content) assert 'data' in data, data assert data['data'] == doc_data - def test_is_trashed_migration(self): - # Skipping to prevent failing tests in TestOozieSubmissions pytest.skip("Skipping Test") @@ -720,13 +646,12 @@ def test_is_trashed_migration(self): @pytest.mark.django_db class TestDocument2Permissions(object): - def setup_method(self): self.default_group = get_default_user_group() self.client = make_logged_in_client(username="perm_user", groupname=self.default_group.name, recreate=True, is_superuser=False) self.client_not_me = make_logged_in_client( - username="not_perm_user", groupname=self.default_group.name, recreate=True, is_superuser=False + username="not_perm_user", groupname=self.default_group.name, recreate=True, is_superuser=False ) self.user = User.objects.get(username="perm_user") @@ -742,7 +667,6 @@ def setup_method(self): self.home_dir = Document2.objects.get_home_directory(user=self.user) - def test_default_permissions(self): # Tests that for a new doc by default, read/write perms are set to no users and no groups new_doc = Document2.objects.create(name='new_doc', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) @@ -751,16 +675,13 @@ def test_default_permissions(self): data = json.loads(response.content) assert new_doc.uuid == data['document']['uuid'], data assert 'perms' in data['document'] - assert ( - { - 'read': {'users': [], 'groups': []}, - 'write': {'users': [], 'groups': []}, - 'link_read': False, - 'link_sharing_on': False, - 'link_write': False, - } == - data['document']['perms']) - + assert { + 'read': {'users': [], 'groups': []}, + 'write': {'users': [], 'groups': []}, + 'link_read': False, + 'link_sharing_on': False, + 'link_write': False, + } == data['document']['perms'] def test_share_document_read_by_user(self): doc = Document2.objects.create(name='new_doc', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) @@ -776,22 +697,24 @@ def test_share_document_read_by_user(self): assert -1 == data['status'] # Share read perm by users - response = self.client.post("/desktop/api2/doc/share", { - 'uuid': json.dumps(doc.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [ - self.user.id, - self.user_not_me.id - ], - 'group_ids': [], - }, - 'write': { - 'user_ids': [], - 'group_ids': [], - } - }) - }) + response = self.client.post( + "/desktop/api2/doc/share", + { + 'uuid': json.dumps(doc.uuid), + 'data': json.dumps( + { + 'read': { + 'user_ids': [self.user.id, self.user_not_me.id], + 'group_ids': [], + }, + 'write': { + 'user_ids': [], + 'group_ids': [], + }, + } + ), + }, + ) assert 0 == json.loads(response.content)['status'], response.content assert doc.can_read(self.user) @@ -805,42 +728,47 @@ def test_share_document_read_by_user(self): assert doc.uuid == data['document']['uuid'], data # other user can share document with read permissions - response = self.client_not_me.post("/desktop/api2/doc/share", { - 'uuid': json.dumps(doc.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [], - 'group_ids': [ - self.default_group.id - ], - }, - 'write': { - 'user_ids': [], - 'group_ids': [], - } - }) - }) + response = self.client_not_me.post( + "/desktop/api2/doc/share", + { + 'uuid': json.dumps(doc.uuid), + 'data': json.dumps( + { + 'read': { + 'user_ids': [], + 'group_ids': [self.default_group.id], + }, + 'write': { + 'user_ids': [], + 'group_ids': [], + }, + } + ), + }, + ) assert 0 == json.loads(response.content)['status'], response.content # other user cannot share document with write permissions - response = self.client_not_me.post("/desktop/api2/doc/share", { - 'uuid': json.dumps(doc.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [], - 'group_ids': [], - }, - 'write': { - 'user_ids': [], - 'group_ids': [ - self.default_group.id - ], - } - }) - }) + response = self.client_not_me.post( + "/desktop/api2/doc/share", + { + 'uuid': json.dumps(doc.uuid), + 'data': json.dumps( + { + 'read': { + 'user_ids': [], + 'group_ids': [], + }, + 'write': { + 'user_ids': [], + 'group_ids': [self.default_group.id], + }, + } + ), + }, + ) assert -1 == json.loads(response.content)['status'], response.content - def test_share_document_read_by_group(self): doc = Document2.objects.create(name='new_doc', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) @@ -854,23 +782,15 @@ def test_share_document_read_by_group(self): data = json.loads(response.content) assert -1 == data['status'] - response = self.client.post("/desktop/api2/doc/share", { - 'uuid': json.dumps(doc.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [ - self.user.id - ], - 'group_ids': [ - self.default_group.id - ] - }, - 'write': { - 'user_ids': [], - 'group_ids': [] - } - }) - }) + response = self.client.post( + "/desktop/api2/doc/share", + { + 'uuid': json.dumps(doc.uuid), + 'data': json.dumps( + {'read': {'user_ids': [self.user.id], 'group_ids': [self.default_group.id]}, 'write': {'user_ids': [], 'group_ids': []}} + ), + }, + ) assert 0 == json.loads(response.content)['status'], response.content assert doc.can_read(self.user) @@ -883,7 +803,6 @@ def test_share_document_read_by_group(self): data = json.loads(response.content) assert doc.uuid == data['document']['uuid'], data - def test_share_document_write_by_user(self): doc = Document2.objects.create(name='new_doc', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) @@ -893,23 +812,15 @@ def test_share_document_write_by_user(self): assert -1 == data['status'] # Share write perm by user - response = self.client.post("/desktop/api2/doc/share", { - 'uuid': json.dumps(doc.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [ - self.user.id - ], - 'group_ids': [] - }, - 'write': { - 'user_ids': [ - self.user_not_me.id - ], - 'group_ids': [] - } - }) - }) + response = self.client.post( + "/desktop/api2/doc/share", + { + 'uuid': json.dumps(doc.uuid), + 'data': json.dumps( + {'read': {'user_ids': [self.user.id], 'group_ids': []}, 'write': {'user_ids': [self.user_not_me.id], 'group_ids': []}} + ), + }, + ) assert 0 == json.loads(response.content)['status'], response.content assert doc.can_read(self.user) @@ -922,7 +833,6 @@ def test_share_document_write_by_user(self): data = json.loads(response.content) assert 0 == data['status'] - def test_share_document_write_by_group(self): doc = Document2.objects.create(name='new_doc', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) @@ -932,23 +842,15 @@ def test_share_document_write_by_group(self): assert -1 == data['status'] # Share write perm by group - response = self.client.post("/desktop/api2/doc/share", { - 'uuid': json.dumps(doc.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [ - self.user.id - ], - 'group_ids': [] - }, - 'write': { - 'user_ids': [], - 'group_ids': [ - self.default_group.id - ] - } - }) - }) + response = self.client.post( + "/desktop/api2/doc/share", + { + 'uuid': json.dumps(doc.uuid), + 'data': json.dumps( + {'read': {'user_ids': [self.user.id], 'group_ids': []}, 'write': {'user_ids': [], 'group_ids': [self.default_group.id]}} + ), + }, + ) assert 0 == json.loads(response.content)['status'], response.content assert doc.can_read(self.user) @@ -961,7 +863,6 @@ def test_share_document_write_by_group(self): data = json.loads(response.content) assert 0 == data['status'] - def test_share_directory(self): # Test that updating the permissions for a directory updates all nested documents accordingly, with file structure: # / @@ -983,21 +884,13 @@ def test_share_directory(self): assert not doc.can_write(self.user_not_me) # Update parent_dir permissions to grant write permissions to default group - response = self.client.post("/desktop/api2/doc/share", { + response = self.client.post( + "/desktop/api2/doc/share", + { 'uuid': json.dumps(parent_dir.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [], - 'group_ids': [] - }, - 'write': { - 'user_ids': [], - 'group_ids': [ - self.default_group.id - ] - } - }) - }) + 'data': json.dumps({'read': {'user_ids': [], 'group_ids': []}, 'write': {'user_ids': [], 'group_ids': [self.default_group.id]}}), + }, + ) assert 0 == json.loads(response.content)['status'], response.content for doc in [parent_dir, child_doc, nested_dir, nested_doc]: @@ -1006,7 +899,6 @@ def test_share_directory(self): assert doc.can_read(self.user_not_me) assert doc.can_write(self.user_not_me) - def test_get_shared_documents(self): not_shared = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) shared_1 = Document2.objects.create(name='query2.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) @@ -1023,7 +915,7 @@ def test_get_shared_documents(self): doc_names = [doc['name'] for doc in data['documents']] assert 'query2.sql' in doc_names assert 'query3.sql' in doc_names - assert not 'query1.sql' in doc_names + assert 'query1.sql' not in doc_names # they should also appear in user's home directory get_documents response response = self.client_not_me.get('/desktop/api2/doc/') @@ -1032,7 +924,6 @@ def test_get_shared_documents(self): assert 'query2.sql' in doc_names assert 'query3.sql' in doc_names - def test_get_shared_directories(self): # Tests that when fetching the shared documents for a user, they are grouped by top-level directory when possible # / @@ -1063,11 +954,11 @@ def test_get_shared_directories(self): assert 'dir1' in doc_names assert 'dir3' in doc_names assert 'query3.sql' in doc_names - assert not 'dir2' in doc_names + assert 'dir2' not in doc_names # nested documents should not appear - assert not 'query1.sql' in doc_names - assert not 'query2.sql' in doc_names + assert 'query1.sql' not in doc_names + assert 'query2.sql' not in doc_names # but nested documents should still be shared/viewable by group response = self.client_not_me.get('/desktop/api2/doc/', {'uuid': doc1.uuid}) @@ -1078,7 +969,6 @@ def test_get_shared_directories(self): data = json.loads(response.content) assert doc2.uuid == data['document']['uuid'], data - def test_inherit_parent_permissions(self): # Tests that when saving a document to a shared directory, the doc/dir inherits same permissions @@ -1091,35 +981,26 @@ def test_inherit_parent_permissions(self): response = self.client.get('/desktop/api2/doc/', {'uuid': doc1.uuid}) data = json.loads(response.content) - assert ( - [{'id': self.default_group.id, 'name': self.default_group.name}] == - data['document']['perms']['read']['groups']), data - assert ( - [{'id': self.user_not_me.id, 'username': self.user_not_me.username}] == - data['document']['perms']['write']['users']), data - + assert [{'id': self.default_group.id, 'name': self.default_group.name}] == data['document']['perms']['read']['groups'], data + assert [{'id': self.user_not_me.id, 'username': self.user_not_me.username}] == data['document']['perms']['write']['users'], data def test_search_documents(self): owned_dir = Directory.objects.create(name='test_dir', owner=self.user, parent_directory=self.home_dir) - owned_query = Document2.objects.create( - name='query1.sql', type='query-hive', owner=self.user, data={}, parent_directory=owned_dir - ) + owned_query = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data={}, parent_directory=owned_dir) owned_history = Document2.objects.create( - name='history.sql', type='query-hive', owner=self.user, data={}, is_history=True, parent_directory=owned_dir - ) - owned_workflow = Document2.objects.create( - name='test.wf', type='oozie-workflow2', owner=self.user, data={}, parent_directory=owned_dir + name='history.sql', type='query-hive', owner=self.user, data={}, is_history=True, parent_directory=owned_dir ) + owned_workflow = Document2.objects.create(name='test.wf', type='oozie-workflow2', owner=self.user, data={}, parent_directory=owned_dir) other_home_dir = Document2.objects.get_home_directory(user=self.user_not_me) not_shared = Document2.objects.create( - name='other_query1.sql', type='query-hive', owner=self.user_not_me, data={}, parent_directory=other_home_dir + name='other_query1.sql', type='query-hive', owner=self.user_not_me, data={}, parent_directory=other_home_dir ) shared_1 = Document2.objects.create( - name='other_query2.sql', type='query-hive', owner=self.user_not_me, data={}, parent_directory=other_home_dir + name='other_query2.sql', type='query-hive', owner=self.user_not_me, data={}, parent_directory=other_home_dir ) shared_2 = Document2.objects.create( - name='other_query3.sql', type='query-hive', owner=self.user_not_me, data={}, parent_directory=other_home_dir + name='other_query3.sql', type='query-hive', owner=self.user_not_me, data={}, parent_directory=other_home_dir ) shared_1.share(user=self.user_not_me, name='read', users=[self.user], groups=[]) @@ -1143,7 +1024,6 @@ def test_search_documents(self): doc_names = [doc['name'] for doc in data['documents']] assert 'history.sql' in doc_names - def test_x_share_directory_y_add_file_x_share(self): # Test that when another User, Y, adds a doc to dir shared by User X, User X doesn't fail to share the dir next time: # / @@ -1157,44 +1037,28 @@ def test_x_share_directory_y_add_file_x_share(self): user_y = User.objects.create(username='user_y', password="user_y") # Share the dir with user_not_me - response = self.client.post("/desktop/api2/doc/share", { - 'uuid': json.dumps(parent_dir.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [], - 'group_ids': [] - }, - 'write': { - 'user_ids': [user_y.id], - 'group_ids': [] - } - }) - }) + response = self.client.post( + "/desktop/api2/doc/share", + { + 'uuid': json.dumps(parent_dir.uuid), + 'data': json.dumps({'read': {'user_ids': [], 'group_ids': []}, 'write': {'user_ids': [user_y.id], 'group_ids': []}}), + }, + ) user_y_child_doc = Document2.objects.create( - name='other_query1.sql', - type='query-hive', - owner=user_y, - data={}, - parent_directory=parent_dir + name='other_query1.sql', type='query-hive', owner=user_y, data={}, parent_directory=parent_dir ) share_test_user = User.objects.create(username='share_test_user', password="share_test_user") # Share the dir with another user - share_test_user - response = self.client.post("/desktop/api2/doc/share", { - 'uuid': json.dumps(parent_dir.uuid), - 'data': json.dumps({ - 'read': { - 'user_ids': [], - 'group_ids': [] - }, - 'write': { - 'user_ids': [share_test_user.id], - 'group_ids': [] - } - }) - }) + response = self.client.post( + "/desktop/api2/doc/share", + { + 'uuid': json.dumps(parent_dir.uuid), + 'data': json.dumps({'read': {'user_ids': [], 'group_ids': []}, 'write': {'user_ids': [share_test_user.id], 'group_ids': []}}), + }, + ) assert 0 == json.loads(response.content)['status'], response.content for doc in [parent_dir, child_doc, user_y_child_doc]: @@ -1203,14 +1067,9 @@ def test_x_share_directory_y_add_file_x_share(self): assert doc.can_read(share_test_user) assert doc.can_write(share_test_user) - def test_unicode_name(self): doc = Document2.objects.create( - name='My Bundle a voté « non » à l’accord', - type='oozie-workflow2', - owner=self.user, - data={}, - parent_directory=self.home_dir + name='My Bundle a voté « non » à l’accord', type='oozie-workflow2', owner=self.user, data={}, parent_directory=self.home_dir ) # Verify that home directory contents return correctly @@ -1225,14 +1084,9 @@ def test_unicode_name(self): path = data['document']['path'] assert '/My%20Bundle%20a%20vot%C3%A9%20%C2%AB%20non%20%C2%BB%20%C3%A0%20l%E2%80%99accord' == path - def test_link_permissions(self): doc = Document2.objects.create( - name='test_link_permissions.sql', - type='query-hive', - owner=self.user, - data={}, - parent_directory=self.home_dir + name='test_link_permissions.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir ) try: @@ -1280,11 +1134,7 @@ def test_link_permissions(self): def test_combined_permissions(self): doc = Document2.objects.create( - name='test_combined_permissions.sql', - type='query-hive', - owner=self.user, - data={}, - parent_directory=self.home_dir + name='test_combined_permissions.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir ) try: @@ -1363,7 +1213,6 @@ def test_combined_permissions(self): @pytest.mark.django_db class TestDocument2ImportExport(object): - def setup_method(self): self.client = make_logged_in_client(username="perm_user", groupname="default", recreate=True, is_superuser=False) self.client_not_me = make_logged_in_client(username="not_perm_user", groupname="default", recreate=True, is_superuser=False) @@ -1388,7 +1237,7 @@ def test_export_documents_with_dependencies(self): query1 = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) query2 = Document2.objects.create(name='query2.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) query3 = Document2.objects.create( - name='query3.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir, is_history=True + name='query3.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir, is_history=True ) workflow = Document2.objects.create(name='test.wf', type='oozie-workflow2', owner=self.user, data={}, parent_directory=self.home_dir) workflow.dependencies.add(query1) @@ -1404,7 +1253,7 @@ def test_export_documents_with_dependencies(self): assert 'test.wf' in [doc['fields']['name'] for doc in documents] assert 'query1.sql' in [doc['fields']['name'] for doc in documents] assert 'query2.sql' in [doc['fields']['name'] for doc in documents] - assert not 'query3.sql' in [doc['fields']['name'] for doc in documents] + assert 'query3.sql' not in [doc['fields']['name'] for doc in documents] # Test that exporting multiple workflows with overlapping dependencies works workflow2 = Document2.objects.create(name='test2.wf', type='oozie-workflow2', owner=self.user, data={}, parent_directory=self.home_dir) @@ -1420,35 +1269,29 @@ def test_export_documents_with_dependencies(self): assert 'query1.sql' in [doc['fields']['name'] for doc in documents] assert 'query2.sql' in [doc['fields']['name'] for doc in documents] - def test_export_documents_file_name(self): - query1 = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data={}, - parent_directory=self.home_dir) - query2 = Document2.objects.create(name='query2.sql', type='query-hive', owner=self.user, data={}, - parent_directory=self.home_dir) - query3 = Document2.objects.create(name='query3.sql', type='query-hive', owner=self.user, data={}, - parent_directory=self.home_dir, is_history=True) - workflow = Document2.objects.create(name='test.wf', type='oozie-workflow2', owner=self.user, data={}, - parent_directory=self.home_dir) + query1 = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) + query2 = Document2.objects.create(name='query2.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) + query3 = Document2.objects.create( + name='query3.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir, is_history=True + ) + workflow = Document2.objects.create(name='test.wf', type='oozie-workflow2', owner=self.user, data={}, parent_directory=self.home_dir) workflow.dependencies.add(query1) workflow.dependencies.add(query2) workflow.dependencies.add(query3) # Test that exporting multiple workflows with overlapping dependencies works - workflow2 = Document2.objects.create(name='test2.wf', type='oozie-workflow2', owner=self.user, data={}, - parent_directory=self.home_dir) + workflow2 = Document2.objects.create(name='test2.wf', type='oozie-workflow2', owner=self.user, data={}, parent_directory=self.home_dir) workflow2.dependencies.add(query1) # Test that exporting to a file includes the date and number of documents in the filename response = self.client.get('/desktop/api2/doc/export/', {'documents': json.dumps([workflow.id, workflow2.id])}) - assert ( - response['Content-Disposition'] == 'attachment; filename="hue-documents-%s-(4).json"' % datetime.today().strftime('%Y-%m-%d')) + assert response['Content-Disposition'] == 'attachment; filename="hue-documents-%s-(4).json"' % datetime.today().strftime('%Y-%m-%d') # Test that exporting single file gets the name of the document in the filename response = self.client.get('/desktop/api2/doc/export/', {'documents': json.dumps([workflow.id])}) assert response['Content-Disposition'] == 'attachment; filename="' + workflow.name + '.json"' - def test_export_directories_with_children(self): # Test that exporting a directory exports children docs # / @@ -1478,14 +1321,13 @@ def test_export_directories_with_children(self): assert 'query2.sql' in [doc['fields']['name'] for doc in documents] assert 'query3.sql' in [doc['fields']['name'] for doc in documents] - def test_import_owned_document(self): owned_query = Document2.objects.create( name='query.sql', type='query-hive', owner=self.user, data=json.dumps({'description': 'original_query'}), - parent_directory=self.home_dir + parent_directory=self.home_dir, ) # Test that importing existing doc updates it and retains owner, UUID @@ -1537,7 +1379,7 @@ def test_import_nonowned_document(self): type='query-hive', owner=self.user, data=json.dumps({'description': 'original_query'}), - parent_directory=self.home_dir + parent_directory=self.home_dir, ) response = self.client.get('/desktop/api2/doc/export/', {'documents': json.dumps([owned_query.id]), 'format': 'json'}) @@ -1564,12 +1406,11 @@ def test_import_nonowned_document(self): assert 0 == data['updated_count'] def test_import_with_history_dependencies(self): - query1 = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data={}, - parent_directory=self.home_dir) - query2 = Document2.objects.create(name='query2.sql', type='query-hive', owner=self.user, data={}, - parent_directory=self.home_dir, is_history=True) - workflow = Document2.objects.create(name='test.wf', type='oozie-workflow2', owner=self.user, data={}, - parent_directory=self.home_dir) + query1 = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir) + query2 = Document2.objects.create( + name='query2.sql', type='query-hive', owner=self.user, data={}, parent_directory=self.home_dir, is_history=True + ) + workflow = Document2.objects.create(name='test.wf', type='oozie-workflow2', owner=self.user, data={}, parent_directory=self.home_dir) workflow.dependencies.add(query1) workflow.dependencies.add(query2) diff --git a/desktop/libs/aws/src/aws/conf.py b/desktop/libs/aws/src/aws/conf.py index 26d5512af4..f72158e3e9 100644 --- a/desktop/libs/aws/src/aws/conf.py +++ b/desktop/libs/aws/src/aws/conf.py @@ -13,23 +13,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -import logging import os import re import sys +import logging import requests -from desktop.lib.conf import Config, UnspecifiedConfigSection, ConfigSection, coerce_bool, coerce_password_from_script +from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool, coerce_password_from_script from desktop.lib.idbroker import conf as conf_idbroker -from hadoop.core_site import get_s3a_access_key, get_s3a_secret_key, get_s3a_session_token, get_raz_api_url, get_raz_s3_default_bucket +from hadoop.core_site import get_raz_api_url, get_raz_s3_default_bucket, get_s3a_access_key, get_s3a_secret_key, get_s3a_session_token if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _, gettext as _t + from django.utils.translation import gettext as _t, gettext_lazy as _ else: - from django.utils.translation import ugettext_lazy as _, ugettext as _t + from django.utils.translation import ugettext as _t, ugettext_lazy as _ LOG = logging.getLogger() @@ -39,18 +38,20 @@ SUBDOMAIN_ENDPOINT_RE = 's3.(?P[a-z0-9-]+).amazonaws.com' HYPHEN_ENDPOINT_RE = 's3-(?P[a-z0-9-]+).amazonaws.com' DUALSTACK_ENDPOINT_RE = 's3.dualstack.(?P[a-z0-9-]+).amazonaws.com' -AWS_ACCOUNT_REGION_DEFAULT = 'us-east-1' # Location.USEast +AWS_ACCOUNT_REGION_DEFAULT = 'us-east-1' # Location.USEast PERMISSION_ACTION_S3 = "s3_access" REGION_CACHED = None IS_IAM_CACHED = None IS_EC2_CACHED = None + def clear_cache(): global REGION_CACHED, IS_IAM_CACHED, IS_EC2_CACHED REGION_CACHED = None IS_IAM_CACHED = None IS_EC2_CACHED = None + def get_locations(): return ('EU', # Ireland 'af-south-1', @@ -265,6 +266,12 @@ def get_default_get_environment_credentials(): default=14400, type=int ), + DEFAULT_HOME_PATH=Config( + key="default_home_path", + type=str, + default=None, + help="Optionally set this for a different home directory path. e.g. s3a://gethue" + ), ) ) ) @@ -292,11 +299,10 @@ def is_ec2_instance(): try: # Low chance of false positive - IS_EC2_CACHED = (os.path.exists('/sys/hypervisor/uuid') and open('/sys/hypervisor/uuid', 'r').read()[:3].lower() == 'ec2') or \ - ( - os.path.exists('/sys/devices/virtual/dmi/id/product_uuid') and \ - open('/sys/devices/virtual/dmi/id/product_uuid', 'r').read()[:3].lower() == 'ec2' - ) + IS_EC2_CACHED = (os.path.exists('/sys/hypervisor/uuid') and open('/sys/hypervisor/uuid', 'r').read()[:3].lower() == 'ec2') or ( + os.path.exists('/sys/devices/virtual/dmi/id/product_uuid') + and open('/sys/devices/virtual/dmi/id/product_uuid', 'r').read()[:3].lower() == 'ec2' + ) except Exception as e: LOG.info("Detecting if Hue on an EC2 host, error might be expected: %s" % e) @@ -324,7 +330,7 @@ def has_iam_metadata(): IS_IAM_CACHED = 'iam' in metadata else: IS_IAM_CACHED = False - except: + except Exception: IS_IAM_CACHED = False LOG.exception("Encountered error when checking IAM metadata") return IS_IAM_CACHED @@ -340,13 +346,17 @@ def has_s3_access(user): def is_raz_s3(): from desktop.conf import RAZ # Must be imported dynamically in order to have proper value - return (RAZ.IS_ENABLED.get() and 'default' in list(AWS_ACCOUNTS.keys()) and \ - AWS_ACCOUNTS['default'].HOST.get() and AWS_ACCOUNTS['default'].get_raw()) + return ( + RAZ.IS_ENABLED.get() + and 'default' in list(AWS_ACCOUNTS.keys()) + and AWS_ACCOUNTS['default'].HOST.get() + and AWS_ACCOUNTS['default'].get_raw() + ) def config_validator(user): res = [] - import desktop.lib.fsmanager # Circular dependecy + import desktop.lib.fsmanager # Circular dependecy if is_enabled(): try: diff --git a/desktop/libs/aws/src/aws/s3/s3fs.py b/desktop/libs/aws/src/aws/s3/s3fs.py index eee4c4ca4f..9d9199ad4c 100644 --- a/desktop/libs/aws/src/aws/s3/s3fs.py +++ b/desktop/libs/aws/src/aws/s3/s3fs.py @@ -16,15 +16,14 @@ from __future__ import absolute_import -from builtins import str -from builtins import object -import itertools -import logging import os -import posixpath import re import sys import time +import logging +import itertools +import posixpath +from builtins import object, str from boto.exception import BotoClientError, S3ResponseError from boto.s3.connection import Location @@ -32,24 +31,26 @@ from boto.s3.prefix import Prefix from aws import s3 -from aws.conf import get_default_region, get_locations, PERMISSION_ACTION_S3, is_raz_s3 -from aws.s3 import normpath, s3file, translate_s3_error, S3A_ROOT +from aws.conf import AWS_ACCOUNTS, PERMISSION_ACTION_S3, get_default_region, get_locations, is_raz_s3 +from aws.s3 import S3A_ROOT, normpath, s3file, translate_s3_error from aws.s3.s3stat import S3Stat - from filebrowser.conf import REMOTE_STORAGE_HOME if sys.version_info[0] > 2: - import urllib.request, urllib.error + import urllib.error + import urllib.request from urllib.parse import quote as urllib_quote, urlparse as lib_urlparse + from django.utils.translation import gettext as _ else: from urllib import quote as urllib_quote - from urlparse import urlparse as lib_urlparse + from django.utils.translation import ugettext as _ + from urlparse import urlparse as lib_urlparse DEFAULT_READ_SIZE = 1024 * 1024 # 1MB BUCKET_NAME_PATTERN = re.compile( - "^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$") + r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$") LOG = logging.getLogger() @@ -58,10 +59,12 @@ class S3FileSystemException(IOError): def __init__(self, *args, **kwargs): super(S3FileSystemException, self).__init__(*args, **kwargs) + class S3ListAllBucketsException(S3FileSystemException): def __init__(self, *args, **kwargs): super(S3FileSystemException, self).__init__(*args, **kwargs) + def auth_error_handler(view_fn): def decorator(*args, **kwargs): try: @@ -89,9 +92,20 @@ def decorator(*args, **kwargs): def get_s3_home_directory(user=None): from desktop.models import _handle_user_dir_raz - remote_home_s3 = 's3a://' + # REMOTE_STORAGE_HOME is deprecated in favor of DEFAULT_HOME_PATH per FS config level. + # But for backward compatibility, we are still giving preference to REMOTE_STORAGE_HOME path first and if it's not set, + # then check for DEFAULT_HOME_PATH which is set per FS config block. This helps in setting diff DEFAULT_HOME_PATH for diff FS in Hue. + if hasattr(REMOTE_STORAGE_HOME, 'get') and REMOTE_STORAGE_HOME.get() and REMOTE_STORAGE_HOME.get().startswith('s3a://'): remote_home_s3 = REMOTE_STORAGE_HOME.get() + elif ( + 'default' in AWS_ACCOUNTS + and AWS_ACCOUNTS['default'].DEFAULT_HOME_PATH.get() + and AWS_ACCOUNTS['default'].DEFAULT_HOME_PATH.get().startswith('s3a://') + ): + remote_home_s3 = AWS_ACCOUNTS['default'].DEFAULT_HOME_PATH.get() + else: + remote_home_s3 = 's3a://' remote_home_s3 = _handle_user_dir_raz(user, remote_home_s3) @@ -116,7 +130,7 @@ def _get_bucket(self, name): except S3ResponseError as e: if e.status == 301 or e.status == 400: raise S3FileSystemException( - _('Failed to retrieve bucket "%s" in region "%s" with "%s". Your bucket is in region "%s"') % + _('Failed to retrieve bucket "%s" in region "%s" with "%s". Your bucket is in region "%s"') % (name, self._get_location(), e.message or e.reason, self.get_bucket_location(name))) else: raise e @@ -213,7 +227,7 @@ def _stats(self, path): raise S3FileSystemException(_('User is not authorized to access path: "%s"') % path) else: raise S3FileSystemException(_('Failed to access path "%s": %s') % (path, e.reason)) - except Exception as e: # SSL errors show up here, because they've been remapped in boto + except Exception as e: # SSL errors show up here, because they've been remapped in boto raise S3FileSystemException(_('Failed to access path "%s": %s') % (path, str(e))) if key is None: key = self._get_key(path, validate=False) @@ -382,7 +396,6 @@ def rmtree(self, path, skipTrash=True): LOG.error(msg) raise S3FileSystemException(msg) - @translate_s3_error @auth_error_handler def remove(self, path, skip_trash=True): @@ -455,7 +468,7 @@ def copy_remote_dir(self, src, dst, *args, **kwargs): def _copy(self, src, dst, recursive, use_src_basename): src_st = self.stats(src) if src_st.isDir and not recursive: - return # omitting directory + return # omitting directory dst = s3.abspath(src, dst) dst_st = self._stats(dst) @@ -513,7 +526,7 @@ def rename(self, old, new): self.rmtree(old, skipTrash=True) else: raise S3FileSystemException('Destination path is same as source path, skipping the operation.') - + @translate_s3_error @auth_error_handler def _check_key_parent_path(self, src, dst): @@ -600,5 +613,5 @@ def setuser(self, user): self.user = user # Only used in Cluster middleware request.fs def get_upload_chuck_size(self): - from hadoop.conf import UPLOAD_CHUNK_SIZE # circular dependency + from hadoop.conf import UPLOAD_CHUNK_SIZE # circular dependency return UPLOAD_CHUNK_SIZE.get() diff --git a/desktop/libs/aws/src/aws/s3/s3fs_test.py b/desktop/libs/aws/src/aws/s3/s3fs_test.py index 01df54e408..24a7e1f649 100644 --- a/desktop/libs/aws/src/aws/s3/s3fs_test.py +++ b/desktop/libs/aws/src/aws/s3/s3fs_test.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Licensed to Cloudera, Inc. under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -13,36 +14,145 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -import json import os -import pytest -import tempfile +import json import string -import sys +import tempfile +from unittest.mock import Mock, patch -from desktop.lib.django_test_util import make_logged_in_client -from desktop.lib.test_utils import grant_access, add_to_group, add_permission, remove_from_group -from useradmin.models import User +import pytest +from aws.conf import AWS_ACCOUNTS from aws.s3 import join, parse_uri -from aws.s3.s3fs import S3FileSystem, S3FileSystemException +from aws.s3.s3fs import S3FileSystem, S3FileSystemException, get_s3_home_directory from aws.s3.s3test_utils import S3TestBase, generate_id from aws.s3.upload import DEFAULT_WRITE_SIZE +from desktop.conf import RAZ +from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.test_utils import add_permission, add_to_group, grant_access, remove_from_group +from filebrowser.conf import REMOTE_STORAGE_HOME +from useradmin.models import User -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - - -class TestS3FileSystem(): - +@pytest.mark.django_db +def test_get_s3_home_directory(): + client = make_logged_in_client(username="test", groupname="test", recreate=True, is_superuser=False) + user = User.objects.get(username="test") + + client_not_me = make_logged_in_client(username="test_not_me", groupname="test_not_me", recreate=True, is_superuser=False) + user_not_me = User.objects.get(username="test_not_me") + + # When REMOTE_STORAGE_HOME ends with /user in RAZ S3 environment. + resets = [RAZ.IS_ENABLED.set_for_testing(True), REMOTE_STORAGE_HOME.set_for_testing('s3a://gethue-bucket/user')] + + try: + default_s3_home_path = get_s3_home_directory(user) + assert default_s3_home_path == 's3a://gethue-bucket/user/test' + + default_s3_home_path = get_s3_home_directory(user_not_me) + assert default_s3_home_path == 's3a://gethue-bucket/user/test_not_me' + finally: + for reset in resets: + reset() + + # When S3 filesystem's DEFAULT_HOME_PATH ends with /user in RAZ S3 environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + AWS_ACCOUNTS.set_for_testing( + {'default': {'region': 'us-west-2', 'host': 's3-us-west-2.amazonaws.com', 'default_home_path': 's3a://gethue-other-bucket/user'}} + ), + ] + + try: + default_s3_home_path = get_s3_home_directory(user) + assert default_s3_home_path == 's3a://gethue-other-bucket/user/test' + + default_s3_home_path = get_s3_home_directory(user_not_me) + assert default_s3_home_path == 's3a://gethue-other-bucket/user/test_not_me' + finally: + for reset in resets: + reset() + + # When S3 filesystem's DEFAULT_HOME_PATH is set in non-RAZ S3 environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(False), + AWS_ACCOUNTS.set_for_testing( + {'default': {'region': 'us-west-2', 'host': 's3-us-west-2.amazonaws.com', 'default_home_path': 's3a://gethue-other-bucket/test-dir'}} + ), + ] + + try: + default_s3_home_path = get_s3_home_directory(user) + assert default_s3_home_path == 's3a://gethue-other-bucket/test-dir' + + default_s3_home_path = get_s3_home_directory(user_not_me) + assert default_s3_home_path == 's3a://gethue-other-bucket/test-dir' + finally: + for reset in resets: + reset() + + # When both REMOTE_STORAGE_HOME and S3 filesystem's DEFAULT_HOME_PATH are set in RAZ S3 environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + REMOTE_STORAGE_HOME.set_for_testing('s3a://gethue-bucket/user'), + AWS_ACCOUNTS.set_for_testing( + {'default': {'region': 'us-west-2', 'host': 's3-us-west-2.amazonaws.com', 'default_home_path': 's3a://gethue-other-bucket/user'}} + ), + ] + + try: + # Gives preference to REMOTE_STORAGE_HOME for of backward compatibility. + default_s3_home_path = get_s3_home_directory(user) + assert default_s3_home_path == 's3a://gethue-bucket/user/test' + + default_s3_home_path = get_s3_home_directory(user_not_me) + assert default_s3_home_path == 's3a://gethue-bucket/user/test_not_me' + finally: + for reset in resets: + reset() + + # When S3 filesystem's DEFAULT_HOME_PATH is set but path does not end with ../user or ../user/ in RAZ S3 environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + AWS_ACCOUNTS.set_for_testing( + {'default': {'region': 'us-west-2', 'host': 's3-us-west-2.amazonaws.com', 'default_home_path': 's3a://gethue-other-bucket/dir'}} + ), + ] + + try: + default_s3_home_path = get_s3_home_directory(user) + assert default_s3_home_path == 's3a://gethue-other-bucket/dir' + + default_s3_home_path = get_s3_home_directory(user_not_me) + assert default_s3_home_path == 's3a://gethue-other-bucket/dir' + finally: + for reset in resets: + reset() + + # When some different path is set in both RAZ and non-RAZ S3 environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + REMOTE_STORAGE_HOME.set_for_testing('abfs://gethue-container/user'), + AWS_ACCOUNTS.set_for_testing( + {'default': {'region': 'us-west-2', 'host': 's3-us-west-2.amazonaws.com', 'default_home_path': 'abfs://gethue-other-container/dir'}} + ), + ] + + try: + default_s3_home_path = get_s3_home_directory(user) + assert default_s3_home_path == 's3a://' + + default_s3_home_path = get_s3_home_directory(user_not_me) + assert default_s3_home_path == 's3a://' + finally: + for reset in resets: + reset() + + +class TestS3FileSystem: def test_rmtree_bucket(self): with patch('aws.s3.s3fs.S3FileSystem._delete_bucket') as _delete_bucket: - fs = S3FileSystem(s3_connection=Mock()) fs.rmtree(path='s3a://gethue') @@ -52,19 +162,11 @@ def test_rmtree_bucket(self): def test_rmtree_key(self): with patch('aws.s3.s3fs.S3FileSystem._get_key') as _get_key: with patch('aws.s3.s3fs.S3FileSystem.isdir') as isdir: - key = Mock( name='data', exists=Mock(return_value=True), - bucket=Mock( - list=Mock(return_value=[]), - delete_key=Mock() - ), - delete=Mock( - return_value=Mock( - exists=Mock(return_value=False) - ) - ) + bucket=Mock(list=Mock(return_value=[]), delete_key=Mock()), + delete=Mock(return_value=Mock(exists=Mock(return_value=False))), ) _get_key.return_value = key isdir.return_value = False @@ -79,19 +181,11 @@ def test_rmtree_key(self): def test_rmtree_empty_dir(self): with patch('aws.s3.s3fs.S3FileSystem._get_key') as _get_key: with patch('aws.s3.s3fs.S3FileSystem.isdir') as isdir: - key = Mock( name='data', exists=Mock(return_value=True), - bucket=Mock( - list=Mock(return_value=[]), - delete_key=Mock() - ), - delete=Mock( - return_value=Mock( - exists=Mock(return_value=False) - ) - ) + bucket=Mock(list=Mock(return_value=[]), delete_key=Mock()), + delete=Mock(return_value=Mock(exists=Mock(return_value=False))), ) _get_key.return_value = key isdir.return_value = True @@ -107,23 +201,11 @@ def test_rmtree_empty_dir(self): def test_rmtree_non_empty_dir(self): with patch('aws.s3.s3fs.S3FileSystem._get_key') as _get_key: with patch('aws.s3.s3fs.S3FileSystem.isdir') as isdir: - key = Mock( name='data', exists=Mock(return_value=True), - bucket=Mock( - list=Mock(return_value=['data/1', 'data/2']), - delete_keys=Mock( - return_value=Mock( - errors=[] - ) - ) - ), - delete=Mock( - return_value=Mock( - exists=Mock(return_value=False) - ) - ) + bucket=Mock(list=Mock(return_value=['data/1', 'data/2']), delete_keys=Mock(return_value=Mock(errors=[]))), + delete=Mock(return_value=Mock(exists=Mock(return_value=False))), ) _get_key.return_value = key isdir.return_value = True @@ -138,7 +220,6 @@ def test_rmtree_non_empty_dir(self): class S3FSTest(S3TestBase): - @classmethod def setup_class(cls): S3TestBase.setup_class() @@ -150,7 +231,6 @@ def setup_class(cls): add_to_group('test') cls.user = User.objects.get(username="test") - def test_open(self): path = self.get_test_path('test_open.txt') @@ -174,7 +254,6 @@ def test_open(self): with pytest.raises(Exception): self.fs.open(path, mode='?r') - def test_read(self): path = self.get_test_path('test_read.txt') with self.cleaning(path): @@ -184,15 +263,12 @@ def test_read(self): assert 'Hel' == self.fs.read(path, 0, 3) assert 'ell' == self.fs.read(path, 1, 3) - def test_isfile(self): pass - def test_isdir(self): pass - def test_exists(self): dir_path = self.get_test_path('test_exists') file_path = join(dir_path, 'file') @@ -210,7 +286,6 @@ def test_exists(self): fake_bucket = 'fake%s' % generate_id(8, string.ascii_lowercase + string.digits) assert not self.fs.exists('s3a://%s' % fake_bucket) - def test_stats(self): with pytest.raises(ValueError): self.fs.stats('ftp://archive') @@ -219,14 +294,13 @@ def test_stats(self): self.fs.stats(not_exists) root_stat = self.fs.stats('s3a://') - assert True == root_stat.isDir + assert True is root_stat.isDir assert 's3a://' == root_stat.path bucket_stat = self.fs.stats('s3a://%s' % self.bucket_name) - assert True == bucket_stat.isDir + assert True is bucket_stat.isDir assert 's3a://%s' % self.bucket_name == bucket_stat.path - def test_copyfile(self): src_path = self.get_test_path('test_copy_file_src') dst_path = self.get_test_path('test_copy_file_dst') @@ -239,7 +313,6 @@ def test_copyfile(self): actual = self.fs.read(dst_path, 0, len(data) + 100) assert data == actual - def test_full_copy(self): src_path = self.get_test_path('test_full_copy_src') dst_path = self.get_test_path('test_full_copy_dst') @@ -269,7 +342,6 @@ def test_full_copy(self): with pytest.raises(S3FileSystemException): self.fs.copy(src_path, dst_file_path, True) - def test_copy_remote_dir(self): src_dir = self.get_test_path('test_copy_remote_dir_src') dst_dir = self.get_test_path('test_copy_remote_dir_dst') @@ -291,7 +363,6 @@ def test_copy_remote_dir(self): assert src_names assert src_names == dst_names - def test_copy_from_local(self): src_name = 'test_copy_from_local_src' src_path = os.path.join(tempfile.gettempdir(), src_name) @@ -307,7 +378,6 @@ def test_copy_from_local(self): actual = self.fs.read(dst_path, 0, len(data) + 100) assert data == actual - def test_rename_dir(self): src_dir = self.get_test_path('test_rename_dir_src') dst_dir = self.get_test_path('test_rename_dir_dst') @@ -337,13 +407,12 @@ def test_rename_dir(self): # Assert that the children files are not duplicated at top-level destination bucket_ls = self.bucket.list() - assert not 'file_one.txt' in bucket_ls - assert not 'file_two.txt' in bucket_ls + assert 'file_one.txt' not in bucket_ls + assert 'file_two.txt' not in bucket_ls # Assert that only the renamed directory, and not an empty file, exists assert 1 == len([key for key in bucket_ls if key.name.strip('/') == self.get_key(dst_dir).name.strip('/')]) - def test_rename_star(self): src_dir = self.get_test_path('test_rename_star_src') dst_dir = self.get_test_path('test_rename_star_dst') @@ -370,7 +439,6 @@ def test_rename_star(self): assert src_names assert src_names == dst_names - def test_rmtree(self): with pytest.raises(NotImplementedError): self.fs.rmtree('universe', skipTrash=False) @@ -390,12 +458,10 @@ def test_rmtree(self): assert not self.fs.exists(nested_dir) assert not self.fs.exists(directory) - def test_listing_buckets(self): buckets = self.fs.listdir('s3a://') assert len(buckets) > 0 - def test_mkdir(self): dir_path = self.get_test_path('test_mkdir') assert not self.fs.exists(dir_path) @@ -403,7 +469,6 @@ def test_mkdir(self): self.fs.mkdir(dir_path) assert self.fs.exists(dir_path) - def test_upload_file(self): with tempfile.NamedTemporaryFile() as local_file: # Make sure we can upload larger than the UPLOAD chunk size @@ -431,7 +496,6 @@ def test_upload_file(self): expected = file(local_file).read() assert actual == expected, 'files do not match: %s != %s' % (len(actual), len(expected)) - def test_check_access(self): dir_path = self.get_test_path('test_check_access') self.fs.mkdir(dir_path) diff --git a/desktop/libs/azure/src/azure/abfs/__init__.py b/desktop/libs/azure/src/azure/abfs/__init__.py index 8f6f49e22f..bd08723799 100644 --- a/desktop/libs/azure/src/azure/abfs/__init__.py +++ b/desktop/libs/azure/src/azure/abfs/__init__.py @@ -15,29 +15,29 @@ # limitations under the License. from __future__ import absolute_import -import calendar -import errno import re +import time +import errno import logging +import calendar import tempfile import posixpath -import time - -from hadoop.fs import normpath as fs_normpath -from azure.conf import get_default_abfs_fs +from azure.conf import ABFS_CLUSTERS, get_default_abfs_fs from desktop.conf import RAZ from filebrowser.conf import REMOTE_STORAGE_HOME +from hadoop.fs import normpath as fs_normpath LOG = logging.getLogger() -#check this first for problems +# check this first for problems ABFS_PATH_RE = re.compile( - '^/*[aA][bB][fF][sS]{1,2}://([$a-z0-9](?!.*--)[-a-z0-9]{1,61}[a-z0-9])(@[^.]*?\.dfs\.core\.windows\.net)?(/(.*?)/?)?$') + r'^/*[aA][bB][fF][sS]{1,2}://([$a-z0-9](?!.*--)[-a-z0-9]{1,61}[a-z0-9])(@[^.]*?\.dfs\.core\.windows\.net)?(/(.*?)/?)?$') ABFS_ROOT_S = 'abfss://' ABFS_ROOT = 'abfs://' ABFSACCOUNT_NAME = re.compile('^/*[aA][bB][fF][sS]{1,2}://[$a-z0-9](?!.*--)[-a-z0-9]{1,61}[a-z0-9](@.*?)$') + def parse_uri(uri): """ Returns filesystem_name, direct_name, base_direct_name @@ -50,6 +50,7 @@ def parse_uri(uri): account_name_and_path = match.group(2) or '' return match.group(1), direct_name, account_name_and_path + def only_filesystem_and_account_name(uri): """ Given a path returns only the filesystem and account name, @@ -60,6 +61,7 @@ def only_filesystem_and_account_name(uri): return match.group(1) + match.group(2) return uri + def is_root(uri): """ Checks if Uri is the Root Directory @@ -76,10 +78,11 @@ def strip_scheme(path): if filesystem == '': raise ValueError('File System must be Specified') path = filesystem + '/' + file_path - except: + except Exception: return path return path + def strip_path(path): """ Return only the end of a path given another path @@ -89,6 +92,7 @@ def strip_path(path): split_path = path.split('/') return split_path[len(split_path) - 1] + def normpath(path): """ Return the normlized path, but ignore leading prefix if it exists @@ -103,6 +107,7 @@ def normpath(path): normalized = fs_normpath(path) return normalized + def parent_path(path): """ Returns the parent of the specified folder @@ -124,6 +129,7 @@ def parent_path(path): return normpath(ABFS_ROOT + filesystem + '/' + parent) return normpath(ABFS_ROOT_S + filesystem + '/' + parent) + def join(first, *complist): """ Join a path on to another path @@ -151,13 +157,13 @@ def abfspath(path, fs_defaultfs=None): if fs_defaultfs is None: try: fs_defaultfs = get_default_abfs_fs() - except: + except Exception: LOG.warning("Configuration for ABFS is not set, may run into errors") return path filesystem, dir_name = ("", "") try: filesystem, dir_name = parse_uri(path)[:2] - except: + except Exception: return path account_name = ABFSACCOUNT_NAME.match(fs_defaultfs) LOG.debug("%s" % fs_defaultfs) @@ -170,7 +176,7 @@ def abfspath(path, fs_defaultfs=None): return path -def get_home_dir_for_abfs(user=None): +def get_abfs_home_directory(user=None): """ Attempts to go to the directory set in the config file or core-site.xml else defaults to abfs:// """ @@ -179,11 +185,17 @@ def get_home_dir_for_abfs(user=None): try: filesystem = parse_uri(get_default_abfs_fs())[0] remote_home_abfs = "abfs://" + filesystem - except: + except Exception: remote_home_abfs = 'abfs://' + # REMOTE_STORAGE_HOME is deprecated in favor of DEFAULT_HOME_PATH per FS config level. + # But for backward compatibility, we are still giving preference to REMOTE_STORAGE_HOME path first and if it's not set, + # then check for DEFAULT_HOME_PATH which is set per FS config block. This helps in setting diff DEFAULT_HOME_PATH for diff FS in Hue. + if hasattr(REMOTE_STORAGE_HOME, 'get') and REMOTE_STORAGE_HOME.get() and REMOTE_STORAGE_HOME.get().startswith('abfs://'): remote_home_abfs = REMOTE_STORAGE_HOME.get() + elif 'default' in ABFS_CLUSTERS and ABFS_CLUSTERS['default'].DEFAULT_HOME_PATH.get() and ABFS_CLUSTERS['default'].DEFAULT_HOME_PATH.get().startswith('abfs://'): + remote_home_abfs = ABFS_CLUSTERS['default'].DEFAULT_HOME_PATH.get() remote_home_abfs = _handle_user_dir_raz(user, remote_home_abfs) @@ -199,7 +211,7 @@ def abfsdatetime_to_timestamp(datetime): """ # There is chance (depends on platform) to get # `'z' is a bad directive in format ...` error (see https://bugs.python.org/issue6641), - #LOG.debug("%s" %datetime) + # LOG.debug("%s" %datetime) stripped = time.strptime(datetime[:-4], '%a, %d %b %Y %H:%M:%S') if datetime[-4:] != ' GMT': raise ValueError('Time [%s] is not in GMT.' % datetime) diff --git a/desktop/libs/azure/src/azure/abfs/abfs.py b/desktop/libs/azure/src/azure/abfs/abfs.py index d4c69c0c49..1157d49b6e 100644 --- a/desktop/libs/azure/src/azure/abfs/abfs.py +++ b/desktop/libs/azure/src/azure/abfs/abfs.py @@ -18,43 +18,35 @@ """ Interfaces for ABFS """ -from future import standard_library -standard_library.install_aliases() -from builtins import object -import logging + import os +import re import sys +import logging import threading -import re - +import urllib.error +import urllib.request +from builtins import object from math import ceil from posixpath import join - -from hadoop.hdfs_site import get_umask_mode -from hadoop.fs.exceptions import WebHdfsException - -from desktop.conf import RAZ -from desktop.lib.rest import http_client, resource -from desktop.lib.rest.raz_http_client import RazHttpClient +from urllib.parse import quote as urllib_quote, urlparse as lib_urlparse import azure.abfs.__init__ as Init_ABFS from azure.abfs.abfsfile import ABFSFile from azure.abfs.abfsstats import ABFSStat from azure.conf import PERMISSION_ACTION_ABFS, is_raz_abfs - -if sys.version_info[0] > 2: - import urllib.request, urllib.error - from urllib.parse import quote as urllib_quote - from urllib.parse import urlparse as lib_urlparse -else: - from urlparse import urlparse as lib_urlparse - from urllib import quote as urllib_quote +from desktop.conf import RAZ +from desktop.lib.rest import http_client, resource +from desktop.lib.rest.raz_http_client import RazHttpClient +from hadoop.fs.exceptions import WebHdfsException +from hadoop.hdfs_site import get_umask_mode LOG = logging.getLogger() # Azure has a 30MB block limit on upload. UPLOAD_CHUCK_SIZE = 30 * 1000 * 1000 + class ABFSFileSystemException(IOError): def __init__(self, *args, **kwargs): @@ -134,7 +126,7 @@ def get_client(self, url): def _getheaders(self): headers = { - "x-ms-version": "2019-12-12" # For latest SAS support + "x-ms-version": "2019-12-12" # For latest SAS support } if self._token_type and self._access_token: @@ -190,7 +182,7 @@ def stats(self, path, params=None, **kwargs): return ABFSStat.for_root(path) try: file_system, dir_name = Init_ABFS.parse_uri(path)[:2] - except: + except Exception: raise IOError if dir_name == '': @@ -294,7 +286,6 @@ def listdir(self, path, params=None, glob=None, **kwargs): return [x.name for x in listofDir] - def listfilesystems(self, root=Init_ABFS.ABFS_ROOT, params=None, **kwargs): """ Lists the names of the File Systems, limited arguements @@ -307,7 +298,7 @@ def get_home_dir(): """ Attempts to go to the directory set by the user in the configuration file. If not defaults to abfs:// """ - return Init_ABFS.get_home_dir_for_abfs() + return Init_ABFS.get_abfs_home_directory() # Find or alter information about the URI path # -------------------------------- @@ -682,14 +673,6 @@ def check_access(self, path, *args, **kwargs): Check access of a file/directory (Work in Progress/Not Ready) """ raise NotImplementedError("") - try: - status = self.stats(path) - if 'x-ms-permissions' not in status.keys(): - raise b - except b: - LOG.debug("Permisions have not been set") - except: - Exception def mkswap(self, filename, subdir='', suffix='swp', basedir=None): """ @@ -718,9 +701,9 @@ def filebrowser_action(self): return self._filebrowser_action # Other Methods to condense stuff - #---------------------------- + # ---------------------------- # Write Files on creation - #---------------------------- + # ---------------------------- def _writedata(self, path, data, size): """ Adds text to a given file @@ -733,11 +716,11 @@ def _writedata(self, path, data, size): length = chunk_size else: length = chunk - self._append(path, data[i*chunk_size:i*chunk_size + length], length) + self._append(path, data[i * chunk_size:i * chunk_size + length], length) self.flush(path, {'position': int(size)}) # Use Patch HTTP request - #---------------------------- + # ---------------------------- def _patching_sl(self, schemeless_path, param, data=None, header=None, **kwargs): """ A wraper function for patch diff --git a/desktop/libs/azure/src/azure/abfs/abfs_test.py b/desktop/libs/azure/src/azure/abfs/abfs_test.py index 27c2e496e1..7646cb28da 100644 --- a/desktop/libs/azure/src/azure/abfs/abfs_test.py +++ b/desktop/libs/azure/src/azure/abfs/abfs_test.py @@ -14,37 +14,137 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -import logging -import json import os -import pytest -import unittest -import tempfile +import json import time +import logging +import tempfile +import pytest from django.contrib.auth.models import User from django.test import TestCase -from desktop.lib.django_test_util import make_logged_in_client -from desktop.lib.test_utils import grant_access, add_to_group, add_permission, remove_from_group - -from azure.abfs.__init__ import abfspath +from azure.abfs.__init__ import abfspath, get_abfs_home_directory from azure.abfs.abfs import ABFS -from azure.active_directory import ActiveDirectory -from azure.conf import ABFS_CLUSTERS,AZURE_ACCOUNTS, is_abfs_enabled - from azure.abfs.upload import DEFAULT_WRITE_SIZE +from azure.active_directory import ActiveDirectory +from azure.conf import ABFS_CLUSTERS, AZURE_ACCOUNTS, is_abfs_enabled +from desktop.conf import RAZ +from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.test_utils import add_permission, add_to_group, grant_access, remove_from_group +from filebrowser.conf import REMOTE_STORAGE_HOME LOG = logging.getLogger() -""" -Interfaces for ADLS via HttpFs/WebHDFS -""" + +@pytest.mark.django_db +def test_get_abfs_home_directory(): + client = make_logged_in_client(username="test", groupname="test", recreate=True, is_superuser=False) + user = User.objects.get(username="test") + + client_not_me = make_logged_in_client(username="test_not_me", groupname="test_not_me", recreate=True, is_superuser=False) + user_not_me = User.objects.get(username="test_not_me") + + # When REMOTE_STORAGE_HOME ends with /user in RAZ ABFS environment. + resets = [RAZ.IS_ENABLED.set_for_testing(True), REMOTE_STORAGE_HOME.set_for_testing('abfs://gethue-container/user')] + + try: + default_abfs_home_path = get_abfs_home_directory(user) + assert default_abfs_home_path == 'abfs://gethue-container/user/test' + + default_abfs_home_path = get_abfs_home_directory(user_not_me) + assert default_abfs_home_path == 'abfs://gethue-container/user/test_not_me' + finally: + for reset in resets: + reset() + + # When ABFS filesystem's DEFAULT_HOME_PATH ends with /user in RAZ ABFS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + ABFS_CLUSTERS.set_for_testing({'default': {'default_home_path': 'abfs://gethue-other-container/user'}}), + ] + + try: + default_abfs_home_path = get_abfs_home_directory(user) + assert default_abfs_home_path == 'abfs://gethue-other-container/user/test' + + default_abfs_home_path = get_abfs_home_directory(user_not_me) + assert default_abfs_home_path == 'abfs://gethue-other-container/user/test_not_me' + finally: + for reset in resets: + reset() + + # When ABFS filesystem's DEFAULT_HOME_PATH is set in non-RAZ ABFS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(False), + ABFS_CLUSTERS.set_for_testing({'default': {'default_home_path': 'abfs://gethue-other-container/test-dir'}}), + ] + + try: + default_abfs_home_path = get_abfs_home_directory(user) + assert default_abfs_home_path == 'abfs://gethue-other-container/test-dir' + + default_abfs_home_path = get_abfs_home_directory(user_not_me) + assert default_abfs_home_path == 'abfs://gethue-other-container/test-dir' + finally: + for reset in resets: + reset() + + # When both REMOTE_STORAGE_HOME and ABFS filesystem's DEFAULT_HOME_PATH are set in RAZ ABFS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + REMOTE_STORAGE_HOME.set_for_testing('abfs://gethue-container/user'), + ABFS_CLUSTERS.set_for_testing({'default': {'default_home_path': 'abfs://gethue-other-container/user'}}), + ] + + try: + # Gives preference to REMOTE_STORAGE_HOME for of backward compatibility. + default_abfs_home_path = get_abfs_home_directory(user) + assert default_abfs_home_path == 'abfs://gethue-container/user/test' + + default_abfs_home_path = get_abfs_home_directory(user_not_me) + assert default_abfs_home_path == 'abfs://gethue-container/user/test_not_me' + finally: + for reset in resets: + reset() + + # When ABFS filesystem's DEFAULT_HOME_PATH is set but path does not end with ../user or ../user/ in RAZ ABFS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + ABFS_CLUSTERS.set_for_testing({'default': {'default_home_path': 'abfs://gethue-other-container/dir'}}), + ] + + try: + default_abfs_home_path = get_abfs_home_directory(user) + assert default_abfs_home_path == 'abfs://gethue-other-container/dir' + + default_abfs_home_path = get_abfs_home_directory(user_not_me) + assert default_abfs_home_path == 'abfs://gethue-other-container/dir' + finally: + for reset in resets: + reset() + + # When some different path is set in both RAZ and non-RAZ ABFS environment. + resets = [ + RAZ.IS_ENABLED.set_for_testing(True), + REMOTE_STORAGE_HOME.set_for_testing('s3a://gethue-bucket/user'), + ABFS_CLUSTERS.set_for_testing({'default': {'default_home_path': 's3a://gethue-other-bucket/dir'}}), + ] + + try: + default_abfs_home_path = get_abfs_home_directory(user) + assert default_abfs_home_path == 'abfs://' + + default_abfs_home_path = get_abfs_home_directory(user_not_me) + assert default_abfs_home_path == 'abfs://' + finally: + for reset in resets: + reset() + + @pytest.mark.integration class ABFSTestBase(TestCase): - def setup_method(self, method): if not is_abfs_enabled(): pytest.skip("Skipping Test") @@ -53,61 +153,60 @@ def setup_method(self, method): grant_access('test', 'test', 'filebrowser') add_to_group('test') self.user = User.objects.get(username="test") - - self.test_fs = 'abfs://test' + (str(int(time.time()) )) + + self.test_fs = 'abfs://test' + (str(int(time.time()))) LOG.debug("%s" % self.test_fs) self.client.mkdir(self.test_fs) def teardown_method(self, method): self.client.rmtree(self.test_fs) - + def test_list(self): testfile = 'abfs://' filesystems = self.client.listdir(testfile) LOG.debug("%s" % filesystems) assert filesystems is not None, filesystems - - pathing = self.client.listdir(testfile + filesystems[0], {"recursive" : "true"} ) + + pathing = self.client.listdir(testfile + filesystems[0], {"recursive": "true"}) LOG.debug("%s" % pathing) assert pathing is not None, pathing - + directory = self.client.listdir(testfile + filesystems[0] + '/' + pathing[0]) LOG.debug("%s" % directory) assert directory is not None, directory - + directory = self.client.listdir(self.test_fs) LOG.debug("%s" % directory) assert directory is not None, directory - + directory = self.client.listdir(abfspath(self.test_fs)) LOG.debug("%s" % directory) assert directory is not None, directory - + pathing = self.client._statsf(filesystems[276]) LOG.debug("%s" % pathing) assert pathing is not None, pathing - + pathing = self.client._statsf(filesystems[277]) LOG.debug("%s" % pathing) assert pathing is not None, pathing - - + def test_existence(self): test_fs = self.test_fs test_dir = test_fs + '/test_existence' test_file = test_dir + '/test.txt' self.client.mkdir(test_dir) self.client.create(test_file) - - #Testing root and filesystems + + # Testing root and filesystems assert self.client.exists('abfs://') assert self.client.exists(test_fs) - - #testing created directories and files + + # testing created directories and files assert self.client.exists(test_dir) assert self.client.exists(test_file) assert not self.client.exists(test_dir + 'a') - + def test_stat_output(self): """ Only tests if the stat outputs something @@ -119,59 +218,58 @@ def test_stat_output(self): self.client.mkdir(test_dir) self.client.mkdir(test_dir2) self.client.mkdir(test_dir3) - - #testing filesystems + + # testing filesystems result = self.client.stats(test_fs) LOG.debug("%s" % result) assert result is not None, result result = self.client.listdir_stats(test_fs) LOG.debug("%s" % result) - - #testing directories + + # testing directories result = self.client.stats(test_dir) LOG.debug("%s" % result) result = self.client.listdir_stats(test_dir) LOG.debug("%s" % result) - + result = self.client.stats(test_dir2) LOG.debug("%s" % result) result = self.client.listdir_stats(test_dir2) LOG.debug("%s" % result) - + result = self.client.stats(test_dir3) LOG.debug("%s" % result) result = self.client.listdir_stats(test_dir3) LOG.debug("%s" % result) - + def test_mkdir(self): test_dir = self.test_fs + '/test_mkdir' assert not self.client.exists(test_dir) - + self.client.mkdir(test_dir) assert self.client.exists(test_dir) self.client.isdir(test_dir) - - + def test_append_and_flush(self): test_fs = self.test_fs test_file = test_fs + '/test.txt' self.client.create(test_file) - + test_string = "This is a test." test_len = len(test_string) - resp = self.client._append(test_file, test_string) #only works with strings + resp = self.client._append(test_file, test_string) # only works with strings LOG.debug("%s" % self.client.stats(test_file)) try: LOG.debug("%s" % resp) - resp = self.client.read(test_file, length = test_len) - except: + resp = self.client.read(test_file, length=test_len) + except Exception: LOG.debug("Not written yet") - - self.client.flush(test_file, {"position" : test_len} ) + + self.client.flush(test_file, {"position": test_len}) resp = self.client.read(test_file) assert resp == test_string self.client.remove(test_file) - + def test_rename(self): test_fs = self.test_fs test_dir = test_fs + '/test' @@ -180,67 +278,66 @@ def test_rename(self): test_file = test_fs + '/test.txt' test_file2 = test_fs + '/test2.txt' test_file3 = test_fs + '/test 3.txt' - + self.client.mkdir(test_dir) assert self.client.exists(test_dir) assert not self.client.exists(test_dir2) - + self.client.rename(test_dir, test_dir2) assert not self.client.exists(test_dir) assert self.client.exists(test_dir2) - + self.client.create(test_file) assert self.client.exists(test_file) assert not self.client.exists(test_file2) - + self.client.rename(test_file, test_file2) assert not self.client.exists(test_file) assert self.client.exists(test_file2) - + self.client.rename(test_dir2, test_dir3) assert not self.client.exists(test_dir2) assert self.client.exists(test_dir3) - + self.client.rename(test_dir3, test_dir2) assert not self.client.exists(test_dir3) assert self.client.exists(test_dir2) - - + def test_chmod(self): test_dir = self.test_fs + '/test_chmod' self.client.mkdir(test_dir) - test_dir_permission = test_dir +'/test' - test_file_permission = test_dir +'/test.txt' - + test_dir_permission = test_dir + '/test' + test_file_permission = test_dir + '/test.txt' + self.client.create(test_file_permission) self.client.chmod(test_file_permission, '0777') self.client.stats(test_file_permission) - + self.client.mkdir(test_dir_permission) self.client.chmod(test_dir_permission, '0000') self.client.chmod(test_dir_permission, '0777') self.client.stats(test_dir_permission) - + def test_chown(self): test_dir = self.test_fs + '/test_chown' self.client.mkdir(test_dir) - test_dir_permission = test_dir +'/test' - test_file_permission = test_dir +'/test.txt' - + test_dir_permission = test_dir + '/test' + test_file_permission = test_dir + '/test.txt' + self.client.create(test_file_permission) - self.client.chown(test_file_permission, group = '$superuser' ) + self.client.chown(test_file_permission, group='$superuser') self.client.stats(test_file_permission) - + self.client.mkdir(test_dir_permission) - self.client.chown(test_dir_permission, group = '$superuser') + self.client.chown(test_dir_permission, group='$superuser') self.client.stats(test_dir_permission) - + def test_create_with_file_permissions(self): test_dir = self.test_fs + '/test_chown' test_file = test_dir + '/test.txt' self.client.mkdir(test_dir) - self.client.create(test_file, headers = {'x-ms-permissions' : '0777'}) - + self.client.create(test_file, headers={'x-ms-permissions': '0777'}) + def test_upload(self): with tempfile.NamedTemporaryFile() as local_file: # Make sure we can upload larger than the UPLOAD chunk size @@ -251,7 +348,7 @@ def test_upload(self): dest_dir = self.test_fs + '/test_upload' local_file = local_file.name dest_path = '%s/%s' % (dest_dir, os.path.basename(local_file)) - + add_permission(self.user.username, 'has_abfs', permname='abfs_access', appname='filebrowser') # Just upload the current python file try: @@ -259,15 +356,14 @@ def test_upload(self): response = json.loads(resp.content) finally: remove_from_group(self.user.username, 'has_abfs') - + assert 0 == response['status'], response stats = self.client.stats(dest_path) actual = self.client.read(dest_path) expected = file(local_file).read() assert actual == expected, 'files do not match: %s != %s' % (len(actual), len(expected)) - - + def test_copy_file(self): test_fs = self.test_fs testdir1 = test_fs + '/testcpy1' @@ -276,19 +372,18 @@ def test_copy_file(self): self.client.mkdir(testdir1) self.client.mkdir(testdir2) self.client.create(test_file) - + test_string = "This is a test." test_len = len(test_string) resp = self.client._append(test_file, test_string) - self.client.flush(test_file, {"position" : test_len} ) - + self.client.flush(test_file, {"position": test_len}) + self.client.copy(test_file, testdir2) self.client.stats(testdir2 + '/test.txt') resp = self.client.read(testdir2 + '/test.txt') resp2 = self.client.read(test_file) assert resp == resp2, "Files %s and %s are not equal" % (test_file, testdir2 + '/test.txt') - - + def test_copy_dir(self): test_fs = self.test_fs testdir1 = test_fs + '/testcpy1' @@ -299,12 +394,11 @@ def test_copy_dir(self): self.client.mkdir(testdir2) self.client.mkdir(test_dir3) self.client.mkdir(test_dir4) - - + self.client.copy(test_dir3, testdir2) self.client.stats(testdir2 + '/test') self.client.stats(testdir2 + '/test/test2') - + @staticmethod def test_static_methods(): test_dir = 'abfss://testfs/test_static/' @@ -315,5 +409,3 @@ def test_static_methods(): LOG.debug("%s" % parent) join_path = ABFS.join(test_dir, 'test1') LOG.debug("%s" % join_path) - - \ No newline at end of file diff --git a/desktop/libs/azure/src/azure/conf.py b/desktop/libs/azure/src/azure/conf.py index 1beb1b496a..5bcdf051b5 100644 --- a/desktop/libs/azure/src/azure/conf.py +++ b/desktop/libs/azure/src/azure/conf.py @@ -15,12 +15,11 @@ # limitations under the License. from __future__ import absolute_import -import logging import sys +import logging -from desktop.lib.conf import Config, UnspecifiedConfigSection, ConfigSection, coerce_password_from_script, coerce_bool +from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool, coerce_password_from_script from desktop.lib.idbroker import conf as conf_idbroker - from hadoop import core_site if sys.version_info[0] > 2: @@ -36,6 +35,7 @@ META_DATA_URL = 'http://169.254.169.254/metadata/instance' AZURE_METADATA = None + def get_default_client_id(): """ Attempt to set AWS client id from script, else core-site, else None @@ -43,6 +43,7 @@ def get_default_client_id(): client_id_script = AZURE_ACCOUNTS['default'].CLIENT_ID_SCRIPT.get() return client_id_script or core_site.get_adls_client_id() or core_site.get_azure_client_id() + def get_default_secret_key(): """ Attempt to set AWS secret key from script, else core-site, else None @@ -50,35 +51,46 @@ def get_default_secret_key(): client_secret_script = AZURE_ACCOUNTS['default'].CLIENT_SECRET_SCRIPT.get() return client_secret_script or core_site.get_adls_authentication_code() or core_site.get_azure_client_secret() + def get_default_tenant_id(): """ Attempt to set AWS tenant id from script, else core-site, else None """ return AZURE_ACCOUNTS['default'].TENANT_ID_SCRIPT.get() + def get_refresh_url(conf, version): refresh_url = core_site.get_adls_refresh_url() or core_site.get_azure_client_endpoint() if not refresh_url: refresh_url = REFRESH_URL.replace('', conf.TENANT_ID.get()).replace('', version + '/' if version else '') return refresh_url + def get_default_region(): return "" + def get_default_adls_url(): return ADLS_CLUSTERS['default'].WEBHDFS_URL.get() + def get_default_adls_fs(): return ADLS_CLUSTERS['default'].FS_DEFAULTFS.get() + def get_default_abfs_url(): return ABFS_CLUSTERS['default'].WEBHDFS_URL.get() + def get_default_abfs_fs(): default_fs = core_site.get_default_fs() - return default_fs if default_fs and default_fs.startswith('abfs://') and \ - ABFS_CLUSTERS['default'].ENABLE_DEFAULTFS_FROM_CORESITE.get() else ABFS_CLUSTERS['default'].FS_DEFAULTFS.get() + return ( + default_fs + if default_fs and default_fs.startswith('abfs://') and ABFS_CLUSTERS['default'].ENABLE_DEFAULTFS_FROM_CORESITE.get() + else ABFS_CLUSTERS['default'].FS_DEFAULTFS.get() + ) + ADLS_CLUSTERS = UnspecifiedConfigSection( "adls_clusters", @@ -148,44 +160,72 @@ def get_default_abfs_fs(): key="enable_defaultfs_from_coresite", type=coerce_bool, default=True, - help="Enable this param to use the defaultFS from core-site.xml"), - FS_DEFAULTFS=Config("fs_defaultfs", help="abfs://@.dfs.core.windows.net", type=str, default=None), - WEBHDFS_URL=Config("webhdfs_url", - help="https://.dfs.core.windows.net", - type=str, default=None), + help="Enable this to use the defaultFS value from core-site.xml" + ), + FS_DEFAULTFS=Config( + key="fs_defaultfs", + help="abfs://@.dfs.core.windows.net", + type=str, + default=None + ), + WEBHDFS_URL=Config( + key="webhdfs_url", + help="https://.dfs.core.windows.net", + type=str, + default=None + ), + DEFAULT_HOME_PATH=Config( + key="default_home_path", + type=str, + default=None, + help="Optionally set this for a different home directory path. e.g. abfs://gethue" + ), ) ) ) + def is_raz_abfs(): from desktop.conf import RAZ # Must be imported dynamically in order to have proper value return (RAZ.IS_ENABLED.get() and 'default' in list(ABFS_CLUSTERS.keys())) + def is_adls_enabled(): - return ('default' in list(AZURE_ACCOUNTS.keys()) and AZURE_ACCOUNTS['default'].get_raw() and AZURE_ACCOUNTS['default'].CLIENT_ID.get() \ + return ('default' in list(AZURE_ACCOUNTS.keys()) and AZURE_ACCOUNTS['default'].get_raw() and AZURE_ACCOUNTS['default'].CLIENT_ID.get() or (conf_idbroker.is_idbroker_enabled('azure') and has_azure_metadata())) and 'default' in list(ADLS_CLUSTERS.keys()) + def is_abfs_enabled(): - return is_raz_abfs() or \ - ('default' in list(AZURE_ACCOUNTS.keys()) and AZURE_ACCOUNTS['default'].get_raw() and AZURE_ACCOUNTS['default'].CLIENT_ID.get() or \ - (conf_idbroker.is_idbroker_enabled('azure') and has_azure_metadata())) and 'default' in list(ABFS_CLUSTERS.keys()) + return ( + is_raz_abfs() + or ( + 'default' in list(AZURE_ACCOUNTS.keys()) + and AZURE_ACCOUNTS['default'].get_raw() + and AZURE_ACCOUNTS['default'].CLIENT_ID.get() + or (conf_idbroker.is_idbroker_enabled('azure') and has_azure_metadata()) + ) + and 'default' in list(ABFS_CLUSTERS.keys()) + ) + def has_adls_access(user): - from desktop.conf import RAZ # Must be imported dynamically in order to have proper value from desktop.auth.backend import is_admin + from desktop.conf import RAZ # Must be imported dynamically in order to have proper value return user.is_authenticated and user.is_active and ( is_admin(user) or user.has_hue_permission(action="adls_access", app="filebrowser") or RAZ.IS_ENABLED.get() ) + def has_abfs_access(user): - from desktop.conf import RAZ # Must be imported dynamically in order to have proper value from desktop.auth.backend import is_admin + from desktop.conf import RAZ # Must be imported dynamically in order to have proper value return user.is_authenticated and user.is_active and ( is_admin(user) or user.has_hue_permission(action="abfs_access", app="filebrowser") or RAZ.IS_ENABLED.get() ) + def azure_metadata(): global AZURE_METADATA if AZURE_METADATA is None: @@ -198,13 +238,15 @@ def azure_metadata(): AZURE_METADATA = False return AZURE_METADATA + def has_azure_metadata(): return azure_metadata() is not None + def config_validator(user): res = [] - import desktop.lib.fsmanager # Avoid cyclic loop + import desktop.lib.fsmanager # Avoid cyclic loop if is_adls_enabled() or is_abfs_enabled(): try: diff --git a/desktop/libs/hadoop/src/hadoop/core_site_tests.py b/desktop/libs/hadoop/src/hadoop/core_site_tests.py index 84652e56ed..f7f0d82ac9 100644 --- a/desktop/libs/hadoop/src/hadoop/core_site_tests.py +++ b/desktop/libs/hadoop/src/hadoop/core_site_tests.py @@ -16,15 +16,14 @@ # limitations under the License. from __future__ import absolute_import -from hadoop import conf -import logging + import os import sys +import logging import tempfile from desktop.models import get_remote_home_storage - -from hadoop import core_site +from hadoop import conf, core_site if sys.version_info[0] > 2: open_file = open @@ -65,10 +64,10 @@ def test_core_site(): fs.s3a.bucket.gethue-dev.endpoint s3.us-west-2.amazonaws.com - - fs.azure.ext.raz.rest.host.url - https://gehue-adls-master:6082/ - + + fs.azure.ext.raz.rest.host.url + https://gehue-adls-master:6082/ + fs.azure.ext.raz.adls.access.cluster.name gehue-adls @@ -76,7 +75,7 @@ def test_core_site(): fs.defaultFS abfs://data@gethuedevstorage.dfs.core.windows.net/hue-adls - + """ open_file(os.path.join(hadoop_home, 'core-site.xml'), 'w').write(xml) @@ -93,7 +92,6 @@ def test_core_site(): assert core_site.get_default_fs() == 'abfs://data@gethuedevstorage.dfs.core.windows.net/hue-adls' - assert get_remote_home_storage() == 's3a://gethue-dev' finally: core_site.reset() for f in finish: