Skip to content

Commit

Permalink
[core] Use dedicated home_directory methods and introduce DEFAULT_HOM…
Browse files Browse the repository at this point in the history
…E_PATH config for supported FS (#3742)

## What changes were proposed in this pull request?

- If two or more filesystems were configured at the same time in Hue like S3 and ABFS and let's say, `REMOTE_STORAGE_HOME` is set to `s3a://test_bucket/dir`, then earlier it defaults user to same `s3a://test_bucket/dir` path when user clicks both the S3 and ABFS left-nav icons.
- Using the dedicated home directory method helps navigating this issue as for above scenario, the user will default to `s3a://test_bucket/dir` when S3 icon is clicked but will be default to `abfs://` when ABFS icon is clicked.



- To improve it further, now the user can set `default_home_path` per FS level so user can default to `s3a://test_bucket/dir` for S3 icon and `abfs://test_container/dir` for ABFS icon. 
- For backward compatibility, `REMOTE_STORAGE_HOME` will still hold priority over per FS level `DEFAULT_HOME_PATH`.



- This PR also removes unnecessary encoding, and improved few home_diectory related methods. 
- For RAZ env, if now someone sets `REMOTE_STORAGE_HOME` or `DEFAULT_HOME_PATH` path ending with `/user` or `/user/`, then we are now appending the username for both scenarios to reduce misconfigurations.

## How was this patch tested?

- Tested E2E in live setup.
- Added new and updated existing unit tests.
  • Loading branch information
Harshg999 authored Jun 3, 2024
1 parent 81a8fad commit 7ee5a33
Show file tree
Hide file tree
Showing 20 changed files with 925 additions and 739 deletions.
22 changes: 11 additions & 11 deletions apps/filebrowser/src/filebrowser/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import logging
import posixpath
import os

from django.http import HttpResponse
from django.utils.translation import gettext as _

from desktop.lib.django_util import JsonResponse
from aws.s3.s3fs import get_s3_home_directory
from azure.abfs.__init__ import get_abfs_home_directory
from desktop.lib import fsmanager
from desktop.lib.i18n import smart_unicode
from desktop.lib.fs.ozone.ofs import get_ofs_home_directory
from desktop.lib.django_util import JsonResponse
from desktop.lib.fs.gc.gs import get_gs_home_directory

from azure.abfs.__init__ import get_home_dir_for_abfs
from aws.s3.s3fs import get_s3_home_directory

from desktop.lib.fs.ozone.ofs import get_ofs_home_directory
from desktop.lib.i18n import smart_unicode
from filebrowser.views import _normalize_path

LOG = logging.getLogger()
Expand Down Expand Up @@ -64,7 +62,7 @@ def get_filesystems(request):


@error_handler
def get_filesystems_with_home_dirs(request): # Using as a public API only for now
def get_filesystems_with_home_dirs(request): # Using as a public API only for now
filesystems = []
user_home_dir = ''

Expand All @@ -76,7 +74,7 @@ def get_filesystems_with_home_dirs(request): # Using as a public API only for no
elif fs == 'gs':
user_home_dir = get_gs_home_directory(request.user)
elif fs == 'abfs':
user_home_dir = get_home_dir_for_abfs(request.user)
user_home_dir = get_abfs_home_directory(request.user)
elif fs == 'ofs':
user_home_dir = get_ofs_home_directory()

Expand Down Expand Up @@ -107,10 +105,11 @@ def touch(request):

if name and (posixpath.sep in name):
raise Exception(_("Error creating %s file. Slashes are not allowed in filename." % name))

request.fs.create(request.fs.join(path, name))
return HttpResponse(status=200)


@error_handler
def rename(request):
src_path = request.POST.get('src_path')
Expand All @@ -132,6 +131,7 @@ def rename(request):
request.fs.rename(src_path, dest_path)
return HttpResponse(status=200)


@error_handler
def content_summary(request, path):
path = _normalize_path(path)
Expand Down
1 change: 1 addition & 0 deletions apps/filebrowser/src/filebrowser/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def get_desktop_enable_download():
type=coerce_bool,
default=False)

# DEPRECATED in favor of DEFAULT_HOME_PATH per FS config level.
REMOTE_STORAGE_HOME = Config(
key="remote_storage_home",
type=str,
Expand Down
4 changes: 2 additions & 2 deletions apps/filebrowser/src/filebrowser/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ def view(request, path):

# default_abfs_home is set in jquery.filechooser.js
if 'default_abfs_home' in request.GET:
from azure.abfs.__init__ import get_home_dir_for_abfs
home_dir_path = get_home_dir_for_abfs(request.user)
from azure.abfs.__init__ import get_abfs_home_directory
home_dir_path = get_abfs_home_directory(request.user)
if request.fs.isdir(home_dir_path):
return format_preserving_redirect(
request,
Expand Down
9 changes: 9 additions & 0 deletions desktop/conf.dist/hue.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,9 @@ tls=no
# The JSON credentials to authenticate to Google Cloud e.g. '{ "type": "service_account", "project_id": .... }'
## json_credentials=None

# Optionally set this for a different home directory path. e.g. gs://gethue-bucket/user
## default_home_path=gs://<bucket_name>/<relative_path>

## Configuration for Ozone File System
# ------------------------------------------------------------------------
[[ozone]]
Expand Down Expand Up @@ -1712,6 +1715,7 @@ submit_to=True
# Redirect client to WebHdfs or S3 for file download. Note: Turning this on will override notebook/redirect_whitelist for user selected file downloads on WebHdfs & S3.
## redirect_download=false

# DEPRECATED in favor of default_home_path per FS config level.
# Optionally set this if you want a different home directory path. e.g. s3a://gethue.
## remote_storage_home=s3a://gethue

Expand Down Expand Up @@ -2026,6 +2030,8 @@ submit_to=True
# The time in seconds before a delegate key is expired. Used when filebrowser/redirect_download is used. Default to 4 Hours.
## key_expiry=14400

# Optionally set this for a different home directory path. e.g. s3a://gethue-bucket/user
## default_home_path=s3a://<bucket_name>/<relative_path>

###########################################################################
# Settings for the Azure lib
Expand Down Expand Up @@ -2058,6 +2064,9 @@ submit_to=True
## fs_defaultfs=abfs://<container_name>@<account_name>.dfs.core.windows.net
## webhdfs_url=https://<account_name>.dfs.core.windows.net

# Optionally set this for a different home directory path. e.g. abfs://gethue-container/user
## default_home_path=abfs://<container_name>/<relative_path>

###########################################################################
# Settings for the Sentry lib
###########################################################################
Expand Down
10 changes: 10 additions & 0 deletions desktop/conf/pseudo-distributed.ini.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,9 @@
# The JSON credentials to authenticate to Google Cloud e.g. '{ "type": "service_account", "project_id": .... }'
## json_credentials=None

# Optionally set this for a different home directory path. e.g. gs://gethue-bucket/user
## default_home_path=gs://<bucket_name>/<relative_path>

## Configuration for Ozone File System
# ------------------------------------------------------------------------
[[ozone]]
Expand Down Expand Up @@ -1695,6 +1698,7 @@
# Redirect client to WebHdfs or S3 for file download. Note: Turning this on will override notebook/redirect_whitelist for user selected file downloads on WebHdfs & S3.
## redirect_download=false

# DEPRECATED in favor of default_home_path per FS config level.
# Optionally set this if you want a different home directory path. e.g. s3a://gethue.
## remote_storage_home=s3a://gethue

Expand Down Expand Up @@ -2010,6 +2014,9 @@
# The time in seconds before a delegate key is expired. Used when filebrowser/redirect_download is used. Default to 4 Hours.
## key_expiry=14400

# Optionally set this for a different home directory path. e.g. s3a://gethue-bucket/user
## default_home_path=s3a://<bucket_name>/<relative_path>


###########################################################################
# Settings for the Azure lib
Expand Down Expand Up @@ -2042,6 +2049,9 @@
## fs_defaultfs=abfs://<container_name>@<account_name>.dfs.core.windows.net
## webhdfs_url=https://<account_name>.dfs.core.windows.net

# Optionally set this for a different home directory path. e.g. abfs://gethue-container/user
## default_home_path=abfs://<container_name>/<relative_path>

###########################################################################
# Settings for the Sentry lib
###########################################################################
Expand Down
8 changes: 7 additions & 1 deletion desktop/core/src/desktop/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2804,7 +2804,13 @@ def get_ldap_bind_password(ldap_config):
key='json_credentials',
type=str,
default=None,
)
),
DEFAULT_HOME_PATH=Config(
key="default_home_path",
type=str,
default=None,
help="Optionally set this for a different home directory path. e.g. gs://gethue"
),
)
)
)
Expand Down
42 changes: 23 additions & 19 deletions desktop/core/src/desktop/lib/fs/gc/gs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,31 @@
# limitations under the License.
import os
import re
import time
import logging
import posixpath
import time

from boto.exception import BotoClientError, GSResponseError
from boto.gs.connection import Location
from boto.gs.key import Key

from boto.s3.prefix import Prefix
from django.utils.translation import gettext as _

from desktop.conf import PERMISSION_ACTION_GS, is_raz_gs
from desktop.lib.fs.gc import GS_ROOT, abspath, parse_uri, translate_gs_error, normpath, join as gs_join
from desktop.lib.fs.gc.gsstat import GSStat
from aws.s3.s3fs import S3FileSystem
from desktop.conf import GC_ACCOUNTS, PERMISSION_ACTION_GS, is_raz_gs
from desktop.lib.fs.gc import GS_ROOT, abspath, join as gs_join, normpath, parse_uri, translate_gs_error
from desktop.lib.fs.gc.gsfile import open as gsfile_open

from desktop.lib.fs.gc.gsstat import GSStat
from filebrowser.conf import REMOTE_STORAGE_HOME

from aws.s3.s3fs import S3FileSystem


DEFAULT_READ_SIZE = 1024 * 1024 # 1MB
BUCKET_NAME_PATTERN = re.compile(
"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$")
r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$")


LOG = logging.getLogger()


class GSFileSystemException(IOError):
def __init__(self, *args, **kwargs):
super(GSFileSystemException, self).__init__(*args, **kwargs)
Expand Down Expand Up @@ -81,9 +78,16 @@ def decorator(*args, **kwargs):
def get_gs_home_directory(user=None):
from desktop.models import _handle_user_dir_raz

remote_home_gs = 'gs://'
# REMOTE_STORAGE_HOME is deprecated in favor of DEFAULT_HOME_PATH per FS config level.
# But for backward compatibility, we are still giving preference to REMOTE_STORAGE_HOME path first and if it's not set,
# then check for DEFAULT_HOME_PATH which is set per FS config block. This helps in setting diff DEFAULT_HOME_PATH for diff FS in Hue.

if hasattr(REMOTE_STORAGE_HOME, 'get') and REMOTE_STORAGE_HOME.get() and REMOTE_STORAGE_HOME.get().startswith('gs://'):
remote_home_gs = REMOTE_STORAGE_HOME.get()
elif 'default' in GC_ACCOUNTS and GC_ACCOUNTS['default'].DEFAULT_HOME_PATH.get() and GC_ACCOUNTS['default'].DEFAULT_HOME_PATH.get().startswith('gs://'):
remote_home_gs = GC_ACCOUNTS['default'].DEFAULT_HOME_PATH.get()
else:
remote_home_gs = 'gs://'

remote_home_gs = _handle_user_dir_raz(user, remote_home_gs)

Expand All @@ -100,7 +104,7 @@ def __init__(self, gs_connection, expiration=None, fs='gs', headers=None, filebr
headers=headers,
filebrowser_action=filebrowser_action
)

@staticmethod
def join(*comp_list):
return gs_join(*comp_list)
Expand Down Expand Up @@ -156,7 +160,7 @@ def stats(self, path):
Returns:
GSStat: An object representing the stats of the file or directory.
Raises:
GSFileSystemException: If the file or directory does not exist.
"""
Expand Down Expand Up @@ -347,7 +351,7 @@ def mkdir(self, path, *args, **kwargs):
def _stats(self, path):
if GSFileSystem.isroot(path):
return GSStat.for_gs_root()

try:
key = self._get_key(path)
except BotoClientError as e:
Expand All @@ -359,17 +363,17 @@ def _stats(self, path):
raise GSFileSystemException(_('User is not authorized to access path: "%s"') % path)
else:
raise GSFileSystemException(_('Failed to access path "%s": %s') % (path, e.reason))
except Exception as e: # SSL errors show up here, because they've been remapped in boto
except Exception as e: # SSL errors show up here, because they've been remapped in boto
raise GSFileSystemException(_('Failed to access path "%s": %s') % (path, str(e)))

if key is None:
bucket_name, key_name = parse_uri(path)[:2]
bucket = self._get_bucket(bucket_name)

key = Key(bucket, key_name)

return self._stats_key(key, self.fs)

@staticmethod
def _stats_key(key, fs='gs'):
if key.size is not None:
Expand Down Expand Up @@ -402,7 +406,7 @@ def _copy(self, src, dst, recursive, use_src_basename):
"""
src_st = self.stats(src)
if src_st.isDir and not recursive:
return None # omitting directory
return None # omitting directory

# Check if the source is a directory and destination is not a directory
dst = abspath(src, dst)
Expand All @@ -421,7 +425,7 @@ def _copy(self, src, dst, recursive, use_src_basename):
src_bucket = self._get_bucket(src_bucket)
dst_bucket = self._get_bucket(dst_bucket)

# Determine whether to keep the source basename when copying directories and
# Determine whether to keep the source basename when copying directories and
# calculate the cut-off length for key names accordingly.
if keep_src_basename:
cut = len(posixpath.dirname(src_key)) # cut of the parent directory name
Expand Down
Loading

0 comments on commit 7ee5a33

Please sign in to comment.