From 81a8fadb6cdcea8e5629356c22f753646c6e6f8b Mon Sep 17 00:00:00 2001 From: Athithyaa Selvam <33496652+athithyaaselvam@users.noreply.github.com> Date: Tue, 28 May 2024 17:28:46 -0700 Subject: [PATCH] Avoid initializing free disk space in redis for all hue shell commands (#3747) Change-Id: I23ec79a1e03cf93f8729b93ced14a6a54195fd3d Co-authored-by: Athithyaa Selvam --- apps/filebrowser/src/filebrowser/tasks.py | 4 +- apps/filebrowser/src/filebrowser/utils.py | 13 ++- desktop/core/src/desktop/api2.py | 2 +- .../management/commands/rungunicornserver.py | 79 +++++++++++++++---- desktop/core/src/desktop/settings.py | 28 ------- 5 files changed, 80 insertions(+), 46 deletions(-) diff --git a/apps/filebrowser/src/filebrowser/tasks.py b/apps/filebrowser/src/filebrowser/tasks.py index df6ba5aa85..ada00be647 100644 --- a/apps/filebrowser/src/filebrowser/tasks.py +++ b/apps/filebrowser/src/filebrowser/tasks.py @@ -34,7 +34,9 @@ from filebrowser.utils import release_reserved_space_for_file_uploads, reserve_space_for_file_uploads if hasattr(TASK_SERVER_V2, 'get') and TASK_SERVER_V2.ENABLED.get(): - from desktop.settings import TIME_ZONE, initialize_free_disk_space_in_redis, parse_broker_url + from desktop.management.commands.rungunicornserver import initialize_free_disk_space_in_redis + from desktop.settings import TIME_ZONE + from filebrowser.utils import parse_broker_url from filebrowser.views import UPLOAD_CLASSES from useradmin.models import User diff --git a/apps/filebrowser/src/filebrowser/utils.py b/apps/filebrowser/src/filebrowser/utils.py index b1b8ee1ae4..02fe79a3d6 100644 --- a/apps/filebrowser/src/filebrowser/utils.py +++ b/apps/filebrowser/src/filebrowser/utils.py @@ -17,13 +17,14 @@ import os import logging from datetime import datetime +from urllib.parse import urlparse + +import redis from desktop.conf import TASK_SERVER_V2 from desktop.lib.django_util import JsonResponse from filebrowser.conf import ARCHIVE_UPLOAD_TEMPDIR -if hasattr(TASK_SERVER_V2, 'get') and TASK_SERVER_V2.ENABLED.get(): - from desktop.settings import parse_broker_url LOG = logging.getLogger() @@ -74,6 +75,14 @@ def generate_chunks(uuid, totalparts, default_write_size=DEFAULT_WRITE_SIZE): os.remove(file_path) +def parse_broker_url(broker_url): + parsed_url = urlparse(broker_url) + host = parsed_url.hostname + port = parsed_url.port + db = int(parsed_url.path.lstrip('/')) + return redis.Redis(host=host, port=port, db=db) + + def get_available_space_for_file_uploads(request): redis_client = parse_broker_url(TASK_SERVER_V2.BROKER_URL.get()) try: diff --git a/desktop/core/src/desktop/api2.py b/desktop/core/src/desktop/api2.py index 9936bfbe57..7962b825c6 100644 --- a/desktop/core/src/desktop/api2.py +++ b/desktop/core/src/desktop/api2.py @@ -30,7 +30,7 @@ if hasattr(TASK_SERVER_V2, 'get') and TASK_SERVER_V2.ENABLED.get(): from desktop.celery import app as celery_app - from desktop.settings import parse_broker_url + from filebrowser.utils import parse_broker_url from collections import defaultdict from datetime import datetime diff --git a/desktop/core/src/desktop/management/commands/rungunicornserver.py b/desktop/core/src/desktop/management/commands/rungunicornserver.py index 3498879418..c34409ba3d 100644 --- a/desktop/core/src/desktop/management/commands/rungunicornserver.py +++ b/desktop/core/src/desktop/management/commands/rungunicornserver.py @@ -15,34 +15,41 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import unicode_literals -import atexit -import desktop.log -import gunicorn.app.base -import logging -import logging.config + import os -import pkg_resources import ssl import sys +import time +import atexit +import logging import tempfile - -from OpenSSL import crypto +import logging.config from multiprocessing.util import _exit_function -from desktop import conf -from desktop.lib.paths import get_desktop_root + +import redis +import psutil +import pkg_resources +import gunicorn.app.base from django.core.management.base import BaseCommand from django.core.wsgi import get_wsgi_application -from django.utils.translation import gettext as _ from django.db import connection +from django.utils.translation import gettext as _ from gunicorn import util +from OpenSSL import crypto from six import iteritems +import desktop.log +from desktop import conf +from desktop.lib.paths import get_desktop_root +from filebrowser.utils import parse_broker_url + GUNICORN_SERVER_HELP = r""" Run Hue using the Gunicorn WSGI server in asynchronous mode. """ PID_FILE = None + class Command(BaseCommand): help = _("Gunicorn Web server for Hue.") @@ -55,6 +62,7 @@ def handle(self, *args, **options): def usage(self, subcommand): return GUNICORN_SERVER_HELP + def activate_translation(): from django.conf import settings from django.utils import translation @@ -65,24 +73,30 @@ def activate_translation(): except AttributeError: pass + def number_of_workers(): return (multiprocessing.cpu_count() * 2) + 1 + def handler_app(environ, start_response): os.environ.setdefault("DJANGO_SETTINGS_MODULE", "desktop.settings") return get_wsgi_application() + def post_fork(server, worker): global PID_FILE with open(PID_FILE, "a") as f: - f.write("%s\n"%worker.pid) + f.write("%s\n" % worker.pid) + def post_worker_init(worker): connection.connect() + def worker_int(worker): connection.close() + def enable_logging(args, options): HUE_DESKTOP_VERSION = pkg_resources.get_distribution("desktop").version or "Unknown" # Start basic logging as soon as possible. @@ -93,6 +107,37 @@ def enable_logging(args, options): desktop.log.basic_logging(os.environ["HUE_PROCESS_NAME"]) logging.info("Welcome to Hue from Gunicorn server " + HUE_DESKTOP_VERSION) + +def initialize_free_disk_space_in_redis(): + conn_success = False + for retries in range(5): + try: + redis_client = parse_broker_url(desktop.conf.TASK_SERVER_V2.BROKER_URL.get()) + free_space = psutil.disk_usage('/tmp').free + available_space = redis_client.get('upload_available_space') + if available_space is None: + available_space = free_space + else: + available_space = int(available_space) + upload_keys_exist = any(redis_client.scan_iter('upload__*')) + redis_client.delete('upload_available_space') + if not upload_keys_exist: + redis_client.setnx('upload_available_space', free_space) + else: + redis_client.setnx('upload_available_space', min(free_space, available_space)) + logging.info("Successfully initialized free disk space in Redis.") + conn_success = True + break + except redis.ConnectionError as e: + logging.error(f"Redis connection error: {e}") + time.sleep(10) + except Exception as e: + logging.error(f"Error while initializing free disk space in Redis: {e}") + time.sleep(10) + if not conn_success: + logging.error("Failed to initialize free disk space in Redis after 5 retries.") + + class StandaloneApplication(gunicorn.app.base.BaseApplication): def __init__(self, app, options=None): self.options = options or {} @@ -122,6 +167,7 @@ def load_wsgiapp(self): def load(self): return self.load_wsgiapp() + def argprocessing(args=[], options={}): global PID_FILE if options['bind']: @@ -155,6 +201,7 @@ def argprocessing(args=[], options={}): ssl_keyfile = conf.SSL_PRIVATE_KEY.get() options['ssl_keyfile'] = ssl_keyfile + def rungunicornserver(args=[], options={}): gunicorn_options = { 'accesslog': "-", @@ -164,7 +211,7 @@ def rungunicornserver(args=[], options={}): 'ca_certs': conf.SSL_CACERTS.get(), # CA certificates file 'capture_output': True, 'cert_reqs': None, # Whether client certificate is required (see stdlib ssl module) - 'certfile': conf.SSL_CERTIFICATE.get(), # SSL certificate file + 'certfile': conf.SSL_CERTIFICATE.get(), # SSL certificate file 'chdir': None, 'check_config': None, 'ciphers': conf.SSL_CIPHER_LIST.get(), # Ciphers to use (see stdlib ssl module) @@ -220,6 +267,7 @@ def rungunicornserver(args=[], options={}): } StandaloneApplication(handler_app, gunicorn_options).run() + def start_server(args, options): global PID_FILE argprocessing(args, options) @@ -232,9 +280,12 @@ def start_server(args, options): activate_translation() enable_logging(args, options) atexit.unregister(_exit_function) + if desktop.conf.TASK_SERVER_V2.ENABLED.get(): + initialize_free_disk_space_in_redis() with open(PID_FILE, "a") as f: - f.write("%s\n"%os.getpid()) + f.write("%s\n" % os.getpid()) rungunicornserver(args, options) + if __name__ == '__main__': start_server(args=sys.argv[1:], options={}) diff --git a/desktop/core/src/desktop/settings.py b/desktop/core/src/desktop/settings.py index 607aa92f19..fcafff926f 100644 --- a/desktop/core/src/desktop/settings.py +++ b/desktop/core/src/desktop/settings.py @@ -28,10 +28,7 @@ import logging import datetime from builtins import map, zip -from urllib.parse import urlparse -import redis -import psutil import pkg_resources import desktop.redaction @@ -759,31 +756,6 @@ def disable_database_logging(): }, } - if desktop.conf.TASK_SERVER_V2.ENABLED.get(): - def initialize_free_disk_space_in_redis(): - redis_client = parse_broker_url(desktop.conf.TASK_SERVER_V2.BROKER_URL.get()) - free_space = psutil.disk_usage('/tmp').free - available_space = redis_client.get('upload_available_space') - if available_space is None: - available_space = free_space - else: - available_space = int(available_space) - upload_keys_exist = any(redis_client.scan_iter('upload__*')) - redis_client.delete('upload_available_space') - if not upload_keys_exist: - redis_client.setnx('upload_available_space', free_space) - else: - redis_client.setnx('upload_available_space', min(free_space, available_space)) - - def parse_broker_url(broker_url): - parsed_url = urlparse(broker_url) - host = parsed_url.hostname - port = parsed_url.port - db = int(parsed_url.path.lstrip('/')) - return redis.Redis(host=host, port=port, db=db) - - initialize_free_disk_space_in_redis() - # %n will be replaced with the first part of the nodename. # CELERYD_LOG_FILE="/var/log/celery/%n%I.log" # CELERYD_PID_FILE="/var/run/celery/%n.pid"