From ff346522d9a1052fd9bc70b6f0af6f3e37fec577 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 23 Aug 2024 13:34:08 -0400 Subject: [PATCH 1/7] schedule metrics reporters in their own tasks --- osf/management/commands/daily_reporters_go.py | 39 ++++++++++--------- .../commands/monthly_reporters_go.py | 31 +++++++++------ osf/metrics/reporters/__init__.py | 29 +++++++------- 3 files changed, 54 insertions(+), 45 deletions(-) diff --git a/osf/management/commands/daily_reporters_go.py b/osf/management/commands/daily_reporters_go.py index d45f02fe54b..b2f73ea5842 100644 --- a/osf/management/commands/daily_reporters_go.py +++ b/osf/management/commands/daily_reporters_go.py @@ -2,11 +2,11 @@ import logging from django.core.management.base import BaseCommand +from django.db.utils import OperationalError from django.utils import timezone -from framework import sentry from framework.celery_tasks import app as celery_app -from osf.metrics.reporters import DAILY_REPORTERS +from osf.metrics.reporters import AllDailyReporters from website.app import init_app @@ -20,25 +20,26 @@ def daily_reporters_go(also_send_to_keen=False, report_date=None, reporter_filte if report_date is None: # default to yesterday report_date = (timezone.now() - datetime.timedelta(days=1)).date() - errors = {} - for reporter_class in DAILY_REPORTERS: - if reporter_filter and (reporter_filter.lower() not in reporter_class.__name__.lower()): + for _reporter_key, _reporter_class in AllDailyReporters.__members__.items(): + if reporter_filter and (reporter_filter.lower() not in _reporter_class.__name__.lower()): continue - try: - reporter_class().run_and_record_for_date( - report_date=report_date, - also_send_to_keen=also_send_to_keen, - ) - except Exception as e: - errors[reporter_class.__name__] = repr(e) - logger.exception(e) - sentry.log_exception(e) - # continue with the next reporter - return errors + daily_reporter_go.apply_async(kwargs={ + 'reporter_key': _reporter_key, + 'report_date': report_date.isoformat(), + }) -def date_fromisoformat(date_str): - return datetime.datetime.strptime(date_str, '%Y-%m-%d').date() +@celery_app.task( + name='management.commands.daily_reporter_go', + autoretry_for=(OperationalError,), + max_retries=5, + retry_backoff=True, + bind=True, +) +def daily_reporter_go(task, reporter_key: str, report_date: str): + _reporter_class = AllDailyReporters[reporter_key].value + _parsed_date = datetime.date.fromisoformat(report_date) + _reporter_class().run_and_record_for_date(report_date=_parsed_date) class Command(BaseCommand): @@ -51,7 +52,7 @@ def add_arguments(self, parser): ) parser.add_argument( '--date', - type=date_fromisoformat, # in python 3.7+, could pass datetime.date.fromisoformat + type=datetime.date.fromisoformat, help='run for a specific date (default: yesterday)', ) parser.add_argument( diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py index 74bd69da6ab..8f9854a722b 100644 --- a/osf/management/commands/monthly_reporters_go.py +++ b/osf/management/commands/monthly_reporters_go.py @@ -1,11 +1,11 @@ import logging from django.core.management.base import BaseCommand +from django.db.utils import OperationalError from django.utils import timezone -from framework import sentry from framework.celery_tasks import app as celery_app -from osf.metrics.reporters import MONTHLY_REPORTERS +from osf.metrics.reporters import AllMonthlyReporters from osf.metrics.utils import YearMonth from website.app import init_app @@ -28,17 +28,24 @@ def monthly_reporters_go(report_year=None, report_month=None): year=today.year if today.month > 1 else today.year - 1, month=today.month - 1 or MAXMONTH, ) + for _reporter_key in AllMonthlyReporters.__members__.keys(): + monthly_reporter_go.apply_async(kwargs={ + 'reporter_key': _reporter_key, + 'yearmonth': str(report_yearmonth), + }) - errors = {} - for reporter_class in MONTHLY_REPORTERS: - try: - reporter_class().run_and_record_for_month(report_yearmonth) - except Exception as e: - errors[reporter_class.__name__] = str(e) - logger.exception(e) - sentry.log_exception(e) - # continue with the next reporter - return errors + +@celery_app.task( + name='management.commands.monthly_reporter_go', + autoretry_for=(OperationalError,), + max_retries=5, + retry_backoff=True, + bind=True, +) +def monthly_reporter_go(task, reporter_key: str, yearmonth: str): + _reporter_class = AllMonthlyReporters[reporter_key].value + _parsed_yearmonth = YearMonth.from_str(yearmonth) + _reporter_class().run_and_record_for_month(_parsed_yearmonth) class Command(BaseCommand): diff --git a/osf/metrics/reporters/__init__.py b/osf/metrics/reporters/__init__.py index b7a0f5e5363..1f8e0fba862 100644 --- a/osf/metrics/reporters/__init__.py +++ b/osf/metrics/reporters/__init__.py @@ -1,3 +1,5 @@ +import enum + # from .active_users import ActiveUserReporter from .storage_addon_usage import StorageAddonUsageReporter from .download_count import DownloadCountReporter @@ -10,18 +12,17 @@ from .spam_count import SpamCountReporter -DAILY_REPORTERS = ( - # ActiveUserReporter, - DownloadCountReporter, - InstitutionSummaryReporter, - NewUserDomainReporter, - NodeCountReporter, - OsfstorageFileCountReporter, - PreprintCountReporter, - StorageAddonUsageReporter, - UserCountReporter, -) +class AllDailyReporters(enum.Enum): + # ACTIVE_USER = ActiveUserReporter + DOWNLOAD_COUNT = DownloadCountReporter + INSTITUTION_SUMMARY = InstitutionSummaryReporter + NEW_USER_DOMAIN = NewUserDomainReporter + NODE_COUNT = NodeCountReporter + OSFSTORAGE_FILE_COUNT = OsfstorageFileCountReporter + PREPRINT_COUNT = PreprintCountReporter + STORAGE_ADDON_USAGE = StorageAddonUsageReporter + USER_COUNT = UserCountReporter + -MONTHLY_REPORTERS = ( - SpamCountReporter, -) +class AllMonthlyReporters(enum.Enum): + SPAM_COUNT = SpamCountReporter From 647cf1b54a19060b2f0fcbc730ec9763347a4010 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 23 Aug 2024 13:35:55 -0400 Subject: [PATCH 2/7] stop trying to send metrics reports to keen --- admin/management/views.py | 2 - admin/templates/management/commands.html | 5 -- osf/management/commands/daily_reporters_go.py | 9 +--- osf/metrics/reporters/_base.py | 47 +------------------ osf/metrics/reporters/download_count.py | 8 ---- osf/metrics/reporters/institution_summary.py | 14 ------ osf/metrics/reporters/new_user_domain.py | 9 ---- osf/metrics/reporters/node_count.py | 9 ---- .../reporters/osfstorage_file_count.py | 6 --- osf/metrics/reporters/preprint_count.py | 9 ---- osf/metrics/reporters/storage_addon_usage.py | 20 -------- osf/metrics/reporters/user_count.py | 13 ----- website/settings/defaults.py | 1 - 13 files changed, 2 insertions(+), 150 deletions(-) diff --git a/admin/management/views.py b/admin/management/views.py index 4323d7fd429..3bd675790dd 100644 --- a/admin/management/views.py +++ b/admin/management/views.py @@ -100,7 +100,6 @@ def post(self, request, *args, **kwargs): class DailyReportersGo(ManagementCommandPermissionView): def post(self, request, *args, **kwargs): - also_keen = bool(request.POST.get('also_send_to_keen', False)) report_date = request.POST.get('report_date', None) if report_date: report_date = isoparse(report_date).date() @@ -109,7 +108,6 @@ def post(self, request, *args, **kwargs): daily_reporters_go.apply_async(kwargs={ 'report_date': report_date, - 'also_send_to_keen': also_keen }) messages.success(request, 'Daily reporters going!') return redirect(reverse('management:commands')) diff --git a/admin/templates/management/commands.html b/admin/templates/management/commands.html index 269ead1bd3d..91471394c71 100644 --- a/admin/templates/management/commands.html +++ b/admin/templates/management/commands.html @@ -89,11 +89,6 @@

Daily Reporters, Go!

(default: yesterday)
- - - (may result in duplicates) diff --git a/osf/management/commands/daily_reporters_go.py b/osf/management/commands/daily_reporters_go.py index b2f73ea5842..5c62e6fbaa6 100644 --- a/osf/management/commands/daily_reporters_go.py +++ b/osf/management/commands/daily_reporters_go.py @@ -14,7 +14,7 @@ @celery_app.task(name='management.commands.daily_reporters_go') -def daily_reporters_go(also_send_to_keen=False, report_date=None, reporter_filter=None): +def daily_reporters_go(report_date=None, reporter_filter=None, **kwargs): init_app() # OSF-specific setup if report_date is None: # default to yesterday @@ -44,12 +44,6 @@ def daily_reporter_go(task, reporter_key: str, report_date: str): class Command(BaseCommand): def add_arguments(self, parser): - parser.add_argument( - '--keen', - type=bool, - default=False, - help='also send reports to keen', - ) parser.add_argument( '--date', type=datetime.date.fromisoformat, @@ -63,7 +57,6 @@ def add_arguments(self, parser): def handle(self, *args, **options): errors = daily_reporters_go( report_date=options.get('date'), - also_send_to_keen=options['keen'], reporter_filter=options.get('filter'), ) for error_key, error_val in errors.items(): diff --git a/osf/metrics/reporters/_base.py b/osf/metrics/reporters/_base.py index 94d35bbaad2..d3bf1722523 100644 --- a/osf/metrics/reporters/_base.py +++ b/osf/metrics/reporters/_base.py @@ -1,12 +1,6 @@ -from collections import defaultdict -from datetime import datetime import logging -import pytz - -from keen.client import KeenClient from osf.metrics.utils import YearMonth -from website.settings import KEEN as keen_settings logger = logging.getLogger(__name__) @@ -33,49 +27,10 @@ def report(self, report_date): """ raise NotImplementedError(f'{self.__name__} must implement `report`') - def keen_events_from_report(self, report): - """given one of this reporter's own reports, build equivalent keen events - (for back-compat; to be deleted once we don't need keen anymore) - - return a mapping from keen collection name to iterable of events - e.g. {'my_keen_collection': [event1, event2, ...]} - """ - raise NotImplementedError(f'{self.__name__} should probably implement keen_events_from_report') - - def run_and_record_for_date(self, report_date, *, also_send_to_keen=False): + def run_and_record_for_date(self, report_date): reports = self.report(report_date) # expecting each reporter to spit out only a handful of reports per day; # not bothering with bulk-create for report in reports: report.save() - - if also_send_to_keen: - self.send_to_keen(reports) - - def send_to_keen(self, reports): - keen_project = keen_settings['private']['project_id'] - write_key = keen_settings['private']['write_key'] - if not (keen_project and write_key): - logger.warning(f'keen not configured; not sending events for {self.__class__.__name__}') - return - - keen_events_by_collection = defaultdict(list) - for report in reports: - keen_event_timestamp = datetime( - report.report_date.year, - report.report_date.month, - report.report_date.day, - tzinfo=pytz.utc, - ) - - for collection_name, keen_events in self.keen_events_from_report(report).items(): - for event in keen_events: - event['keen'] = {'timestamp': keen_event_timestamp.isoformat()} - keen_events_by_collection[collection_name].extend(keen_events) - - client = KeenClient( - project_id=keen_project, - write_key=write_key, - ) - client.add_events(keen_events_by_collection) diff --git a/osf/metrics/reporters/download_count.py b/osf/metrics/reporters/download_count.py index f6ed14df198..f772722dc31 100644 --- a/osf/metrics/reporters/download_count.py +++ b/osf/metrics/reporters/download_count.py @@ -12,11 +12,3 @@ def report(self, date): report_date=date, ), ] - - def keen_events_from_report(self, report): - event = { - 'files': { - 'total': report.daily_file_downloads, - }, - } - return {'download_count_summary': [event]} diff --git a/osf/metrics/reporters/institution_summary.py b/osf/metrics/reporters/institution_summary.py index d51657e83b6..892e337aec4 100644 --- a/osf/metrics/reporters/institution_summary.py +++ b/osf/metrics/reporters/institution_summary.py @@ -93,17 +93,3 @@ def report(self, date): reports.append(report) return reports - - def keen_events_from_report(self, report): - event = { - 'institution': { - 'id': report.institution_id, - 'name': report.institution_name, - }, - 'users': report.users.to_dict(), - 'nodes': report.nodes.to_dict(), - 'projects': report.projects.to_dict(), - 'registered_nodes': report.registered_nodes.to_dict(), - 'registered_projects': report.registered_projects.to_dict(), - } - return {'institution_summary': [event]} diff --git a/osf/metrics/reporters/new_user_domain.py b/osf/metrics/reporters/new_user_domain.py index be28079e331..ec13aad860f 100644 --- a/osf/metrics/reporters/new_user_domain.py +++ b/osf/metrics/reporters/new_user_domain.py @@ -28,12 +28,3 @@ def report(self, date): ) for domain_name, count in domain_names.items() ] - - def keen_events_from_report(self, report): - events = [ - {'domain': report.domain_name, 'date': str(report.report_date)} - for _ in range(report.new_user_count) - ] - return { - 'user_domain_events': events, - } diff --git a/osf/metrics/reporters/node_count.py b/osf/metrics/reporters/node_count.py index d90a23fda0b..0a4120ca1f9 100644 --- a/osf/metrics/reporters/node_count.py +++ b/osf/metrics/reporters/node_count.py @@ -90,12 +90,3 @@ def report(self, date): ) return [report] - - def keen_events_from_report(self, report): - event = { - 'nodes': report.nodes.to_dict(), - 'projects': report.projects.to_dict(), - 'registered_nodes': report.registered_nodes.to_dict(), - 'registered_projects': report.registered_projects.to_dict(), - } - return {'node_summary': [event]} diff --git a/osf/metrics/reporters/osfstorage_file_count.py b/osf/metrics/reporters/osfstorage_file_count.py index 339838dce78..2f35e1e81fd 100644 --- a/osf/metrics/reporters/osfstorage_file_count.py +++ b/osf/metrics/reporters/osfstorage_file_count.py @@ -45,9 +45,3 @@ def report(self, date): ) return [report] - - def keen_events_from_report(self, report): - event = { - 'osfstorage_files_including_quickfiles': report.files.to_dict(), - } - return {'file_summary': [event]} diff --git a/osf/metrics/reporters/preprint_count.py b/osf/metrics/reporters/preprint_count.py index 319f72ae319..23f68bc7736 100644 --- a/osf/metrics/reporters/preprint_count.py +++ b/osf/metrics/reporters/preprint_count.py @@ -58,12 +58,3 @@ def report(self, date): logger.info('{} Preprints counted for the provider {}'.format(resp['hits']['total'], preprint_provider.name)) return reports - - def keen_events_from_report(self, report): - event = { - 'provider': { - 'name': report.provider_key, - 'total': report.preprint_count, - }, - } - return {'preprint_summary': [event]} diff --git a/osf/metrics/reporters/storage_addon_usage.py b/osf/metrics/reporters/storage_addon_usage.py index 242be243b57..704254795f0 100644 --- a/osf/metrics/reporters/storage_addon_usage.py +++ b/osf/metrics/reporters/storage_addon_usage.py @@ -167,23 +167,3 @@ def report(self, date): report_date=date, usage_by_addon=usage_by_addon, )] - - def keen_events_from_report(self, report): - events = [ - { - 'provider': { - 'name': addon_usage.addon_shortname, - }, - 'users': { - 'enabled': addon_usage.enabled_usersettings, - 'linked': addon_usage.linked_usersettings, - }, - 'nodes': { - 'connected': addon_usage.connected_nodesettings, - 'deleted': addon_usage.deleted_nodesettings, - 'disconnected': addon_usage.disconnected_nodesettings - }, - } - for addon_usage in report.usage_by_addon - ] - return {'addon_snapshot': events} diff --git a/osf/metrics/reporters/user_count.py b/osf/metrics/reporters/user_count.py index fc9f3d6df54..e0a61c7bb10 100644 --- a/osf/metrics/reporters/user_count.py +++ b/osf/metrics/reporters/user_count.py @@ -18,16 +18,3 @@ def report(self, report_date): ) return [report] - - def keen_events_from_report(self, report): - event = { - 'status': { - 'active': report.active, - 'deactivated': report.deactivated, - 'merged': report.merged, - 'new_users_daily': report.new_users_daily, - 'new_users_with_institution_daily': report.new_users_with_institution_daily, - 'unconfirmed': report.unconfirmed, - } - } - return {'user_summary': [event]} diff --git a/website/settings/defaults.py b/website/settings/defaults.py index 63535dac1dd..7d6df427336 100644 --- a/website/settings/defaults.py +++ b/website/settings/defaults.py @@ -646,7 +646,6 @@ class CeleryConfig: 'daily_reporters_go': { 'task': 'management.commands.daily_reporters_go', 'schedule': crontab(minute=0, hour=6), # Daily 1:00 a.m. - 'kwargs': {'also_send_to_keen': True}, }, 'monthly_reporters_go': { 'task': 'management.commands.monthly_reporters_go', From 979eded66a7e29c3677692452ca80e45549027aa Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Fri, 23 Aug 2024 14:22:39 -0400 Subject: [PATCH 3/7] Don't record preprint metrics from contributors --- addons/base/views.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/addons/base/views.py b/addons/base/views.py index 6fea2444421..6253f7bc91b 100644 --- a/addons/base/views.py +++ b/addons/base/views.py @@ -696,6 +696,10 @@ def osfstoragefile_mark_viewed(self, auth, fileversion, file_node): @file_signals.file_viewed.connect def osfstoragefile_update_view_analytics(self, auth, fileversion, file_node): resource = file_node.target + user = getattr(auth, 'user', None) + if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user): + # Don't record views by contributors + return enqueue_update_analytics( resource, file_node, @@ -707,6 +711,10 @@ def osfstoragefile_update_view_analytics(self, auth, fileversion, file_node): @file_signals.file_viewed.connect def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node): resource = file_node.target + user = getattr(auth, 'user', None) + if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user): + # Don't record views by contributors + return if waffle.switch_is_active(features.ELASTICSEARCH_METRICS) and isinstance(resource, Preprint): try: PreprintView.record_for_preprint( @@ -730,6 +738,10 @@ def osfstoragefile_downloaded_update_analytics(self, auth, fileversion, file_nod @file_signals.file_downloaded.connect def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node): resource = file_node.target + user = getattr(auth, 'user', None) + if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user): + # Don't record downloads by contributors + return if waffle.switch_is_active(features.ELASTICSEARCH_METRICS) and isinstance(resource, Preprint): try: PreprintDownload.record_for_preprint( From f09940a521f9aaee6c069944f082ff92f479b14d Mon Sep 17 00:00:00 2001 From: Jon Walz Date: Tue, 27 Aug 2024 10:53:16 -0400 Subject: [PATCH 4/7] Update README-docker-compose.md (#10705) * Replace references to `docker-compose` command with `docker compose` * Make all bash commands copyable * Remove incorrect/misleading steps * Skip ES setup steps in tests if local SEARCH_ENGINE is None --------- Co-authored-by: Jon Walz --- README-docker-compose.md | 267 ++++++++++++++++++++++++--------------- tests/base.py | 6 +- 2 files changed, 168 insertions(+), 105 deletions(-) diff --git a/README-docker-compose.md b/README-docker-compose.md index 9d8d6a2ba46..dab0834411a 100644 --- a/README-docker-compose.md +++ b/README-docker-compose.md @@ -3,9 +3,7 @@ 1. Install the Docker Client - OSX: https://www.docker.com/products/docker#/mac - - Ubuntu - - docker: https://docs.docker.com/engine/installation/linux/ubuntulinux - - docker-compose: https://docs.docker.com/compose/install/ + - Ubuntu: https://docs.docker.com/engine/installation/linux/ubuntulinux - Windows: https://www.docker.com/products/docker#/windows 2. Grant the docker client additional resources (recommended minimums of 1 CPU, 8GB memory, 2GB swap, and 32GB disk image size) - OSX: https://docs.docker.com/docker-for-mac/#/preferences @@ -26,16 +24,21 @@ - Ubuntu - Add loopback alias - `sudo ifconfig lo:0 192.168.168.167 netmask 255.255.255.255 up` + ```bash + sudo ifconfig lo:0 192.168.168.167 netmask 255.255.255.255 up + ``` - For persistance, add to /etc/network/interfaces... Add lo:0 to auto line... - ```auto lo lo:0``` + ```bash + auto lo lo:0 + ``` Add stanza for lo:0... - ```iface lo:0 inet static - address 192.168.168.167 - netmask 255.255.255.255 - network 192.168.168.167 + ```bash + iface lo:0 inet static + address 192.168.168.167 + netmask 255.255.255.255 + network 192.168.168.167 ``` - If UFW enabled. Enable UFW forwarding. - https://docs.docker.com/engine/installation/linux/linux-postinstall/#allow-access-to-the-remote-api-through-a-firewall @@ -49,8 +52,10 @@ `sudo usermod -aG docker $USER` - In order to run OSF Preprints, raise fs.inotify.max_user_watches from default value - `echo fs.inotify.max_user_watches=131072 | sudo tee -a /etc/sysctl.conf` - `sudo sysctl -p` + ```bash + echo fs.inotify.max_user_watches=131072 | sudo tee -a /etc/sysctl.conf + sudo sysctl -p` + ``` - Windows - Install Microsoft Loopback Adapter (Windows 10 follow community comments as the driver was renamed) @@ -75,19 +80,22 @@ * _NOTE: After making changes to `Environment Variables` or `Volume Mounts` you will need to recreate the container(s)._ - - `$ docker-compose up --force-recreate --no-deps preprints` + ```bash + docker compose up --force-recreate --no-deps preprints + ``` 1. Application Settings - e.g. OSF & OSF API local.py - - `$ cp ./website/settings/local-dist.py ./website/settings/local.py` - - `$ cp ./api/base/settings/local-dist.py ./api/base/settings/local.py` - - `$ cp ./docker-compose-dist.override.yml ./docker-compose.override.yml` + ```bash + cp ./website/settings/local-dist.py ./website/settings/local.py + cp ./api/base/settings/local-dist.py ./api/base/settings/local.py + cp ./docker-compose-dist.override.yml ./docker-compose.override.yml + ``` For local tasks, (dev only) - `$ cp ./tasks/local-dist.py ./tasks/local.py` + ```bash + cp ./tasks/local-dist.py ./tasks/local.py + ``` 2. OPTIONAL (uncomment the below lines if you will use remote debugging) Environment variables (incl. remote debugging) - e.g. .docker-compose.env @@ -104,16 +112,12 @@ * _NOTE: The `elasticsearch`, `elasticsearch6`, and `sharejs` containers are incompatible with ARM64._ - - Running containers with docker-compose - - - Copy an ARM64-compatible configuration to `docker-compose.override.yml`: - - `$ cp ./docker-compose-dist-arm64.override.yml ./docker-compose.override.yml` + - Running containers with docker compose - In `webite/settings/local.py`, disable `SEARCH_ENGINE` ```python - # SEARCH_ENGINE = 'elastic' - SEARCH_ENGINE = None + # SEARCH_ENGINE = 'elastic' + SEARCH_ENGINE = None ``` - Building the Docker image @@ -121,9 +125,9 @@ - If you wish to use an OSF image other than the latest `develop-arm64`: - Build the image ```bash - $ cd - $ git checkout - $ docker buildx build --platform linux/arm64 -t osf:-arm64 . + cd + git checkout + docker buildx build --platform linux/arm64 -t osf:-arm64 . ``` - In `docker-compose.override.yml`, replace any `quay.io/centerforopenscience/osf:develop-arm64` with the locally-tagged image above: ```yml @@ -134,31 +138,45 @@ * _NOTE: Running docker containers detached (`-d`) will execute them in the background, if you would like to view/follow their console log output use the following command._ - - `$ docker-compose logs -f --tail 1000 web` + ```bash + docker compose logs -f --tail 1000 web + ``` 1. Application Environment - - `$ docker-compose up requirements mfr_requirements wb_requirements gv_requirements` + ```bash + docker compose up requirements mfr_requirements wb_requirements gv_requirements + ``` - _NOTE: When the various requirements installations are complete these containers will exit. You should only need to run these containers after pulling code that changes python requirements or if you update the python requirements._ + _NOTE: When the various requirements installations are complete these containers will exit. You should only need to run these containers after pulling code that changes python requirements or if you update the python requirements._ 2. Start Core Component Services (Detached) - - `$ docker-compose up -d elasticsearch postgres mongo rabbitmq` + + ```bash + docker compose up -d elasticsearch postgres mongo rabbitmq + ``` 3. Remove your existing node_modules and start the assets watcher (Detached) - - `$ rm -Rf ./node_modules` - - `$ docker-compose up -d assets` - - `$ docker-compose up -d admin_assets` + ```bash + rm -Rf ./node_modules + docker compose up -d assets + docker compose up -d admin_assets + ``` + + _NOTE: The first time the assets container is run it will take Webpack/NPM up to 15 minutes to compile resources. + When you see the BowerJS build occurring it is likely a safe time to move forward with starting the remaining + containers._ - _NOTE: The first time the assets container is run it will take Webpack/NPM up to 15 minutes to compile resources. - When you see the BowerJS build occurring it is likely a safe time to move forward with starting the remaining - containers._ 4. Start the Services (Detached) - - `$ docker-compose up -d mfr wb fakecas sharejs` + ```bash + docker compose up -d mfr wb fakecas sharejs + ``` 5. Run migrations and create preprint providers - When starting with an empty database you will need to run migrations and populate preprint providers. See the [Running arbitrary commands](#running-arbitrary-commands) section below for instructions. 6. Start the OSF Web, API Server, and Preprints (Detached) - - `$ docker-compose up -d worker web api admin preprints ember_osf_web gv` + ```bash + docker compose up -d worker web api admin preprints ember_osf_web gv + ``` 7. View the OSF at [http://localhost:5000](http://localhost:5000). @@ -167,52 +185,52 @@ - Once the requirements have all been installed, you can start the OSF in the background with ```bash - $ docker-compose up -d assets admin_assets mfr wb fakecas sharejs worker web api admin preprints ember_osf_web gv + docker compose up -d assets admin_assets mfr wb fakecas sharejs worker web api admin preprints ember_osf_web gv ``` - To view the logs for a given container: ```bash - $ docker-compose logs -f --tail 100 web + docker compose logs -f --tail 100 web ``` ### Helpful aliases - Start all containers ```bash - alias dcsa="docker-compose up -d assets admin_assets mfr wb fakecas sharejs worker elasticsearch elasticsearch6 web api admin preprints gv" + alias dcsa="docker compose up -d assets admin_assets mfr wb fakecas sharejs worker elasticsearch elasticsearch6 web api admin preprints gv" ``` - Shut down all containers ```bash - alias dchs="docker-compose down" + alias dchs="docker compose down" ``` - Attach to container logs - dcl . Ie. `dcl web` will log only the web container - ```bash - alias dcl="docker-compose logs -f --tail 100 " - ``` + ```bash + alias dcl="docker compose logs -f --tail 100 " + ``` - Run migrations (Starting a fresh database or changes to migrations) ```bash - alias dcm="docker-compose run --rm web python3 manage.py migrate" + alias dcm="docker compose run --rm web python3 manage.py migrate" ``` - Download requirements (Whenever the requirements change or first-time set-up) ```bash - alias dcreq="docker-compose up requirements mfr_requirements wb_requirements gv_requirements" + alias dcreq="docker compose up requirements mfr_requirements wb_requirements gv_requirements" ``` - Restart the containers - - `$ dcr `. Ie. `dcr web` will restart the web container + - `dcr `. Ie. `dcr web` will restart the web container ```bash - alias dcr="docker-compose restart -t 0 " + alias dcr="docker compose restart -t 0 " ``` - Start the OSF shell (Interactive python shell that allows working directly with the osf on a code level instead of a web level.) ```bash - alias dcosfs="docker-compose run --rm web python3 manage.py osf_shell" + alias dcosfs="docker compose run --rm web python3 manage.py osf_shell" ``` - List all these commands @@ -222,35 +240,60 @@ ## Running arbitrary commands -- View logs: `$ docker-compose logs -f --tail 100 ` +- View logs: + ```bash + docker compose logs -f --tail 100 + ``` - _NOTE: CTRL-c will exit_ - Run migrations: - After creating migrations, resetting your database, or starting on a fresh install you will need to run migrations to make the needed changes to database. This command looks at the migrations on disk and compares them to the list of migrations in the `django_migrations` database table and runs any migrations that have not been run. - - `docker-compose run --rm web python3 manage.py migrate` To run `osf` migrations - - `docker-compose run --rm gv python manage.py migrate` To run `gravyvalet(gv)` migrations + - To run `osf` migrations: + ```bash + docker compose run --rm web python3 manage.py migrate + ``` + - To run `gravyvalet(gv)` migrations: + ```bash + docker compose run --rm gv python manage.py migrate + ``` - Populate institutions: - After resetting your database or with a new install you will need to populate the table of institutions. **You must have run migrations first.** - - `docker-compose run --rm web python3 -m scripts.populate_institutions -e test -a` + ```bash + docker compose run --rm web python3 -m scripts.populate_institutions -e test -a + ``` - Populate preprint, registration, and collection providers: - After resetting your database or with a new install, the required providers and subjects will be created automatically **when you run migrations.** To create more: - - `docker-compose run --rm web python3 manage.py populate_fake_providers` + ```bash + docker compose run --rm web python3 manage.py populate_fake_providers + ``` - _NOTE: In case, you encounter error with missing data, when running the `'populate_fake_providers'` command. Fix this with 'update_taxonomies' command:_ - - `docker-compose run --rm web python3 -m scripts.update_taxonomies` + ```bash + docker compose run --rm web python3 -m scripts.update_taxonomies + ``` - Populate citation styles - Needed for api v2 citation style rendering. - - `docker-compose run --rm web python3 -m scripts.parse_citation_styles` + ```bash + docker compose run --rm web python3 -m scripts.parse_citation_styles + ``` - Start ember_osf_web - Needed for quickfiles feature: - - `docker-compose up -d ember_osf_web` + ```bash + docker compose up -d ember_osf_web + ``` - OPTIONAL: Register OAuth Scopes - Needed for things such as the ember-osf dummy app - - `docker-compose run --rm web python3 -m scripts.register_oauth_scopes` + ```bash + docker compose run --rm web python3 -m scripts.register_oauth_scopes + ``` - OPTIONAL: Create migrations: - After changing a model you will need to create migrations and apply them. Migrations are python code that changes either the structure or the data of a database. This will compare the django models on disk to the database, find the differences, and create migration code to change the database. If there are no changes this command is a noop. - - `docker-compose run --rm web python3 manage.py makemigrations` + ```bash + docker compose run --rm web python3 manage.py makemigrations + ``` - OPTIONAL: Destroy and recreate an empty database: - **WARNING**: This will delete all data in your database. - - `docker-compose run --rm web python3 manage.py reset_db --noinput` + ```bash + docker compose run --rm web python3 manage.py reset_db --noinput + ``` ## Application Debugging @@ -260,7 +303,7 @@ If you want to debug your changes by using print statements, you'll have to have 1. Edit your container configuration in docker-compose.mfr.env or docker-compose.mfr.env to include the new environment variable by uncommenting PYTHONUNBUFFERED=0 2. If you're using a container running Python 3 you can insert the following code prior to a print statement: - ``` + ```python import functools print = functools.partial(print, flush=True) ``` @@ -278,13 +321,13 @@ You should run the `web` and/or `api` container (depending on which codebase the ```bash # Kill the already-running web container -$ docker-compose kill web +docker compose kill web # Run a web container. App logs and breakpoints will show up here. -$ docker-compose run --rm --service-ports web +docker compose run --rm --service-ports web ``` -**IMPORTANT: While attached to the running app, CTRL-c will stop the container.** To detach from the container and leave it running, **use CTRL-p CTRL-q**. Use `docker attach` to re-attach to the container, passing the *container-name* (which you can get from `docker-compose ps`), e.g. `docker attach osf_web_run_1`. +**IMPORTANT: While attached to the running app, CTRL-c will stop the container.** To detach from the container and leave it running, **use CTRL-p CTRL-q**. Use `docker attach` to re-attach to the container, passing the *container-name* (which you can get from `docker compose ps`), e.g. `docker attach osf_web_run_1`. ### Remote Debugging with PyCharm @@ -300,37 +343,47 @@ $ docker-compose run --rm --service-ports web ## Application Tests - Run All Tests - - `$ docker-compose run --rm web invoke test` - -- Run OSF Specific Tests - - `$ docker-compose run --rm web invoke test_osf` + ```bash + docker compose run --rm web python3 -m pytest + ``` - Test a Specific Module - - `$ docker-compose run --rm web invoke test_module -m tests/test_conferences.py` + ```bash + docker compose run --rm web python3 -m pytest tests/test_conferences.py + ``` - Test a Specific Class - - `docker-compose run --rm web invoke test_module -m tests/test_conferences.py::TestProvisionNode` + ```bash + docker compose run --rm web python3 -m pytest tests/test_conferences.py::TestProvisionNode + ``` - Test a Specific Method - - `$ docker-compose run --rm web invoke test_module -m tests/test_conferences.py::TestProvisionNode::test_upload` - -- Test with Specific Parameters (1 cpu, capture stdout) - - `$ docker-compose run --rm web invoke test_module -m tests/test_conferences.py::TestProvisionNode::test_upload -n 1 --params '--capture=sys'` + ```bash + docker compose run --rm web python3 -m pytest tests/test_conferences.py::TestProvisionNode::test_upload + ``` ## Managing Container State Restart a container: - - `$ docker-compose restart -t 0 assets` +```bash +docker compose restart -t 0 assets +``` Recreate a container _(useful to ensure all environment variables/volume changes are in order)_: - - `$ docker-compose up --force-recreate --no-deps assets` + ```bash + docker compose up --force-recreate --no-deps assets + ``` Delete a container _(does not remove volumes)_: - - `$ docker-compose stop -t 0 assets` - - `$ docker-compose rm assets` + ```bash + docker compose stop -t 0 assets + docker compose rm assets + ``` List containers and status: - - `$ docker-compose ps` +```bash +docker compose ps +``` ### Backing up your database In certain cases, you may wish to remove all docker container images, but preserve a copy of the database used by your @@ -342,11 +395,13 @@ resetting docker. To back up your database, follow the following sequence of com ([as of this writing](https://github.com/CenterForOpenScience/osf.io/blob/ce1702cbc95eb7777e5aaf650658a9966f0e6b0c/docker-compose.yml#L53), Postgres 15) 2. Start postgres locally. This must be on a different port than the one used by [docker postgres](https://github.com/CenterForOpenScience/osf.io/blob/ce1702cbc95eb7777e5aaf650658a9966f0e6b0c/docker-compose.yml#L61). Eg, `pg_ctl -D /usr/local/var/postgres start -o "-p 5433"` -3. Verify that the postgres docker container is running (`docker-compose up -d postgres`) +3. Verify that the postgres docker container is running (`docker compose up -d postgres`) 4. Tell your local (non-docker) version of postgres to connect to (and back up) data from the instance in docker - (defaults to port 5432): - `pg_dump --username postgres --compress 9 --create --clean --format d --jobs 4 --host localhost --file ~/Desktop/osf_backup osf` for osf -5. The same can be done for `grayvalet`, just replace `osf` with `gravyvalet` (this applies for all following commands related to backups) + (defaults to port 5432). For `osf` run: + ```bash + pg_dump --username postgres --compress 9 --create --clean --format d --jobs 4 --host localhost --file ~/Desktop/osf_backup osf + ``` +6. The same can be done for `grayvalet`, just replace `osf` with `gravyvalet` (this applies for all following commands related to backups) (shorthand: `pg_dump -U postgres -Z 9 -C --c -Fd --j 4 -h localhost --f ~/Desktop/osf_backup osf`) @@ -357,12 +412,14 @@ resetting docker. To back up your database, follow the following sequence of com ``` 2. Delete a persistent storage volume: **WARNING: All postgres data will be destroyed.** - - `$ docker-compose stop -t 0 postgres` - - `$ docker-compose rm postgres` - - `$ docker volume rm osfio_postgres_data_vol` +```bash +docker compose stop -t 0 postgres +docker compose rm postgres +docker volume rm osfio_postgres_data_vol +``` 3. Starting a new postgres container. ```bash -docker-compose up -d postgres +docker compose up -d postgres ``` 4. Restoring the database from the dump file into the new postgres container. ```bash @@ -376,7 +433,7 @@ instructions on dropping postgres data volumes) When ready, run the restore command from a local terminal: ```bash -$ pg_restore --username postgres --clean --dbname osf --format d --jobs 4 --host localhost ~/Desktop/osf_backup +pg_restore --username postgres --clean --dbname osf --format d --jobs 4 --host localhost ~/Desktop/osf_backup ``` (shorthand) `pg_restore -U postgres -c -d osf -Fd -j 4 -h localhost ~/Desktop/osf_backup` @@ -386,29 +443,31 @@ $ pg_restore --username postgres --clean --dbname osf --format d --jobs 4 --host Resetting the Environment: **WARNING: All volumes and containers are destroyed** - - `$ docker-compose down -v` + - `docker compose down -v` Delete a persistent storage volume: **WARNING: All postgres data will be destroyed.** - - `$ docker-compose stop -t 0 postgres` - - `$ docker-compose rm postgres` - - `$ docker volume rm osfio_postgres_data_vol` + ```bash + docker compose stop -t 0 postgres + docker compose rm postgres + docker volume rm osfio_postgres_data_vol + ``` ## Updating ```bash -$ git stash # if you have any changes that need to be stashed -$ git pull upstream develop # (replace upstream with the name of your remote) -$ git stash pop # unstash changes +git stash # if you have any changes that need to be stashed +git pull upstream develop # (replace upstream with the name of your remote) +git stash pop # unstash changes # If you get an out of space error -$ docker image prune +docker image prune # Pull latest images -$ docker-compose pull +docker compose pull # It is recommended to run requirements only for services that require update, not to wear off local SSD more than needed -$ docker-compose up requirements mfr_requirements wb_requirements gv_requirements +docker compose up requirements mfr_requirements wb_requirements gv_requirements # Run db migrations -$ docker-compose run --rm web python3 manage.py migrate +docker compose run --rm web python3 manage.py migrate ``` ## Miscellaneous @@ -425,7 +484,7 @@ The issue is that docker containers run in unprivileged mode by default. For `docker run`, you can use `--privilege=true` to give the container extended privileges. You can also add or drop capabilities by using `cap-add` and `cap-drop`. Since Docker 1.12, there is no need to add `--security-opt seccomp=unconfined` because the seccomp profile will adjust to selected capabilities. ([Reference](https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities)) -When using `docker-compose`, set `privileged: true` for individual containers in the `docker-compose.yml`. ([Reference](https://docs.docker.com/compose/compose-file/#domainname-hostname-ipc-mac_address-privileged-read_only-shm_size-stdin_open-tty-user-working_dir)) Here is an example for WaterButler: +When using `docker compose`, set `privileged: true` for individual containers in the `docker-compose.yml`. ([Reference](https://docs.docker.com/compose/compose-file/#domainname-hostname-ipc-mac_address-privileged-read_only-shm_size-stdin_open-tty-user-working_dir)) Here is an example for WaterButler: ```yml wb: diff --git a/tests/base.py b/tests/base.py index 1d2068189b2..2c36dd801eb 100644 --- a/tests/base.py +++ b/tests/base.py @@ -150,6 +150,9 @@ def setUp(self): class SearchTestCase(unittest.TestCase): def setUp(self): + if settings.SEARCH_ENGINE is None: + return + settings.ELASTIC_INDEX = uuid.uuid1().hex settings.ELASTIC_TIMEOUT = 60 @@ -163,7 +166,8 @@ def setUp(self): def tearDown(self): super().tearDown() - + if settings.SEARCH_ENGINE is None: + return from website.search import elastic_search elastic_search.delete_index(settings.ELASTIC_INDEX) From 5d83a420e8cf0b009aaefa4786059b714be78d1b Mon Sep 17 00:00:00 2001 From: Fitz Elliott Date: Mon, 26 Aug 2024 14:57:34 -0400 Subject: [PATCH 5/7] don't pass all headers to do_check_spam * Something in python-upgrade started including the request input object in the headers. Celery/kombu can't serialize this and dies screaming. Silence the screams. [ENG-6022] --- osf/models/spam.py | 1 - 1 file changed, 1 deletion(-) diff --git a/osf/models/spam.py b/osf/models/spam.py index c6d0a438f5c..993039b1fcf 100644 --- a/osf/models/spam.py +++ b/osf/models/spam.py @@ -200,7 +200,6 @@ def do_check_spam(self, author, author_email, content, request_headers): 'user_agent': request_headers.get('User-Agent'), 'referer': request_headers.get('Referer'), } - request_kwargs.update(request_headers) check_resource_for_domains_postcommit( self.guids.first()._id, From 2d458e9defeb01341a65b112c5bcd31cdcfac821 Mon Sep 17 00:00:00 2001 From: John Tordoff Date: Mon, 2 Sep 2024 12:04:53 -0400 Subject: [PATCH 6/7] [ENG-6197] Update Docker Compose files to run ES6 on ARM (#10728) * fix thanks to MC, add working docker image * use envvars consistently * clean-up README.md --------- Co-authored-by: John Tordoff <> --- README-docker-compose.md | 20 ++++--------- api/base/settings/defaults.py | 2 +- conftest.py | 2 +- docker-compose-dist-arm64.override.yml | 41 ++------------------------ website/settings/defaults.py | 1 + 5 files changed, 10 insertions(+), 56 deletions(-) diff --git a/README-docker-compose.md b/README-docker-compose.md index dab0834411a..8e2fb5098ef 100644 --- a/README-docker-compose.md +++ b/README-docker-compose.md @@ -110,7 +110,11 @@ #### Special Instructions for Apple Chipset (M1, M2, etc.) and other ARM64 architecture - * _NOTE: The `elasticsearch`, `elasticsearch6`, and `sharejs` containers are incompatible with ARM64._ + * _NOTE: The default `elasticsearch`, `elasticsearch6`, and `sharejs` containers are incompatible with ARM64._ + + - To run `elasticsearch6` on ARM64 architecture: + + - Copy `docker-compose-dist-arm64.override.yml` into your `docker-compose.override.yml` file - Running containers with docker compose @@ -120,20 +124,6 @@ SEARCH_ENGINE = None ``` - - Building the Docker image - - - If you wish to use an OSF image other than the latest `develop-arm64`: - - Build the image - ```bash - cd - git checkout - docker buildx build --platform linux/arm64 -t osf:-arm64 . - ``` - - In `docker-compose.override.yml`, replace any `quay.io/centerforopenscience/osf:develop-arm64` with the locally-tagged image above: - ```yml - image: osf:-arm64 - ``` - ## Application Runtime * _NOTE: Running docker containers detached (`-d`) will execute them in the background, if you would like to view/follow their console log output use the following command._ diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py index d74e744f787..a7d8838fe04 100644 --- a/api/base/settings/defaults.py +++ b/api/base/settings/defaults.py @@ -317,7 +317,7 @@ # django-elasticsearch-metrics ELASTICSEARCH_DSL = { 'default': { - 'hosts': os.environ.get('ELASTIC6_URI', '127.0.0.1:9201'), + 'hosts': osf_settings.ELASTIC6_URI, 'retry_on_timeout': True, }, } diff --git a/conftest.py b/conftest.py index 1acfabbdbb5..2eb51df076e 100644 --- a/conftest.py +++ b/conftest.py @@ -124,7 +124,7 @@ def _test_speedups_disable(request, settings, _test_speedups): @pytest.fixture(scope='session') def setup_connections(): - connections.create_connection(hosts=['http://localhost:9201']) + connections.create_connection(hosts=[website_settings.ELASTIC6_URI]) @pytest.fixture(scope='function') diff --git a/docker-compose-dist-arm64.override.yml b/docker-compose-dist-arm64.override.yml index aad331ae1a7..cffa4bd8982 100644 --- a/docker-compose-dist-arm64.override.yml +++ b/docker-compose-dist-arm64.override.yml @@ -6,43 +6,6 @@ services: # OSF # ####### - requirements: - image: quay.io/centerforopenscience/osf:develop-arm64 + elasticsearch6: + image: quay.io/centerforopenscience/elasticsearch:es6-arm-6.3.1 platform: linux/arm64 - - assets: - image: quay.io/centerforopenscience/osf:develop-arm64 - platform: linux/arm64 - # Need to allocate tty to be able to call invoke for requirements task - tty: true - - admin_assets: - image: quay.io/centerforopenscience/osf:develop-arm64 - platform: linux/arm64 - # Need to allocate tty to be able to call invoke for requirements task - tty: true - - worker: - image: quay.io/centerforopenscience/osf:develop-arm64 - platform: linux/arm64 - # Need to allocate tty to be able to call invoke for requirements task - tty: true - - admin: - image: quay.io/centerforopenscience/osf:develop-arm64 - platform: linux/arm64 - # Need to allocate tty to be able to call invoke for requirements task - tty: true - - api: - image: quay.io/centerforopenscience/osf:develop-arm64 - platform: linux/arm64 - # Need to allocate tty to be able to call invoke for requirements task - tty: true - - web: - image: quay.io/centerforopenscience/osf:develop-arm64 - platform: linux/arm64 - # Need to allocate tty to be able to call invoke for requirements task - tty: true - diff --git a/website/settings/defaults.py b/website/settings/defaults.py index 7d6df427336..b305fe0fd88 100644 --- a/website/settings/defaults.py +++ b/website/settings/defaults.py @@ -105,6 +105,7 @@ def parent_dir(path): SEARCH_ENGINE = 'elastic' # Can be 'elastic', or None ELASTIC_URI = '127.0.0.1:9200' +ELASTIC6_URI = os.environ.get('ELASTIC6_URI', '127.0.0.1:9201') ELASTIC_TIMEOUT = 10 ELASTIC_INDEX = 'website' ELASTIC_KWARGS = { From 97cba74653559ecbf8f2b51f394a0c8362780d16 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Mon, 2 Sep 2024 14:05:46 -0400 Subject: [PATCH 7/7] Update force_archive.LOGS_WHITELIST --- osf/management/commands/force_archive.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/osf/management/commands/force_archive.py b/osf/management/commands/force_archive.py index b844f520526..725d53172fa 100644 --- a/osf/management/commands/force_archive.py +++ b/osf/management/commands/force_archive.py @@ -58,10 +58,13 @@ # Ignorable NodeLogs LOG_WHITELIST = { 'affiliated_institution_added', + 'category_updated', 'comment_added', 'comment_removed', 'comment_restored', 'comment_updated', + 'confirm_ham', + 'confirm_spam', 'contributor_added', 'contributor_removed', 'contributors_reordered', @@ -72,7 +75,10 @@ 'embargo_completed', 'embargo_initiated', 'embargo_terminated', + 'external_ids_added', 'file_tag_added', + 'flag_spam', + 'guid_metadata_updated', 'license_changed', 'made_contributor_invisible', 'made_private', @@ -80,6 +86,8 @@ 'made_wiki_private', 'made_wiki_public', 'node_removed', + 'node_access_requests_disabled', + 'node_access_requests_enabled', 'permissions_updated', 'pointer_created', 'pointer_removed', @@ -92,6 +100,7 @@ 'registration_initiated', 'retraction_approved', 'retraction_initiated', + 'subjects_updated', 'tag_added', 'tag_removed', 'wiki_deleted',