From 78f04ce16e4366de434331460eec2f7f84998fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sondre=20Lilleb=C3=B8=20Gundersen?= Date: Wed, 18 Nov 2020 23:57:34 +0100 Subject: [PATCH] Add celery integration docs (#52) --- CHANGELOG.rst | 11 ++ README.rst | 4 +- django_guid/__init__.py | 2 +- .../integrations/celery/integration.py | 2 +- docs/README_PYPI.rst | 4 +- docs/configuration.rst | 2 +- docs/index.rst | 2 +- docs/integrations.rst | 146 +++++++++++++++++- docs/troubleshooting.rst | 2 +- pyproject.toml | 8 +- 10 files changed, 167 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2500b0d..cb3ea0c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,14 @@ Changelog ========= +`3.1.0`_ - 18.11.2020 +--------------------- + +**Features** + +* Added a new setting, ``UUID_LENGTH``, which lets you crop the UUIDs generated for log filters. +* Added a new integration for tracing with Celery_. + `3.0.1`_ - 12.11.2020 --------------------- @@ -213,3 +221,6 @@ see the `upgrading docs`_. .. _3.0.0: https://github.com/snok/django-guid/compare/2.2.0...3.0.0 .. _upgrading docs: https://django-guid.readthedocs.io/en/latest/upgrading.html .. _3.0.1: https://github.com/snok/django-guid/compare/3.0.0...3.0.1 +.. _3.1.0: https://github.com/snok/django-guid/compare/3.0.1...3.1.0 + +.. _Celery: https://docs.celeryproject.org/en/stable/ diff --git a/README.rst b/README.rst index 160cc70..02e985a 100644 --- a/README.rst +++ b/README.rst @@ -75,7 +75,7 @@ making it accessible by using the ID of the current thread. (Version 2 of Django * Free software: BSD License * Documentation: https://django-guid.readthedocs.io -* Homepage: https://github.com/JonasKs/django-guid +* Homepage: https://github.com/snok/django-guid -------------- @@ -262,7 +262,7 @@ And make sure to add the new ``correlation_id`` filter to one or all of your for If these settings were confusing, please have a look in the demo projects' -`settings.py `_ file for a complete example. +`settings.py `_ file for a complete example. 4. Django GUID Logger (Optional) ================================ diff --git a/django_guid/__init__.py b/django_guid/__init__.py index fa894fc..559662e 100644 --- a/django_guid/__init__.py +++ b/django_guid/__init__.py @@ -1,4 +1,4 @@ from django_guid.api import clear_guid, get_guid, set_guid # noqa F401 -__version__ = '3.0.1' +__version__ = '3.1.0' default_app_config = 'django_guid.apps.DjangoGuidConfig' diff --git a/django_guid/integrations/celery/integration.py b/django_guid/integrations/celery/integration.py index 9c26137..be93db1 100644 --- a/django_guid/integrations/celery/integration.py +++ b/django_guid/integrations/celery/integration.py @@ -19,7 +19,7 @@ class CeleryIntegration(Integration): def __init__(self, use_django_logging: bool = False, log_parent: bool = False, uuid_length: int = 32) -> None: """ :param use_django_logging: If true, configures Celery to use the logging settings defined in settings.py - :param log_parent: If true, traces the origin of a task. Should be True if you wish to use the CeleryParentId log filter. + :param log_parent: If true, traces the origin of a task. Should be True if you wish to use the CeleryTracing log filter. :param uuid_length: Optionally lets you set the length of the celery IDs generated for the log filter """ super().__init__() diff --git a/docs/README_PYPI.rst b/docs/README_PYPI.rst index 13ee7f2..f6f3c90 100644 --- a/docs/README_PYPI.rst +++ b/docs/README_PYPI.rst @@ -60,7 +60,7 @@ making it accessible by using the ID of the current thread. (Version 2 of Django * Free software: BSD License * Documentation: https://django-guid.readthedocs.io -* Homepage: https://github.com/JonasKs/django-guid +* Homepage: https://github.com/snok/django-guid -------------- @@ -247,7 +247,7 @@ And make sure to add the new ``correlation_id`` filter to one or all of your for If these settings were confusing, please have a look in the demo projects' -`settings.py `_ file for a complete example. +`settings.py `_ file for a complete example. 4. Django GUID Logger (Optional) ================================ diff --git a/docs/configuration.rst b/docs/configuration.rst index 8d756f2..b578d7c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -80,7 +80,7 @@ And make sure to add the new ``correlation_id`` filter to one or all of your for If these settings were confusing, please have a look in the demo projects' -`settings.py `_ file for a complete example. +`settings.py `_ file for a complete example. 4. Django GUID Logger (Optional) -------------------------------- diff --git a/docs/index.rst b/docs/index.rst index 4c5ad08..d72b7ab 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -73,7 +73,7 @@ making it accessible by using the ID of the current thread. * Free software: BSD License * Documentation: https://django-guid.readthedocs.io -* Homepage: https://github.com/JonasKs/django-guid +* Homepage: https://github.com/snok/django-guid -------------- diff --git a/docs/integrations.rst b/docs/integrations.rst index 12febec..a98ca22 100644 --- a/docs/integrations.rst +++ b/docs/integrations.rst @@ -51,11 +51,155 @@ To add the integration, simply import ``SentryIntegration`` from the integration 'INTEGRATIONS': [SentryIntegration()], } +Celery +------ + +The Celery integration enables tracing for Celery workers. There's three possible scenarios: + +1. A task is published from a request within Django +2. A task is published from another task +3. A task is published from Celery Beat + +For scenario 1 and 2 the existing correlation IDs is transferred, and for scenario +3 a unique ID is generated. + +To enable this behavior, simply add it to your list of integrations: + +.. code-block:: python + + from django_guid.integrations import SentryIntegration + + DJANGO_GUID = { + ... + 'INTEGRATIONS': [ + CeleryIntegration( + use_django_logging=True, + log_parent=True, + ) + ], + } + +Integration settings +^^^^^^^^^^^^^^^^^^^^ + +These are the settings you can pass when instantiating the ``CeleryIntegration``: + +* **use_django_logging**: Tells celery to use the Django logging configuration (formatter). +* **log_parent**: Enables the ``CeleryTracing`` log filter described below. +* **uuid_length**: Lets you optionally trim the length of the integration generated UUIDs. + +Celery integration log filter +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Out of the box, the CeleryIntegration will make sure a correlation ID is present for any Celery task; +but how do you make sense of duplicate logs in subprocesses? Given these example tasks, what happens if we a worker +picks up ``debug_task`` as scheduled by Celery beat? + +.. code-block:: python + + @app.task() + def debug_task() -> None: + logger.info('Debug task 1') + second_debug_task.delay() + second_debug_task.delay() + + + @app.task() + def second_debug_task() -> None: + logger.info('Debug task 2') + third_debug_task.delay() + fourth_debug_task.delay() + + + @app.task() + def third_debug_task() -> None: + logger.info('Debug task 3') + fourth_debug_task.delay() + fourth_debug_task.delay() + + + @app.task() + def fourth_debug_task() -> None: + logger.info('Debug task 4') + + +It will be close to impossible to make sense of the logs generated, +simply because the correlation ID tells you nothing about how subprocesses are linked. For this, +the integration provides an additional log filter, ``CeleryTracing`` which logs the +ID of the current process and the ID of the parent process. Using the log filter, the log output of the example tasks becomes: + +.. code-block:: bbcode + + correlation-id current-id + | parent-id | + | | | + INFO [3b162382e1] [ None ] [93ddf3639c] demoproj.celery - Debug task 1 + INFO [3b162382e1] [93ddf3639c] [24046ab022] demoproj.celery - Debug task 2 + INFO [3b162382e1] [93ddf3639c] [cb5595a417] demoproj.celery - Debug task 2 + INFO [3b162382e1] [24046ab022] [08f5428a66] demoproj.celery - Debug task 3 + INFO [3b162382e1] [24046ab022] [32f40041c6] demoproj.celery - Debug task 4 + INFO [3b162382e1] [cb5595a417] [1c75a4ed2c] demoproj.celery - Debug task 3 + INFO [3b162382e1] [08f5428a66] [578ad2d141] demoproj.celery - Debug task 4 + INFO [3b162382e1] [cb5595a417] [21b2ef77ae] demoproj.celery - Debug task 4 + INFO [3b162382e1] [08f5428a66] [8cad7fc4d7] demoproj.celery - Debug task 4 + INFO [3b162382e1] [1c75a4ed2c] [72a43319f0] demoproj.celery - Debug task 4 + INFO [3b162382e1] [1c75a4ed2c] [ec3cf4113e] demoproj.celery - Debug task 4 + +At the very least, this should provide a mechanism for linking parent/children processes +in a meaningful way. + +To set up the filter, add :code:`django_guid.integrations.celery.log_filters.CeleryTracing` as a filter in your ``LOGGING`` configuration: + +.. code-block:: python + + LOGGING = { + ... + 'filters': { + 'celery_tracing': { + '()': 'django_guid.integrations.celery.log_filters.CeleryTracing' + } + } + } + +Put that filter in your handler: + +.. code-block:: python + + LOGGING = { + ... + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'medium', + 'filters': ['correlation_id', 'celery_tracing'], + } + } + } + +And then you can **optionally** add ``celery_parent_id`` and/or ``celery_current_id`` to you formatter: + +.. code-block:: python + + LOGGING = { + ... + 'formatters': { + 'medium': { + 'format': '%(levelname)s [%(correlation_id)s] [%(celery_parent_id)s-%(celery_current_id)s] %(name)s - %(message)s' + } + } + } + +However, if you use a log management tool which lets you interact with ``log.extra`` value, leaving the filters +out of the formatter might be preferable. + +If these settings were confusing, please have a look in the demo projects' +`settings.py `_ file for a complete example. + Writing your own integration ============================ -Creating your own custom integration requires you to inherit the ``Integration`` base class (which is found `here `_). +Creating your own custom integration requires you to inherit the ``Integration`` base class (which is found `here `_). The class is quite simple and only contains four methods and a class attribute: diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index efcdb23..b12b95b 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -40,4 +40,4 @@ Read the `official docs ` Ask for help ------------ -Still no luck? Create an `issue on GitHub `_ and ask for help. +Still no luck? Create an `issue on GitHub `_ and ask for help. diff --git a/pyproject.toml b/pyproject.toml index 4d044a0..0730ac5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,9 @@ [tool.poetry] name = "django-guid" -version = "3.0.1" # Remember to also change __init__.py version +version = "3.1.0" # Remember to also change __init__.py version description = "Middleware that enables single request-response cycle tracing by injecting a unique ID into project logs" authors = ["Jonas Krüger Svensson "] +maintainers = ["Sondre Lillebø Gundersen "] license = "BSD-4-Clause" readme = "docs/README_PYPI.rst" homepage = "https://github.com/snok/django-guid" @@ -60,11 +61,6 @@ celery = "^5.0.2" redis = "^3.5.3" ipython = "^7.19.0" -[tool.poetry.extras] -celery = ["celery"] -sentry = ["sentry-sdk"] -all = ["celery,sentry-sdk"] - [tool.black] line-length = 120 skip-string-normalization = true