Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Correctly handle log level configuration #508

Merged
merged 3 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 58 additions & 12 deletions src/crawlee/_log_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

import json
import logging
import sys
import textwrap
import traceback
from typing import Any
from typing import TYPE_CHECKING, Any

from colorama import Fore, Style, just_fix_windows_console
from typing_extensions import assert_never

if TYPE_CHECKING:
from crawlee.configuration import Configuration

just_fix_windows_console()

Expand All @@ -31,6 +35,46 @@
_LOG_MESSAGE_INDENT = ' ' * 6


def get_configured_log_level(configuration: Configuration) -> int:
verbose_logging_requested = 'verbose_log' in configuration.model_fields_set and configuration.verbose_log

if 'log_level' in configuration.model_fields_set:
if configuration.log_level == 'DEBUG':
return logging.DEBUG
if configuration.log_level == 'INFO':
return logging.INFO
if configuration.log_level == 'WARNING':
return logging.WARNING
if configuration.log_level == 'ERROR':
return logging.ERROR
vdusek marked this conversation as resolved.
Show resolved Hide resolved
if configuration.log_level == 'CRITICAL':
return logging.CRITICAL

assert_never(configuration.log_level)

if sys.flags.dev_mode or verbose_logging_requested:
return logging.DEBUG

return logging.INFO


def configure_logger(
logger: logging.Logger,
configuration: Configuration,
*,
remove_old_handlers: bool = False,
) -> None:
handler = logging.StreamHandler()
handler.setFormatter(CrawleeLogFormatter())

if remove_old_handlers:
for old_handler in logger.handlers[:]:
logger.removeHandler(old_handler)

logger.addHandler(handler)
logger.setLevel(get_configured_log_level(configuration))


class CrawleeLogFormatter(logging.Formatter):
"""Log formatter that prints out the log message nicely formatted, with colored level and stringified extra fields.

Expand Down Expand Up @@ -87,15 +131,6 @@ def format(self, record: logging.LogRecord) -> str:
level_short_alias = _LOG_LEVEL_SHORT_ALIAS.get(record.levelno, record.levelname)
level_string = f'{level_color_code}{level_short_alias}{Style.RESET_ALL} '

# Format the exception, if there is some
# Basically just print the traceback and indent it a bit
exception_string = ''
if record.exc_info:
exc_info = record.exc_info
record.exc_info = None
exception_string = ''.join(traceback.format_exception(*exc_info)).rstrip()
exception_string = '\n' + textwrap.indent(exception_string, _LOG_MESSAGE_INDENT)

# Format the extra log record fields, if there were some
# Just stringify them to JSON and color them gray
extra_string = ''
Expand All @@ -105,8 +140,19 @@ def format(self, record: logging.LogRecord) -> str:
f' {Fore.LIGHTBLACK_EX}({json.dumps(extra, ensure_ascii=False, default=str)}){Style.RESET_ALL}'
)

# Call the parent method so that it populates missing fields in the record
super().format(record)

# Format the actual log message
log_string = super().format(record)
log_string = self.formatMessage(record)

# Format the exception, if there is some
# Basically just print the traceback and indent it a bit
exception_string = ''
if record.exc_text:
exception_string = '\n' + textwrap.indent(record.exc_text.rstrip(), _LOG_MESSAGE_INDENT)
else:
exception_string = ''

if self.include_logger_name:
# Include logger name at the beginning of the log line
Expand Down
16 changes: 5 additions & 11 deletions src/crawlee/basic_crawler/_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from crawlee._autoscaling import AutoscaledPool
from crawlee._autoscaling.snapshotter import Snapshotter
from crawlee._autoscaling.system_status import SystemStatus
from crawlee._log_config import CrawleeLogFormatter
from crawlee._log_config import configure_logger, get_configured_log_level
from crawlee._request import BaseRequestData, Request, RequestState
from crawlee._types import BasicCrawlingContext, HttpHeaders, RequestHandlerRunResult, SendRequestFunction
from crawlee._utils.byte_size import ByteSize
Expand Down Expand Up @@ -203,20 +203,14 @@ def __init__(
self._retry_on_blocked = retry_on_blocked

if configure_logging:
handler = logging.StreamHandler()
handler.setFormatter(CrawleeLogFormatter())

root_logger = logging.getLogger()

for old_handler in root_logger.handlers[:]:
root_logger.removeHandler(old_handler)

root_logger.addHandler(handler)
root_logger.setLevel(logging.INFO if not sys.flags.dev_mode else logging.DEBUG)
configure_logger(root_logger, self._configuration, remove_old_handlers=True)

# Silence HTTPX logger
httpx_logger = logging.getLogger('httpx')
httpx_logger.setLevel(logging.WARNING if not sys.flags.dev_mode else logging.INFO)
httpx_logger.setLevel(
logging.DEBUG if get_configured_log_level(self._configuration) <= logging.DEBUG else logging.WARNING
)
vdusek marked this conversation as resolved.
Show resolved Hide resolved

if not _logger:
_logger = logging.getLogger(__name__)
Expand Down
9 changes: 5 additions & 4 deletions src/crawlee/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from __future__ import annotations

from datetime import timedelta
from typing import Annotated
from typing import Annotated, Literal

from pydantic import AliasChoices, Field
from pydantic import AliasChoices, BeforeValidator, Field
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing_extensions import Self

Expand Down Expand Up @@ -51,14 +51,15 @@ class Configuration(BaseSettings):
] = False

log_level: Annotated[
int,
Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
Field(
validation_alias=AliasChoices(
'apify_log_level',
'crawlee_log_level',
)
),
] = 4 # INFO
BeforeValidator(lambda value: str(value).upper()),
] = 'INFO'

default_dataset_id: Annotated[
str,
Expand Down
Loading