Skip to content

Commit

Permalink
Correctly handle log level configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
janbuchar committed Sep 9, 2024
1 parent 0afec94 commit c21ffa1
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 26 deletions.
68 changes: 56 additions & 12 deletions src/crawlee/_log_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

import json
import logging
import sys
import textwrap
import traceback
from typing import Any
from typing import TYPE_CHECKING, Any

from colorama import Fore, Style, just_fix_windows_console
from typing_extensions import assert_never

if TYPE_CHECKING:
from crawlee.configuration import Configuration

just_fix_windows_console()

Expand All @@ -31,6 +35,44 @@
_LOG_MESSAGE_INDENT = ' ' * 6


def get_configured_log_level(configuration: Configuration) -> int:
verbose_logging_requested = 'verbose_log' in configuration.model_fields_set and configuration.verbose_log

if 'log_level' in configuration.model_fields_set:
if configuration.log_level == 'DEBUG':
return logging.DEBUG
if configuration.log_level == 'INFO':
return logging.INFO
if configuration.log_level == 'WARNING':
return logging.WARNING
if configuration.log_level == 'ERROR':
return logging.ERROR

assert_never(configuration.log_level)

if sys.flags.dev_mode or verbose_logging_requested:
return logging.DEBUG

return logging.INFO


def configure_logger(
logger: logging.Logger,
configuration: Configuration,
*,
remove_old_handlers: bool = False,
) -> None:
handler = logging.StreamHandler()
handler.setFormatter(CrawleeLogFormatter())

if remove_old_handlers:
for old_handler in logger.handlers[:]:
logger.removeHandler(old_handler)

logger.addHandler(handler)
logger.setLevel(get_configured_log_level(configuration))


class CrawleeLogFormatter(logging.Formatter):
"""Log formatter that prints out the log message nicely formatted, with colored level and stringified extra fields.
Expand Down Expand Up @@ -87,15 +129,6 @@ def format(self, record: logging.LogRecord) -> str:
level_short_alias = _LOG_LEVEL_SHORT_ALIAS.get(record.levelno, record.levelname)
level_string = f'{level_color_code}{level_short_alias}{Style.RESET_ALL} '

# Format the exception, if there is some
# Basically just print the traceback and indent it a bit
exception_string = ''
if record.exc_info:
exc_info = record.exc_info
record.exc_info = None
exception_string = ''.join(traceback.format_exception(*exc_info)).rstrip()
exception_string = '\n' + textwrap.indent(exception_string, _LOG_MESSAGE_INDENT)

# Format the extra log record fields, if there were some
# Just stringify them to JSON and color them gray
extra_string = ''
Expand All @@ -105,8 +138,19 @@ def format(self, record: logging.LogRecord) -> str:
f' {Fore.LIGHTBLACK_EX}({json.dumps(extra, ensure_ascii=False, default=str)}){Style.RESET_ALL}'
)

# Call the parent method so that it populates missing fields in the record
super().format(record)

# Format the actual log message
log_string = super().format(record)
log_string = self.formatMessage(record)

# Format the exception, if there is some
# Basically just print the traceback and indent it a bit
exception_string = ''
if record.exc_text:
exception_string = '\n' + textwrap.indent(record.exc_text.rstrip(), _LOG_MESSAGE_INDENT)
else:
exception_string = ''

if self.include_logger_name:
# Include logger name at the beginning of the log line
Expand Down
16 changes: 5 additions & 11 deletions src/crawlee/basic_crawler/_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from crawlee._autoscaling import AutoscaledPool
from crawlee._autoscaling.snapshotter import Snapshotter
from crawlee._autoscaling.system_status import SystemStatus
from crawlee._log_config import CrawleeLogFormatter
from crawlee._log_config import configure_logger, get_configured_log_level
from crawlee._request import BaseRequestData, Request, RequestState
from crawlee._types import BasicCrawlingContext, HttpHeaders, RequestHandlerRunResult, SendRequestFunction
from crawlee._utils.byte_size import ByteSize
Expand Down Expand Up @@ -203,20 +203,14 @@ def __init__(
self._retry_on_blocked = retry_on_blocked

if configure_logging:
handler = logging.StreamHandler()
handler.setFormatter(CrawleeLogFormatter())

root_logger = logging.getLogger()

for old_handler in root_logger.handlers[:]:
root_logger.removeHandler(old_handler)

root_logger.addHandler(handler)
root_logger.setLevel(logging.INFO if not sys.flags.dev_mode else logging.DEBUG)
configure_logger(root_logger, self._configuration, remove_old_handlers=True)

# Silence HTTPX logger
httpx_logger = logging.getLogger('httpx')
httpx_logger.setLevel(logging.WARNING if not sys.flags.dev_mode else logging.INFO)
httpx_logger.setLevel(
logging.DEBUG if get_configured_log_level(self._configuration) <= logging.DEBUG else logging.WARNING
)

if not _logger:
_logger = logging.getLogger(__name__)
Expand Down
6 changes: 3 additions & 3 deletions src/crawlee/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from datetime import timedelta
from typing import Annotated
from typing import Annotated, Literal

from pydantic import AliasChoices, Field
from pydantic_settings import BaseSettings, SettingsConfigDict
Expand Down Expand Up @@ -51,14 +51,14 @@ class Configuration(BaseSettings):
] = False

log_level: Annotated[
int,
Literal['DEBUG', 'INFO', 'WARNING', 'ERROR'],
Field(
validation_alias=AliasChoices(
'apify_log_level',
'crawlee_log_level',
)
),
] = 4 # INFO
] = 'INFO'

default_dataset_id: Annotated[
str,
Expand Down

0 comments on commit c21ffa1

Please sign in to comment.