diff --git a/src/crawlee/basic_crawler/_basic_crawler.py b/src/crawlee/basic_crawler/_basic_crawler.py index 212649883..95dcc660a 100644 --- a/src/crawlee/basic_crawler/_basic_crawler.py +++ b/src/crawlee/basic_crawler/_basic_crawler.py @@ -851,6 +851,9 @@ async def __run_task_function(self) -> None: if not crawling_context.session: raise RuntimeError('SessionError raised in a crawling context without a session') from session_error + if self._error_handler: + await self._error_handler(crawling_context, session_error) + if self._should_retry_request(crawling_context, session_error): self._logger.warning('Encountered a session error, rotating session and retrying') diff --git a/tests/unit/basic_crawler/test_basic_crawler.py b/tests/unit/basic_crawler/test_basic_crawler.py index 5bc9c68f3..32d9cf099 100644 --- a/tests/unit/basic_crawler/test_basic_crawler.py +++ b/tests/unit/basic_crawler/test_basic_crawler.py @@ -9,7 +9,7 @@ from datetime import timedelta from pathlib import Path from typing import TYPE_CHECKING, Any -from unittest.mock import Mock +from unittest.mock import AsyncMock, Mock import httpx import pytest @@ -189,6 +189,26 @@ async def error_handler(context: BasicCrawlingContext, error: Exception) -> Requ assert calls[1][2] == 1 +async def test_calls_error_handler_for_sesion_errors() -> None: + crawler = BasicCrawler( + max_session_rotations=1, + ) + + @crawler.router.default_handler + async def handler(context: BasicCrawlingContext) -> None: + raise SessionError('Arbitrary session error for testing purposes') + + error_handler_mock = AsyncMock() + + @crawler.error_handler + async def error_handler(context: BasicCrawlingContext, error: Exception) -> None: + await error_handler_mock(context, error) + + await crawler.run(['https://crawlee.dev']) + + assert error_handler_mock.call_count == 1 + + async def test_handles_error_in_error_handler() -> None: crawler = BasicCrawler( request_provider=RequestList(['http://a.com/', 'http://b.com/', 'http://c.com/']),