Skip to content

Commit

Permalink
fix: call error_handler for SessionError (#557)
Browse files Browse the repository at this point in the history
Closes: #546
  • Loading branch information
vdusek authored Oct 1, 2024
1 parent 734c797 commit e75ac4b
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
3 changes: 3 additions & 0 deletions src/crawlee/basic_crawler/_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,9 @@ async def __run_task_function(self) -> None:
if not crawling_context.session:
raise RuntimeError('SessionError raised in a crawling context without a session') from session_error

if self._error_handler:
await self._error_handler(crawling_context, session_error)

if self._should_retry_request(crawling_context, session_error):
self._logger.warning('Encountered a session error, rotating session and retrying')

Expand Down
22 changes: 21 additions & 1 deletion tests/unit/basic_crawler/test_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING, Any
from unittest.mock import Mock
from unittest.mock import AsyncMock, Mock

import httpx
import pytest
Expand Down Expand Up @@ -189,6 +189,26 @@ async def error_handler(context: BasicCrawlingContext, error: Exception) -> Requ
assert calls[1][2] == 1


async def test_calls_error_handler_for_sesion_errors() -> None:
crawler = BasicCrawler(
max_session_rotations=1,
)

@crawler.router.default_handler
async def handler(context: BasicCrawlingContext) -> None:
raise SessionError('Arbitrary session error for testing purposes')

error_handler_mock = AsyncMock()

@crawler.error_handler
async def error_handler(context: BasicCrawlingContext, error: Exception) -> None:
await error_handler_mock(context, error)

await crawler.run(['https://crawlee.dev'])

assert error_handler_mock.call_count == 1


async def test_handles_error_in_error_handler() -> None:
crawler = BasicCrawler(
request_provider=RequestList(['http://a.com/', 'http://b.com/', 'http://c.com/']),
Expand Down

0 comments on commit e75ac4b

Please sign in to comment.