Skip to content

Commit

Permalink
Invoke page_init_callback after setting route (#205)
Browse files Browse the repository at this point in the history
  • Loading branch information
elacuesta authored Aug 7, 2023
1 parent f5d7fc2 commit af282e9
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 4 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -350,10 +350,11 @@ See [Handling page events](#handling-page-events).
### `playwright_page_init_callback`
Type `Optional[Union[Callable, str]]`, default `None`

A coroutine function (`async def`) to be invoked immediately after creating
a page for the request. It receives the page and the request as positional
arguments. Useful for initialization code. Invoked only for newly created
pages, ignored if the page for the request already exists (e.g. by passing
A coroutine function (`async def`) to be invoked for newly created pages.
Called after attaching page event handlers & setting up internal route
handling, before making any request. It receives the Playwright page and the
Scrapy request as positional arguments. Useful for initialization code.
Ignored if the page for the request already exists (e.g. by passing
`playwright_page`).

```python
Expand Down
33 changes: 33 additions & 0 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,10 @@ async def _download_request(self, request: Request, spider: Spider) -> Response:
),
)

await _maybe_execute_page_init_callback(
page=page, request=request, context_name=context_name, spider=spider
)

try:
result = await self._download_request_with_page(request, page, spider)
except Exception as ex:
Expand Down Expand Up @@ -618,6 +622,35 @@ async def _set_redirect_meta(request: Request, response: PlaywrightResponse) ->
request.meta["redirect_reasons"] = list(reversed(redirect_reasons))


async def _maybe_execute_page_init_callback(
page: Page,
request: Request,
context_name: str,
spider: Spider,
) -> None:
page_init_callback = request.meta.get("playwright_page_init_callback")
if page_init_callback:
try:
page_init_callback = load_object(page_init_callback)
await page_init_callback(page, request)
except Exception as ex:
logger.warning(
"[Context=%s] Page init callback exception for %s exc_type=%s exc_msg=%s",
context_name,
repr(request),
type(ex),
str(ex),
extra={
"spider": spider,
"context_name": context_name,
"scrapy_request_url": request.url,
"scrapy_request_method": request.method,
"exception": ex,
},
exc_info=True,
)


def _make_request_logger(context_name: str, spider: Spider) -> Callable:
async def _log_request(request: PlaywrightRequest) -> None:
referrer = await request.header_value("referer")
Expand Down

0 comments on commit af282e9

Please sign in to comment.