Skip to content

Commit

Permalink
Refactor _download_request_with_page
Browse files Browse the repository at this point in the history
  • Loading branch information
elacuesta committed Sep 5, 2023
1 parent a1618db commit e253ebf
Showing 1 changed file with 13 additions and 13 deletions.
26 changes: 13 additions & 13 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,13 +349,17 @@ async def _download_request_with_page(
if request.meta.get("playwright_include_page"):
request.meta["playwright_page"] = page

context_name = request.meta.setdefault("playwright_context", DEFAULT_CONTEXT_NAME)

start_time = time()
context_name = request.meta.get("playwright_context")
page_goto_kwargs = request.meta.get("playwright_page_goto_kwargs") or {}
page_goto_kwargs.pop("url", None)

start_time = time()
response = await page.goto(url=request.url, **page_goto_kwargs)
if response is None:
if response is not None:
await _set_redirect_meta(request=request, response=response)
headers = Headers(await response.all_headers())
headers.pop("Content-Encoding", None)
else:
logger.warning(
"Navigating to %s returned None, the response"
" will have empty headers and status 200",
Expand All @@ -368,10 +372,7 @@ async def _download_request_with_page(
},
)
headers = Headers()
else:
await _set_redirect_meta(request=request, response=response)
headers = Headers(await response.all_headers())
headers.pop("Content-Encoding", None)

await self._apply_page_methods(page, request, spider)
body_str = await _get_page_content(
page=page,
Expand All @@ -383,12 +384,11 @@ async def _download_request_with_page(
request.meta["download_latency"] = time() - start_time

server_ip_address = None
with suppress(AttributeError, KeyError, TypeError, ValueError):
server_addr = await response.server_addr()
server_ip_address = ip_address(server_addr["ipAddress"])

with suppress(AttributeError):
if response is not None:
request.meta["playwright_security_details"] = await response.security_details()
with suppress(KeyError, TypeError, ValueError):
server_addr = await response.server_addr()
server_ip_address = ip_address(server_addr["ipAddress"])

if not request.meta.get("playwright_include_page"):
await page.close()
Expand Down

0 comments on commit e253ebf

Please sign in to comment.