Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into close-inactive-contexts
Browse files Browse the repository at this point in the history
  • Loading branch information
elacuesta committed Aug 29, 2023
2 parents a609ae5 + f1004bd commit 12eb537
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.0.30
current_version = 0.0.31
commit = True
tag = True

Expand Down
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# scrapy-playwright changelog


### [v0.0.31](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.31) (2023-08-28)

* Do not fail when getting referer header for debug log messages (#225)
* Do not override headers with values from asset requests (#226)


### [v0.0.30](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.30) (2023-08-17)

* Fix page_init_callback duplication (#222)
Expand Down
2 changes: 1 addition & 1 deletion scrapy_playwright/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.30"
__version__ = "0.0.31"
13 changes: 7 additions & 6 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,13 +524,8 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
self.browser_type_name, playwright_request, headers
)
)
# the request that reaches the callback should contain the final headers
headers.clear()
headers.update(final_headers)
del final_headers

# if the request is triggered by scrapy, not playwright
original_playwright_method: str = playwright_request.method
# if the current request corresponds to the original scrapy one
if (
playwright_request.url.rstrip("/") == url.rstrip("/")
and playwright_request.is_navigation_request()
Expand All @@ -539,7 +534,13 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
overrides["method"] = method
if body:
overrides["post_data"] = body.decode(encoding)
# the request that reaches the callback should contain the final headers
headers.clear()
headers.update(final_headers)

del final_headers

original_playwright_method: str = playwright_request.method
try:
await route.continue_(**overrides)
if overrides.get("method"):
Expand Down
7 changes: 6 additions & 1 deletion scrapy_playwright/headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,14 @@ async def use_scrapy_headers(
scrapy_headers_str.setdefault("user-agent", playwright_headers.get("user-agent"))

if playwright_request.is_navigation_request():
# if referer header is set via playwright_page_goto_kwargs
if referer := playwright_headers.get("referer"):
scrapy_headers_str.setdefault("referer", referer)

# otherwise it fails with playwright.helper.Error: NS_ERROR_NET_RESET
if browser_type == "firefox":
# otherwise this fails with playwright.helper.Error: NS_ERROR_NET_RESET
scrapy_headers_str["host"] = urlparse(playwright_request.url).netloc

return scrapy_headers_str

# override user agent, for consistency with other requests
Expand Down

0 comments on commit 12eb537

Please sign in to comment.