Skip to content

Commit

Permalink
feat: support custom profile in playwright
Browse files Browse the repository at this point in the history
if user_data_dir option is found in browser_option,
then the launch function used is launch_persistent_context
instead of launch and the user_data_dir option is passed to playwright
  • Loading branch information
sherpya committed Aug 23, 2024
1 parent fc8223b commit 084e1e7
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 8 deletions.
21 changes: 13 additions & 8 deletions src/crawlee/browsers/playwright_browser_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,19 @@ async def new_browser(self) -> PlaywrightBrowserController:
if not self._playwright:
raise RuntimeError('Playwright browser plugin is not initialized.')

if self._browser_type == 'chromium':
browser = await self._playwright.chromium.launch(**self._browser_options)
elif self._browser_type == 'firefox':
browser = await self._playwright.firefox.launch(**self._browser_options)
elif self._browser_type == 'webkit':
browser = await self._playwright.webkit.launch(**self._browser_options)
else:
raise ValueError(f'Invalid browser type: {self._browser_type}')
if self._browser_type not in ('chromium', 'firefox', 'webkit'):
raise ValueError(f'Invalid browser type: {self._browser_type}.')

module = getattr(self._playwright, self._browser_type, None)
if module is None:
raise ValueError(f'Invalid browser type: {self._browser_type}.')

# Determine whether to launch browser with persistent context or not
launch_fn = 'launch_persistent_context' if 'user_data_dir' in self._browser_options else 'launch'
launch = getattr(module, launch_fn, None)
if launch is None:
raise RuntimeError(f'Playwright {self._browser_type} browser module does not implement {launch_fn}.')
browser = await launch(**self._browser_options)

return PlaywrightBrowserController(
browser,
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/browsers/test_playwright_browser_plugin_with_profile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import annotations

from pathlib import Path
from typing import AsyncGenerator

import pytest

from crawlee.browsers import PlaywrightBrowserPlugin


@pytest.fixture()
async def plugin(tmp_path_factory) -> AsyncGenerator[PlaywrightBrowserPlugin, None]:
browser_options = {'user_data_dir': tmp_path_factory.mktemp('data') / 'profile'}
async with PlaywrightBrowserPlugin(browser_options=browser_options) as plugin:
yield plugin


async def test_new_browser(plugin: PlaywrightBrowserPlugin, httpbin: str) -> None:
browser_controller = await plugin.new_browser()

# assert browser_controller.is_browser_connected

page = await browser_controller.new_page()
await page.goto(f'{httpbin}')

await page.close()
await browser_controller.close()

# assert not browser_controller.is_browser_connected
# check if profile directory does contain some files
assert any(Path(plugin.browser_options.get('user_data_dir')).iterdir())

0 comments on commit 084e1e7

Please sign in to comment.