Skip to content

Commit

Permalink
refactor: introduce a fixture for httpbin url (#168)
Browse files Browse the repository at this point in the history
This is the non-controversial part of #167.
  • Loading branch information
janbuchar authored Jun 3, 2024
1 parent 200ebfa commit 94448e4
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 39 deletions.
40 changes: 20 additions & 20 deletions tests/unit/browsers/test_browser_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,52 +6,52 @@
from crawlee.browsers.playwright_browser_plugin import PlaywrightBrowserPlugin


async def test_new_page_single_plugin() -> None:
async def test_new_page_single_plugin(httpbin: str) -> None:
plugin = PlaywrightBrowserPlugin(browser_type='chromium')

async with BrowserPool([plugin]) as browser_pool:
assert browser_pool.plugins == [plugin]

page_1 = await browser_pool.new_page()
await page_1.page.goto('https://httpbin.org/get')
await page_1.page.goto(f'{httpbin}/get')
assert page_1.browser_type == 'chromium'
assert page_1.page.url == 'https://httpbin.org/get'
assert page_1.page.url == f'{httpbin}/get'
assert '<html' in await page_1.page.content() # there is some HTML content

page_2 = await browser_pool.new_page()
await page_2.page.goto('https://httpbin.org/status/200')
await page_2.page.goto(f'{httpbin}/status/200')
assert page_2.browser_type == 'chromium'
assert page_2.page.url == 'https://httpbin.org/status/200'
assert page_2.page.url == f'{httpbin}/status/200'
assert '<html' in await page_1.page.content() # there is some HTML content


async def test_new_page_multiple_plugins() -> None:
async def test_new_page_multiple_plugins(httpbin: str) -> None:
plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium')
plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox')

async with BrowserPool([plugin_chromium, plugin_firefox]) as browser_pool:
assert browser_pool.plugins == [plugin_chromium, plugin_firefox]

page_1 = await browser_pool.new_page()
await page_1.page.goto('https://httpbin.org/get')
await page_1.page.goto(f'{httpbin}/get')
assert page_1.browser_type == 'chromium'
assert page_1.page.url == 'https://httpbin.org/get'
assert page_1.page.url == f'{httpbin}/get'
assert '<html' in await page_1.page.content() # there is some HTML content

page_2 = await browser_pool.new_page()
await page_2.page.goto('https://httpbin.org/headers')
await page_2.page.goto(f'{httpbin}/headers')
assert page_2.browser_type == 'firefox'
assert page_2.page.url == 'https://httpbin.org/headers'
assert page_2.page.url == f'{httpbin}/headers'
assert '<html' in await page_2.page.content() # there is some HTML content

page_3 = await browser_pool.new_page()
await page_3.page.goto('https://httpbin.org/user-agent')
await page_3.page.goto(f'{httpbin}/user-agent')
assert page_3.browser_type == 'chromium'
assert page_3.page.url == 'https://httpbin.org/user-agent'
assert page_3.page.url == f'{httpbin}/user-agent'
assert '<html' in await page_3.page.content() # there is some HTML content


async def test_new_page_with_each_plugin() -> None:
async def test_new_page_with_each_plugin(httpbin: str) -> None:
plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium')
plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox')

Expand All @@ -63,22 +63,22 @@ async def test_new_page_with_each_plugin() -> None:
assert pages[0].browser_type == 'chromium'
assert pages[1].browser_type == 'firefox'

await pages[0].page.goto('https://httpbin.org/get')
assert pages[0].page.url == 'https://httpbin.org/get'
await pages[0].page.goto(f'{httpbin}/get')
assert pages[0].page.url == f'{httpbin}/get'
assert '<html' in await pages[0].page.content() # there is some HTML content

await pages[1].page.goto('https://httpbin.org/headers')
assert pages[1].page.url == 'https://httpbin.org/headers'
await pages[1].page.goto(f'{httpbin}/headers')
assert pages[1].page.url == f'{httpbin}/headers'
assert '<html' in await pages[1].page.content()


async def test_resource_management() -> None:
async def test_resource_management(httpbin: str) -> None:
playwright_plugin = PlaywrightBrowserPlugin(browser_type='chromium')

async with BrowserPool([playwright_plugin]) as browser_pool:
page = await browser_pool.new_page()
await page.page.goto('https://httpbin.org/get')
assert page.page.url == 'https://httpbin.org/get'
await page.page.goto(f'{httpbin}/get')
assert page.page.url == f'{httpbin}/get'
assert '<html' in await page.page.content() # there is some HTML content

# The page should be closed
Expand Down
16 changes: 8 additions & 8 deletions tests/unit/browsers/test_playwright_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from crawlee.browsers.playwright_browser_plugin import PlaywrightBrowserPlugin


async def test_new_page() -> None:
async def test_new_page(httpbin: str) -> None:
async with PlaywrightBrowserPlugin() as plugin:
# Get a new page with default options
page_1 = await plugin.new_page()
await page_1.goto('https://httpbin.org/get')
assert page_1.url == 'https://httpbin.org/get'
await page_1.goto(f'{httpbin}/get')
assert page_1.url == f'{httpbin}/get'
assert '<html' in await page_1.content() # there is some HTML content

page_options = {
Expand All @@ -22,20 +22,20 @@ async def test_new_page() -> None:
async with PlaywrightBrowserPlugin(page_options=page_options) as plugin:
# Get a new page with custom options
page_2 = await plugin.new_page()
await page_2.goto('https://httpbin.org/user-agent')
assert page_2.url == 'https://httpbin.org/user-agent'
await page_2.goto(f'{httpbin}/user-agent')
assert page_2.url == f'{httpbin}/user-agent'
assert '<html' in await page_2.content() # there is some HTML content


async def test_resource_management() -> None:
async def test_resource_management(httpbin: str) -> None:
async with PlaywrightBrowserPlugin() as plugin:
assert plugin.browser is not None
# Browser should be connected
assert plugin.browser.is_connected() is True

page = await plugin.new_page()
await page.goto('https://httpbin.org/get')
assert page.url == 'https://httpbin.org/get'
await page.goto(f'{httpbin}/get')
assert page.url == f'{httpbin}/get'
assert '<html' in await page.content() # there is some HTML content

# The page should be closed
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,8 @@ def memory_storage_client(tmp_path: Path) -> MemoryStorageClient:
crawlee_local_storage_dir=str(tmp_path), # type: ignore
)
return MemoryStorageClient(cfg)


@pytest.fixture()
def httpbin() -> str:
return os.environ.get('HTTPBIN_URL', 'https://httpbin.org')
8 changes: 4 additions & 4 deletions tests/unit/http_crawler/test_http_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,17 @@ async def test_handles_server_error(
assert server['500_endpoint'].called


async def test_stores_cookies() -> None:
async def test_stores_cookies(httpbin: str) -> None:
visit = Mock()
track_session_usage = Mock()

session_pool = SessionPool(max_pool_size=1)
crawler = HttpCrawler(
request_provider=RequestList(
[
'https://httpbin.org/cookies/set?a=1',
'https://httpbin.org/cookies/set?b=2',
'https://httpbin.org/cookies/set?c=3',
f'{httpbin}/cookies/set?a=1',
f'{httpbin}/cookies/set?b=2',
f'{httpbin}/cookies/set?c=3',
]
),
session_pool=session_pool,
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/httpx_client/test_httpx_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ async def proxy(proxy_info: ProxyInfo) -> AsyncGenerator[ProxyInfo, None]:
yield proxy_info


async def test_proxy(proxy: ProxyInfo) -> None:
async def test_proxy(proxy: ProxyInfo, httpbin: str) -> None:
client = HttpxClient()
request = Request(url='https://httpbin.org/status/222', unique_key='42', id='42', user_data={})
request = Request(url=f'{httpbin}/status/222', unique_key='42', id='42', user_data={})

async with Statistics() as statistics:
result = await client.crawl(request, None, proxy, statistics)
Expand All @@ -65,9 +65,9 @@ async def disabled_proxy(proxy_info: ProxyInfo) -> AsyncGenerator[ProxyInfo, Non
yield proxy_info


async def test_proxy_disabled(disabled_proxy: ProxyInfo) -> None:
async def test_proxy_disabled(disabled_proxy: ProxyInfo, httpbin: str) -> None:
client = HttpxClient()
request = Request(url='https://httpbin.org/status/222', unique_key='42', id='42', user_data={})
request = Request(url=f'{httpbin}/status/222', unique_key='42', id='42', user_data={})

with pytest.raises(ProxyError):
async with Statistics() as statistics:
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/playwright_crawler/test_playwright_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from crawlee.playwright_crawler import PlaywrightCrawlingContext


async def test_basic_request() -> None:
request_provider = RequestList(['https://httpbin.org/'])
async def test_basic_request(httpbin: str) -> None:
request_provider = RequestList([f'{httpbin}/'])
crawler = PlaywrightCrawler(request_provider=request_provider)
result: dict = {}

Expand All @@ -24,6 +24,6 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:

await crawler.run()

assert result.get('request_url') == result.get('page_url') == 'https://httpbin.org/'
assert result.get('request_url') == result.get('page_url') == f'{httpbin}/'
assert 'httpbin' in result.get('page_title', '')
assert '<html' in result.get('page_content', '') # there is some HTML content

0 comments on commit 94448e4

Please sign in to comment.