From 94448e4c183808967914edf8d875a9ad28208b32 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Mon, 3 Jun 2024 21:41:24 +0200 Subject: [PATCH] refactor: introduce a fixture for httpbin url (#168) This is the non-controversial part of #167. --- tests/unit/browsers/test_browser_pool.py | 40 +++++++++---------- tests/unit/browsers/test_playwright_plugin.py | 16 ++++---- tests/unit/conftest.py | 5 +++ tests/unit/http_crawler/test_http_crawler.py | 8 ++-- tests/unit/httpx_client/test_httpx_client.py | 8 ++-- .../test_playwright_crawler.py | 6 +-- 6 files changed, 44 insertions(+), 39 deletions(-) diff --git a/tests/unit/browsers/test_browser_pool.py b/tests/unit/browsers/test_browser_pool.py index 69d67509f..74d035c91 100644 --- a/tests/unit/browsers/test_browser_pool.py +++ b/tests/unit/browsers/test_browser_pool.py @@ -6,26 +6,26 @@ from crawlee.browsers.playwright_browser_plugin import PlaywrightBrowserPlugin -async def test_new_page_single_plugin() -> None: +async def test_new_page_single_plugin(httpbin: str) -> None: plugin = PlaywrightBrowserPlugin(browser_type='chromium') async with BrowserPool([plugin]) as browser_pool: assert browser_pool.plugins == [plugin] page_1 = await browser_pool.new_page() - await page_1.page.goto('https://httpbin.org/get') + await page_1.page.goto(f'{httpbin}/get') assert page_1.browser_type == 'chromium' - assert page_1.page.url == 'https://httpbin.org/get' + assert page_1.page.url == f'{httpbin}/get' assert ' None: +async def test_new_page_multiple_plugins(httpbin: str) -> None: plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium') plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox') @@ -33,25 +33,25 @@ async def test_new_page_multiple_plugins() -> None: assert browser_pool.plugins == [plugin_chromium, plugin_firefox] page_1 = await browser_pool.new_page() - await page_1.page.goto('https://httpbin.org/get') + await page_1.page.goto(f'{httpbin}/get') assert page_1.browser_type == 'chromium' - assert page_1.page.url == 'https://httpbin.org/get' + assert page_1.page.url == f'{httpbin}/get' assert ' None: +async def test_new_page_with_each_plugin(httpbin: str) -> None: plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium') plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox') @@ -63,22 +63,22 @@ async def test_new_page_with_each_plugin() -> None: assert pages[0].browser_type == 'chromium' assert pages[1].browser_type == 'firefox' - await pages[0].page.goto('https://httpbin.org/get') - assert pages[0].page.url == 'https://httpbin.org/get' + await pages[0].page.goto(f'{httpbin}/get') + assert pages[0].page.url == f'{httpbin}/get' assert ' None: +async def test_resource_management(httpbin: str) -> None: playwright_plugin = PlaywrightBrowserPlugin(browser_type='chromium') async with BrowserPool([playwright_plugin]) as browser_pool: page = await browser_pool.new_page() - await page.page.goto('https://httpbin.org/get') - assert page.page.url == 'https://httpbin.org/get' + await page.page.goto(f'{httpbin}/get') + assert page.page.url == f'{httpbin}/get' assert ' None: +async def test_new_page(httpbin: str) -> None: async with PlaywrightBrowserPlugin() as plugin: # Get a new page with default options page_1 = await plugin.new_page() - await page_1.goto('https://httpbin.org/get') - assert page_1.url == 'https://httpbin.org/get' + await page_1.goto(f'{httpbin}/get') + assert page_1.url == f'{httpbin}/get' assert ' None: async with PlaywrightBrowserPlugin(page_options=page_options) as plugin: # Get a new page with custom options page_2 = await plugin.new_page() - await page_2.goto('https://httpbin.org/user-agent') - assert page_2.url == 'https://httpbin.org/user-agent' + await page_2.goto(f'{httpbin}/user-agent') + assert page_2.url == f'{httpbin}/user-agent' assert ' None: +async def test_resource_management(httpbin: str) -> None: async with PlaywrightBrowserPlugin() as plugin: assert plugin.browser is not None # Browser should be connected assert plugin.browser.is_connected() is True page = await plugin.new_page() - await page.goto('https://httpbin.org/get') - assert page.url == 'https://httpbin.org/get' + await page.goto(f'{httpbin}/get') + assert page.url == f'{httpbin}/get' assert ' MemoryStorageClient: crawlee_local_storage_dir=str(tmp_path), # type: ignore ) return MemoryStorageClient(cfg) + + +@pytest.fixture() +def httpbin() -> str: + return os.environ.get('HTTPBIN_URL', 'https://httpbin.org') diff --git a/tests/unit/http_crawler/test_http_crawler.py b/tests/unit/http_crawler/test_http_crawler.py index 355dbcff1..c2bb11c97 100644 --- a/tests/unit/http_crawler/test_http_crawler.py +++ b/tests/unit/http_crawler/test_http_crawler.py @@ -107,7 +107,7 @@ async def test_handles_server_error( assert server['500_endpoint'].called -async def test_stores_cookies() -> None: +async def test_stores_cookies(httpbin: str) -> None: visit = Mock() track_session_usage = Mock() @@ -115,9 +115,9 @@ async def test_stores_cookies() -> None: crawler = HttpCrawler( request_provider=RequestList( [ - 'https://httpbin.org/cookies/set?a=1', - 'https://httpbin.org/cookies/set?b=2', - 'https://httpbin.org/cookies/set?c=3', + f'{httpbin}/cookies/set?a=1', + f'{httpbin}/cookies/set?b=2', + f'{httpbin}/cookies/set?c=3', ] ), session_pool=session_pool, diff --git a/tests/unit/httpx_client/test_httpx_client.py b/tests/unit/httpx_client/test_httpx_client.py index 5dad75583..0f7eca5a4 100644 --- a/tests/unit/httpx_client/test_httpx_client.py +++ b/tests/unit/httpx_client/test_httpx_client.py @@ -39,9 +39,9 @@ async def proxy(proxy_info: ProxyInfo) -> AsyncGenerator[ProxyInfo, None]: yield proxy_info -async def test_proxy(proxy: ProxyInfo) -> None: +async def test_proxy(proxy: ProxyInfo, httpbin: str) -> None: client = HttpxClient() - request = Request(url='https://httpbin.org/status/222', unique_key='42', id='42', user_data={}) + request = Request(url=f'{httpbin}/status/222', unique_key='42', id='42', user_data={}) async with Statistics() as statistics: result = await client.crawl(request, None, proxy, statistics) @@ -65,9 +65,9 @@ async def disabled_proxy(proxy_info: ProxyInfo) -> AsyncGenerator[ProxyInfo, Non yield proxy_info -async def test_proxy_disabled(disabled_proxy: ProxyInfo) -> None: +async def test_proxy_disabled(disabled_proxy: ProxyInfo, httpbin: str) -> None: client = HttpxClient() - request = Request(url='https://httpbin.org/status/222', unique_key='42', id='42', user_data={}) + request = Request(url=f'{httpbin}/status/222', unique_key='42', id='42', user_data={}) with pytest.raises(ProxyError): async with Statistics() as statistics: diff --git a/tests/unit/playwright_crawler/test_playwright_crawler.py b/tests/unit/playwright_crawler/test_playwright_crawler.py index 37048f344..6729ac5e8 100644 --- a/tests/unit/playwright_crawler/test_playwright_crawler.py +++ b/tests/unit/playwright_crawler/test_playwright_crawler.py @@ -9,8 +9,8 @@ from crawlee.playwright_crawler import PlaywrightCrawlingContext -async def test_basic_request() -> None: - request_provider = RequestList(['https://httpbin.org/']) +async def test_basic_request(httpbin: str) -> None: + request_provider = RequestList([f'{httpbin}/']) crawler = PlaywrightCrawler(request_provider=request_provider) result: dict = {} @@ -24,6 +24,6 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None: await crawler.run() - assert result.get('request_url') == result.get('page_url') == 'https://httpbin.org/' + assert result.get('request_url') == result.get('page_url') == f'{httpbin}/' assert 'httpbin' in result.get('page_title', '') assert '