From 00d1b8619066125049de6ecf8dfbc2a35dcbf404 Mon Sep 17 00:00:00 2001 From: Puneet Saraswat Date: Sat, 2 Sep 2023 11:25:18 -0500 Subject: [PATCH] cleanup --- querent/collectors/webscaper/web_scraper_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/querent/collectors/webscaper/web_scraper_collector.py b/querent/collectors/webscaper/web_scraper_collector.py index 62212c2e..a6c3e9f4 100644 --- a/querent/collectors/webscaper/web_scraper_collector.py +++ b/querent/collectors/webscaper/web_scraper_collector.py @@ -32,7 +32,7 @@ async def scrape_website(self, website_url: str): async with ClientSession(connector=TCPConnector(ssl=False)) as session: async with session.get(website_url) as response: content = await response.text() - max_length = len(" ".join(content.split(" ")[:600])) + max_length = len(content) return CollectedBytes( data=content[:max_length], file=None, error=None )