crawler fails on content-type

I have a website that returns the following header: 'Content-Type': 'text/html; charset=utf-8'
dairiley · Jan 23, 2024 · 1aa3280 · 1aa3280
1 parent 5b1b997
commit 1aa3280
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/lib/shared/layers/python-sdk/python/genai_core/websites/crawler.py b/lib/shared/layers/python-sdk/python/genai_core/websites/crawler.py
@@ -101,7 +101,7 @@ def parse_url(url: str):
     base_url = f"{root_url_parse.scheme}://{root_url_parse.netloc}"
 
     response = requests.get(url, timeout=20)
-    if response.headers["Content-Type"] != "text/html":
+    if "text/html" not in response.headers["Content-Type"]:
         raise Exception(f"Invalid content type {response.headers['Content-Type']}")
     soup = BeautifulSoup(response.content, "html.parser")
     content = soup.text