diff --git a/bot/fetcher.py b/bot/fetcher.py index 375b93f..5352685 100644 --- a/bot/fetcher.py +++ b/bot/fetcher.py @@ -9,7 +9,7 @@ class Fetcher: """Retrieves remote content over HTTP.""" # Matches non-quoted URLs in text - url_re = re.compile(r"(?:[^'\"]|^)(https?://\S+)(?:[^'\"]|$)") + url_re = re.compile(r"(?:[^'\"]|^)\b(https?://\S+)\b(?:[^'\"]|$)") timeout = 3 # seconds def __init__(self): diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index 180ec25..7a7fdfe 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -61,6 +61,19 @@ async def test_nothing_to_substitute(self): text = await self.fetcher.substitute_urls(src) self.assertEqual(text, src) + def test_extract_urls(self): + text = "Compare https://example.org/first and https://example.org/second" + urls = self.fetcher._extract_urls(text) + self.assertEqual(urls, ["https://example.org/first", "https://example.org/second"]) + + text = "Extract https://example.org/first." + urls = self.fetcher._extract_urls(text) + self.assertEqual(urls, ["https://example.org/first"]) + + text = 'Extract "https://example.org/first"' + urls = self.fetcher._extract_urls(text) + self.assertEqual(urls, []) + class ContentTest(unittest.TestCase): def test_extract_as_is(self):