Skip to content

Commit

Permalink
fix: terminate url on word boundary
Browse files Browse the repository at this point in the history
  • Loading branch information
nalgeon committed Jul 19, 2024
1 parent 8dc4c80 commit 6f5b8d1
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
2 changes: 1 addition & 1 deletion bot/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Fetcher:
"""Retrieves remote content over HTTP."""

# Matches non-quoted URLs in text
url_re = re.compile(r"(?:[^'\"]|^)(https?://\S+)(?:[^'\"]|$)")
url_re = re.compile(r"(?:[^'\"]|^)\b(https?://\S+)\b(?:[^'\"]|$)")
timeout = 3 # seconds

def __init__(self):
Expand Down
13 changes: 13 additions & 0 deletions tests/test_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,19 @@ async def test_nothing_to_substitute(self):
text = await self.fetcher.substitute_urls(src)
self.assertEqual(text, src)

def test_extract_urls(self):
text = "Compare https://example.org/first and https://example.org/second"
urls = self.fetcher._extract_urls(text)
self.assertEqual(urls, ["https://example.org/first", "https://example.org/second"])

text = "Extract https://example.org/first."
urls = self.fetcher._extract_urls(text)
self.assertEqual(urls, ["https://example.org/first"])

text = 'Extract "https://example.org/first"'
urls = self.fetcher._extract_urls(text)
self.assertEqual(urls, [])


class ContentTest(unittest.TestCase):
def test_extract_as_is(self):
Expand Down

0 comments on commit 6f5b8d1

Please sign in to comment.