From 08670d2fdb1a88d05f2eed15d0e9751194ae209f Mon Sep 17 00:00:00 2001 From: James Gerity Date: Wed, 29 Nov 2023 18:08:53 -0500 Subject: [PATCH 1/2] wikipedia: fix unreliable urlparse() usage Co-authored-by: dgw Co-authored-by: Florian Strzelecki --- sopel/builtins/wikipedia.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sopel/builtins/wikipedia.py b/sopel/builtins/wikipedia.py index f55d8c3b2..148d10e1a 100644 --- a/sopel/builtins/wikipedia.py +++ b/sopel/builtins/wikipedia.py @@ -327,12 +327,16 @@ def mw_image_description(server, image): def mw_info(bot, trigger, match=None): """Retrieves and outputs a snippet of the linked page.""" server = match.group(1) - page_info = urlparse(match.group(2)) - article = unquote(page_info.path) + page_info = urlparse(match.group(0)) + # in Python 3.9+ this can be str.removeprefix() instead, but we're confident that + # "/wiki/" is at the start of the path anyway since it's part of the pattern + trim_offset = len("/wiki/") + article = unquote(page_info.path)[trim_offset:] section = unquote(page_info.fragment) if section: - if section.startswith('cite_note-'): # Don't bother trying to retrieve a snippet when cite-note is linked + if section.startswith('cite_note-'): + # Don't bother trying to retrieve a section snippet if cite-note is linked say_snippet(bot, trigger, server, article, show_url=False) elif section.startswith('/media'): # gh2316: media fragments are usually images; try to get an image description From 43b56a42f86f24a888648c701bf7f5dd367f27a6 Mon Sep 17 00:00:00 2001 From: James Gerity Date: Wed, 29 Nov 2023 18:09:05 -0500 Subject: [PATCH 2/2] wikipedia: special-case pages in Special:* namespace Co-authored-by: dgw --- sopel/builtins/wikipedia.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sopel/builtins/wikipedia.py b/sopel/builtins/wikipedia.py index 148d10e1a..e90402d9b 100644 --- a/sopel/builtins/wikipedia.py +++ b/sopel/builtins/wikipedia.py @@ -334,6 +334,12 @@ def mw_info(bot, trigger, match=None): article = unquote(page_info.path)[trim_offset:] section = unquote(page_info.fragment) + if article.startswith("Special:"): + # The MediaWiki query API does not include pages in the Special: + # namespace, so there's no point bothering when we know this will error + LOGGER.debug("Ignoring page in Special: namespace") + return False + if section: if section.startswith('cite_note-'): # Don't bother trying to retrieve a section snippet if cite-note is linked @@ -367,6 +373,11 @@ def wikipedia(bot, trigger): if not query: bot.reply('What do you want me to look up?') return plugin.NOLIMIT + + if query.startswith("Special:"): + bot.reply("Sorry, the MediaWiki API doesn't support querying the Special: namespace.") + return False + server = lang + '.wikipedia.org' query = mw_search(server, query, 1) if not query: