From 1cc5e289d289da6b350e8a1764b89eb6575625a7 Mon Sep 17 00:00:00 2001
From: Florian Strzelecki <florian.strzelecki@gmail.com>
Date: Sun, 13 Jan 2019 23:17:04 +0100
Subject: [PATCH 1/9] core: 2-bytes unicode characters are not truncated
 anymore

To send a long message, Sopel needs to split the message in multiple
lines of text, and used to split the bytestring version of the
unicode text.

This would cause an issue when 2-bytes unicode characters get truncated
in half, displaying unwanted message.

In this commit, I split the unicode string properly, and I truncate it
until its encoded version is below the max length.

See also the test/test_tools.py file for all the use-case handled by
this change.
---
 sopel/bot.py            | 28 ++++---------
 sopel/tools/__init__.py | 34 +++++++++++++++
 test/test_tools.py      | 93 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+), 20 deletions(-)
 create mode 100644 test/test_tools.py

diff --git a/sopel/bot.py b/sopel/bot.py
index 28be3539b3..7b2b09801d 100644
--- a/sopel/bot.py
+++ b/sopel/bot.py
@@ -290,27 +290,15 @@ def say(self, text, recipient, max_messages=1):
         message will contain the entire remainder, which may be truncated by
         the server.
         """
-        # We're arbitrarily saying that the max is 400 bytes of text when
-        # messages will be split. Otherwise, we'd have to acocunt for the bot's
-        # hostmask, which is hard.
-        max_text_length = 400
-        # Encode to bytes, for propper length calculation
-        if isinstance(text, unicode):
-            encoded_text = text.encode('utf-8')
-        else:
-            encoded_text = text
         excess = ''
-        if max_messages > 1 and len(encoded_text) > max_text_length:
-            last_space = encoded_text.rfind(' '.encode('utf-8'), 0, max_text_length)
-            if last_space == -1:
-                excess = encoded_text[max_text_length:]
-                encoded_text = encoded_text[:max_text_length]
-            else:
-                excess = encoded_text[last_space + 1:]
-                encoded_text = encoded_text[:last_space]
-        # We'll then send the excess at the end
-        # Back to unicode again, so we don't screw things up later.
-        text = encoded_text.decode('utf-8')
+        if not isinstance(text, unicode):
+            # Make sure we are dealing with unicode string
+            text = text.decode('utf-8')
+
+        if max_messages > 1:
+            # Manage multi-line only when needed
+            text, excess = tools.get_sendable_message(text)
+
         try:
             self.sending.acquire()
 
diff --git a/sopel/tools/__init__.py b/sopel/tools/__init__.py
index b210f71c94..738a46af84 100644
--- a/sopel/tools/__init__.py
+++ b/sopel/tools/__init__.py
@@ -152,6 +152,40 @@ def get_nickname_command_pattern(command):
         """.format(command=command)
 
 
+def get_sendable_message(text, max_length=400):
+    """Get a sendable ``text`` message, with its excess when needed.
+
+    :param str txt: unicode string of text to send
+    :param int max_length: maximum length of the message to be sendable
+    :return: a tuple of two values, the sendable text and its excess text
+
+    We're arbitrarily saying that the max is 400 bytes of text when
+    messages will be split. Otherwise, we'd have to account for the bot's
+    hostmask, which is hard.
+
+    The `max_length` is the max length of text in **bytes**, but we take
+    care of unicode 2-bytes characters, by working on the unicode string,
+    then making sure the bytes version is smaller than the max length.
+    """
+    unicode_max_length = max_length
+    excess = ''
+
+    while len(text.encode('utf-8')) > max_length:
+        last_space = text.rfind(' ', 0, unicode_max_length)
+        if last_space == -1:
+            # No last space, just split where it is possible
+            excess = text[unicode_max_length:] + excess
+            text = text[:unicode_max_length]
+            # Decrease max length for the unicode string
+            unicode_max_length = unicode_max_length - 1
+        else:
+            # Split at the last best space found
+            excess = text[last_space:]
+            text = text[:last_space]
+
+    return text, excess.lstrip()
+
+
 def deprecated(old):
     def new(*args, **kwargs):
         print('Function %s is deprecated.' % old.__name__, file=sys.stderr)
diff --git a/test/test_tools.py b/test/test_tools.py
new file mode 100644
index 0000000000..c4289b738a
--- /dev/null
+++ b/test/test_tools.py
@@ -0,0 +1,93 @@
+# coding=utf-8
+"""Tests sopel.tools"""
+from __future__ import unicode_literals, absolute_import, print_function, division
+
+
+from sopel import tools
+
+
+def test_get_sendable_message_default():
+    initial = 'aaaa'
+    text, excess = tools.get_sendable_message(initial)
+
+    assert text == initial
+    assert excess == ''
+
+
+def test_get_sendable_message_limit():
+    initial = 'a' * 400
+    text, excess = tools.get_sendable_message(initial)
+
+    assert text == initial
+    assert excess == ''
+
+
+def test_get_sendable_message_excess():
+    initial = 'a' * 401
+    text, excess = tools.get_sendable_message(initial)
+
+    assert text == 'a' * 400
+    assert excess == 'a'
+
+
+def test_get_sendable_message_excess_space():
+    # aaa...aaa bbb...bbb
+    initial = ' '.join(['a' * 200, 'b' * 200])
+    text, excess = tools.get_sendable_message(initial)
+
+    assert text == 'a' * 200
+    assert excess == 'b' * 200
+
+
+def test_get_sendable_message_excess_space_limit():
+    # aaa...aaa bbb...bbb
+    initial = ' '.join(['a' * 400, 'b' * 200])
+    text, excess = tools.get_sendable_message(initial)
+
+    assert text == 'a' * 400
+    assert excess == 'b' * 200
+
+
+def test_get_sendable_message_excess_bigger():
+    # aaa...aaa bbb...bbb
+    initial = ' '.join(['a' * 401, 'b' * 1000])
+    text, excess = tools.get_sendable_message(initial)
+
+    assert text == 'a' * 400
+    assert excess == 'a ' + 'b' * 1000
+
+
+def test_get_sendable_message_optional():
+    text, excess = tools.get_sendable_message('aaaa', 3)
+    assert text == 'aaa'
+    assert excess == 'a'
+
+    text, excess = tools.get_sendable_message('aaa bbb', 3)
+    assert text == 'aaa'
+    assert excess == 'bbb'
+
+    text, excess = tools.get_sendable_message('aa bb cc', 3)
+    assert text == 'aa'
+    assert excess == 'bb cc'
+
+
+def test_get_sendable_message_two_bytes():
+    text, excess = tools.get_sendable_message('αααα', 4)
+    assert text == 'αα'
+    assert excess == 'αα'
+
+    text, excess = tools.get_sendable_message('αααα', 5)
+    assert text == 'αα'
+    assert excess == 'αα'
+
+    text, excess = tools.get_sendable_message('α ααα', 4)
+    assert text == 'α'
+    assert excess == 'ααα'
+
+    text, excess = tools.get_sendable_message('αα αα', 4)
+    assert text == 'αα'
+    assert excess == 'αα'
+
+    text, excess = tools.get_sendable_message('ααα α', 4)
+    assert text == 'αα'
+    assert excess == 'α α'

From ac5bd8b11ee72c3959da2b43253d712380c099e9 Mon Sep 17 00:00:00 2001
From: Florian Strzelecki <florian.strzelecki@gmail.com>
Date: Sun, 13 Jan 2019 23:49:12 +0100
Subject: [PATCH 2/9] core: irc.bot write method truncate properly unicode
 message

---
 sopel/irc.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/sopel/irc.py b/sopel/irc.py
index 7b259ab08d..aa0597bcae 100644
--- a/sopel/irc.py
+++ b/sopel/irc.py
@@ -138,13 +138,24 @@ def write(self, args, text=None):
             # CR-LF (Carriage Return - Line Feed) pair, and these messages SHALL
             # NOT exceed 512 characters in length, counting all characters
             # including the trailing CR-LF. Thus, there are 510 characters
-            # maximum allowed for the command and its parameters.  There is no
+            # maximum allowed for the command and its parameters. There is no
             # provision for continuation of message lines.
 
+            max_length = unicode_max_length = 510
             if text is not None:
-                temp = (' '.join(args) + ' :' + text)[:510] + '\r\n'
+                temp = (' '.join(args) + ' :' + text)
             else:
-                temp = ' '.join(args)[:510] + '\r\n'
+                temp = ' '.join(args)
+
+            # The max length of 512 is in bytes, not unicode
+            while len(temp.encode('utf-8')) > max_length:
+                temp = temp[:unicode_max_length]
+                unicode_max_length = unicode_max_length - 1
+
+            # Ends the message with CR-LF
+            temp = temp + '\r\n'
+
+            # Log and output the message
             self.log_raw(temp, '>>')
             self.send(temp.encode('utf-8'))
         finally:

From 9d83b54aa151780643334dc974c829eb3df9bb4b Mon Sep 17 00:00:00 2001
From: Humorous Baby <44451911+HumorBaby@users.noreply.github.com>
Date: Tue, 29 Jan 2019 15:25:15 -0500
Subject: [PATCH 3/9] cli: fix configpath issue for wizard

Now checks if the config file passed to `-c` already has a `.cfg` extensions before appending one.

Fixes #1463
---
 sopel/config/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sopel/config/__init__.py b/sopel/config/__init__.py
index 473fff3521..ea72ef81e8 100644
--- a/sopel/config/__init__.py
+++ b/sopel/config/__init__.py
@@ -212,7 +212,7 @@ def _modules(self):
 
 def _wizard(section, config=None):
     dotdir = os.path.expanduser('~/.sopel')
-    configpath = os.path.join(dotdir, (config or 'default') + '.cfg')
+    configpath = os.path.join(dotdir, ((config or 'default.cfg') + ('.cfg' if config and not config.endswith('.cfg') else '')))
     if section == 'all':
         _create_config(configpath)
     elif section == 'mod':

From 163a74f2c1bdd920c5df07fe2e34824388725ec7 Mon Sep 17 00:00:00 2001
From: dgw <dgw@technobabbl.es>
Date: Tue, 29 Jan 2019 16:34:38 -0600
Subject: [PATCH 4/9] travis: allow building maintenance branches

Maintenance branches will always look like Major.Minor.X, so it seemed
easier to give them a separate branch rule from release tags.
---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index d49bfd81ed..7a8b80d28e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,7 @@ git:
 branches:
   only:
     - master
+    - /^\d+\.\d+\.x$/  # allows building maintenance branches
     - /^v?\d+\.\d+(\.\d+)?(-\S*)?$/  # allows building version tags
 sudo: false  # Enables running on faster infrastructure.
 cache:

From 4e8f28d13aa3772eabeed06ad92e4788921ac241 Mon Sep 17 00:00:00 2001
From: Florian Strzelecki <florian.strzelecki@gmail.com>
Date: Wed, 23 Jan 2019 20:13:07 +0100
Subject: [PATCH 5/9] coretasks: split AUTHENTICATE token in 400-byte chunks
 (fix #975)

---
 sopel/coretasks.py | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/sopel/coretasks.py b/sopel/coretasks.py
index 0098fedd1c..b2c8f8adf2 100644
--- a/sopel/coretasks.py
+++ b/sopel/coretasks.py
@@ -539,6 +539,37 @@ def recieve_cap_ack_sasl(bot):
     bot.write(('AUTHENTICATE', mech))
 
 
+def send_authenticate(bot, token):
+    """Send ``AUTHENTICATE`` command to server with the given ``token``.
+
+    :param bot: instance of IRC bot that must authenticate
+    :param str token: authentication token
+
+    In case the ``token`` is more than 400 bytes, we need to split it and send
+    as many ``AUTHENTICATE`` commands as needed. If the last chunk is 400 bytes
+    long, we must also send a last empty command (`AUTHENTICATE +` is for empty
+    line), so the server knows we are done with ``AUTHENTICATE``.
+
+    .. seealso::
+
+        https://ircv3.net/specs/extensions/sasl-3.1.html#the-authenticate-command
+
+    """
+    # payload is a base64 encoded token
+    payload = base64.b64encode(token.encode('utf-8'))
+
+    # split the payload into chunks of at most 400 bytes
+    chunk_size = 400
+    for i in range(0, len(payload), chunk_size):
+        offset = i + chunk_size
+        chunk = payload[i:offset]
+        bot.write(('AUTHENTICATE', chunk))
+
+    # send empty (+) AUTHENTICATE when payload's length is a multiple of 400
+    if len(payload) % chunk_size == 0:
+        bot.write(('AUTHENTICATE', '+'))
+
+
 @sopel.module.event('AUTHENTICATE')
 @sopel.module.rule('.*')
 def auth_proceed(bot, trigger):
@@ -549,8 +580,7 @@ def auth_proceed(bot, trigger):
     sasl_username = bot.config.core.auth_username or bot.nick
     sasl_password = bot.config.core.auth_password
     sasl_token = '\0'.join((sasl_username, sasl_username, sasl_password))
-    # Spec says we do a base 64 encode on the SASL stuff
-    bot.write(('AUTHENTICATE', base64.b64encode(sasl_token.encode('utf-8'))))
+    send_authenticate(bot, sasl_token)
 
 
 @sopel.module.event(events.RPL_SASLSUCCESS)

From ee9fe88ac8b4bddca3edca3603f1b8562403d37a Mon Sep 17 00:00:00 2001
From: Rusty Bower <rusty@rustybower.com>
Date: Wed, 9 Jan 2019 09:50:29 -0600
Subject: [PATCH 6/9] wiktionary: fix query logix (see #1214)

wiktionary: fixing .lower() logic
---
 sopel/modules/wiktionary.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sopel/modules/wiktionary.py b/sopel/modules/wiktionary.py
index 0b7c4d81c5..2f33844665 100644
--- a/sopel/modules/wiktionary.py
+++ b/sopel/modules/wiktionary.py
@@ -99,8 +99,11 @@ def wiktionary(bot, trigger):
 
     _etymology, definitions = wikt(word)
     if not definitions:
-        bot.say("Couldn't get any definitions for %s." % word)
-        return
+        # Cast word to lower to check in case of mismatched user input
+        _etymology, definitions = wikt(word.lower())
+        if not definitions:
+            bot.say("Couldn't get any definitions for %s." % word)
+            return
 
     result = format(word, definitions)
     if len(result) < 150:

From 6bd094e8af26e158f324f1c8720325f85f854145 Mon Sep 17 00:00:00 2001
From: dgw <dgw@technobabbl.es>
Date: Fri, 1 Feb 2019 02:33:43 -0600
Subject: [PATCH 7/9] split the hair again on supported IPython versions

I can't WAIT to drop the ipython module from core.

Less than a year after dropping py2 support, upstream has also dumped
support for Python below 3.5. They're moving too fast for our slow-ass
release cycle (and it's not even an important dependency).
---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 2494b29236..88c8349cd4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ praw<6.0.0
 pyenchant; python_version < '3.7'
 geoip2
 ipython<6.0; python_version < '3.3'
-ipython>=6.0,<7.0; python_version >= '3.3'
+ipython>=6.0,<7.0; python_version >= '3.3' and python_version < '3.5'
+ipython>=7.0,<8.0; python_version >= '3.5'
 requests>=2.0.0,<3.0.0
 dnspython

From 4ac49457ae1e7b8c2125853430a1dd859c47f4f2 Mon Sep 17 00:00:00 2001
From: dgw <dgw@technobabbl.es>
Date: Fri, 1 Feb 2019 02:52:33 -0600
Subject: [PATCH 8/9] Get specific about dnspython requirement

dnspython dropped support for Python 3.3 in version 1.16.0, so we have
to special-case that. Otherwise, py2.7 and 3.4+ are supported until
version 2.0 comes out.

This is the part where I maybe come to regret adding this dependency,
isn't it? Might be worth trying to make dnspython optional later.
---
 requirements.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 2494b29236..b1dd623b1d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,6 @@ geoip2
 ipython<6.0; python_version < '3.3'
 ipython>=6.0,<7.0; python_version >= '3.3'
 requests>=2.0.0,<3.0.0
-dnspython
+dnspython<2.0; python_version >= '2.7' and python_version < '3.0'
+dnspython<1.16.0; python_version == '3.3'
+dnspython<3.0; python_version >= '3.4'

From 90a091614f7428fc6f419f5a50810850446b36f6 Mon Sep 17 00:00:00 2001
From: dgw <dgw@technobabbl.es>
Date: Fri, 1 Feb 2019 03:08:31 -0600
Subject: [PATCH 9/9] Release 6.6.2

---
 NEWS              | 13 +++++++++++++
 sopel/__init__.py |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index db7aebdfa2..94635c0689 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,16 @@
+Changes between 6.6.1 and 6.6.2
+===============================
+Module changes:
+* wiktionary tries harder to get a valid result before erroring out
+
+Core changes:
+* Fixed an inconsistency between interpretations of the --config option in
+  normal operation vs. wizard mode
+* Requirement specifiers tightened up to reduce/prevent pip trying to install
+  incompatible dependency versions (IPython, dnspython)
+* SASL token is now split when required according to spec
+* Multi-byte Unicode characters are now handled correctly when splitting lines
+
 Changes between 6.6.0 and 6.6.1
 ===============================
 Module changes:
diff --git a/sopel/__init__.py b/sopel/__init__.py
index bdd8ba6ed2..f796233139 100644
--- a/sopel/__init__.py
+++ b/sopel/__init__.py
@@ -30,7 +30,7 @@
 import traceback
 import signal
 
-__version__ = '6.6.1'
+__version__ = '6.6.2'
 
 
 def _version_info(version=__version__):