From d11eab3773832d635fdad8341031fc899812288b Mon Sep 17 00:00:00 2001
From: Nikolay Panov <github@niksite.ru>
Date: Sun, 25 Nov 2018 11:42:11 -0800
Subject: [PATCH] Core refactoring and cleanup.

---
 .gitignore                        |  41 +---
 .travis.yml                       |  15 +-
 MANIFEST.in                       |   1 -
 Makefile                          |  11 ++
 README.md                         |  45 +++--
 pyproject.toml                    |  25 +++
 setup.cfg                         |   2 -
 setup.py                          |  29 ---
 tests/__init__.py                 |   0
 tests/test_deconstruct_url.py     |  32 +++
 tests/test_generic_url_cleanup.py |  20 ++
 tests/test_normalize_fragment.py  |  20 ++
 tests/test_normalize_host.py      |  19 ++
 tests/test_normalize_path.py      |  39 ++++
 tests/test_normalize_port.py      |  13 ++
 tests/test_normalize_query.py     |  21 ++
 tests/test_normalize_scheme.py    |  13 ++
 tests/test_normalize_userinfo.py  |  19 ++
 tests/test_provide_url_scheme.py  |  20 ++
 tests/test_reconstruct_url.py     |  38 ++++
 tests/test_url_normalize.py       | 194 +++++++------------
 tox.ini                           |  36 ++--
 url_normalize/__init__.py         |  48 ++++-
 url_normalize/tools.py            | 100 ++++++++++
 url_normalize/url_normalize.py    | 311 ++++++++++++++++++------------
 25 files changed, 752 insertions(+), 360 deletions(-)
 delete mode 100644 MANIFEST.in
 create mode 100644 Makefile
 create mode 100644 pyproject.toml
 delete mode 100644 setup.cfg
 delete mode 100644 setup.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_deconstruct_url.py
 create mode 100644 tests/test_generic_url_cleanup.py
 create mode 100644 tests/test_normalize_fragment.py
 create mode 100644 tests/test_normalize_host.py
 create mode 100644 tests/test_normalize_path.py
 create mode 100644 tests/test_normalize_port.py
 create mode 100644 tests/test_normalize_query.py
 create mode 100644 tests/test_normalize_scheme.py
 create mode 100644 tests/test_normalize_userinfo.py
 create mode 100644 tests/test_provide_url_scheme.py
 create mode 100644 tests/test_reconstruct_url.py
 mode change 100755 => 100644 tests/test_url_normalize.py
 create mode 100644 url_normalize/tools.py
 mode change 100755 => 100644 url_normalize/url_normalize.py

diff --git a/.gitignore b/.gitignore
index 8f3f281..8ea4eca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,38 +1,9 @@
-*.py[cod]
-
-# C extensions
-*.so
-
-# Packages
-*.egg
-*.eggs
-*.egg-info
-dist
-build
-eggs
-parts
-bin
-var
-sdist
-develop-eggs
-.installed.cfg
-lib
-lib64
-
-# Installer logs
-pip-log.txt
-
-# Unit test / coverage reports
 .coverage
+.*cache
 .tox
-nosetests.xml
-
-# Translations
-*.mo
-
-# Mr Developer
-.mr.developer.cfg
-.project
-.pydevproject
 .vscode
-.cache
+dist
+*.lock
+__pycache__
+*.pyc
+*.egg-info
diff --git a/.travis.yml b/.travis.yml
index a2e4c62..ae0f0ed 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,10 +1,11 @@
 language: python
+sudo: required
+dist: xenial
 python:
-    - "3.6"
+  - "2.7"
+  - "3.7"
 install:
-    - "pip install coverage"
-    - "pip install coveralls"
-script:
-    - "coverage run --source=url_normalize setup.py test"
-after_success:
-    coveralls
+  - "pip install coveralls poetry"
+  - "poetry install -v"
+script: "pytest"
+after_success: coveralls
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index bb3ec5f..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1 +0,0 @@
-include README.md
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f20d6d8
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,11 @@
+tox:
+	@tox
+
+test:
+	@py.test
+
+build:
+	@poetry build
+
+publish:
+	@poetry publish
diff --git a/README.md b/README.md
index 25d6a40..1699c1c 100644
--- a/README.md
+++ b/README.md
@@ -5,21 +5,23 @@ url-normalize
 [![Coverage Status](https://coveralls.io/repos/github/niksite/url-normalize/badge.svg?branch=master)](https://coveralls.io/github/niksite/url-normalize?branch=master)
 
 URI Normalization function:
-   * Take care of IDN domains.
-   * Always provide the URI scheme in lowercase characters.
-   * Always provide the host, if any, in lowercase characters.
-   * Only perform percent-encoding where it is essential.
-   * Always use uppercase A-through-F characters when percent-encoding.
-   * Prevent dot-segments appearing in non-relative URI paths.
-   * For schemes that define a default authority, use an empty authority if the default is desired.
-   * For schemes that define an empty path to be equivalent to a path of "/", use "/".
-   * For schemes that define a port, use an empty port if the default is desired
-   * All portions of the URI must be utf-8 encoded NFC from Unicode strings
+
+* Take care of IDN domains.
+* Always provide the URI scheme in lowercase characters.
+* Always provide the host, if any, in lowercase characters.
+* Only perform percent-encoding where it is essential.
+* Always use uppercase A-through-F characters when percent-encoding.
+* Prevent dot-segments appearing in non-relative URI paths.
+* For schemes that define a default authority, use an empty authority if the default is desired.
+* For schemes that define an empty path to be equivalent to a path of "/", use "/".
+* For schemes that define a port, use an empty port if the default is desired
+* All portions of the URI must be utf-8 encoded NFC from Unicode strings
 
 Inspired by Sam Ruby's urlnorm.py: http://intertwingly.net/blog/2004/08/04/Urlnorm
 
 Example:
-```
+
+```sh
 $ pip install url-normalize
 Collecting url-normalize
 ...
@@ -30,17 +32,20 @@ Python 3.6.1 (default, Jul  8 2017, 05:00:20)
 Type "help", "copyright", "credits" or "license" for more information.
 > from url_normalize import url_normalize
 > print(url_normalize('www.foo.com:80/foo'))
-> http://www.foo.com/foo
+> https://www.foo.com/foo
 ```
 
 History:
-   * 07 Jul 2017: Python 2/3 compatibility.
-   * 05 Jan 2016: Python 3 compatibility
-   * 29 Dec 2015: PEP8, setup.py
-   * 10 Mar 2010: support for shebang (#!) urls
-   * 28 Feb 2010: using 'http' schema by default when appropriate
-   * 28 Feb 2010: added handling of IDN domains
-   * 28 Feb 2010: code pep8-zation
-   * 27 Feb 2010: forked from Sam Ruby's urlnorm.py
+
+* 1.4.0: A bit of code refactoring and cleanup
+* 1.3.2: Support empty string and double slash urls (//domain.tld)
+* 1.3.1: Same code support both Python 3 and Python 2.
+* 1.3: Python 3 compatibility
+* 1.2: PEP8, setup.py
+* 1.1.2: support for shebang (#!) urls
+* 1.1.1: using 'http' schema by default when appropriate
+* 1.1: added handling of IDN domains
+* 1.0: code pep8-zation
+* 0.1: forked from Sam Ruby's urlnorm.py
 
 License: "Python" (PSF) License
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..8ed8581
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,25 @@
+[tool.poetry]
+name = "url-normalize"
+version = "1.4.0"
+description = "URL normalization for Python"
+authors = ["Nikolay Panov <github@npanov.com>"]
+license = "PSF"
+readme = "README.md"
+repository = "https://github.com/niksite/url-normalize"
+homepage = "https://github.com/niksite/url-normalize"
+keywords = ['url', 'normalization', 'normalize']
+
+[tool.poetry.dependencies]
+python = "~2.7 || ^3.6"
+six = "^1.11"
+
+[tool.poetry.dev-dependencies]
+pytest = "^3.0"
+pytest-cov = "^2.6"
+tox = "^3.5"
+pytest-flakes = "^4.0"
+pytest-socket = "^0.3.1"
+
+[build-system]
+requires = ["poetry>=0.12"]
+build-backend = "poetry.masonry.api"
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index b7e4789..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[aliases]
-test=pytest
diff --git a/setup.py b/setup.py
deleted file mode 100644
index cafd0a7..0000000
--- a/setup.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python
-from __future__ import print_function
-from setuptools import setup
-
-setup(
-    name="url-normalize",
-    version="1.3.3",
-    author="Nikolay Panov",
-    author_email="github@niksite.ru",
-    description="URL normalization for Python",
-    long_description=open("README.md").read(),
-    license="Python",
-    url="https://github.com/niksite/url-normalize",
-    packages=['url_normalize'],
-    classifiers=[
-        "Environment :: Web Environment",
-        "Intended Audience :: Developers",
-        "Operating System :: OS Independent",
-        "Topic :: Text Processing :: Indexing",
-        "Topic :: Utilities",
-        "Topic :: Internet",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3"
-    ],
-    install_requires=['future'],
-    setup_requires=['pytest-runner'],
-    tests_require=['pytest'],
-)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_deconstruct_url.py b/tests/test_deconstruct_url.py
new file mode 100644
index 0000000..94f008b
--- /dev/null
+++ b/tests/test_deconstruct_url.py
@@ -0,0 +1,32 @@
+"""Deconstruct url tests."""
+from url_normalize.tools import deconstruct_url, URL
+
+EXPECTED_DATA = {
+    "http://site.com": URL(
+        fragment="",
+        host="site.com",
+        path="",
+        port="",
+        query="",
+        scheme="http",
+        userinfo="",
+    ),
+    "http://user@www.example.com:8080/path/index.html?param=val#fragment": URL(
+        fragment="fragment",
+        host="www.example.com",
+        path="/path/index.html",
+        port="8080",
+        query="param=val",
+        scheme="http",
+        userinfo="user@",
+    ),
+}
+
+
+def test_deconstruct_url_result_is_expected():
+    """Assert we got expected results from the deconstruct_url function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = deconstruct_url(url)
+
+        assert result == expected, url
diff --git a/tests/test_generic_url_cleanup.py b/tests/test_generic_url_cleanup.py
new file mode 100644
index 0000000..a6ef393
--- /dev/null
+++ b/tests/test_generic_url_cleanup.py
@@ -0,0 +1,20 @@
+"""Tests for generic_url_cleanup function."""
+from url_normalize.url_normalize import generic_url_cleanup
+
+EXPECTED_DATA = {
+    "//site/#!fragment": "//site/?_escaped_fragment_=fragment",
+    "//site/?utm_source=some source&param=value": "//site/?param=value",
+    "//site/?utm_source=some source": "//site/",
+    "//site/?param=value&utm_source=some source": "//site/?param=value",
+    "//site/page": "//site/page",
+    "//site/?& ": "//site/",
+}
+
+
+def test_generic_url_cleanup_result_is_expected():
+    """Assert we got expected results from the generic_url_cleanup function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = generic_url_cleanup(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_fragment.py b/tests/test_normalize_fragment.py
new file mode 100644
index 0000000..9607c5b
--- /dev/null
+++ b/tests/test_normalize_fragment.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+"""Tests for normalize_fragment function."""
+from url_normalize.url_normalize import normalize_fragment
+
+EXPECTED_DATA = {
+    "": "",
+    "fragment": "fragment",
+    "пример": "%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80",
+    "!fragment": "%21fragment",
+    "~fragment": "~fragment",
+}
+
+
+def test_normalize_fragment_result_is_expected():
+    """Assert we got expected results from the normalize_fragment function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_fragment(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_host.py b/tests/test_normalize_host.py
new file mode 100644
index 0000000..09a8756
--- /dev/null
+++ b/tests/test_normalize_host.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+"""Tests for normalize_host function."""
+from url_normalize.url_normalize import normalize_host
+
+EXPECTED_DATA = {
+    "site.com": "site.com",
+    "SITE.COM": "site.com",
+    "site.com.": "site.com",
+    "пример.испытание": "xn--e1afmkfd.xn--80akhbyknj4f",
+}
+
+
+def test_normalize_host_result_is_expected():
+    """Assert we got expected results from the normalize_host function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_host(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_path.py b/tests/test_normalize_path.py
new file mode 100644
index 0000000..b2b72a5
--- /dev/null
+++ b/tests/test_normalize_path.py
@@ -0,0 +1,39 @@
+"""Tests for normalize_path function."""
+from url_normalize.url_normalize import normalize_path
+
+EXPECTED_DATA = {
+    "": "/",
+    "/": "/",
+    "..": "/",
+    "/foo/bar/.": "/foo/bar/",
+    "/foo/bar/./": "/foo/bar/",
+    "/foo/bar/..": "/foo/",
+    "/foo/bar/../": "/foo/",
+    "/foo/bar/../baz": "/foo/baz",
+    "/foo/bar/../..": "/",
+    "/foo/bar/../../": "/",
+    "/foo/bar/../../baz": "/baz",
+    "/foo/bar/../../../baz": "/baz",
+    "/foo/bar/../../../../baz": "/baz",
+    "/./foo": "/foo",
+    "/../foo": "/foo",
+    "/foo.": "/foo.",
+    "/.foo": "/.foo",
+    "/foo..": "/foo..",
+    "/..foo": "/..foo",
+    "/./../foo": "/foo",
+    "/./foo/.": "/foo/",
+    "/foo/./bar": "/foo/bar",
+    "/foo/../bar": "/bar",
+    "/foo//": "/foo/",
+    "/foo///bar//": "/foo/bar/",
+}
+
+
+def test_normalize_host_result_is_expected():
+    """Assert we got expected results from the normalize_path function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_path(url, "http")
+
+        assert result == expected, url
diff --git a/tests/test_normalize_port.py b/tests/test_normalize_port.py
new file mode 100644
index 0000000..78eeb8e
--- /dev/null
+++ b/tests/test_normalize_port.py
@@ -0,0 +1,13 @@
+"""Tests for normalize_port function."""
+from url_normalize.url_normalize import normalize_port
+
+EXPECTED_DATA = {"8080": "8080", "": "", "80": "", "string": "string"}
+
+
+def test_normalize_port_result_is_expected():
+    """Assert we got expected results from the normalize_port function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_port(url, "http")
+
+        assert result == expected, url
diff --git a/tests/test_normalize_query.py b/tests/test_normalize_query.py
new file mode 100644
index 0000000..f963ef0
--- /dev/null
+++ b/tests/test_normalize_query.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+"""Tests for normalize_query function."""
+
+from url_normalize.url_normalize import normalize_query
+
+EXPECTED_DATA = {
+    "": "",
+    "param1=val1&param2=val2": "param1=val1&param2=val2",
+    "Ç=Ç": "%C3%87=%C3%87",
+    "%C3%87=%C3%87": "%C3%87=%C3%87",
+    "q=C%CC%A7": "q=%C3%87",
+}
+
+
+def test_normalize_query_result_is_expected():
+    """Assert we got expected results from the normalize_query function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_query(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_scheme.py b/tests/test_normalize_scheme.py
new file mode 100644
index 0000000..b615e3a
--- /dev/null
+++ b/tests/test_normalize_scheme.py
@@ -0,0 +1,13 @@
+"""Tests for normalize_scheme function."""
+from url_normalize.url_normalize import normalize_scheme
+
+EXPECTED_DATA = {"http": "http", "HTTP": "http"}
+
+
+def test_normalize_scheme_result_is_expected():
+    """Assert we got expected results from the normalize_scheme function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_scheme(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_userinfo.py b/tests/test_normalize_userinfo.py
new file mode 100644
index 0000000..05e9b28
--- /dev/null
+++ b/tests/test_normalize_userinfo.py
@@ -0,0 +1,19 @@
+"""Tests for normalize_userinfo function."""
+from url_normalize.url_normalize import normalize_userinfo
+
+EXPECTED_DATA = {
+    ":@": "",
+    "": "",
+    "@": "",
+    "user:password@": "user:password@",
+    "user@": "user@",
+}
+
+
+def test_normalize_userinfo_result_is_expected():
+    """Assert we got expected results from the normalize_userinfo function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_userinfo(url)
+
+        assert result == expected, url
diff --git a/tests/test_provide_url_scheme.py b/tests/test_provide_url_scheme.py
new file mode 100644
index 0000000..abde7de
--- /dev/null
+++ b/tests/test_provide_url_scheme.py
@@ -0,0 +1,20 @@
+"""Tests for provide_url_scheme function."""
+from url_normalize.url_normalize import provide_url_scheme
+
+EXPECTED_DATA = {
+    "": "",
+    "-": "-",
+    "/file/path": "/file/path",
+    "//site/path": "https://site/path",
+    "ftp://site/": "ftp://site/",
+    "site/page": "https://site/page",
+}
+
+
+def test_provide_url_scheme_result_is_expected():
+    """Assert we got expected results from the provide_url_scheme function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = provide_url_scheme(url)
+
+        assert result == expected, url
diff --git a/tests/test_reconstruct_url.py b/tests/test_reconstruct_url.py
new file mode 100644
index 0000000..bfaa0fc
--- /dev/null
+++ b/tests/test_reconstruct_url.py
@@ -0,0 +1,38 @@
+"""Reconstruct url tests."""
+from url_normalize.tools import reconstruct_url, URL
+
+EXPECTED_DATA = (
+    (
+        URL(
+            fragment="",
+            host="site.com",
+            path="",
+            port="",
+            query="",
+            scheme="http",
+            userinfo="",
+        ),
+        "http://site.com",
+    ),
+    (
+        URL(
+            fragment="fragment",
+            host="www.example.com",
+            path="/path/index.html",
+            port="8080",
+            query="param=val",
+            scheme="http",
+            userinfo="user@",
+        ),
+        "http://user@www.example.com:8080/path/index.html?param=val#fragment",
+    ),
+)
+
+
+def test_deconstruct_url_result_is_expected():
+    """Assert we got expected results from the deconstruct_url function."""
+    for url, expected in EXPECTED_DATA:
+
+        result = reconstruct_url(url)
+
+        assert result == expected, url
diff --git a/tests/test_url_normalize.py b/tests/test_url_normalize.py
old mode 100755
new mode 100644
index a9f2016..a98fb99
--- a/tests/test_url_normalize.py
+++ b/tests/test_url_normalize.py
@@ -1,126 +1,82 @@
 # -*- coding: utf-8 -*-
-"""URI normalizator tests."""
-from __future__ import unicode_literals
-
+"""Integrations tests."""
 from url_normalize import url_normalize
 
+
 EXPECTED_RESULTS = {
-    '':
-        '', # empty string 
-    '/foo/bar/.':
-        '/foo/bar/',
-    '/foo/bar/./':
-        '/foo/bar/',
-    '/foo/bar/..':
-        '/foo/',
-    '/foo/bar/../':
-        '/foo/',
-    '/foo/bar/../baz':
-        '/foo/baz',
-    '/foo/bar/../..':
-        '/',
-    '/foo/bar/../../':
-        '/',
-    '/foo/bar/../../baz':
-        '/baz',
-    '/foo/bar/../../../baz':
-        '/baz',  # was: '/../baz',
-    '/foo/bar/../../../../baz':
-        '/baz',
-    '/./foo':
-        '/foo',
-    '/../foo':
-        '/foo',  # was: '/../foo',
-    '/foo.':
-        '/foo.',
-    '/.foo':
-        '/.foo',
-    '/foo..':
-        '/foo..',
-    '/..foo':
-        '/..foo',
-    '/./../foo':
-        '/foo',  # was: '/../foo',
-    '/./foo/.':
-        '/foo/',
-    '/foo/./bar':
-        '/foo/bar',
-    '/foo/../bar':
-        '/bar',
-    '/foo//':
-        '/foo/',
-    '/foo///bar//':
-        '/foo/bar/',
-    '//www.foo.com/': 
-        'https://www.foo.com/',
-    'http://www.foo.com:80/foo':
-        'http://www.foo.com/foo',
-    'http://www.foo.com:8000/foo':
-        'http://www.foo.com:8000/foo',
-    'http://www.foo.com./foo/bar.html':
-        'http://www.foo.com/foo/bar.html',
-    'http://www.foo.com.:81/foo':
-        'http://www.foo.com:81/foo',
-    'http://www.foo.com/%7ebar':
-        'http://www.foo.com/~bar',
-    'http://www.foo.com/%7Ebar':
-        'http://www.foo.com/~bar',
-    'ftp://user:pass@ftp.foo.net/foo/bar':
-        'ftp://user:pass@ftp.foo.net/foo/bar',
-    'http://USER:pass@www.Example.COM/foo/bar':
-        'http://USER:pass@www.example.com/foo/bar',
-    'http://www.example.com./':
-        'http://www.example.com/',
-    '-':
-        '-',
-    'пример.испытание/Служебная:Search/Test':
-        'http://xn--e1afmkfd.xn--80akhbyknj4f/'
-        '%D0%A1%D0%BB%D1%83%D0%B6%D0%B5%D0%B1%'
-        'D0%BD%D0%B0%D1%8F:Search/Test',
-    'http://lifehacker.com/#!5753509/'
-    'hello-world-this-is-the-new-lifehacker':
-        'http://lifehacker.com/?_escaped_fragment'
-        '_=5753509/hello-world-this-is-the-new-lifehacker',
+    "/../foo": "/foo",  # was: '/../foo',
+    "/./../foo": "/foo",  # was: '/../foo',
+    "/./foo": "/foo",
+    "/./foo/.": "/foo/",
+    "//www.foo.com/": "https://www.foo.com/",
+    "/foo/../bar": "/bar",
+    "/foo/./bar": "/foo/bar",
+    "/foo//": "/foo/",
+    "/foo///bar//": "/foo/bar/",
+    "/foo/bar/..": "/foo/",
+    "/foo/bar/../..": "/",
+    "/foo/bar/../../../../baz": "/baz",
+    "/foo/bar/../../../baz": "/baz",  # was: '/../baz',
+    "/foo/bar/../../": "/",
+    "/foo/bar/../../baz": "/baz",
+    "/foo/bar/../": "/foo/",
+    "/foo/bar/../baz": "/foo/baz",
+    "/foo/bar/.": "/foo/bar/",
+    "/foo/bar/./": "/foo/bar/",
+    "http://:@example.com/": "http://example.com/",
+    "http://@example.com/": "http://example.com/",
+    "http://127.0.0.1:80/": "http://127.0.0.1/",
+    "http://example.com:081/": "http://example.com:81/",
+    "http://example.com:80/": "http://example.com/",
+    "http://example.com": "http://example.com/",
+    "http://example.com/?b&a": "http://example.com/?a&b",
+    "http://example.com/?q=%5c": "http://example.com/?q=%5C",
+    "http://example.com/?q=%C7": "http://example.com/?q=%EF%BF%BD",
+    "http://example.com/?q=C%CC%A7": "http://example.com/?q=%C3%87",
+    "http://EXAMPLE.COM/": "http://example.com/",
+    "http://example.com/%7Ejane": "http://example.com/~jane",
+    "http://example.com/a/../a/b": "http://example.com/a/b",
+    "http://example.com/a/./b": "http://example.com/a/b",
+    "http://lifehacker.com/#!5753509/hello-world-this-is-the-new-lifehacker": "http://lifehacker.com/?_escaped_fragment_=5753509/hello-world-this-is-the-new-lifehacker",
+    "http://USER:pass@www.Example.COM/foo/bar": "http://USER:pass@www.example.com/foo/bar",
+    "http://www.example.com./": "http://www.example.com/",
+    "http://www.foo.com:80/foo": "http://www.foo.com/foo",
+    "http://www.foo.com.:81/foo": "http://www.foo.com:81/foo",
+    "http://www.foo.com./foo/bar.html": "http://www.foo.com/foo/bar.html",
+    "http://www.foo.com/%7Ebar": "http://www.foo.com/~bar",
+    "http://www.foo.com/%7ebar": "http://www.foo.com/~bar",
+    "пример.испытание/Служебная:Search/Test": "https://xn--e1afmkfd.xn--80akhbyknj4f/%D0%A1%D0%BB%D1%83%D0%B6%D0%B5%D0%B1%D0%BD%D0%B0%D1%8F:Search/Test",
 }
 
-EXPECTED_CHANGES = [
-    (False, "http://:@example.com/"),
-    (False, "http://@example.com/"),
-    (False, "http://example.com"),
-    (False, "HTTP://example.com/"),
-    (False, "http://EXAMPLE.COM/"),
-    (False, "http://example.com/%7Ejane"),
-    (False, "http://example.com/?q=%C7"),
-    (False, "http://example.com/?q=%5c"),
-    (False, "http://example.com/?q=C%CC%A7"),
-    (False, "http://example.com/a/../a/b"),
-    (False, "http://example.com/a/./b"),
-    (False, "http://example.com:80/"),
-    (True, "http://example.com/"),
-    (True, "http://example.com/?q=%C3%87"),
-    (True, "http://example.com/?q=%E2%85%A0"),
-    (True, "http://example.com/?q=%5C"),
-    (True, "http://example.com/~jane"),
-    (True, "http://example.com/a/b"),
-    (True, "http://example.com:8080/"),
-    (True, "http://user:password@example.com/"),
+NO_CHANGES_EXPECTED = (
+    "-",
+    "",
+    "/..foo",
+    "/.foo",
+    "/foo..",
+    "/foo.",
+    "ftp://user:pass@ftp.foo.net/foo/bar",
+    "http://127.0.0.1/",
+    "http://example.com:8080/",
+    "http://example.com/?a&b",
+    "http://example.com/?q=%5C",
+    "http://example.com/?q=%C3%87",
+    "http://example.com/?q=%E2%85%A0",
+    "http://example.com/",
+    "http://example.com/~jane",
+    "http://example.com/a/b",
+    "http://user:password@example.com/",
+    "http://www.foo.com:8000/foo",
     # from rfc2396bis
-    (True, "ftp://ftp.is.co.za/rfc/rfc1808.txt"),
-    (True, "http://www.ietf.org/rfc/rfc2396.txt"),
-    (True, "ldap://[2001:db8::7]/c=GB?objectClass?one"),
-    (True, "mailto:John.Doe@example.com"),
-    (True, "news:comp.infosystems.www.servers.unix"),
-    (True, "tel:+1-816-555-1212"),
-    (True, "telnet://192.0.2.16:80/"),
-    (True, "urn:oasis:names:specification:docbook:dtd:xml:4.1.2"),
-    # other
-    (True, "http://127.0.0.1/"),
-    (False, "http://127.0.0.1:80/"),
-    (True, "http://www.w3.org/2000/01/rdf-schema#"),
-    (False, "http://example.com:081/"),
-    (True, "http://example.com/?a&b"),
-    (False, "http://example.com/?b&a"),
-]
+    "ftp://ftp.is.co.za/rfc/rfc1808.txt",
+    "http://www.ietf.org/rfc/rfc2396.txt",
+    "ldap://[2001:db8::7]/c=GB?objectClass?one",
+    "mailto:John.Doe@example.com",
+    "news:comp.infosystems.www.servers.unix",
+    "tel:+1-816-555-1212",
+    "telnet://192.0.2.16:80/",
+    "urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
+)
 
 
 def test_url_normalize_changes():
@@ -128,11 +84,11 @@ def test_url_normalize_changes():
 
     http://www.intertwingly.net/wiki/pie/PaceCanonicalIds
     """
-    for (expected, value) in EXPECTED_CHANGES:
-        assert expected == (url_normalize(value) == value)
+    for value in NO_CHANGES_EXPECTED:
+        assert url_normalize(value) == value
 
 
 def test_url_normalize_results():
     """Assert url_normalize return expected results."""
     for value, expected in EXPECTED_RESULTS.items():
-        assert expected == url_normalize(value)
+        assert expected == url_normalize(value), value
diff --git a/tox.ini b/tox.ini
index 7ff4fa1..c388cfb 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,21 +1,25 @@
 [tox]
-envlist=py27, py36
+skipsdist = True
+envlist = py27, py37
 
 [testenv]
-deps=
-    pytest
-    coverage
-    pytest-cov
-setenv=
-    PYTHONWARNINGS=all
+whitelist_externals = poetry
+skip_install = true
+commands =
+    poetry install -v
+    poetry run pytest
 
-[testenv:py27]
-commands=pytest url_normalize
+[pytest]
+addopts =
+    --cov-fail-under=99
+    --cov-report=term-missing:skip-covered
+    --cov=url_normalize
+    --disable-socket
+    --flakes
+    -v
+python_files = tests.py test_*.py *_tests.py
 
-[testenv:py36]
-commands=pytest url_normalize
-
-[testenv:cov]
-usedevelop=true
-basepython=python3.6
-commands=pytest --cov=url_normalize --cov-report term
+[flake8]
+max-line-length = 80
+select = C,E,F,W,B,B950
+ignore = E501
diff --git a/url_normalize/__init__.py b/url_normalize/__init__.py
index b4debc5..a59dfe8 100644
--- a/url_normalize/__init__.py
+++ b/url_normalize/__init__.py
@@ -1,9 +1,43 @@
-"""URI normalizator."""
-from __future__ import (absolute_import, division, print_function,
-                        unicode_literals)
-from future import standard_library
+# -*- coding: utf-8 -*-
+"""
+URI normalizator.
 
-standard_library.install_aliases()
+URI Normalization function:
+ * Take care of IDN domains.
+ * Always provide the URI scheme in lowercase characters.
+ * Always provide the host, if any, in lowercase characters.
+ * Only perform percent-encoding where it is essential.
+ * Always use uppercase A-through-F characters when percent-encoding.
+ * Prevent dot-segments appearing in non-relative URI paths.
+ * For schemes that define a default authority, use an empty authority if the
+   default is desired.
+ * For schemes that define an empty path to be equivalent to a path of "/",
+   use "/".
+ * For schemes that define a port, use an empty port if the default is desired
+ * All portions of the URI must be utf-8 encoded NFC from Unicode strings
 
-# pylint: disable=C0413
-from .url_normalize import url_normalize  # NOQA
+Inspired by Sam Ruby's urlnorm.py:
+    http://intertwingly.net/blog/2004/08/04/Urlnorm
+This fork author: Nikolay Panov (<pythonista@npanov.com>)
+
+History:
+ * 1.4.0: A bit of code refactoring and cleanup
+ * 1.3.2: Support empty string and double slash urls (//domain.tld)
+ * 1.3.1: Same code support both Python 3 and Python 2.
+ * 1.3: Python 3 compatibility
+ * 1.2: PEP8, setup.py
+ * 1.1.2: support for shebang (#!) urls
+ * 1.1.1: using 'http' schema by default when appropriate
+ * 1.1: added handling of IDN domains
+ * 1.0: code pep8-zation
+ * 0.1: forked from Sam Ruby's urlnorm.py
+"""
+
+from __future__ import absolute_import
+
+from .url_normalize import url_normalize
+
+__license__ = "Python"
+__version__ = "1.4.0"
+
+__all__ = ["url_normalize"]
diff --git a/url_normalize/tools.py b/url_normalize/tools.py
new file mode 100644
index 0000000..4828e82
--- /dev/null
+++ b/url_normalize/tools.py
@@ -0,0 +1,100 @@
+"""Url normalize tools (py27/py37 compatible)."""
+import re
+import unicodedata
+from collections import namedtuple
+
+import six
+from six.moves.urllib.parse import quote as quote_orig
+from six.moves.urllib.parse import unquote as unquote_orig
+from six.moves.urllib.parse import urlsplit, urlunsplit
+
+URL = namedtuple(
+    "URL", ["scheme", "userinfo", "host", "port", "path", "query", "fragment"]
+)
+
+
+def deconstruct_url(url):
+    """Tranform the url into URL structure.
+
+    Params:
+        url : string : the URL
+
+    Returns:
+        URL
+
+    """
+    scheme, auth, path, query, fragment = urlsplit(url.strip())
+    (userinfo, host, port) = re.search("([^@]*@)?([^:]*):?(.*)", auth).groups()
+    return URL(
+        fragment=fragment,
+        host=host,
+        path=path,
+        port=port,
+        query=query,
+        scheme=scheme,
+        userinfo=userinfo or "",
+    )
+
+
+def reconstruct_url(url):
+    """Reconstruct string url from URL.
+
+    Params:
+        url : URL object instance
+
+    Returns:
+        string : reconstructed url string
+
+    """
+    auth = (url.userinfo or "") + url.host
+    if url.port:
+        auth += ":" + url.port
+    return urlunsplit((url.scheme, auth, url.path, url.query, url.fragment))
+
+
+def force_unicode(string, charset="utf-8"):
+    """Convert string to unicode if it is not yet unicode.
+
+    Params:
+        string : string/unicode : an input string
+        charset : string : optional : output encoding
+
+    Returns:
+        unicode
+
+    """
+    if isinstance(string, six.text_type):  # Always True on Py3
+        return string
+    return string.decode(charset, "replace")  # Py2 only
+
+
+def unquote(string, charset="utf-8"):
+    """Unquote and normalize unicode string.
+
+    Params:
+        string : string to be unquoted
+        charset : string : optional : output encoding
+
+    Returns:
+        string : an unquoted and normalized string
+
+    """
+    string = unquote_orig(string)
+    string = force_unicode(string, charset)
+    string = unicodedata.normalize("NFC", string).encode(charset)
+    return string
+
+
+def quote(string, safe="/"):
+    """Quote string.
+
+    Params:
+        string : string to be quoted
+        safe : string of safe characters
+
+    Returns:
+        string : quoted string
+
+    """
+    string = quote_orig(string, safe)
+    return string
diff --git a/url_normalize/url_normalize.py b/url_normalize/url_normalize.py
old mode 100755
new mode 100644
index b4e2280..44aee13
--- a/url_normalize/url_normalize.py
+++ b/url_normalize/url_normalize.py
@@ -1,139 +1,153 @@
 # -*- coding: utf-8 -*-
-"""URI normalizator.
-
-URI Normalization function:
- * Take care of IDN domains.
- * Always provide the URI scheme in lowercase characters.
- * Always provide the host, if any, in lowercase characters.
- * Only perform percent-encoding where it is essential.
- * Always use uppercase A-through-F characters when percent-encoding.
- * Prevent dot-segments appearing in non-relative URI paths.
- * For schemes that define a default authority, use an empty authority if the
-   default is desired.
- * For schemes that define an empty path to be equivalent to a path of "/",
-   use "/".
- * For schemes that define a port, use an empty port if the default is desired
- * All portions of the URI must be utf-8 encoded NFC from Unicode strings
-
-Inspired by Sam Ruby's urlnorm.py:
-    http://intertwingly.net/blog/2004/08/04/Urlnorm
-This fork author: Nikolay Panov (<pythoneer@npanov.com>)
-
-History:
- * 28 Oct 2018: Support empty string and double slash urls (//domain.tld/foo.html)
- * 07 Jul 2017: Same code support both Python 3 and Python 2.
- * 05 Jan 2016: Python 3 compatibility, please use version 1.2 on python 2
- * 29 Dec 2015: PEP8, setup.py
- * 10 Mar 2010: support for shebang (#!) urls
- * 28 Feb 2010: using 'http' schema by default when appropriate
- * 28 Feb 2010: added handling of IDN domains
- * 28 Feb 2010: code pep8-zation
- * 27 Feb 2010: forked from Sam Ruby's urlnorm.py
-"""
-from __future__ import unicode_literals
-
+"""URL normalize main module."""
 import re
-import unicodedata
-from urllib.parse import quote, unquote, urlsplit, urlunsplit
 
-__license__ = "Python"
-__version__ = "1.3.4"
+from .tools import deconstruct_url, force_unicode, quote, reconstruct_url, unquote
 
+DEFAULT_PORT = {
+    "ftp": "21",
+    "gopher": "70",
+    "http": "80",
+    "https": "443",
+    "news": "119",
+    "nntp": "119",
+    "snews": "563",
+    "snntp": "563",
+    "telnet": "23",
+    "ws": "80",
+    "wss": "443",
+}
+DEFAULT_SCHEME = "https"
 
-def _clean(string, charset='utf-8'):
-    """Unquote and normalize unicode string.
+
+def provide_url_scheme(url):
+    """Make sure we have valid url scheme.
 
     Params:
-        charset : string : optional : output encoding
+        url : string : the URL
 
     Returns:
-        string : an unquoted and normalized string
+        string : updated url with validated/attached scheme
 
     """
-    string = unquote(string)
-    return unicodedata.normalize('NFC', string).encode(charset)
+    has_scheme = ":" in url[:7]
+    is_default_scheme = url.startswith("//")
+    is_file_path = url == "-" or (url.startswith("/") and not is_default_scheme)
+    if not url or has_scheme or is_file_path:
+        return url
+    if is_default_scheme:
+        return DEFAULT_SCHEME + ":" + url
+    return DEFAULT_SCHEME + "://" + url
 
-DEFAULT_PORT = {
-    'ftp': 21,
-    'telnet': 23,
-    'http': 80,
-    'ws': 80,
-    'gopher': 70,
-    'news': 119,
-    'nntp': 119,
-    'prospero': 191,
-    'https': 443,
-    'wss': 443,
-    'snews': 563,
-    'snntp': 563,
-}
 
+def generic_url_cleanup(url):
+    """Cleanup the URL from unnecessary data and convert to final form.
 
-def url_normalize(url, charset='utf-8'):
-    """URI normalization routine.
+    Converts shebang urls to final form, removed unnecessary data from the url.
 
-    Sometimes you get an URL by a user that just isn't a real
-    URL because it contains unsafe characters like ' ' and so on.  This
-    function can fix some of the problems in a similar way browsers
-    handle data entered by the user:
+    Params:
+        url : string : the URL
+
+    Returns:
+        string : update url
+
+    """
+    url = url.replace("#!", "?_escaped_fragment_=")
+    url = re.sub(r"utm_source=[^&]+&?", "", url)
+    url = url.rstrip("&? ")
+    return url
 
-    >>> url_normalize(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)')
-    'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29'
+
+def normalize_scheme(scheme):
+    """Normalize scheme part of the url.
 
     Params:
-        charset : string : The target charset for the URL if the url was
-                           given as unicode string.
+        scheme : string : url scheme, e.g., 'https'
+
+    Returns:
+        string : normalized scheme data.
+
     """
+    return scheme.lower()
 
-    # invalid empty / null url
-    if url is None or len(url) == 0:
-        return url
 
-    # if there is no scheme use http as default scheme
-    if url[0] not in ['/', '-'] and ':' not in url[:7]:
-        url = 'https://' + url
+def normalize_userinfo(userinfo):
+    """Normalize userinfo part of the url.
 
-    # protocol indeferent url (http|https), prepend https
-    if len(url) > 2 and url[0] == '/' and url[1] == '/' and ':' not in url[:7]:
-        url = 'https:' + url
+    Params:
+        userinfo : string : url userinfo, e.g., 'user@'
 
-    # shebang urls support
-    url = url.replace('#!', '?_escaped_fragment_=')
+    Returns:
+        string : normalized userinfo data.
 
-    # remove feedburner's crap
-    url = re.sub(r'\?utm_source=feedburner.+$', '', url)
+    """
+    if userinfo in ["@", ":@"]:
+        return ""
+    return userinfo
 
-    # splitting url to useful parts
-    scheme, auth, path, query, fragment = urlsplit(url.strip())
-    (userinfo, host, port) = re.search('([^@]*@)?([^:]*):?(.*)', auth).groups()
 
-    # Always provide the URI scheme in lowercase characters.
-    scheme = scheme.lower()
+def normalize_host(host, charset="utf-8"):
+    """Normalize host part of the url.
 
-    # Always provide the host, if any, in lowercase characters.
-    host = host.lower()
-    if host and host[-1] == '.':
-        host = host[:-1]
+    Lowercase and strip of final dot.
+    Also, take care about IDN domains.
 
-    # take care about IDN domains
+    Params:
+        host : string : url host, e.g., 'site.com'
+
+    Returns:
+        string : normalized host data.
+
+    """
+    host = force_unicode(host, charset)
+    host = host.lower()
+    host = host.strip(".")
     host = host.encode("idna").decode(charset)
+    return host
+
+
+def normalize_port(port, scheme):
+    """Normalize port part of the url.
+
+    Remove mention of default port number
+
+    Params:
+        port : string : url port, e.g., '8080'
+        scheme : string : url scheme, e.g., 'http'
+
+    Returns:
+        string : normalized port data.
+
+    """
+    if not port.isdigit():
+        return port
+    port = str(int(port))
+    if DEFAULT_PORT[scheme] == port:
+        return ""
+    return port
 
+
+def normalize_path(path, scheme):
+    """Normalize path part of the url.
+
+    Remove mention of default path number
+
+    Params:
+        path : string : url path, e.g., '/section/page.html'
+        scheme : string : url scheme, e.g., 'http'
+
+    Returns:
+        string : normalized path data.
+
+    """
     # Only perform percent-encoding where it is essential.
     # Always use uppercase A-through-F characters when percent-encoding.
     # All portions of the URI must be utf-8 encoded NFC from Unicode strings
-    path = quote(_clean(path), "~:/?#[]@!$&'()*+,;=")
-    fragment = quote(_clean(fragment), "~")
-
-    # note care must be taken to only encode & and = characters as values
-    query = "&".join(
-        sorted(["=".join(
-            [quote(_clean(t), "~:/?#[]@!$'()*+,;=")
-             for t in q.split("=", 1)]) for q in query.split("&")]))
-
+    path = quote(unquote(path), "~:/?#[]@!$&'()*+,;=")
     # Prevent dot-segments appearing in non-relative URI paths.
     if scheme in ["", "http", "https", "ftp", "file"]:
         output, part = [], None
-        for part in path.split('/'):
+        for part in path.split("/"):
             if part == "":
                 if not output:
                     output.append(part)
@@ -146,31 +160,80 @@ def url_normalize(url, charset='utf-8'):
                 output.append(part)
         if part in ["", ".", ".."]:
             output.append("")
-        path = '/'.join(output)
-
-    # For schemes that define a default authority, use an empty authority if
-    # the default is desired.
-    if userinfo in ["@", ":@"]:
-        userinfo = ""
-
+        path = "/".join(output)
     # For schemes that define an empty path to be equivalent to a path of "/",
     # use "/".
-    if path == "" and scheme in ["http", "https", "ftp", "file"]:
+    if not path and scheme in ["http", "https", "ftp", "file"]:
         path = "/"
+    return path
+
+
+def normalize_fragment(fragment):
+    """Normalize fragment part of the url.
+
+    Params:
+        fragment : string : url fragment, e.g., 'fragment'
+
+    Returns:
+        string : normalized fragment data.
 
-    # For schemes that define a port, use an empty port if the default is
-    # desired
-    if port and scheme in DEFAULT_PORT.keys():
-        if port.isdigit():
-            port = str(int(port))
-            if int(port) == DEFAULT_PORT[scheme]:
-                port = ''
-
-    # Put it all back together again
-    auth = (userinfo or "") + host
-    if port:
-        auth += ":" + port
-    if url.endswith("#") and query == "" and fragment == "":
-        path += "#"
-
-    return urlunsplit((scheme, auth, path, query, fragment))
+    """
+    return quote(unquote(fragment), "~")
+
+
+def normalize_query(query):
+    """Normalize query part of the url.
+
+    Params:
+        query : string : url query, e.g., 'param1=val1&param2=val2'
+
+    Returns:
+        string : normalized query data.
+
+    """
+    query = "&".join(
+        sorted(
+            [
+                "=".join(
+                    [quote(unquote(t), "~:/?#[]@!$'()*+,;=") for t in q.split("=", 1)]
+                )
+                for q in query.split("&")
+            ]
+        )
+    )
+    return query
+
+
+def url_normalize(url, charset="utf-8"):
+    """URI normalization routine.
+
+    Sometimes you get an URL by a user that just isn't a real
+    URL because it contains unsafe characters like ' ' and so on.
+    This function can fix some of the problems in a similar way
+    browsers handle data entered by the user:
+
+    >>> url_normalize('http://de.wikipedia.org/wiki/Elf (Begriffsklärung)')
+    'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29'
+
+    Params:
+        charset : string : optional
+            The target charset for the URL if the url was given as unicode string.
+    """
+    if not url:
+        return url
+    url = provide_url_scheme(url)
+    url = generic_url_cleanup(url)
+    url_elements = deconstruct_url(url)
+    url_elements = url_elements._replace(
+        scheme=normalize_scheme(url_elements.scheme),
+        userinfo=normalize_userinfo(url_elements.userinfo),
+        host=normalize_host(url_elements.host, charset),
+        query=normalize_query(url_elements.query),
+        fragment=normalize_fragment(url_elements.fragment),
+    )
+    url_elements = url_elements._replace(
+        port=normalize_port(url_elements.port, url_elements.scheme),
+        path=normalize_path(url_elements.path, url_elements.scheme),
+    )
+    url = reconstruct_url(url_elements)
+    return url