diff --git a/setup.py b/setup.py index 04f451da..e8270014 100644 --- a/setup.py +++ b/setup.py @@ -109,7 +109,7 @@ def get_long_description(): "certifi", "charset_normalizer >= 3.0.1; python_version < '3.7'", "charset_normalizer >= 3.1.0; python_version >= '3.7'", - "courlan @ git+https://github.com/adbar/courlan@compatibility", + "courlan @ git+https://github.com/adbar/courlan", "htmldate >= 1.4.3", "justext >= 3.0.0", "lxml >= 4.9.3 ; platform_system != 'Darwin'", diff --git a/tests/feeds_tests.py b/tests/feeds_tests.py index 40f69ae8..0e008de3 100644 --- a/tests/feeds_tests.py +++ b/tests/feeds_tests.py @@ -65,7 +65,7 @@ def test_atom_extraction(): 'example.org', 'http://example.org/', 'http://example.org', - ) == ['http://example.org/article1'] + ) == ['http://example.org/article1/'] # TODO: remove slash? def test_rss_extraction(): @@ -87,7 +87,7 @@ def test_rss_extraction(): 'example.org', 'http://example.org/', '', - ) == ['http://example.org/article1'] + ) == ['http://example.org/article1/'] # TODO: remove slash? # spaces assert len(feeds.extract_links(XMLDECL + '\r\n https://www.ak-kurier.de/akkurier/www/artikel/108815-sinfonisches-blasorchester-spielt-1500-euro-fuer-kinder-in-drk-krankenhaus-kirchen-ein ', 'ak-kurier.de', 'https://www.ak-kurier.de/', '')) == 1 assert ( diff --git a/tests/sitemaps_tests.py b/tests/sitemaps_tests.py index a6701565..76fd79ce 100644 --- a/tests/sitemaps_tests.py +++ b/tests/sitemaps_tests.py @@ -164,7 +164,6 @@ def test_robotstxt(): def test_whole(): "Test whole process." results = sitemaps.sitemap_search("https://www.sitemaps.org", target_lang="de") - print(results) assert len(results) == 8 diff --git a/tests/spider_tests.py b/tests/spider_tests.py index 04593940..7cabb958 100644 --- a/tests/spider_tests.py +++ b/tests/spider_tests.py @@ -87,7 +87,7 @@ def test_process_links(): spider.process_links(htmlstring, base_url, language='en') todo = spider.URL_STORE.find_unvisited_urls(base_url) known_links = spider.URL_STORE.find_known_urls(base_url) - assert 'https://example.org/en/page1' in todo and len(known_links) == 4 + assert 'https://example.org/en/page1/' in todo and len(known_links) == 4 # TODO: remove slash? # wrong language htmlstring = '' spider.process_links(htmlstring, base_url, language='de')