From b022c3c8761ee65de3fe2eac734e873318a02fd1 Mon Sep 17 00:00:00 2001 From: meyt Date: Thu, 22 Apr 2021 18:10:34 +0430 Subject: [PATCH] Use redirected URL, closes #7 --- linkpreview/compose.py | 2 +- linkpreview/grabber.py | 2 +- tests/test_grabber.py | 19 +++++++++++++++++++ tests/test_preview.py | 22 ++++++++++++++++++++++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/linkpreview/compose.py b/linkpreview/compose.py index 91be3f0..8db9caa 100644 --- a/linkpreview/compose.py +++ b/linkpreview/compose.py @@ -11,7 +11,7 @@ def link_preview( if content is None: try: grabber = LinkGrabber() - content = grabber.get_content(url) + content, url = grabber.get_content(url) except InvalidMimeTypeError: content = '' diff --git a/linkpreview/grabber.py b/linkpreview/grabber.py index 418b292..45f1fc3 100644 --- a/linkpreview/grabber.py +++ b/linkpreview/grabber.py @@ -58,4 +58,4 @@ def get_content(self, url: str, headers: dict = None): content += chunk - return content + return content, r.url diff --git a/tests/test_grabber.py b/tests/test_grabber.py index c60b496..4976055 100644 --- a/tests/test_grabber.py +++ b/tests/test_grabber.py @@ -45,6 +45,20 @@ class FakeResponse(Response): headers={"content-length": "100000"}, ) ) + httpserver.expect_request("/redirected").respond_with_response( + FakeResponse( + mimetype="text/html", + response=b"done!", + ) + ) + redirected_url = "http://%s:%s/redirected" % (httpserver.host, httpserver.port) + httpserver.expect_request("/redirection").respond_with_response( + FakeResponse( + mimetype="text/html", + headers={"location": redirected_url}, + status=301 + ) + ) # success grabber = LinkGrabber(maxsize=100) @@ -80,3 +94,8 @@ class FakeResponse(Response): grabber = LinkGrabber() with pytest.raises(exceptions.InvalidContentError): grabber.get_content(httpserver.url_for("/badmime")) + + # redirection + grabber = LinkGrabber() + content, url = grabber.get_content(httpserver.url_for("/redirection")) + assert url == redirected_url diff --git a/tests/test_preview.py b/tests/test_preview.py index 65b99ca..383d310 100644 --- a/tests/test_preview.py +++ b/tests/test_preview.py @@ -2,6 +2,8 @@ from pytest_httpserver import HTTPServer +from werkzeug.wrappers.response import Response + from linkpreview import Link, link_preview from linkpreview.preview import OpenGraph, TwitterCard, Schema, Generic @@ -184,6 +186,18 @@ def test_link_preview(httpserver: HTTPServer): '{}', headers={"content-type": "application/json"}, ) + httpserver.expect_request("/redirected").respond_with_data( + get_sample("generic/h1-img.html"), + headers={"content-type": "text/html"}, + ) + redirected_url = "http://%s:%s/redirected" % (httpserver.host, httpserver.port) + httpserver.expect_request("/redirection").respond_with_response( + Response( + mimetype="text/html", + headers={"location": redirected_url}, + status=301 + ) + ) url = httpserver.url_for("/preview1") preview = link_preview(url) @@ -235,3 +249,11 @@ def test_link_preview(httpserver: HTTPServer): assert preview.image is None assert preview.absolute_image is None assert preview.force_title == "Preview 3" + + url = httpserver.url_for("/redirection") + preview = link_preview(url) + assert preview.link.url == redirected_url + assert preview.title == "This title is from the first h1 tag." + assert preview.description is None + assert preview.image == "http://localhost:8000/img/heck.jpg" + assert preview.absolute_image == "http://localhost:8000/img/heck.jpg"