From 3d629367cca8a3821d1c44032fc730410f1f28b6 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Thu, 3 Aug 2023 18:26:30 -0300 Subject: [PATCH] update twitter item tests (and postgres update) - We don't need that many integration tests, so we are moving some of them back to test/models/parser/twitter_item_test.rb, and updating them to not make live requests, instead they will be stubbed. - I added tests to check the basic request functionality that we now have. And removed "assigns values to hash from the API response" since that is already being tested in "it makes a get request to the tweet lookup endpoint successfully" - "should decode html entities" was removed because that happens inside Media and is not done by the individual parser, which means the test actually fails (as it should) - fake_tweet and fake_twitter_user were removed, since they used methods from the old Twitter gem. Now we are stubbing a response from our new method: tweet_lookup - added .squish to parsed_data['raw']['api']['data'][0]['text'] to clean up line breaks from title and description. Our test was failling because it was not being removed. also since title and description are the same, I just set the description to be the same as the title instead of parsing twice. - separated the stub from the response, so we can also have a failed response. changed the response fixture to be a success one, and added an error one - changed the id and user to make it clear that those are fake and being stubbed. - Removed the test for truncated text, that behavior is no longer present in the v2 api, only retweets might be truncated (we don't fetch those), and the way to deal with it is different. It does not take truncated as a query param. - @url.gsub!(/\s/, '') -> remove whitespaces from the url - raise ApiError.new("#{e.class}: #{e.message}") -> I can get the response code and body, but I get an error when I try the same for the error - upgrade postgres image to 13 (#373) I had upgraded to postgres12-bullseye because of a issue we had when building on Travis: We had an issue with building on Travis that seems related to a change the maintainers of the postgres docker images have made to the underlying OS image layer: Previous: Debian 11 (bullseye) New: Debian 12 (bookworm). The workaround seems to be using postgres-bullseye.More on this here https://stackoverflow.com/questions/76555305/postgres-container-failed-to-start-with-initdb-error-popen-failure-cannot-allo/76591040#76591040 Now updating to 13 I checked which image Devin used in Alegre and am using the same one here. --- app/models/concerns/provider_twitter.rb | 2 +- app/models/parser/twitter_item.rb | 14 +- docker-compose.yml | 4 +- test/data/twitter-item-response-error.json | 13 + ...son => twitter-item-response-success.json} | 6 +- test/integration/parsers/twitter_item_test.rb | 40 --- test/models/parser/twitter_item_test.rb | 303 ++++++------------ 7 files changed, 123 insertions(+), 259 deletions(-) create mode 100644 test/data/twitter-item-response-error.json rename test/data/{twitter-item-response.json => twitter-item-response-success.json} (91%) diff --git a/app/models/concerns/provider_twitter.rb b/app/models/concerns/provider_twitter.rb index e9997cea..bcd082fe 100644 --- a/app/models/concerns/provider_twitter.rb +++ b/app/models/concerns/provider_twitter.rb @@ -62,8 +62,8 @@ def get(path, params) raise ApiResponseCodeError.new("#{response.class}: #{response.code} #{response.message} - #{response.body}") unless response.code.to_i < 400 JSON.parse(response.body) rescue StandardError => e - raise ApiError.new("#{e.class}: #{e.code} #{e.message} - #{e.body}") PenderSentry.notify(e, url: url) + raise ApiError.new("#{e.class}: #{e.message}") end end diff --git a/app/models/parser/twitter_item.rb b/app/models/parser/twitter_item.rb index 5d8eb257..a83e8e6b 100644 --- a/app/models/parser/twitter_item.rb +++ b/app/models/parser/twitter_item.rb @@ -19,8 +19,10 @@ def patterns # Main function for class def parse_data_for_parser(_doc, _original_url, _jsonld_array) @url.gsub!(/(%23|#)!\//, '') + @url.gsub!(/\s/, '') @url = replace_subdomain_pattern(url) parts = url.match(TWITTER_ITEM_URL) + user, id = parts['user'], parts['id'] @parsed_data['raw']['api'] = {} @@ -36,16 +38,16 @@ def parse_data_for_parser(_doc, _original_url, _jsonld_array) published_at = '' html = '' author_name = user - author_url = get_author_url(url, user) || RequestHelper.top_url(url) + author_url = get_author_url(user) elsif @parsed_data[:error].nil? - title = parsed_data['raw']['api']['data'][0]['text'] - description = parsed_data['raw']['api']['data'][0]['text'] + title = parsed_data['raw']['api']['data'][0]['text'].squish + description = title picture = get_twitter_item_picture(parsed_data) author_picture = parsed_data['raw']['api']['includes']['users'][0]['profile_image_url'].gsub('_normal', '') published_at = parsed_data['raw']['api']['data'][0]['created_at'] html = html_for_twitter_item(url) author_name = parsed_data['raw']['api']['includes']['users'][0]['name'] - author_url = get_author_url(url, user) || parsed_data['raw']['api']['includes']['users'][0]['url'] || RequestHelper.top_url(url) + author_url = get_author_url(user) || parsed_data['raw']['api']['includes']['users'][0]['url'] || RequestHelper.top_url(url) end @parsed_data.merge!({ @@ -63,8 +65,8 @@ def parse_data_for_parser(_doc, _original_url, _jsonld_array) parsed_data end - def get_author_url(url, user) - URI(url).host + '/' + user + def get_author_url(user) + 'https://twitter.com/' + user end def get_twitter_item_picture(parsed_data) diff --git a/docker-compose.yml b/docker-compose.yml index 8657aae0..0b843757 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "2.2" volumes: redis: minio: - postgres12: + postgres13: services: redis: image: redis:5 @@ -21,7 +21,7 @@ services: MINIO_ACCESS_KEY: AKIAIOSFODNN7EXAMPLE MINIO_SECRET_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY postgres: - image: postgres:12-bullseye + image: postgres:13-buster ports: - "5432:5432" environment: diff --git a/test/data/twitter-item-response-error.json b/test/data/twitter-item-response-error.json new file mode 100644 index 00000000..86132bcd --- /dev/null +++ b/test/data/twitter-item-response-error.json @@ -0,0 +1,13 @@ +{ + "errors": [ + { + "value": "1111111111111111111", + "detail": "Could not find tweet with ids: [1111111111111111111].", + "title": "Not Found Error", + "resource_type": "tweet", + "parameter": "ids", + "resource_id": "1111111111111111111", + "type": "https://api.twitter.com/2/problems/resource-not-found" + } + ] +} \ No newline at end of file diff --git a/test/data/twitter-item-response.json b/test/data/twitter-item-response-success.json similarity index 91% rename from test/data/twitter-item-response.json rename to test/data/twitter-item-response-success.json index 93195d3e..000a472a 100644 --- a/test/data/twitter-item-response.json +++ b/test/data/twitter-item-response-success.json @@ -1,9 +1,9 @@ { "data": [ { - "id": "1686748612506632192", + "id": "1111111111111111111", "edit_history_tweet_ids": [ - "1686748612506632192" + "1111111111111111111" ], "attachments": { "media_keys": [ @@ -25,7 +25,7 @@ ], "users": [ { - "username": "NASAWebb", + "username": "fake_user", "url": "https://t.co/ZpTf8zeokA", "name": "NASA Webb Telescope", "profile_image_url": "https://pbs.twimg.com/profile_images/685182791496134658/Wmyak8D6_normal.jpg", diff --git a/test/integration/parsers/twitter_item_test.rb b/test/integration/parsers/twitter_item_test.rb index 3224be1f..e01c991a 100644 --- a/test/integration/parsers/twitter_item_test.rb +++ b/test/integration/parsers/twitter_item_test.rb @@ -11,45 +11,5 @@ class TwitterItemIntegrationTest < ActiveSupport::TestCase assert_nil data['picture'] assert_not_nil data['author_picture'] end - - test "should parse valid link with spaces" do - # skip("twitter api key is not currently working") - m = create_media url: ' https://twitter.com/caiosba/status/742779467521773568 ' - data = m.as_json - assert_match 'I\'ll be talking in @rubyconfbr this year! More details soon...', data['title'] - assert_match 'Caio Almeida', data['author_name'] - assert_match '@caiosba', data['username'] - assert_nil data['picture'] - assert_not_nil data['author_picture'] - end - - test "should fill in html when html parsing fails but API works" do - # skip("twitter api key is not currently working") - url = 'https://twitter.com/codinghorror/status/1276934067015974912' - OpenURI.stubs(:open_uri).raises(OpenURI::HTTPError.new('','429 Too Many Requests')) - m = create_media url: url - data = m.as_json - assert_match /twitter-tweet.*#{url}/, data[:html] - end - - test "should not parse a twitter post when passing the twitter api bearer token is missing" do - # skip("this might be broke befcause of twitter api changes - needs fixing") - key = create_api_key application_settings: { config: { twitter_bearer_token: '' } } - m = create_media url: 'https://twitter.com/cal_fire/status/919029734847025152', key: key - assert_equal '', PenderConfig.get(:twitter_bearer_token) - data = m.as_json - assert_equal m.url, data['title'] - assert_match "401 Unauthorized", data['error']['message'] - end - - test "should store oembed data of a twitter profile" do - # skip("twitter api key is not currently working") - m = create_media url: 'https://twitter.com/meedan' - data = m.as_json - - assert data['raw']['oembed'].is_a? Hash - assert_equal "https:\/\/twitter.com", data['raw']['oembed']['provider_url'] - assert_equal "Twitter", data['raw']['oembed']['provider_name'] - end end diff --git a/test/models/parser/twitter_item_test.rb b/test/models/parser/twitter_item_test.rb index 092bbff0..9e5f775d 100644 --- a/test/models/parser/twitter_item_test.rb +++ b/test/models/parser/twitter_item_test.rb @@ -1,67 +1,5 @@ require 'test_helper' -class TwitterItemIntegrationTest < ActiveSupport::TestCase - test "should parse tweet" do - skip("twitter api key is not currently working") - m = create_media url: 'https://twitter.com/caiosba/status/742779467521773568' - data = m.as_json - assert_match 'I\'ll be talking in @rubyconfbr this year! More details soon...', data['title'] - assert_match 'Caio Almeida', data['author_name'] - assert_match '@caiosba', data['username'] - assert_nil data['picture'] - assert_not_nil data['author_picture'] - end - - test "should parse valid link with spaces" do - skip("twitter api key is not currently working") - m = create_media url: ' https://twitter.com/caiosba/status/742779467521773568 ' - data = m.as_json - assert_match 'I\'ll be talking in @rubyconfbr this year! More details soon...', data['title'] - assert_match 'Caio Almeida', data['author_name'] - assert_match '@caiosba', data['username'] - assert_nil data['picture'] - assert_not_nil data['author_picture'] - end - - test "should fill in html when html parsing fails but API works" do - skip("twitter api key is not currently working") - url = 'https://twitter.com/codinghorror/status/1276934067015974912' - OpenURI.stubs(:open_uri).raises(OpenURI::HTTPError.new('','429 Too Many Requests')) - m = create_media url: url - data = m.as_json - assert_match /twitter-tweet.*#{url}/, data[:html] - end - - test "should not parse a twitter post when passing the twitter api key or subkey missing" do - skip("this might be broke befcause of twitter api changes - needs fixing") - key = create_api_key application_settings: { config: { twitter_consumer_key: 'consumer_key', twitter_consumer_secret: '' } } - m = create_media url: 'https://twitter.com/cal_fire/status/919029734847025152', key: key - assert_equal 'consumer_key', PenderConfig.get(:twitter_consumer_key) - assert_equal '', PenderConfig.get(:twitter_consumer_secret) - data = m.as_json - assert_equal m.url, data['title'] - assert_match "Twitter::Error::Unauthorized", data['raw']['api']['error']['message'] - PenderConfig.current = nil - - key = create_api_key application_settings: { config: { twitter_consumer_key: '' } } - m = create_media url: 'https://twitter.com/cal_fire/status/919029734847025152' , key: key - assert_equal '', PenderConfig.get(:twitter_consumer_key) - data = m.as_json - assert_equal m.url, data['title'] - assert_match "Twitter::Error::BadRequest", data['raw']['api']['error']['message'] - end - - test "should store oembed data of a twitter profile" do - skip("twitter api key is not currently working") - m = create_media url: 'https://twitter.com/meedan' - data = m.as_json - - assert data['raw']['oembed'].is_a? Hash - assert_equal "https:\/\/twitter.com", data['raw']['oembed']['provider_url'] - assert_equal "Twitter", data['raw']['oembed']['provider_name'] - end -end - class TwitterItemUnitTest < ActiveSupport::TestCase def setup isolated_setup @@ -71,22 +9,32 @@ def teardown isolated_teardown end - def fake_twitter_user - return @fake_twitter_user unless @fake_twitter_user.blank? - # https://github.com/sferik/twitter/blob/master/lib/twitter/user.rb - api_response = response_fixture_from_file('twitter-profile-response.json', parse_as: :json) - @fake_twitter_user = Twitter::User.new(api_response.with_indifferent_access) + def empty_doc + Nokogiri::HTML('') end - def fake_tweet - return @fake_tweet unless @fake_tweet.blank? - # https://github.com/sferik/twitter/blob/master/lib/twitter/tweet.rb - api_response = response_fixture_from_file('twitter-item-response.json', parse_as: :json) - @fake_tweet = Twitter::Tweet.new(api_response.with_indifferent_access) + def query + params = { + "ids": "1111111111111111111", + "tweet.fields": "author_id,created_at,text", + "expansions": "author_id,attachments.media_keys", + "user.fields": "profile_image_url,username,url", + "media.fields": "url", + } + Rack::Utils.build_query(params) end - def empty_doc - Nokogiri::HTML('') + def twitter_item_response_success + JSON.parse(response_fixture_from_file('twitter-item-response-success.json')) + end + + def twitter_item_response_error + JSON.parse(response_fixture_from_file('twitter-item-response-error.json')) + end + + def stub_tweet_lookup + Parser::TwitterItem.any_instance.stubs(:tweet_lookup) + .with('1111111111111111111') end test "returns provider and type" do @@ -123,171 +71,112 @@ def empty_doc assert_equal true, match_seven.is_a?(Parser::TwitterItem) end - test "assigns values to hash from the API response" do - skip("this might be broke befcause of twitter api changes - needs fixing") - Twitter::REST::Client.any_instance.stubs(:status).returns(fake_tweet) - Twitter::REST::Client.any_instance.stubs(:user).returns(fake_twitter_user) - - data = Parser::TwitterItem.new('https://twitter.com/fakeaccount/status/123456789').parse_data(empty_doc) - - assert_equal '123456789', data['external_id'] - assert_equal '@fakeaccount', data['username'] - assert_match /I'll be talking in @rubyconfbr this year!/, data['title'] - assert_match /I'll be talking in @rubyconfbr this year!/, data['description'] - assert_nil data['picture'] - assert_match /pbs.twimg.com\/profile_images\/1217299193217388544\/znpkNtDr.jpg/, data['author_picture'] - assert_match /
/, data['html'] - assert_match 'Caio Almeida', data['author_name'] - assert_match /twitter.com\/TEDTalks/, data['author_url'] - assert_not_nil data['published_at'] - - assert_nil data['error'] - end - - test "should store data of post returned by twitter API" do - skip("this might be broke befcause of twitter api changes - needs fixing") - Twitter::REST::Client.any_instance.stubs(:status).returns(fake_tweet) - Twitter::REST::Client.any_instance.stubs(:user).returns(fake_twitter_user) - - data = Parser::TwitterItem.new('https://twitter.com/fakeaccount/status/123456789').parse_data(empty_doc) + test "it makes a get request to the tweet lookup endpoint successfully" do + stub_configs({'twitter_bearer_token' => 'test' }) + + WebMock.stub_request(:get, "https://api.twitter.com/2/tweets") + .with(query: query) + .with(headers: { "Authorization": "Bearer test" }) + .to_return(status: 200, body: response_fixture_from_file('twitter-item-response-success.json')) - assert data['raw']['api'].is_a? Hash - assert !data['raw']['api'].empty? + data = Parser::TwitterItem.new('https://m.twitter.com/fake_user/status/1111111111111111111').parse_data(empty_doc) + + assert_equal '1111111111111111111', data['external_id'] + assert_equal '@fake_user', data['username'] end - # I'm not confident this is testing anything about HTML decoding as written - test "should decode html entities" do - skip("this might be broke befcause of twitter api changes - needs fixing") - tweet = Twitter::Tweet.new( - id: "123", - text: " [update] between Calistoga and Santa Rosa (Napa & Sonoma County) is now 35,270 acres and 44% contained. " - ) - Twitter::REST::Client.any_instance.stubs(:status).returns(tweet) - Twitter::REST::Client.any_instance.stubs(:user).returns(fake_twitter_user) - - data = Parser::TwitterItem.new('https://twitter.com/fakeaccount/status/123456789').parse_data(empty_doc) - assert_no_match /&/, data['title'] - end + test "it makes a get request to the tweet lookup endpoint and raises an error when 401 is returned" do + stub_configs({'twitter_bearer_token' => 'test' }) - test "should throw Pender::Exception::ApiLimitReached when Twitter::Error::TooManyRequests is thrown when parsing tweet" do - skip("this might be broke befcause of twitter api changes - needs fixing") - Twitter::REST::Client.any_instance.stubs(:status).raises(Twitter::Error::TooManyRequests) + WebMock.stub_request(:get, "https://api.twitter.com/2/tweets") + .with(query: query) + .with(headers: { "Authorization": "Bearer test" }) + .to_return(status: 401) - assert_raises Pender::Exception::ApiLimitReached do - Parser::TwitterItem.new('https://twitter.com/fake-account/status/123456789').parse_data(empty_doc) + assert_raises ProviderTwitter::ApiError do + Parser::TwitterItem.new('https://m.twitter.com/fake_user/status/1111111111111111111').parse_data(empty_doc) end end - test "logs error resulting from non-ratelimit tweet lookup, and return default values with html blank" do - skip("this might be broke befcause of twitter api changes - needs fixing") - Twitter::REST::Client.any_instance.stubs(:status).raises(Twitter::Error::NotFound) - Twitter::REST::Client.any_instance.stubs(:user).returns(fake_twitter_user) + test "it makes a get request to the tweet lookup endpoint, raises an error when 500 is returned and notifies sentry" do + stub_configs({'twitter_bearer_token' => 'test' }) - data = {} - sentry_call_count = 0 - arguments_checker = Proc.new do |e| - sentry_call_count += 1 - assert_equal Twitter::Error::NotFound, e.class - end + WebMock.stub_request(:get, "https://api.twitter.com/2/tweets") + .with(query: query) + .with(headers: { "Authorization": "Bearer test" }) + .to_return(status: 500, body: response_fixture_from_file('twitter-item-response-error.json')) - PenderSentry.stub(:notify, arguments_checker) do - data = Parser::TwitterItem.new('https://twitter.com/fake-account/status/123456789').parse_data(empty_doc) - assert_equal 1, sentry_call_count - end - assert_match /Twitter::Error::NotFound/, data['error']['message'] - assert_equal "123456789", data['external_id'] - assert_equal "@fake-account", data['username'] - assert data['html'].empty? + sentry_call_count = 0 + arguments_checker = Proc.new do |e| + sentry_call_count += 1 + end + + PenderSentry.stub(:notify, arguments_checker) do + assert_raises ProviderTwitter::ApiError do + Parser::TwitterItem.new('https://twitter.com/fake_user/status/1111111111111111111').parse_data(empty_doc) + end + assert_equal 1, sentry_call_count + end end - # This swallows rate limiting errors, which we're surfacing in a different - # exception catching block in the same class. It also doesn't surface errors. - # We may want to reconsider both of these things for consistency. - test "logs error resulting from looking up user information, and returns tweet info" do - skip("this might be broke befcause of twitter api changes - needs fixing") - Twitter::REST::Client.any_instance.stubs(:status).returns(fake_tweet) - Twitter::REST::Client.any_instance.stubs(:user).raises(Twitter::Error) - - data = {} - sentry_call_count = 0 - arguments_checker = Proc.new do |e| - sentry_call_count += 1 - assert_equal Twitter::Error, e.class - end + test "should store data of post returned by twitter API" do + stub_tweet_lookup.returns(twitter_item_response_success) - PenderSentry.stub(:notify, arguments_checker) do - data = Parser::TwitterItem.new('https://twitter.com/fakeaccount/status/123456789').parse_data(empty_doc) - assert_equal 1, sentry_call_count - end - assert_nil data['error'] - assert_equal "123456789", data['external_id'] - assert_equal "@fakeaccount", data['username'] - assert_match /I'll be talking in @rubyconfbr this year!/, data['title'] + data = Parser::TwitterItem.new('https://twitter.com/fake_user/status/1111111111111111111').parse_data(empty_doc) + + assert data['raw']['api'].is_a? Hash + assert !data['raw']['api'].empty? end - # This is current behavior, but I wonder if we might want something like https://twitter.com/fakeaccount - test "falls back to top_url when user information can't be retrieved" do - skip("this might be broke befcause of twitter api changes - needs fixing") - Twitter::REST::Client.any_instance.stubs(:status).returns(fake_tweet) - Twitter::REST::Client.any_instance.stubs(:user).raises(Twitter::Error) + test "sets the author_url o be https://twitter.com/ even if an error is returned" do + stub_tweet_lookup.returns(twitter_item_response_error) - data = Parser::TwitterItem.new('https://twitter.com/fakeaccount/status/123456789').parse_data(empty_doc) - assert_nil data['error'] - assert_equal 'https://twitter.com', data['author_url'] + data = Parser::TwitterItem.new('https://twitter.com/fake_user/status/1111111111111111111').parse_data(empty_doc) + + assert_not_nil data['error'] + assert_equal 'https://twitter.com/fake_user', data['author_url'] end test "should remove line breaks from Twitter item title" do - skip("this might be broke befcause of twitter api changes - needs fixing") - tweet = Twitter::Tweet.new( - id: '123', - text: "LA Times- USC Dornsife Sunday Poll: \n Donald Trump Retains 2 Point \n Lead Over Hillary" - ) - Twitter::REST::Client.any_instance.stubs(:status).returns(tweet) - Twitter::REST::Client.any_instance.stubs(:user).returns(fake_twitter_user) - - data = Parser::TwitterItem.new('https://twitter.com/fake-account/status/123456789').parse_data(empty_doc) - assert_match 'LA Times- USC Dornsife Sunday Poll: Donald Trump Retains 2 Point Lead Over Hillary', data['title'] + stub_tweet_lookup.returns(twitter_item_response_success) + + data = Parser::TwitterItem.new('https://twitter.com/fake_user/status/1111111111111111111').parse_data(empty_doc) + + assert_match 'Youths! Webb observed galaxy cluster El Gordo', data['title'] end test "should parse tweet url with special chars, and strip them" do - skip("this might be broke befcause of twitter api changes - needs fixing") - Twitter::REST::Client.any_instance.stubs(:status).returns(fake_tweet) - Twitter::REST::Client.any_instance.stubs(:user).returns(fake_twitter_user) + stub_tweet_lookup.returns(twitter_item_response_success) - parser = Parser::TwitterItem.new('https://twitter.com/#!/salmaeldaly/status/45532711472992256') + parser = Parser::TwitterItem.new('https://twitter.com/#!/fake_user/status/1111111111111111111') data = parser.parse_data(empty_doc) - assert_match 'https://twitter.com/salmaeldaly/status/45532711472992256', parser.url + assert_match 'https://twitter.com/fake_user/status/1111111111111111111', parser.url - parser = Parser::TwitterItem.new('https://twitter.com/%23!/salmaeldaly/status/45532711472992256') + parser = Parser::TwitterItem.new('https://twitter.com/%23!/fake_user/status/1111111111111111111') data = parser.parse_data(empty_doc) - assert_match 'https://twitter.com/salmaeldaly/status/45532711472992256', parser.url - end - - # I'm not confident this is testing anything about truncation as written - test "should get all information of a truncated tweet" do - skip("this might be broke befcause of twitter api changes - needs fixing") - tweet = Twitter::Tweet.new( - id: "123", - full_text: "Anti immigrant graffiti in a portajon on a residential construction site in Mtn Brook, AL. Job has about 50% Latino workers. https://t.co/bS5vI4Jq7I", - truncated: true, - entities: { - media: [ - { media_url_https: "https://pbs.twimg.com/media/C7dYir1VMAAi46b.jpg" } - ] - } - ) - Twitter::REST::Client.any_instance.stubs(:status).returns(tweet) - Twitter::REST::Client.any_instance.stubs(:user).returns(fake_twitter_user) - - data = Parser::TwitterItem.new('https://twitter.com/fake-account/status/123456789').parse_data(nil) - - assert_equal 'https://pbs.twimg.com/media/C7dYir1VMAAi46b.jpg', data['picture'] + assert_match 'https://twitter.com/fake_user/status/1111111111111111111', parser.url end test "#oembed_url returns URL with the instance URL" do oembed_url = Parser::TwitterItem.new('https://twitter.com/fake-account/status/1234').oembed_url assert_equal 'https://publish.twitter.com/oembed?url=https://twitter.com/fake-account/status/1234', oembed_url end + + test "should parse valid link with spaces" do + stub_tweet_lookup.returns(twitter_item_response_success) + + data = Parser::TwitterItem.new(' https://twitter.com/fake_user/status/1111111111111111111').parse_data(empty_doc) + + assert_match 'Youths! Webb observed galaxy cluster El Gordo', data['title'] + end + + test "should fill in html when html parsing fails but API works" do + stub_tweet_lookup.returns(twitter_item_response_success) + + data = Parser::TwitterItem.new('https://twitter.com/fake_user/status/1111111111111111111').parse_data(empty_doc) + + assert_match "