diff --git a/app/models/concerns/provider_instagram.rb b/app/models/concerns/provider_instagram.rb index 17213ece..50d7f9cc 100644 --- a/app/models/concerns/provider_instagram.rb +++ b/app/models/concerns/provider_instagram.rb @@ -8,6 +8,10 @@ class ApiAuthenticationError < StandardError; end class_methods do def ignored_urls [ + { + pattern: /^https:\/\/(www\.)?instagram\.com/, + reason: :login_page + }, { pattern: /^https:\/\/www\.instagram\.com\/accounts\/login/, reason: :login_page diff --git a/app/models/parser/instagram_item.rb b/app/models/parser/instagram_item.rb index 159c47b4..3e449ee6 100644 --- a/app/models/parser/instagram_item.rb +++ b/app/models/parser/instagram_item.rb @@ -3,6 +3,7 @@ class InstagramItem < Base include ProviderInstagram INSTAGRAM_ITEM_URL = /^https?:\/\/(www\.)?instagram\.com\/(p|tv|reel)\/([^\/]+)/ + INSTAGRAM_HOME_URL = /^https?:\/\/(www\.)?instagram\.com\/?$/ class << self def type @@ -10,14 +11,14 @@ def type end def patterns - [INSTAGRAM_ITEM_URL] + [INSTAGRAM_ITEM_URL, INSTAGRAM_HOME_URL] end end private # Main function for class - def parse_data_for_parser(doc, _original_url, _jsonld_array) + def parse_data_for_parser(doc, original_url, _jsonld_array) id = url.match(INSTAGRAM_ITEM_URL)[3] @parsed_data.merge!(external_id: id) diff --git a/test/models/parser/instagram_item_test.rb b/test/models/parser/instagram_item_test.rb index c1235115..30137765 100644 --- a/test/models/parser/instagram_item_test.rb +++ b/test/models/parser/instagram_item_test.rb @@ -9,6 +9,13 @@ class InstagramItemIntegrationTest < ActiveSupport::TestCase assert !data['title'].blank? end + test "should parse Instagram item when the final url is instagram.com" do + m = Media.new url: 'https://instagram.com/' + data = m.as_json + assert_equal 'instagram', data['provider'] + assert_equal 'https://instagram.com/', data['title'] + end + test "should get canonical URL parsed from html tags" do media1 = create_media url: 'https://www.instagram.com/p/CAdW7PMlTWc/?taken-by=kikoloureiro' assert_match /https:\/\/www.instagram.com\/p\/CAdW7PMlTWc/, media1.url @@ -50,6 +57,9 @@ def doc match_three = Parser::InstagramItem.match?('https://www.instagram.com/reel/CAdW7PMlTWc') assert_equal true, match_three.is_a?(Parser::InstagramItem) + + match_four = Parser::InstagramItem.match?('https://www.instagram.com/') + assert_equal true, match_four.is_a?(Parser::InstagramItem) end test "should set profile defaults to URL upon error" do @@ -158,4 +168,20 @@ def doc assert data['raw']['metatags'].present? assert data['raw']['api'].present? end + + test "should return url as title when redirected to instagram main page" do + url = 'https://www.instagram.com/p/CdOk-lLKmyH/' + instagram_main_page = 'https://instagram.com/' + + WebMock.stub_request(:get, url).to_return(status: 302, headers: { 'location' => instagram_main_page }) + WebMock.stub_request(:get, instagram_main_page).to_return(status: 200, body: 'Instagram') + WebMock.stub_request(:get, "https://www.instagram.com/p/CdOk-lLKmyH/?__a=1&__d=a").to_return(status: 200) + + media = Media.new(url: url) + data = media.as_json + + assert_equal 'https://www.instagram.com/p/CdOk-lLKmyH', data['title'] + assert_equal 'instagram', data['provider'] + assert_equal 'item', data['type'] + end end