diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1e023eb0..8d07463c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -41,10 +41,8 @@ deploy_qa: - pip install ecs-deploy==1.11.0 - aws ssm get-parameters-by-path --region $AWS_DEFAULT_REGION --path /qa/pender/ --recursive --with-decryption --output text --query "Parameters[].[Name]" | sed -E 's#/qa/pender/##' > env.qa.names - rm -f qa-pender-c.env.args; for NAME in `cat env.qa.names`; do echo -n "-s qa-pender-c $NAME /qa/pender/$NAME " >> qa-pender-c.env.args; done - - echo -n "-s qa-pender-c GITHUB_TOKEN arn:aws:secretsmanager:eu-west-1:848416313321:secret:GithubToken-Plain-BUhwIw" >> qa-pender-c.env.args - ecs deploy ecs-qa qa-pender --image qa-pender-c $ECR_API_BASE_URL/qa/pender/api:$CI_COMMIT_SHA --exclusive-env -e qa-pender-c APP pender -e qa-pender-c DEPLOY_ENV qa -e qa-pender-c AWS_REGION $AWS_DEFAULT_REGION --timeout 3600 --exclusive-secrets `cat qa-pender-c.env.args` - rm -f qa-pender-background.env.args; for NAME in `cat env.qa.names`; do echo -n "-s qa-pender-background $NAME /qa/pender/$NAME " >> qa-pender-background.env.args; done - - echo -n "-s qa-pender-background GITHUB_TOKEN arn:aws:secretsmanager:eu-west-1:848416313321:secret:GithubToken-Plain-BUhwIw" >> qa-pender-background.env.args - ecs deploy ecs-qa qa-pender-background --image qa-pender-background $ECR_API_BASE_URL/qa/pender/api:$CI_COMMIT_SHA --exclusive-env -e qa-pender-background APP pender -e qa-pender-background DEPLOY_ENV qa -e qa-pender-background AWS_REGION $AWS_DEFAULT_REGION --timeout 3600 --exclusive-secrets `cat qa-pender-background.env.args` - echo "new Image was deployed $ECR_API_BASE_URL/qa/pender/api:$CI_COMMIT_SHA" only: @@ -90,10 +88,8 @@ deploy_live: - pip install ecs-deploy==1.11.0 - aws ssm get-parameters-by-path --region $AWS_DEFAULT_REGION --path /live/pender/ --recursive --with-decryption --output text --query "Parameters[].[Name]" | sed -E 's#/live/pender/##' > env.live.names - rm -f live-pender-c.env.args; for NAME in `cat env.live.names`; do echo -n "-s live-pender-c $NAME /live/pender/$NAME " >> live-pender-c.env.args; done - - echo -n "-s live-pender-c GITHUB_TOKEN arn:aws:secretsmanager:eu-west-1:848416313321:secret:GithubToken-Plain-BUhwIw" >> live-pender-c.env.args - ecs deploy ecs-live live-pender --image live-pender-c $ECR_API_BASE_URL/live/pender/api:$CI_COMMIT_SHA --exclusive-env -e live-pender-c APP pender -e live-pender-c DEPLOY_ENV live -e live-pender-c AWS_REGION $AWS_DEFAULT_REGION --timeout 3600 --exclusive-secrets `cat live-pender-c.env.args` - rm -f live-pender-background.env.args; for NAME in `cat env.live.names`; do echo -n "-s live-pender-background $NAME /live/pender/$NAME " >> live-pender-background.env.args; done - - echo -n "-s live-pender-background GITHUB_TOKEN arn:aws:secretsmanager:eu-west-1:848416313321:secret:GithubToken-Plain-BUhwIw" >> live-pender-background.env.args - ecs deploy ecs-live live-pender-background --image live-pender-background $ECR_API_BASE_URL/live/pender/api:$CI_COMMIT_SHA --exclusive-env -e live-pender-background APP pender -e live-pender-background DEPLOY_ENV live -e live-pender-background AWS_REGION $AWS_DEFAULT_REGION --timeout 3600 --exclusive-secrets `cat live-pender-background.env.args` - echo "new Image was deployed $ECR_API_BASE_URL/live/pender/api:$CI_COMMIT_SHA" only: diff --git a/.travis.yml b/.travis.yml index 5c9c449c..f660bbed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,8 +10,8 @@ before_install: before_script: - docker-compose exec pender test/setup-parallel script: -- docker-compose exec pender bundle exec rake "parallel:test[3]" -- docker-compose exec pender bundle exec rake parallel:spec +- docker-compose exec -e TEST_RETRY_COUNT=5 pender bundle exec rake "parallel:test[3]" +- docker-compose exec -e TEST_RETRY_COUNT=5 pender bundle exec rake parallel:spec after_script: - docker-compose exec pender cat tmp/performance.csv - docker-compose exec -e TRAVIS_PULL_REQUEST=$TRAVIS_PULL_REQUEST -e TRAVIS_TEST_RESULT=$TRAVIS_TEST_RESULT -e TRAVIS_BRANCH=$TRAVIS_BRANCH -e CC_TEST_REPORTER_ID=$CC_TEST_REPORTER_ID -e GIT_COMMIT_SHA=$GIT_COMMIT_SHA -e GIT_COMMITTED_AT=$GIT_COMMITTED_AT pender test/test-coverage diff --git a/CODEOWNERS b/CODEOWNERS index 77a4c9f9..86843834 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1 +1 @@ -* @caiosba @melsawy +* @caiosba @melsawy @vasconsaurus diff --git a/app/models/concerns/media_archiver.rb b/app/models/concerns/media_archiver.rb index 5782b488..7199b68e 100644 --- a/app/models/concerns/media_archiver.rb +++ b/app/models/concerns/media_archiver.rb @@ -61,10 +61,6 @@ def declare_archiver(name, patterns, modifier, enabled = true) def give_up(info = {}) url, archiver, key_id = info[:args][0], info[:args][1], info[:args][2] - PenderSentry.notify( - StandardError.new(info[:error_message]), - info.merge({ url: url, archiver: archiver, key_id: key_id }) - ) Rails.logger.warn level: 'WARN', message: "[#{info[:error_class]}] #{info[:error_message]}", url: url, archiver: archiver data = { error: { message: info[:error_message], code: Lapis::ErrorCodes::const_get('ARCHIVER_FAILURE') }} Media.notify_webhook_and_update_cache(archiver, url, data, key_id) diff --git a/app/models/metrics.rb b/app/models/metrics.rb index 438ef481..09583670 100644 --- a/app/models/metrics.rb +++ b/app/models/metrics.rb @@ -97,21 +97,22 @@ def verify_facebook_metrics_response(url, response) error = JSON.parse(response.body)['error'] is_retryable = (RETRYABLE_FACEBOOK_ERROR_CODES + FACEBOOK_RATE_LIMIT_CODES).include?(error['code'].to_i) + fb_error_hash = { + 'app.api_key' => ApiKey.current&.id, + 'facebook.metrics.error.code' => error['code'], + 'facebook.metrics.error.message' => error['message'], + 'facebook.metrics.url' => url, + 'facebook.metrics.retryable' => is_retryable + } Rails.logger.warn level: 'WARN', message: "Facebook metrics error: #{error['code']} - #{error['message']}", url: url, key_id: ApiKey.current&.id, error: error, retryable: is_retryable TracingService.set_error_status( "Facebook metrics error", - attributes: { - 'app.api_key' => ApiKey.current&.id, - 'facebook.metrics.error.code' => error['code'], - 'facebook.metrics.error.message' => error['message'], - 'facebook.metrics.url' => url, - 'facebook.metrics.retryable' => is_retryable - } + attributes: fb_error_hash ) if is_retryable @locker.lock(3600) if FACEBOOK_RATE_LIMIT_CODES.include?(error['code'].to_i) - raise Pender::Exception::RetryLater, 'Metrics request failed' + raise Pender::Exception::RetryLater, "Metrics request failed.\n#{fb_error_hash.to_json}" end end diff --git a/app/models/parser/twitter_profile.rb b/app/models/parser/twitter_profile.rb index 693759e0..a8823813 100644 --- a/app/models/parser/twitter_profile.rb +++ b/app/models/parser/twitter_profile.rb @@ -9,10 +9,7 @@ def type def patterns [ - /^https?:\/\/(www\.)?twitter\.com\/(?[\w\d]+)(\?+.*)$/, - /^https?:\/\/(0|m|mobile)\.twitter\.com\/(?[\w\d]+)(\?+.*)$/, - /^https?:\/\/(www\.)?twitter\.com\/(?[^\/]+)$/, - /^https?:\/\/(0|m|mobile)\.twitter\.com\/(?[^\/]+)$/ + /^https?:\/\/((0|m|mobile|www)\.)?twitter\.com\/(?[\w]{4,15})[\/]*(\?.*)?[\/]*$/ ] end end diff --git a/config/initializers/02_sentry.rb b/config/initializers/02_sentry.rb index 4f1c2653..556bd635 100644 --- a/config/initializers/02_sentry.rb +++ b/config/initializers/02_sentry.rb @@ -10,4 +10,7 @@ # Any exceptions we want to prevent sending to Sentry config.excluded_exceptions += ['Pender::Exception::RetryLater'] + + # report_after_job_retries when turned on, the SDK will only report the exception after all retries have failed. + config.sidekiq.report_after_job_retries = true end diff --git a/config/initializers/03_sidekiq.rb b/config/initializers/03_sidekiq.rb index cd9bd6d1..1272fb2b 100644 --- a/config/initializers/03_sidekiq.rb +++ b/config/initializers/03_sidekiq.rb @@ -10,11 +10,11 @@ Sidekiq.configure_server do |config| config.redis = redis_config - config.death_handlers << ->(job, ex) do - if ex.is_a?(Pender::Exception::RetryLater) - ex = Pender::Exception::RetryLimitHit.new(ex) + config.death_handlers << ->(job, original_exception) do + if original_exception.is_a?(Pender::Exception::RetryLater) + limit_hit_exception = Pender::Exception::RetryLimitHit.new(original_exception) end - Sentry.capture_exception(ex) + PenderSentry.notify(limit_hit_exception, {job: job, original_exception: original_exception.cause}) end end diff --git a/pull_request_template.md b/pull_request_template.md index e52b7f67..55d06382 100644 --- a/pull_request_template.md +++ b/pull_request_template.md @@ -21,6 +21,7 @@ Please describe parts of the change that require extra attention during code rev - [ ] I have added unit and feature tests, if the PR implements a new feature or otherwise would benefit from additional testing - [ ] I have added regression tests, if the PR fixes a bug - [ ] I have added logging, exception reporting, and custom tracing with any additional information required for debugging +- [ ] I considered secure coding practices when writing this code. Any security concerns are noted above. - [ ] I have commented my code in hard-to-understand areas, if any - [ ] I have made needed changes to the README - [ ] My changes generate no new warnings diff --git a/test/integration/parsers/facebook_item_test.rb b/test/integration/parsers/facebook_item_test.rb new file mode 100644 index 00000000..2c9ac0b4 --- /dev/null +++ b/test/integration/parsers/facebook_item_test.rb @@ -0,0 +1,60 @@ +require 'test_helper' + +class FacebookItemIntegrationTest < ActiveSupport::TestCase + test "should get facebook post with valid data from crowdtangle" do + m = create_media url: 'https://www.facebook.com/144585402276277/posts/1127489833985824' + data = m.as_json + + assert_equal 'facebook', data['provider'] + assert_equal 'item', data['type'] + assert_equal '144585402276277_1127489833985824', data['external_id'] + assert data['error'].nil? + assert !data['title'].blank? + assert !data['username'].blank? + assert !data['author_name'].blank? + assert !data['author_picture'].blank? + assert !data['author_url'].blank? + assert !data['description'].blank? + assert !data['text'].blank? + assert !data['picture'].blank? + assert !data['published_at'].blank? + # data['html'] started to be returned as an empty string for this test + # which is extra weird since we get it even when the page does not exist + # will come back to this + # assert !data['html'].blank? + end + + test "should get facebook data even if crowdtangle fails" do + m = create_media url: 'https://www.facebook.com/ECRG.TheBigO/posts/pfbid036xece5JjgLH7rD9RnCr1ASnjETq7QThCHiH1HqYAcfUZNHav4gFJdYUY7nGU8JB6l' + data = m.as_json + + assert_equal 'facebook', data['provider'] + assert_equal 'item', data['type'] + assert data['external_id'].blank? + assert data['error'].nil? + assert !data['raw']['crowdtangle']['error'].blank? + assert !data['title'].blank? + assert !data['description'].blank? + assert !data['picture'].blank? + assert !data['html'].blank? + end + + test "should return data even if post does not exist" do + m = create_media url: 'https://www.facebook.com/111111111111111/posts/1111111111111111' + data = m.as_json + + assert_equal 'facebook', data['provider'] + assert_equal 'item', data['type'] + assert_equal '111111111111111_1111111111111111', data['external_id'] + assert_equal 'https://www.facebook.com/111111111111111/posts/1111111111111111', data['title'] + assert !data['raw']['crowdtangle']['error'].blank? + assert_equal '', data['username'] + assert_equal '', data['author_name'] + assert_equal '', data['author_picture'] + assert_equal '', data['author_url'] + assert_equal '', data['description'] + assert_equal '', data['picture'] + assert_equal '', data['published_at'] + assert !data['html'].blank? + end +end diff --git a/test/integration/parsers/facebook_profile_test.rb b/test/integration/parsers/facebook_profile_test.rb new file mode 100644 index 00000000..fd03ab51 --- /dev/null +++ b/test/integration/parsers/facebook_profile_test.rb @@ -0,0 +1,48 @@ +require 'test_helper' + +class FacebookProfileIntegrationTest < ActiveSupport::TestCase + test "should parse Facebook page" do + media = create_media url: 'https://www.facebook.com/ironmaiden/?fref=ts' + data = media.as_json + + assert !data['title'].blank? + assert_equal 'ironmaiden', data['username'] + assert_equal 'facebook', data['provider'] + assert_equal 'profile', data['type'] + + # Requires login, so cannot fetch ID from HTML + assert data['id'].blank? + assert data['external_id'].blank? + end + + test "should parse Facebook page with numeric id" do + media = create_media url: 'https://www.facebook.com/pages/Meedan/105510962816034?fref=ts' + data = media.as_json + + assert !data['title'].blank? + assert_equal 'Meedan', data['username'] + assert_not_nil data['description'] + assert_not_nil data['picture'] + assert_not_nil data['published_at'] + assert_equal 'facebook', data['provider'] + assert_equal 'profile', data['type'] + + # Parsed from URL + assert_equal '105510962816034', data['id'] + assert_equal '105510962816034', data['external_id'] + end + + test "should return data even if Facebook page does not exist" do + media = create_media url: 'https://www.facebook.com/pages/fakepage/1111111111111' + data = media.as_json + + assert_equal 'https://www.facebook.com/pages/fakepage/1111111111111', data['title'] + assert_equal 'fakepage', data['username'] + assert data['description'].blank? + assert data['picture'].blank? + assert data['published_at'].blank? + assert_equal 'facebook', data['provider'] + assert_equal 'profile', data['type'] + end +end + diff --git a/test/models/archiver_test.rb b/test/models/archiver_test.rb index a3774b75..b3553a5e 100644 --- a/test/models/archiver_test.rb +++ b/test/models/archiver_test.rb @@ -742,4 +742,36 @@ def quietly_redefine_constant(klass, constant, new_value) ensure WebMock.disable! end + + test "MediaArchiver should not notify Sentry when the worker hits the maximum number of retries" do + WebMock.stub_request(:post, /example.com\/webhook/).to_return(status: 200, body: '') + + Media.any_instance.stubs(:follow_redirections) + Media.any_instance.stubs(:get_canonical_url).returns(true) + Media.any_instance.stubs(:try_https) + Media.any_instance.stubs(:parse) + Media.any_instance.stubs(:archive) + + a = create_api_key application_settings: { config: { 'perma_cc_key': 'my-perma-key' }, 'webhook_url': 'https://example.com/webhook.php', 'webhook_token': 'test' } + url = 'http://example.com' + + data = {} + sentry_call_count = 0 + arguments_checker = Proc.new do |e| + sentry_call_count += 1 + assert_equal StandardError, e.class + end + + assert_raises StandardError do + Media.new(url: url, key: a).as_json(archivers: 'perma_cc') + end + + PenderSentry.stub(:notify, arguments_checker) do + Media.give_up({ args: [url, 'perma_cc', nil], error_message: 'Test Archiver' }) + end + + assert_equal 0, sentry_call_count + ensure + WebMock.disable! + end end diff --git a/test/models/parser/facebook_item_test.rb b/test/models/parser/facebook_item_test.rb index 90a53f2a..2b221386 100644 --- a/test/models/parser/facebook_item_test.rb +++ b/test/models/parser/facebook_item_test.rb @@ -1,139 +1,5 @@ require 'test_helper' -class FacebookItemIntegrationTest < ActiveSupport::TestCase - test "should get facebook post with valid data from crowdtangle" do - m = create_media url: 'https://www.facebook.com/144585402276277/posts/1127489833985824' - data = m.as_json - - assert_equal 'facebook', data['provider'] - assert_equal 'item', data['type'] - assert_equal '144585402276277_1127489833985824', data['external_id'] - assert data['error'].blank? - assert !data['title'].blank? - assert !data['username'].blank? - assert !data['author_name'].blank? - assert !data['author_picture'].blank? - assert !data['author_url'].blank? - assert !data['description'].blank? - assert !data['text'].blank? - assert !data['picture'].blank? - assert !data['subtype'].blank? - assert !data['published_at'].blank? - end - - test "should set title to URL if an item is unavailable" do - # This URL requires login to see - m = create_media url: 'https://www.facebook.com/caiosba/posts/8457689347638947' - data = m.as_json - - assert_equal 'facebook', data['provider'] - assert_equal 'item', data['type'] - assert_equal 'https://www.facebook.com/caiosba/posts/8457689347638947', data['title'] - end - - # Previous integration tests - test "should get canonical URL from facebook object 3" do - url = 'https://www.facebook.com/54212446406/photos/a.397338611406/10157431603156407/?type=3&theater' - media = Media.new(url: url) - media.as_json({ force: 1 }) - assert_match 'https://www.facebook.com/54212446406/photos/a.397338611406/10157431603156407', media.url - end - - test "should create Facebook post from mobile URL" do - m = create_media url: 'https://m.facebook.com/KIKOLOUREIROofficial/photos/a.10150618138397252/10152555300292252/?type=3&theater' - data = m.as_json - assert !data['title'].blank? - assert_equal 'facebook', data['provider'] - assert_equal 'item', data['type'] - end - - test "should not use Facebook embed if is a link to redirect" do - url = 'https://l.facebook.com/l.php?u=https://hindi.indiatvnews.com/paisa/business-1-07-cr-new-taxpayers-added-dropped-filers-down-at-25-22-lakh-in-fy18-630914&h=AT1WAU-mDHKigOgFNrUsxsS2doGO0_F5W9Yck7oYUx-IsYAHx8JqyHwO02-N0pX8UOlcplZO50px8mkTA1XNyKig8Z2CfX6t3Sh0bHtO9MYPtWqacCm6gOXs5lbC6VGMLjDALNXZ6vg&s=1' - - m = create_media url: url - data = m.as_json - assert !data['title'].blank? - assert_equal '', data['html'] - end - - test "should not change url when redirected to login page" do - url = 'https://www.facebook.com/ugmhmyanmar/posts/2850282508516442' - canonical_url = 'https://www.facebook.com/ugmhmyanmar/posts/ugmh-%E1%80%80%E1%80%95%E1%80%BC%E1%80%B1%E1%80%AC%E1%80%90%E1%80%B2%E1%80%B7-ugmh-%E1%80%A1%E1%80%80%E1%80%BC%E1%80%B1%E1%80%AC%E1%80%84%E1%80%BA%E1%80%B8%E1%80%A1%E1%80%95%E1%80%AD%E1%80%AF%E1%80%84%E1%80%BA%E1%80%B8-%E1%81%84%E1%80%80%E1%80%90%E1%80%AD%E1%80%99%E1%80%90%E1%80%8A%E1%80%BA%E1%80%81%E1%80%BC%E1%80%84%E1%80%BA%E1%80%B8-%E1%80%80%E1%80%9C%E1%80%AD%E1%80%94%E1%80%BA%E1%80%80%E1%80%BB%E1%80%85%E1%80%BA%E1%80%80%E1%80%BB%E1%80%81%E1%80%BC%E1%80%84%E1%80%BA%E1%80%B8%E1%80%9B%E1%80%B2%E1%80%B7-%E1%80%A1%E1%80%80%E1%80%BB%E1%80%AD%E1%80%AF%E1%80%B8%E1%80%86%E1%80%80%E1%80%BA%E1%80%9F%E1%80%AC/2850282508516442/' - redirection_to_login_page = 'https://www.facebook.com/login/' - response = 'mock'; response.stubs(:code).returns('302') - response.stubs(:header).returns({ 'location' => redirection_to_login_page }) - response_login_page = 'mock'; response_login_page.stubs(:code).returns('200') - RequestHelper.stubs(:request_url).with(url, 'Get').returns(response) - RequestHelper.stubs(:request_url).with(canonical_url, 'Get').returns(response) - RequestHelper.stubs(:request_url).with(redirection_to_login_page, 'Get').returns(response_login_page) - RequestHelper.stubs(:request_url).with(redirection_to_login_page + '?next=https%3A%2F%2Fwww.facebook.com%2Fugmhmyanmar%2Fposts%2F2850282508516442', 'Get').returns(response_login_page) - - m = create_media url: url - assert_equal canonical_url, m.url - assert_equal url, m.original_url - end - - test "should add login required error, return html and empty description" do - html = "Log in or sign up to view" - RequestHelper.stubs(:get_html).returns(Nokogiri::HTML(html)) - Media.any_instance.stubs(:follow_redirections) - - m = create_media url: 'https://www.facebook.com/caiosba/posts/3588207164560845' - data = m.as_json - - assert_equal 'Login required to see this profile', data[:error][:message] - assert_equal Lapis::ErrorCodes::const_get('LOGIN_REQUIRED'), data[:error][:code] - assert_equal m.url, data[:title] - assert data[:description].empty? - assert_match "
", data['html'] - end - - test "should get canonical URL parsed from facebook html when it is relative" do - relative_url = '/dina.samak/posts/10153679232246949' - url = "https://www.facebook.com#{relative_url}" - RequestHelper.stubs(:get_html).returns(Nokogiri::HTML("")) - Media.any_instance.stubs(:follow_redirections) - m = create_media url: url - assert_equal url, m.url - end - - test "should get canonical URL parsed from facebook html when it is a page" do - canonical_url = 'https://www.facebook.com/CyrineOfficialPage/posts/10154332542247479' - Media.any_instance.stubs(:get_html).returns(Nokogiri::HTML("")) - Media.any_instance.stubs(:follow_redirections) - Media.stubs(:validate_url).with(canonical_url).returns(true) - m = create_media url: 'https://www.facebook.com/CyrineOfficialPage/posts/10154332542247479?pnref=story.unseen-section' - assert_equal canonical_url, m.url - end - - test "should get the group name when parsing group post" do - url = 'https://www.facebook.com/groups/memetics.hacking/permalink/1580570905320222/' - m = Media.new url: url - data = m.as_json - assert_match /(memetics.hacking|exploring belief systems)/, data['title'] - assert_match /permalink\/1580570905320222/, data['url'] - assert_equal 'facebook', data['provider'] - assert_equal 'item', data['type'] - end - - test "should return html even when FB url is private" do - url = 'https://www.facebook.com/caiosba/posts/1913749825339929' - m = create_media url: url - data = m.as_json - assert_equal 'facebook', data['provider'] - assert_match "
", data['html'] - end - - test "should store oembed data of a facebook post" do - m = create_media url: 'https://www.facebook.com/144585402276277/posts/1127489833985824' - m.as_json - - assert m.data['raw']['oembed'].is_a? Hash - assert_match /facebook.com/, m.data['oembed']['provider_url'] - assert_equal "facebook", m.data['oembed']['provider_name'].downcase - end -end - class FacebookItemUnitTest < ActiveSupport::TestCase def setup isolated_setup @@ -151,14 +17,6 @@ def empty_doc @empty_doc ||= Nokogiri::HTML('') end - def post_doc - @post_doc ||= response_fixture_from_file('facebook-item-page_ironmaiden.html', parse_as: :html) - end - - def pfbid_doc - @pfbid_doc ||= response_fixture_from_file('facebook-item-page_pfbid.html', parse_as: :html) - end - def crowdtangle_response <<~JSON { @@ -296,7 +154,7 @@ def crowdtangle_response_not_found end test "sends tracing information to honeycomb, including updated URL" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) TracingService.expects(:add_attributes_to_current_span).with({ 'app.parser.type' => 'facebook_item', @@ -307,8 +165,28 @@ def crowdtangle_response_not_found Parser::FacebookItem.new('https://www.facebook.com/123456789276277/posts/1127489833985824').parse_data(empty_doc, 'https://www.facebook.com/fakeaccount/posts/original-123456789') end - test "sets fallbacks from metatags on crowdtangle error, and populates HTML" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + test "sets information from crowdtangle" do + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) + + parser = Parser::FacebookItem.new('https://www.facebook.com/123456789276277/posts/1127489833985824') + data = parser.parse_data(empty_doc, throwaway_url) + + assert data['error'].blank? + assert_equal '123456789276277_1127489833985824', data['external_id'] + assert_equal 'Trent Aric - Meteorologist', data['author_name'] + assert_equal 'TrentAricTV', data['username'] + assert_match /273572839_489238069228086_8419777016738266396_n.jpg/, data['author_picture'] + assert_equal 'https://www.facebook.com/123456789276277', data['author_url'] + assert_match /Look at what the long range computer models are indicating/, data['title'] + assert_match /Look at what the long range computer models are indicating/, data['description'] + assert_match /Look at what the long range computer models are indicating/, data['text'] + assert_match /14602101_1127500960651378_1143375978446192640_n.jpg\?_nc_cat=107&ccb=1-6/, data['picture'] + assert_equal 'native_video', data['subtype'] + assert_equal '2016-10-05 11:15:30', data['published_at'] + end + + test "sets fallbacks from metatags and populates HTML for post on crowdtangle error" do + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) doc = Nokogiri::HTML(<<~HTML) @@ -328,14 +206,13 @@ def crowdtangle_response_not_found assert_match /data-href="https:\/\/www.facebook.com\/fakeaccount\/posts\/123456789"/, data.dig('html') end - test "sets fallbacks from title metatags for event and watch URLS on crowdtangle error, and populates HTML" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + test "sets fallbacks from metatags for event and watch URLS on crowdtangle error" do + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) doc = Nokogiri::HTML(<<~HTML) - this is also a page title | Facebook HTML data = Parser::FacebookItem.new('https://www.facebook.com/events/331430157280289').parse_data(doc, throwaway_url) @@ -351,9 +228,27 @@ def crowdtangle_response_not_found assert_equal 'this is the page description', data['description'] end + # not sure if this test is needed + test "should parse and set data from mobile URL" do + url = 'https://m.facebook.com/KIKOLOUREIROofficial/photos/a.10150618138397252/10152555300292252/?type=3&theater' + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + + doc = Nokogiri::HTML(<<~HTML) + + + + HTML + + data = Parser::FacebookItem.new(url).parse_data(doc, url) + + assert !data['title'].blank? + end + # Implicitly testing MediaCrowdtangleItem + # This test is flaky, need to come back to it test "sends error to sentry when we receive unexpected response from crowdtangle API" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: '') + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: 'something unexpected') data = {} sentry_call_count = 0 @@ -371,49 +266,31 @@ def crowdtangle_response_not_found Parser::FacebookItem::IdsGrabber.any_instance.stubs(:uuid).returns(nil) data = Parser::FacebookItem.new('https://www.facebook.com/55555/posts/123456789').parse_data(empty_doc, throwaway_url) + assert data['error'].blank? assert_match /No ID given for Crowdtangle/, data.dig('raw', 'crowdtangle', 'error', 'message') end test 'sets raw error when crowdtangle request fails' do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + data = Parser::FacebookItem.new('https://www.facebook.com/55555/posts/123456789').parse_data(empty_doc, throwaway_url) assert data['error'].blank? assert_match /No results received from Crowdtangle/, data.dig('raw', 'crowdtangle', 'error', 'message') end - test "sets information from crowdtangle" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) - - parser = Parser::FacebookItem.new('https://www.facebook.com/123456789276277/posts/1127489833985824') - data = parser.parse_data(empty_doc, throwaway_url) - - assert data['error'].blank? - assert_equal '123456789276277_1127489833985824', data['external_id'] - assert_equal 'Trent Aric - Meteorologist', data['author_name'] - assert_equal 'TrentAricTV', data['username'] - assert_match /273572839_489238069228086_8419777016738266396_n.jpg/, data['author_picture'] - assert_equal 'https://www.facebook.com/123456789276277', data['author_url'] - assert_match /Look at what the long range computer models are indicating/, data['title'] - assert_match /Look at what the long range computer models are indicating/, data['description'] - assert_match /Look at what the long range computer models are indicating/, data['text'] - assert_match /14602101_1127500960651378_1143375978446192640_n.jpg\?_nc_cat=107&ccb=1-6/, data['picture'] - assert_equal 'native_video', data['subtype'] - assert_equal '2016-10-05 11:15:30', data['published_at'] - end - test "updates URL if different than received from crowdtangle" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) parser = Parser::FacebookItem.new('https://www.facebook.com/123456789276277/posts/1127489833985824') - data = parser.parse_data(empty_doc, throwaway_url) + parser.parse_data(empty_doc, throwaway_url) assert_equal 'https://www.facebook.com/123456789276277/posts/1127489833985824/woo', parser.url end - test 'when crowdtangle returns a different post than we tried to request' do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) + test 'sets raw error when crowdtangle returns a different post than we tried to request' do + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) data = Parser::FacebookItem.new('https://www.facebook.com/12345/posts/55555').parse_data(empty_doc, 'https://www.facebook.com/12345/posts/55555') @@ -423,31 +300,36 @@ def crowdtangle_response_not_found assert_nil data['description'] end - test "should return empty html for deleted posts (when doc cannot be returned)" do - RequestHelper.stubs(:get_html).returns(nil) + # facebook started returning an answer for unavailable items (https://github.com/meedan/pender/pull/374) + # the html gives the user more information (that it is an issue on FB's side and not check) + test "sets html for deleted/unavailable posts" do + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) - data = Parser::FacebookItem.new('https://www.facebook.com/fakeaccount/posts/12345').parse_data(nil, throwaway_url) - assert_equal '', data[:html] + doc = Nokogiri::HTML(<<~HTML) + + + + HTML + + WebMock.stub_request(:get, 'https://www.facebook.com/fakeaccount/posts/12345').to_return(status: 200, body: doc.to_s) + + data = Parser::FacebookItem.new('https://www.facebook.com/fakeaccount/posts/12345').parse_data(doc, throwaway_url) + assert !data[:html].blank? end - test "should return empty html when FB url is from group and cannot be embedded" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: {}.to_json) + test "should return empty html when FB url is from group or event and cannot be embedded" do + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) data = Parser::FacebookItem.new('https://www.facebook.com/groups/133819471984630/').parse_data(empty_doc, throwaway_url) - assert_equal '', data['html'] - end - - test "should return empty html when FB url is event and cannot be embedded" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: {}.to_json) data = Parser::FacebookItem.new('https://www.facebook.com/events/331430157280289').parse_data(empty_doc, throwaway_url) - assert_equal '', data['html'] end test "should reject default page titles" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: {}.to_json) + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + parser = Parser::FacebookItem.new('https://www.facebook.com/fakeaccount/posts/12345') doc = Nokogiri::HTML(<<~HTML) @@ -464,12 +346,20 @@ def crowdtangle_response_not_found end test "sets unique title from page description when FB post ID is obscured in URL" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: {}.to_json) + url = "https://www.facebook.com/LittleMix/posts/pfbid0E7xrT6BDrv7r7Ry3kHUSdw2naE6BdFBgH2gTsEY9h1a64DdM3vqPyq8gXaFY5rqhl" + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + + doc = Nokogiri::HTML(<<~HTML) + + + + HTML - parser = Parser::FacebookItem.new('https://www.facebook.com/LittleMix/posts/pfbid0E7xrT6BDrv7r7Ry3kHUSdw2naE6BdFBgH2gTsEY9h1a64DdM3vqPyq8gXaFY5rqhl') - data = parser.parse_data(pfbid_doc, throwaway_url) + parser = Parser::FacebookItem.new(url) + data = parser.parse_data(doc, throwaway_url) - assert_match /Nothing comes between us/, data['title'] + assert_match "this is the page description", data['title'] end test "#oembed_url returns URL with the instance URL" do @@ -477,20 +367,191 @@ def crowdtangle_response_not_found assert_equal 'https://www.facebook.com/plugins/post/oembed.json/?url=https://www.facebook.com/fakeaccount/posts/1234', oembed_url end - test "should return default data when redirected to login page" do - WebMock.stub_request(:any, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + # we need to test Media here because that's where the default values are set after the FB parser ran + # which makes me wonder if we should be testing this here at all or only in Media + test "should return default data (set title to URL and description to empty string) when redirected to login page" do + url = 'https://m.facebook.com/groups/593719938050039/permalink/1184073722347988' + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) doc = Nokogiri::HTML(<<~HTML) HTML - WebMock.stub_request(:any, 'https://m.facebook.com/groups/593719938050039/permalink/1184073722347988/').to_return(status: 200, body: doc.to_s) + WebMock.stub_request(:get, url).to_return(status: 200, body: doc.to_s) - media = Media.new(url: 'https://m.facebook.com/groups/593719938050039/permalink/1184073722347988/') + media = Media.new(url: url) data = media.as_json - assert_equal 'https://m.facebook.com/groups/593719938050039/permalink/1184073722347988', data['title'] + assert_equal url, data['title'] assert_match '', data['description'] end + + # we need to test Media here because that's where we get_canonical_url after the FB parser ran + # which makes me wonder if we should be testing this here at all or only in Media + test "should get canonical URL from facebook object 3" do + url_from_facebook_object_3 = 'https://www.facebook.com/54212446406/photos/a.397338611406/10157431603156407/?type=3&theater' + canonical_url = "https://www.facebook.com/54212446406/photos/a.397338611406/10157431603156407" + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + + doc = Nokogiri::HTML(<<~HTML) + + HTML + + WebMock.stub_request(:get, url_from_facebook_object_3).to_return(status: 200, body: doc.to_s) + WebMock.stub_request(:get, canonical_url).to_return(status: 200) + WebMock.stub_request(:get, "https://www.facebook.com/plugins/post/oembed.json/?url=#{canonical_url}").to_return(status: 200) + + media = Media.new(url: url_from_facebook_object_3) + data = media.as_json + + assert_match canonical_url, data['url'] + end + + # we need to test Media here because that's where we get_canonical_url after the FB parser ran + # which makes me wonder if we should be testing this here at all or only in Media + test "should return canonical url when redirected to login page" do + url = 'https://www.facebook.com/ugmhmyanmar/posts/2850282508516442' + canonical_url = 'https://www.facebook.com/ugmhmyanmar/posts/ugmh-%E1%80%80%E1%80%95%E1%80%BC%E1%80%B1%E1%80%AC%E1%80%90%E1%80%B2%E1%80%B7-ugmh-%E1%80%A1%E1%80%80%E1%80%BC%E1%80%B1%E1%80%AC%E1%80%84%E1%80%BA%E1%80%B8%E1%80%A1%E1%80%95%E1%80%AD%E1%80%AF%E1%80%84%E1%80%BA%E1%80%B8-%E1%81%84%E1%80%80%E1%80%90%E1%80%AD%E1%80%99%E1%80%90%E1%80%8A%E1%80%BA%E1%80%81%E1%80%BC%E1%80%84%E1%80%BA%E1%80%B8-%E1%80%80%E1%80%9C%E1%80%AD%E1%80%94%E1%80%BA%E1%80%80%E1%80%BB%E1%80%85%E1%80%BA%E1%80%80%E1%80%BB%E1%80%81%E1%80%BC%E1%80%84%E1%80%BA%E1%80%B8%E1%80%9B%E1%80%B2%E1%80%B7-%E1%80%A1%E1%80%80%E1%80%BB%E1%80%AD%E1%80%AF%E1%80%B8%E1%80%86%E1%80%80%E1%80%BA%E1%80%9F%E1%80%AC/2850282508516442/' + redirection_to_login_page = 'https://www.facebook.com/login/' + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + + doc = Nokogiri::HTML(<<~HTML) + + + + HTML + + WebMock.stub_request(:get, url).to_return(status: 302, headers: { 'location' => redirection_to_login_page }) + WebMock.stub_request(:get, canonical_url).to_return(status: 302, headers: { 'location' => redirection_to_login_page }) + WebMock.stub_request(:get, redirection_to_login_page).to_return(status: 200, body: doc.to_s) + + media = Media.new(url: url) + + assert_equal canonical_url, media.url + assert_equal url, media.original_url + end + + # we need to test Media here because that's where we get_canonical_url after the FB parser ran + # which makes me wonder if we should be testing this here at all or only in Media + test "should set parser url to full URL when the facebook html og:url is relative" do + relative_url = '/dina.samak/posts/10153679232246949' + url = "https://www.facebook.com#{relative_url}" + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + + doc = Nokogiri::HTML(<<~HTML) + + + + HTML + + WebMock.stub_request(:get, url).to_return(status: 200, body: doc.to_s) + WebMock.stub_request(:get, "https://www.facebook.com/plugins/post/oembed.json/?url=#{url}").to_return(status: 200) + + media = Media.new(url: url) + data = media.as_json + + assert_equal url, data['url'] + end + + test "should get canonical URL parsed from facebook html when it is a page" do + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) + + canonical_url = 'https://www.facebook.com/CyrineOfficialPage/posts/10154332542247479' + url = 'https://www.facebook.com/CyrineOfficialPage/posts/10154332542247479?pnref=story.unseen-section' + + doc = Nokogiri::HTML(<<~HTML) + + HTML + + WebMock.stub_request(:get, url).to_return(status: 200, body: doc.to_s) + WebMock.stub_request(:get, canonical_url).to_return(status: 200) + WebMock.stub_request(:get, "https://www.facebook.com/plugins/post/oembed.json/?url=#{canonical_url}").to_return(status: 200) + + media = Media.new(url: url) + data = media.as_json + + assert_equal canonical_url, data['url'] + end + + test "should add login required error, return html and empty description when redirected to login" do + url = 'https://m.facebook.com/groups/593719938050039/permalink/1184073722347988/' + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + + doc = Nokogiri::HTML(<<~HTML) + + + + HTML + + WebMock.stub_request(:get, url).to_return(status: 200, body: doc.to_s) + + parser = Parser::FacebookItem.new(url) + data = parser.parse_data(doc, url) + + assert_equal 'Login required to see this profile', data[:error][:message] + assert_equal Lapis::ErrorCodes::const_get('LOGIN_REQUIRED'), data[:error][:code] + assert data[:description].empty? + assert_match "
", data['html'] + end + + test "should return html and empty description when FB url is private" do + url = 'https://www.facebook.com/caiosba/posts/1913749825339929' + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + + doc = Nokogiri::HTML(<<~HTML) + + + + HTML + + WebMock.stub_request(:get, url).to_return(status: 200, body: doc.to_s) + WebMock.stub_request(:get, "https://www.facebook.com/plugins/post/oembed.json/?url=#{url}").to_return(status: 200) + + media = Media.new(url: url) + data = media.as_json + + assert data[:description].empty? + assert_match "
", data['html'] + end + + test "should get the group name when parsing group post" do + url = 'https://www.facebook.com/groups/memetics.hacking/permalink/1580570905320222' + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response_not_found) + + doc = Nokogiri::HTML(<<~HTML) + + + HTML + + WebMock.stub_request(:get, url).to_return(status: 200) + WebMock.stub_request(:get, "https://www.facebook.com/plugins/post/oembed.json/?url=#{url}").to_return(status: 200, body: doc.to_s) + + parser = Parser::FacebookItem.new(url) + data = parser.parse_data(doc, url) + + assert_match 'this is the page description', data['title'] + end + + test "should store oembed data of a facebook post" do + url = 'https://www.facebook.com/144585402276277/posts/1127489833985824' + + WebMock.stub_request(:get, /api.crowdtangle.com\/post/).to_return(status: 200, body: crowdtangle_response) + WebMock.stub_request(:get, url).to_return(status: 200) + WebMock.stub_request(:get, "https://www.facebook.com/plugins/post/oembed.json/?url=#{url}").to_return(status: 200) + + media = Media.new(url: url) + data = media.as_json + + assert data['oembed'].is_a? Hash + assert_match /facebook.com/, data['oembed']['provider_url'] + assert_equal "facebook", data['oembed']['provider_name'].downcase + end end diff --git a/test/models/parser/facebook_profile_test.rb b/test/models/parser/facebook_profile_test.rb index 7afb871c..9c657bfb 100644 --- a/test/models/parser/facebook_profile_test.rb +++ b/test/models/parser/facebook_profile_test.rb @@ -1,73 +1,5 @@ require 'test_helper' -class FacebookProfileIntegrationTest < ActiveSupport::TestCase - test "should parse Facebook page" do - m = create_media url: 'https://www.facebook.com/ironmaiden/?fref=ts' - data = m.as_json - assert !data['title'].blank? - assert_match 'ironmaiden', data['username'] - assert_equal 'facebook', data['provider'] - assert_equal 'profile', data['type'] - - # Requires login, so cannot fetch ID from HTML - assert data['id'].blank? - assert data['external_id'].blank? - end - - test "should parse Facebook page with numeric id" do - m = create_media url: 'https://www.facebook.com/pages/Meedan/105510962816034?fref=ts' - data = m.as_json - assert !data['title'].blank? - assert_match 'Meedan', data['username'] - assert_equal 'facebook', data['provider'] - assert_equal 'profile', data['type'] - assert_not_nil data['description'] - assert_not_nil data['picture'] - assert_not_nil data['published_at'] - - # Parsed from URL - assert_equal '105510962816034', data['id'] - assert_equal '105510962816034', data['external_id'] - end - - test "should parse Facebook with numeric id" do - m = create_media url: 'http://facebook.com/513415662050479' - data = m.as_json - assert_match /facebook.com\/(NautilusMag|513415662050479)/, data['url'] - assert !data['title'].blank? - assert_equal 'facebook', data['provider'] - assert_equal 'profile', data['type'] - - # Parsed from URL - assert_equal '513415662050479', data['id'] - assert_equal '513415662050479', data['external_id'] - end - - test "should parse Arabic Facebook page" do - m = create_media url: 'https://www.facebook.com/%D8%A7%D9%84%D9%85%D8%B1%D9%83%D8%B2-%D8%A7%D9%84%D8%AB%D9%82%D8%A7%D9%81%D9%8A-%D8%A7%D9%84%D9%82%D8%A8%D8%B7%D9%8A-%D8%A7%D9%84%D8%A3%D8%B1%D8%AB%D9%88%D8%B0%D9%83%D8%B3%D9%8A-%D8%A8%D8%A7%D9%84%D9%85%D8%A7%D9%86%D9%8A%D8%A7-179240385797/' - data = m.as_json - assert !data['title'].blank? - assert_equal 'facebook', data['provider'] - assert_equal 'profile', data['type'] - end - - test "should parse Arabic URLs" do - assert_nothing_raised do - m = create_media url: 'https://www.facebook.com/إدارة-تموين-أبنوب-217188161807938/' - data = m.as_json - end - end - - test "should store oembed data of a public facebook page" do - m = create_media url: 'https://www.facebook.com/heymeedan' - m.as_json - - assert m.data['raw']['oembed'].is_a?(Hash), "Expected #{m.data['raw']['oembed']} to be a Hash" - assert !m.data['oembed']['author_name'].blank? - assert !m.data['oembed']['title'].blank? - end -end - class FacebookProfileUnitTest < ActiveSupport::TestCase def setup isolated_setup @@ -110,17 +42,7 @@ def throwaway_url end # Note: Not all of these URLs can be visited successfully without logging in - test "matches known URL patterns, and returns instance on success" do - assert_nil Parser::FacebookProfile.match?('https://example.com') - # Below are all Facebook Items - assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/pages/Meedan/105510962816034/photos/') - assert_nil Parser::FacebookProfile.match?('https://m.facebook.com/permalink.php?story_fbid=10154534111016407&id=54212446406') - assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/permalink.php?story_fbid=10154534111016407&id=54212446406') - assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/story.php?story_fbid=10154534111016407&id=54212446406') - assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/photo.php?story_fbid=10154534111016407&id=54212446406') - assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/livemap?story_fbid=10154534111016407&id=54212446406') - assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/watch?story_fbid=10154534111016407&id=54212446406') - + test "matches known URL patterns, and returns instance on success" do assert Parser::FacebookProfile.match?('https://facebook.com/heymeedan').is_a?(Parser::FacebookProfile) assert Parser::FacebookProfile.match?('https://m.facebook.com/heymeedan').is_a?(Parser::FacebookProfile) assert Parser::FacebookProfile.match?('https://www.facebook.com/heymeedan').is_a?(Parser::FacebookProfile) @@ -130,8 +52,79 @@ def throwaway_url assert Parser::FacebookProfile.match?('https://www.facebook.com/people/Meedan/105510962816034?fref=ts').is_a?(Parser::FacebookProfile) assert Parser::FacebookProfile.match?('https://www.facebook.com/profile.php?id=105510962816034').is_a?(Parser::FacebookProfile) end + + test "should not match patterns from Facebook items" do + assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/pages/Meedan/105510962816034/photos/') + assert_nil Parser::FacebookProfile.match?('https://m.facebook.com/permalink.php?story_fbid=10154534111016407&id=54212446406') + assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/permalink.php?story_fbid=10154534111016407&id=54212446406') + assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/story.php?story_fbid=10154534111016407&id=54212446406') + assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/photo.php?story_fbid=10154534111016407&id=54212446406') + assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/livemap?story_fbid=10154534111016407&id=54212446406') + assert_nil Parser::FacebookProfile.match?('https://www.facebook.com/watch?story_fbid=10154534111016407&id=54212446406') + end + + test "should not match a page that isn't a Facebook page" do + assert_nil Parser::FacebookProfile.match?('https://example.com') + end + + test "should parse Facebook page" do + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(meedan_doc, throwaway_url) + + assert_equal 'Meedan', data['title'] + assert_equal 'fakeaccount', data['username'] + assert_equal 'Meedan. 3,783 likes · 65 were here. Make sense of the global web.', data['description'] + assert_equal '54421674438', data['external_id'] + assert_equal '54421674438', data['id'] + end + + # following the redirections and setting the url to canonical happen in Media + test "should parse Facebook with numeric id and set data['url'] to the canonical url" do + url = 'https://facebook.com/513415662050479' + canonical_url = 'https://www.facebook.com/heymeedan' + picture_url = 'https://scontent-lax3-1.xx.fbcdn.net/v/t39.30808-1/310513247_435753678699138_2623398131510754475_n.png?_nc_cat=110&_nc_ht=scontent-lax3-1.xx&_nc_ohc=d6UgzKKHMJ8AX9tPN2o&_nc_sid=d36de4&ccb=1-7&oe=63DDB83C&oh=00_AfDH7lP98qp_etN0a2ZMms1tp6vx51198IAobPHbRLnSyA' + + WebMock.stub_request(:get, url).to_return(status: 200, body: meedan_doc.to_s) + WebMock.stub_request(:get, canonical_url).to_return(status: 200) + WebMock.stub_request(:get, "https://www.facebook.com/plugins/post/oembed.json/?url=#{canonical_url}").to_return(status: 200) + WebMock.stub_request(:get, picture_url).to_return(status: 200) + + media = create_media url: url + data = media.as_json + + assert_equal canonical_url, data['url'] + assert_equal 'Meedan', data['title'] + assert_equal 'facebook', data['provider'] + assert_equal 'profile', data['type'] + + # Parsed from URL + assert_equal '513415662050479', data['id'] + assert_equal '513415662050479', data['external_id'] + end - test "sets error if problem parsing" do + test "should parse Arabic Facebook page" do + parser = Parser::FacebookProfile.new('https://www.facebook.com/%D8%A7%D9%84%D9%85%D8%B1%D9%83%D8%B2-%D8%A7%D9%84%D8%AB%D9%82%D8%A7%D9%81%D9%8A-%D8%A7%D9%84%D9%82%D8%A8%D8%B7%D9%8A-%D8%A7%D9%84%D8%A3%D8%B1%D8%AB%D9%88%D8%B0%D9%83%D8%B3%D9%8A-%D8%A8%D8%A7%D9%84%D9%85%D8%A7%D9%86%D9%8A%D8%A7-179240385797/') + data = parser.parse_data(arabic_doc, throwaway_url) + + assert_equal 'المركز الثقافي القبطي الأرثوذكسي بالمانيا', data['title'] + assert_equal 'المركز-الثقافي-القبطي-الأرثوذكسي-بالمانيا-179240385797', data['username'] + assert_match /Bad Kreuznach/ , data['description'] + assert_equal '179240385797', data['external_id'] + assert_equal '179240385797', data['id'] + end + + test "should parse Arabic URLs" do + parser = Parser::FacebookProfile.new('https://www.facebook.com/إدارة-تموين-أبنوب-217188161807938/') + data = parser.parse_data(arabic_doc, throwaway_url) + + assert_equal 'المركز الثقافي القبطي الأرثوذكسي بالمانيا', data['title'] + assert_equal 'إدارة-تموين-أبنوب-217188161807938', data['username'] + assert_match /Bad Kreuznach/ , data['description'] + assert_equal '179240385797', data['external_id'] + assert_equal '179240385797', data['id'] + end + + test "sets error if problem parsing and notifies Sentry" do data = {} sentry_call_count = 0 arguments_checker = Proc.new do |e| @@ -141,7 +134,8 @@ def throwaway_url Parser::FacebookProfile.stub(:get_id_from_doc, -> (_) { raise NoMethodError.new('fake for test') }) do PenderSentry.stub(:notify, arguments_checker) do - data = Parser::FacebookProfile.new('https://www.facebook.com/fake-account').parse_data(nil, 'https://www.facebook.com/fake-account') + parser = Parser::FacebookProfile.new('https://www.facebook.com/fakeaccount') + data = parser.parse_data(nil, 'https://www.facebook.com/fakeaccount') assert_equal 1, sentry_call_count end end @@ -149,7 +143,8 @@ def throwaway_url end test "sets error if login page URL detected" do - data = Parser::FacebookProfile.new('https://www.facebook.com/login/?next=').parse_data(meedan_doc, 'https://www.facebook.com/login/?next=') + parser = Parser::FacebookProfile.new('https://www.facebook.com/login/?next=') + data = parser.parse_data(meedan_doc, 'https://www.facebook.com/login/?next=') assert_equal Lapis::ErrorCodes::const_get('LOGIN_REQUIRED'), data[:error][:code] assert_match /Login required/, data[:error][:message] @@ -158,7 +153,8 @@ def throwaway_url end test "sets error if login page detected from HTML, but not apparent from URL" do - data = Parser::FacebookProfile.new('https://facebook.com/fake-ironmaiden').parse_data(login_doc, 'https://facebook.com/fake-ironmaiden') + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(login_doc, 'https://facebook.com/fakeaccount') assert_equal Lapis::ErrorCodes::const_get('LOGIN_REQUIRED'), data[:error][:code] assert_match /Login required/, data[:error][:message] @@ -167,79 +163,91 @@ def throwaway_url end test "sets external_id when it can be extracted from the URL" do - data = Parser::FacebookProfile.new('https://facebook.com/54421674438').parse_data(meedan_doc, 'https://facebook.com/fake-inconsequential-url') + parser = Parser::FacebookProfile.new('https://facebook.com/54421674438') + data = parser.parse_data(meedan_doc, 'https://facebook.com/fake-inconsequential-url') assert_equal '54421674438', data['id'] assert_equal '54421674438', data['external_id'] - data = Parser::FacebookProfile.new('https://facebook.com/profile.php?id=54421674438').parse_data(meedan_doc, 'https://facebook.com/fake-inconsequential-url') + parser = Parser::FacebookProfile.new('https://facebook.com/profile.php?id=54421674438') + data = parser.parse_data(meedan_doc, 'https://facebook.com/fake-inconsequential-url') assert_equal '54421674438', data['id'] assert_equal '54421674438', data['external_id'] - data = Parser::FacebookProfile.new('https://facebook.com/people/fakeaccount/54421674438').parse_data(meedan_doc, 'https://facebook.com/fake-inconsequential-url') + parser = Parser::FacebookProfile.new('https://facebook.com/people/fakeaccount/54421674438') + data = parser.parse_data(meedan_doc, 'https://facebook.com/fake-inconsequential-url') assert_equal '54421674438', data['id'] assert_equal '54421674438', data['external_id'] - data = Parser::FacebookProfile.new('https://facebook.com/pages/fakeaccount/54421674438').parse_data(meedan_doc, 'https://facebook.com/fake-inconsequential-url') + parser = Parser::FacebookProfile.new('https://facebook.com/pages/fakeaccount/54421674438') + data = parser.parse_data(meedan_doc, 'https://facebook.com/fake-inconsequential-url') assert_equal '54421674438', data['id'] assert_equal '54421674438', data['external_id'] end test "sets external_id when it can be extracted from the original URL, but not current URL" do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(meedan_doc, 'https://facebook.com/54421674438') + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(meedan_doc, 'https://facebook.com/54421674438') assert_equal '54421674438', data['id'] assert_equal '54421674438', data['external_id'] end test "sets external_id from original url if current url is a login redirect" do - data = Parser::FacebookProfile.new('https://facebook.com/login.php?/id=12345').parse_data(meedan_doc,'https://facebook.com/profile.php?id=54421674438') + parser = Parser::FacebookProfile.new('https://facebook.com/login.php?/id=12345') + data = parser.parse_data(meedan_doc,'https://facebook.com/profile.php?id=54421674438') assert_equal '54421674438', data['id'] assert_equal '54421674438', data['external_id'] end test "sets external_id from HTML if URL matching does not work, but ID present in doc" do - parser = Parser::FacebookProfile.new('https://facebook.com/fake-heymeedan') + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') - data = parser.parse_data(meedan_doc,'https://facebook.com/fake-heymeedan') + data = parser.parse_data(meedan_doc,'https://facebook.com/fakeaccount') assert_equal '54421674438', data['id'] assert_equal '54421674438', data['external_id'] - data = parser.parse_data(arabic_doc, 'https://facebook.com/fake-heymeedan') + data = parser.parse_data(arabic_doc, 'https://facebook.com/fakeaccount') assert_equal '179240385797', data['id'] assert_equal '179240385797', data['external_id'] end test "leaves external_id empty if ID cannot be found in URL or HTML" do - data = Parser::FacebookProfile.new('https://facebook.com/fake-ironmaiden').parse_data(login_doc, 'https://facebook.com/fake-ironmaiden') + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(login_doc, 'https://facebook.com/fakeaccount') assert data['external_id'].empty? end test "sets pictures from og:image metatag" do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(meedan_doc, throwaway_url) assert_match /scontent-lax3-1.xx.fbcdn.net\/v\/t39.30808-1\/310513247_435753678699138_2623398131510754475_n.png/, data['picture'] assert_match /scontent-lax3-1.xx.fbcdn.net\/v\/t39.30808-1\/310513247_435753678699138_2623398131510754475_n.png/, data['author_picture'] end test "leaves pictures blank when og:image metatag missing" do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(old_meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(old_meedan_doc, throwaway_url) assert_nil data['picture'] assert_nil data['author_picture'] end test 'sets title from og:title tag if present' do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(arabic_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(arabic_doc, throwaway_url) assert_match /المركز الثقافي القبطي الأرثوذكسي بالمانيا/, data['title'] end test 'sets title from title html tag if og:title not present' do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(old_meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(old_meedan_doc, throwaway_url) assert_match /Meedan - Nonprofit Organization/, data['title'] end test 'returns nil if og or html title tags not present' do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(empty_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(empty_doc, throwaway_url) assert_nil data['title'] end @@ -256,12 +264,13 @@ def throwaway_url doc = Nokogiri::HTML(<<~HTML) Watch HTML - parser.parse_data(doc, 'https://facebook.com/fakeaccount') + data = parser.parse_data(doc, 'https://facebook.com/fakeaccount') assert_nil data['title'] end test "should strip '| Facebook' from page titles" do parser = Parser::FacebookProfile.new('https://www.facebook.com/fakeaccount') + doc = Nokogiri::HTML(<<~HTML) Piglet the Dog's post | Facebook HTML @@ -276,96 +285,118 @@ def throwaway_url end test 'sets description from og:description metatag if present' do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(meedan_doc, throwaway_url) assert_equal "Meedan. 3,783 likes · 65 were here. Make sense of the global web.", data['description'] end test 'sets description from description metatag if og:description not present' do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(old_meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(old_meedan_doc, throwaway_url) assert_equal "Meedan. 66 likes. Meedan is a non-profit social technology company which aims to increase cross-language interaction on the web, with particular emphasis...", data['description'] end test 'leaves description empty if description not present in HTML present' do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(empty_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(empty_doc, throwaway_url) assert_nil data['description'] end test 'gets username from URL when possible' do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(meedan_doc, throwaway_url) assert_equal 'fakeaccount', data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/people/fakeaccount/123456789').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/people/fakeaccount/123456789') + data = parser.parse_data(meedan_doc, throwaway_url) assert_equal 'fakeaccount', data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/pages/fakeaccount/123456789').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/pages/fakeaccount/123456789') + data = parser.parse_data(meedan_doc, throwaway_url) assert_equal 'fakeaccount', data['username'] end test 'returns empty username if not clear from URL' do - data = Parser::FacebookProfile.new('https://facebook.com/events/123456').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/events/123456') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/live/123456').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/live/123456') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/livemap/123456').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/livemap/123456') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/watch/123456').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/watch/123456') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/story.php/123456').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/story.php/123456') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/category/123456').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/category/123456') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/photo/123456').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/photo/123456') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] - data = Parser::FacebookProfile.new('https://facebook.com/photo.php/123456').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/photo.php/123456') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] # Note: we don't expect to realistically get this URL pattern in the parser because it would be # redirected to the human-readable link before we parse data - data = Parser::FacebookProfile.new('https://facebook.com/123456789').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/123456789') + data = parser.parse_data(meedan_doc, throwaway_url) assert_nil data['username'] end test 'sets author name from URL if possible' do - data = Parser::FacebookProfile.new('https://facebook.com/fakeaccount').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/fakeaccount') + data = parser.parse_data(meedan_doc, throwaway_url) assert_equal 'fakeaccount', data['author_name'] - data = Parser::FacebookProfile.new('https://facebook.com/people/fakeaccount/123456789').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/people/fakeaccount/123456789') + data = parser.parse_data(meedan_doc, throwaway_url) assert_equal 'fakeaccount', data['author_name'] - data = Parser::FacebookProfile.new('https://facebook.com/pages/fakeaccount/123456789').parse_data(meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/pages/fakeaccount/123456789') + data = parser.parse_data(meedan_doc, throwaway_url) assert_equal 'fakeaccount', data['author_name'] end test 'sets author name from og:title tag if not parseable from URL' do - data = Parser::FacebookProfile.new('https://facebook.com/1234567').parse_data(arabic_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/1234567') + data = parser.parse_data(arabic_doc, throwaway_url) assert_match /المركز الثقافي القبطي الأرثوذكسي بالمانيا/, data['author_name'] end test 'sets author name from title html tag if if not parseable from URL and og:title not present' do - data = Parser::FacebookProfile.new('https://facebook.com/1234567').parse_data(old_meedan_doc, throwaway_url) + parser = Parser::FacebookProfile.new('https://facebook.com/1234567') + data = parser.parse_data(old_meedan_doc, throwaway_url) assert_equal 'Meedan - Nonprofit Organization', data['author_name'] end test 'sets author_url to the passed url' do - data = Parser::FacebookProfile.new('https://facebook.com/fake-passed-url').parse_data(meedan_doc, 'https://facebook.com/fake-original-url') + parser = Parser::FacebookProfile.new('https://facebook.com/fake-passed-url') + data = parser.parse_data(meedan_doc, 'https://facebook.com/fake-original-url') assert_equal 'https://facebook.com/fake-passed-url', data['author_url'] end test 'sets author_url to original url if passed url is forbidden' do - data = Parser::FacebookProfile.new('https://facebook.com/login/web').parse_data(meedan_doc, 'https://facebook.com/fake-original-url') + parser = Parser::FacebookProfile.new('https://facebook.com/login/web') + data = parser.parse_data(meedan_doc, 'https://facebook.com/fake-original-url') assert_equal 'https://facebook.com/fake-original-url', data['author_url'] end @@ -374,4 +405,22 @@ def throwaway_url oembed_url = Parser::FacebookProfile.new('https://www.facebook.com/fakeaccount').oembed_url assert_equal 'https://www.facebook.com/plugins/post/oembed.json/?url=https://www.facebook.com/fakeaccount', oembed_url end + + test "should store oembed data of a public facebook page" do + url = 'https://facebook.com/513415662050479' + canonical_url = 'https://www.facebook.com/heymeedan' + picture_url = 'https://scontent-lax3-1.xx.fbcdn.net/v/t39.30808-1/310513247_435753678699138_2623398131510754475_n.png?_nc_cat=110&_nc_ht=scontent-lax3-1.xx&_nc_ohc=d6UgzKKHMJ8AX9tPN2o&_nc_sid=d36de4&ccb=1-7&oe=63DDB83C&oh=00_AfDH7lP98qp_etN0a2ZMms1tp6vx51198IAobPHbRLnSyA' + + WebMock.stub_request(:get, url).to_return(status: 200, body: meedan_doc.to_s) + WebMock.stub_request(:get, canonical_url).to_return(status: 200) + WebMock.stub_request(:get, "https://www.facebook.com/plugins/post/oembed.json/?url=#{canonical_url}").to_return(status: 200) + WebMock.stub_request(:get, picture_url).to_return(status: 200) + + media = create_media url: url + data = media.as_json + + assert data['oembed'].is_a?(Hash), "Expected #{data['oembed']} to be a Hash" + assert_equal 'heymeedan', data['oembed']['author_name'] + assert_equal 'Meedan', data['oembed']['title'] + end end diff --git a/test/models/parser/twitter_profile_test.rb b/test/models/parser/twitter_profile_test.rb index e48a2fa5..e1afa595 100644 --- a/test/models/parser/twitter_profile_test.rb +++ b/test/models/parser/twitter_profile_test.rb @@ -39,6 +39,32 @@ def stub_profile_lookup end test "matches known URL patterns, and returns instance on success" do + # Standard profile + match_zero = Parser::TwitterProfile.match?('https://twitter.com/username/') + assert_equal true, match_zero.is_a?(Parser::TwitterProfile) + match_one = Parser::TwitterProfile.match?('https://twitter.com/username') + assert_equal true, match_one.is_a?(Parser::TwitterProfile) + match_two = Parser::TwitterProfile.match?('https://twitter.com/user_name') + assert_equal true, match_two.is_a?(Parser::TwitterProfile) + + # Profile with query + match_three = Parser::TwitterProfile.match?('https://twitter.com/username?ref_src=twsrc%5Etfw') + assert_equal true, match_three.is_a?(Parser::TwitterProfile) + match_four = Parser::TwitterProfile.match?('https://twitter.com/username/?t=1') + assert_equal true, match_four.is_a?(Parser::TwitterProfile) + + # Mobile patterns + match_five = Parser::TwitterProfile.match?('https://0.twitter.com/username') + assert_equal true, match_five.is_a?(Parser::TwitterProfile) + match_six = Parser::TwitterProfile.match?('https://m.twitter.com/username') + assert_equal true, match_six.is_a?(Parser::TwitterProfile) + match_seven = Parser::TwitterProfile.match?('https://mobile.twitter.com/username') + assert_equal true, match_seven.is_a?(Parser::TwitterProfile) + match_eight = Parser::TwitterProfile.match?('https://mobile.twitter.com/username?ref_src=twsrc%5Etfw') + assert_equal true, match_eight.is_a?(Parser::TwitterProfile) + end + + test "does not match pages that should be parsed by pages" do assert_nil Parser::TwitterProfile.match?('https://example.com') # Blog posts -> should beparsed as pages @@ -46,24 +72,20 @@ def stub_profile_lookup assert_nil Parser::TwitterProfile.match?('https://blog.twitter.com/official/en_us/topics/events/2018/Embrace-Ramadan-with-various-Twitter-only-activations.html') assert_nil Parser::TwitterProfile.match?('https://business.twitter.com') assert_nil Parser::TwitterProfile.match?('https://business.twitter.com/en/blog/4-tips-Tweeting-live-events.html') + end - # Standard profile - match_one = Parser::TwitterProfile.match?('https://twitter.com/meedan') - assert_equal true, match_one.is_a?(Parser::TwitterProfile) - - # Profile with query - match_one = Parser::TwitterProfile.match?('https://twitter.com/meedan?ref_src=twsrc%5Etfw') - assert_equal true, match_one.is_a?(Parser::TwitterProfile) + test "does not match patterns with usernames that are not permitted by twitter" do + assert_nil Parser::TwitterProfile.match?('https://twitter.com/user whitespace') + assert_nil Parser::TwitterProfile.match?('https://twitter.com/user*@symbols$') + assert_nil Parser::TwitterProfile.match?('https://twitter.com/user-–dash—') + assert_nil Parser::TwitterProfile.match?('https://twitter.com/userwithareallylongusername') + assert_nil Parser::TwitterProfile.match?('https://twitter.com/me') + end - # Mobile patterns - match_two = Parser::TwitterProfile.match?('https://0.twitter.com/meedan') - assert_equal true, match_two.is_a?(Parser::TwitterProfile) - match_three = Parser::TwitterProfile.match?('https://m.twitter.com/meedan') - assert_equal true, match_three.is_a?(Parser::TwitterProfile) - match_four = Parser::TwitterProfile.match?('https://mobile.twitter.com/meedan') - assert_equal true, match_four.is_a?(Parser::TwitterProfile) - match_five = Parser::TwitterProfile.match?('https://mobile.twitter.com/meedan?ref_src=twsrc%5Etfw') - assert_equal true, match_five.is_a?(Parser::TwitterProfile) + test "matches and extracts username correctly even with a trailing slash" do + match = 'https://twitter.com/username/'.match(Parser::TwitterProfile.patterns[0]) + username = match['username'] + assert_equal username, 'username' end test "it makes a get request to the user lookup by username endpoint successfully" do diff --git a/test/test_helper.rb b/test/test_helper.rb index 9fb8257f..2b18a190 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -12,7 +12,7 @@ require 'minitest/mock' require 'mocha/minitest' -Minitest::Retry.use!(retry_count: 5) +Minitest::Retry.use!(retry_count: ENV['TEST_RETRY_COUNT'].to_i || 0) Minitest::Retry.on_failure do |_klass, _test_name| sleep 10