From 498e521da4bb2a67a45bb82972c8e65cdf658fbe Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Tue, 1 Aug 2023 11:05:16 -0300 Subject: [PATCH] update parsers to use the new api made some small fixes and cleaned up methods that no longer work, and I believe are no longer needed? note: author_url: author_url(url, user) || parsed_data['raw']['api']['includes']['users'][0]['url'] The api returns us the author url but not the twitter_author_url, it returns an external url. So I made a small method that returns the author twitter url, if that fails it will return the external one. --- app/models/parser/twitter_item.rb | 34 +++++++++------------------- app/models/parser/twitter_profile.rb | 14 ++++++------ 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/app/models/parser/twitter_item.rb b/app/models/parser/twitter_item.rb index 7ead9ecd..ad9eaacb 100644 --- a/app/models/parser/twitter_item.rb +++ b/app/models/parser/twitter_item.rb @@ -24,38 +24,26 @@ def parse_data_for_parser(_doc, _original_url, _jsonld_array) user, id = parts['user'], parts['id'] @parsed_data['raw']['api'] = {} - handle_twitter_exceptions do - @parsed_data['raw']['api'] = tweet_lookup(id) - end + @parsed_data['raw']['api'] = tweet_lookup(id) + @parsed_data[:error] = parsed_data.dig(:raw, :api, :error) @parsed_data.merge!({ external_id: id, username: '@' + user, - title: parsed_data['raw']['api']['data'][0]['text'] || stripped_title(parsed_data), - description: parsed_data['raw']['api']['data'][0]['text'] || parsed_data.dig('raw', 'api', 'text') || parsed_data.dig('raw', 'api', 'full_text'), - picture: parsed_data['raw']['api']['description'] || picture_url(parsed_data), - author_picture: parsed_data['raw']['api']['includes']['users'][0]['profile_image_url'] || author_picture_url(parsed_data), - published_at: parsed_data['raw']['api']['data'][0]['created_at'] || parsed_data.dig('raw', 'api', 'created_at'), + title: parsed_data['raw']['api']['data'][0]['text'], + description: parsed_data['raw']['api']['data'][0]['text'], + picture: parsed_data['raw']['api']['includes']['media'][0]['url'], + author_picture: parsed_data['raw']['api']['includes']['users'][0]['profile_image_url'].gsub('_normal', ''), + published_at: parsed_data['raw']['api']['data'][0]['created_at'], html: html_for_twitter_item(parsed_data, url), - author_name: parsed_data['raw']['api']['includes']['users'][0]['name'] || parsed_data.dig('raw', 'api', 'user', 'name'), - author_url: parsed_data['raw']['api']['includes']['users'][0]['url'] || twitter_author_url(user) || RequestHelper.top_url(url) + author_name: parsed_data['raw']['api']['includes']['users'][0]['name'], + author_url: author_url(url, user) || parsed_data['raw']['api']['includes']['users'][0]['url'] }) parsed_data end - def stripped_title(data) - title = (data.dig('raw', 'api', 'text') || data.dig('raw', 'api', 'full_text')) - title.gsub(/\s+/, ' ') if title - end - - def author_picture_url(data) - picture_url = data.dig('raw', 'api', 'user', 'profile_image_url_https') - picture_url.gsub('_normal', '') if picture_url - end - - def picture_url(data) - item_media = data.dig('raw', 'api', 'entities', 'media') - (item_media.dig(0, 'media_url_https') || item_media.dig(0, 'media_url')) if item_media + def author_url(url, user) + URI(url).host + '/' + user end def html_for_twitter_item(data, url) diff --git a/app/models/parser/twitter_profile.rb b/app/models/parser/twitter_profile.rb index 60d26a41..7e96b27b 100644 --- a/app/models/parser/twitter_profile.rb +++ b/app/models/parser/twitter_profile.rb @@ -22,13 +22,13 @@ def parse_data_for_parser(doc, _original_url, _jsonld_array) @url = replace_subdomain_pattern(url) username = url.match(/^https?:\/\/(www\.)?twitter\.com\/([^\/]+)$/)[2] - @parsed_data[:raw][:api] = {} - handle_twitter_exceptions do - @parsed_data[:raw][:api] = user_lookup_by_username(username) - picture_url = parsed_data[:raw][:api]['data'][0]['profile_image_url'].gsub('_normal', '') - set_data_field('picture', picture_url) - set_data_field('author_picture', picture_url) - end + @parsed_data[:raw][:api] = {} + @parsed_data[:raw][:api] = user_lookup_by_username(username) + + picture_url = parsed_data[:raw][:api]['data'][0]['profile_image_url'].gsub('_normal', '') + set_data_field('picture', picture_url) + set_data_field('author_picture', picture_url) + @parsed_data[:error] = parsed_data.dig(:raw, :api, :error) username = parsed_data['raw']['api']['data'][0]['username'] @parsed_data.merge!({