Skip to content

Commit

Permalink
update parsers to use the new api
Browse files Browse the repository at this point in the history
made some small fixes and cleaned up methods that no longer work,
and I believe are no longer needed?

note:
author_url: author_url(url, user) || parsed_data['raw']['api']['includes']['users'][0]['url']

The api returns us the author url but not the twitter_author_url, it returns
an external url. So I made a small method that returns the author twitter url,
if that fails it will return the external one.
  • Loading branch information
vasconsaurus committed Aug 1, 2023
1 parent a512116 commit 498e521
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 30 deletions.
34 changes: 11 additions & 23 deletions app/models/parser/twitter_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,26 @@ def parse_data_for_parser(_doc, _original_url, _jsonld_array)
user, id = parts['user'], parts['id']

@parsed_data['raw']['api'] = {}
handle_twitter_exceptions do
@parsed_data['raw']['api'] = tweet_lookup(id)
end
@parsed_data['raw']['api'] = tweet_lookup(id)

@parsed_data[:error] = parsed_data.dig(:raw, :api, :error)
@parsed_data.merge!({
external_id: id,
username: '@' + user,
title: parsed_data['raw']['api']['data'][0]['text'] || stripped_title(parsed_data),
description: parsed_data['raw']['api']['data'][0]['text'] || parsed_data.dig('raw', 'api', 'text') || parsed_data.dig('raw', 'api', 'full_text'),
picture: parsed_data['raw']['api']['description'] || picture_url(parsed_data),
author_picture: parsed_data['raw']['api']['includes']['users'][0]['profile_image_url'] || author_picture_url(parsed_data),
published_at: parsed_data['raw']['api']['data'][0]['created_at'] || parsed_data.dig('raw', 'api', 'created_at'),
title: parsed_data['raw']['api']['data'][0]['text'],
description: parsed_data['raw']['api']['data'][0]['text'],
picture: parsed_data['raw']['api']['includes']['media'][0]['url'],
author_picture: parsed_data['raw']['api']['includes']['users'][0]['profile_image_url'].gsub('_normal', ''),
published_at: parsed_data['raw']['api']['data'][0]['created_at'],
html: html_for_twitter_item(parsed_data, url),
author_name: parsed_data['raw']['api']['includes']['users'][0]['name'] || parsed_data.dig('raw', 'api', 'user', 'name'),
author_url: parsed_data['raw']['api']['includes']['users'][0]['url'] || twitter_author_url(user) || RequestHelper.top_url(url)
author_name: parsed_data['raw']['api']['includes']['users'][0]['name'],
author_url: author_url(url, user) || parsed_data['raw']['api']['includes']['users'][0]['url']
})
parsed_data
end

def stripped_title(data)
title = (data.dig('raw', 'api', 'text') || data.dig('raw', 'api', 'full_text'))
title.gsub(/\s+/, ' ') if title
end

def author_picture_url(data)
picture_url = data.dig('raw', 'api', 'user', 'profile_image_url_https')
picture_url.gsub('_normal', '') if picture_url
end

def picture_url(data)
item_media = data.dig('raw', 'api', 'entities', 'media')
(item_media.dig(0, 'media_url_https') || item_media.dig(0, 'media_url')) if item_media
def author_url(url, user)
URI(url).host + '/' + user
end

def html_for_twitter_item(data, url)
Expand Down
14 changes: 7 additions & 7 deletions app/models/parser/twitter_profile.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ def parse_data_for_parser(doc, _original_url, _jsonld_array)
@url = replace_subdomain_pattern(url)
username = url.match(/^https?:\/\/(www\.)?twitter\.com\/([^\/]+)$/)[2]

@parsed_data[:raw][:api] = {}
handle_twitter_exceptions do
@parsed_data[:raw][:api] = user_lookup_by_username(username)
picture_url = parsed_data[:raw][:api]['data'][0]['profile_image_url'].gsub('_normal', '')
set_data_field('picture', picture_url)
set_data_field('author_picture', picture_url)
end
@parsed_data[:raw][:api] = {}
@parsed_data[:raw][:api] = user_lookup_by_username(username)

picture_url = parsed_data[:raw][:api]['data'][0]['profile_image_url'].gsub('_normal', '')
set_data_field('picture', picture_url)
set_data_field('author_picture', picture_url)

@parsed_data[:error] = parsed_data.dig(:raw, :api, :error)
username = parsed_data['raw']['api']['data'][0]['username']
@parsed_data.merge!({
Expand Down

0 comments on commit 498e521

Please sign in to comment.