Skip to content

Commit

Permalink
update parser to return url as fallback for title and tests
Browse files Browse the repository at this point in the history
the title we were getting from the ld+json could be the same for
multiple videos from the same author, so we are setting the title to
the url instead

and since what we get for the title and description might vary, in the
integration test we only check if those two are strings
  • Loading branch information
vasconsaurus committed Sep 11, 2023
1 parent d437f68 commit 473706b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
2 changes: 1 addition & 1 deletion app/models/parser/kwai_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def parse_data_for_parser(doc, _original_url, jsonld_array)
handle_exceptions(StandardError) do
jsonld = (jsonld_array.find{|item| item.dig('@type') == 'VideoObject'} || {})

title = get_kwai_text_from_tag(doc, '.info .title') || jsonld.dig('name')
title = get_kwai_text_from_tag(doc, '.info .title')
name = get_kwai_text_from_tag(doc, '.name') || jsonld.dig('creator','name')
description = get_kwai_text_from_tag(doc, '.info .title') || jsonld.dig('transcript') || jsonld.dig('description')
@parsed_data.merge!({
Expand Down
19 changes: 12 additions & 7 deletions test/models/parser/kwai_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ class KwaiIntegrationTest < ActiveSupport::TestCase
assert_equal 'item', data['type']
assert_equal 'kwai', data['provider']
assert_equal 'Arthur Virgilio', data['author_name']
assert_match 'Presidente Zelensky foi consagrado numa sala de reuniões', data['title']
assert_match 'foi consagrado numa sala de reuniões do G7', data['description']
assert_kind_of String, data['title']
assert_kind_of String, data['description']
assert_nil data['error']
end
end
Expand Down Expand Up @@ -41,21 +41,26 @@ def teardown
doc = response_fixture_from_file('kwai-page.html', parse_as: :html)

data = Parser::KwaiItem.new('https://s.kw.ai/p/example').parse_data(doc)
assert_equal 'A special video', data[:title]
assert_equal 'A special video', data[:description]
assert_equal "A special video", data[:title]
assert_equal "A special video", data[:description]
assert_equal 'Reginaldo Silva2871', data[:author_name]
assert_equal 'Reginaldo Silva2871', data[:username]
end

test "assigns values to hash from the json+ld" do
empty_doc = Nokogiri::HTML('')

jsonld = [{"url"=>"https://www.kwai.com/@fakeuser/video/5221229445268222050", "name"=>"Fake User. Áudio original criado por Fake User.", "description"=>"#tag1 #tag2 #tag3", "transcript"=>"video transcript", "creator"=>{"name"=>"Fake User", "description"=>"Fake User Description", "alternateName"=>"fakeuser", "url"=>"https://www.kwai.com/@fakeuser"}, "@context"=>"https://schema.org/", "@type"=>"VideoObject"}]
doc = Nokogiri::HTML(<<~HTML)
<script data-n-head="ssr" type="application/ld+json" id="VideoObject">{"url":"https://www.kwai.com/@fakeuser/video/5221229445268222050","name":"Fake User. Áudio original criado por Fake User. ","description":"#tag1 #tag2 #tag3","transcript":"video transcript","thumbnailUrl":["http://ak-br-pic.kwai.net/kimg/fake_image_thumbnail.webp"],"uploadDate":"2023-05-22 01:41:04","contentUrl":"https://aws-br-cdn.kwai.net/upic/2023/05/22/01/fake_link.mp4?tag=1-1694439486-s-0-rnlkpacssc-56115f1493ef597d","commentCount":105,"duration":"PT1M7S","width":592,"height":1280,"audio":{"name":"Áudio original criado por Fake User","author":"Fake User","@type":"CreativeWork"},"creator":{"name":"Fake User","image":"https://aws-br-pic.kwai.net/bs2/overseaHead/fake_image.jpg","description":"Fake User Description","alternateName":"fakeuser","url":"https://www.kwai.com/@fakeuser","genre":["News","Politics & Economics"],"mainEntityOfPage":{"@id":"https://www.kwai.com/@fakeuser/video/5221229445268222050","@type":"ItemPage"},"@context":"https://schema.org/","@type":"VideoObject"}</script>
HTML

data = Parser::KwaiItem.new('https://www.kwai.com/fakelink').parse_data(empty_doc, 'https://www.kwai.com/fakelink', jsonld)
WebMock.stub_request(:any, 'https://www.kwai.com/@fakeuser/video/5221229445268222050').to_return(status: 200, body: doc.to_s)

media = Media.new(url: 'https://www.kwai.com/@fakeuser/video/5221229445268222050')
data = media.as_json

assert_equal 'video transcript', data['description']
assert_equal 'Fake User. Áudio original criado por Fake User.', data['title']
assert_equal 'https://www.kwai.com/@fakeuser/video/5221229445268222050', data['title']
assert_equal 'Fake User', data['author_name']
end
end

0 comments on commit 473706b

Please sign in to comment.