Skip to content

Commit

Permalink
[ie/youtube:tab] Fix channel metadata extraction (yt-dlp#10071)
Browse files Browse the repository at this point in the history
Closes yt-dlp#9893, Closes yt-dlp#10090
Authored by: bashonly, shoxie007

Co-authored-by: shoxie007 <[email protected]>
  • Loading branch information
bashonly and shoxie007 authored Jun 13, 2024
1 parent ea88129 commit a0d9967
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions yt_dlp/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,14 +885,14 @@ def _get_count(self, data, *path_list):
return count

@staticmethod
def _extract_thumbnails(data, *path_list):
def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
"""
Extract thumbnails from thumbnails dict
@param path_list: path list to level that contains 'thumbnails' key
"""
thumbnails = []
for path in path_list or [()]:
for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
Expand Down Expand Up @@ -5124,6 +5124,10 @@ def _extract_metadata_from_tabs(self, item_id, data):
else:
metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

# pageHeaderViewModel slow rollout began April 2024
page_header_view_model = traverse_obj(data, (
'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'
# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
def _get_uncropped(url):
Expand All @@ -5139,8 +5143,10 @@ def _get_uncropped(url):
'preference': 1,
})

channel_banners = self._extract_thumbnails(
data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
channel_banners = (
self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
or self._extract_thumbnails(
page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
for banner in channel_banners:
banner['preference'] = -10

Expand All @@ -5167,7 +5173,11 @@ def _get_uncropped(url):
or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
or info['id']),
'availability': self._extract_availability(data),
'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
'channel_follower_count': (
self._get_count(data, ('header', ..., 'subscriberCountText'))
or traverse_obj(page_header_view_model, (
'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
Expand Down

0 comments on commit a0d9967

Please sign in to comment.