mirror of https://github.com/ytdl-org/youtube-dl
Compare commits
2 Commits
48ddab1f3a
...
40bd5c1815
Author | SHA1 | Date |
---|---|---|
Aaron Tan | 40bd5c1815 | 3 months ago |
dirkf | 70f230f9cf | 3 months ago |
@ -0,0 +1,79 @@ |
|||||||
|
# coding: utf-8 |
||||||
|
from __future__ import unicode_literals |
||||||
|
|
||||||
|
from .common import InfoExtractor |
||||||
|
from ..utils import ( |
||||||
|
determine_ext, |
||||||
|
int_or_none, |
||||||
|
merge_dicts, |
||||||
|
parse_iso8601, |
||||||
|
T, |
||||||
|
traverse_obj, |
||||||
|
txt_or_none, |
||||||
|
urljoin, |
||||||
|
) |
||||||
|
|
||||||
|
|
||||||
|
class CaffeineTVIE(InfoExtractor): |
||||||
|
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/]+/video/(?P<id>[0-9a-f-]+)' |
||||||
|
_TESTS = [{ |
||||||
|
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e', |
||||||
|
'info_dict': { |
||||||
|
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e', |
||||||
|
'ext': 'mp4', |
||||||
|
'title': 'GOOOOD MORNINNNNN #highlights', |
||||||
|
'timestamp': 1654702180, |
||||||
|
'upload_date': '20220608', |
||||||
|
'uploader': 'TsuSurf', |
||||||
|
'duration': 3145, |
||||||
|
'age_limit': 17, |
||||||
|
}, |
||||||
|
'params': { |
||||||
|
'format': 'bestvideo', |
||||||
|
}, |
||||||
|
}] |
||||||
|
|
||||||
|
def _real_extract(self, url): |
||||||
|
video_id = self._match_id(url) |
||||||
|
json_data = self._download_json( |
||||||
|
'https://api.caffeine.tv/social/public/activity/' + video_id, |
||||||
|
video_id) |
||||||
|
broadcast_info = traverse_obj(json_data, ('broadcast_info', T(dict))) or {} |
||||||
|
title = broadcast_info['broadcast_title'] |
||||||
|
video_url = broadcast_info['video_url'] |
||||||
|
|
||||||
|
ext = determine_ext(video_url) |
||||||
|
if ext == 'm3u8': |
||||||
|
formats = self._extract_m3u8_formats( |
||||||
|
video_url, video_id, 'mp4', entry_protocol='m3u8', |
||||||
|
fatal=False) |
||||||
|
else: |
||||||
|
formats = [{'url': video_url}] |
||||||
|
self._sort_formats(formats) |
||||||
|
|
||||||
|
return merge_dicts({ |
||||||
|
'id': video_id, |
||||||
|
'title': title, |
||||||
|
'formats': formats, |
||||||
|
}, traverse_obj(json_data, { |
||||||
|
'uploader': ((None, 'user'), 'username'), |
||||||
|
}, get_all=False), traverse_obj(json_data, { |
||||||
|
'like_count': ('like_count', T(int_or_none)), |
||||||
|
'view_count': ('view_count', T(int_or_none)), |
||||||
|
'comment_count': ('comment_count', T(int_or_none)), |
||||||
|
'tags': ('tags', Ellipsis, T(txt_or_none)), |
||||||
|
'is_live': 'is_live', |
||||||
|
'uploader': ('user', 'name'), |
||||||
|
}), traverse_obj(broadcast_info, { |
||||||
|
'duration': ('content_duration', T(int_or_none)), |
||||||
|
'timestamp': ('broadcast_start_time', T(parse_iso8601)), |
||||||
|
'thumbnail': ('preview_image_path', T(lambda u: urljoin(url, u))), |
||||||
|
'age_limit': ('content_rating', T(lambda r: r and { |
||||||
|
# assume Apple Store ratings [1] |
||||||
|
# 1. https://en.wikipedia.org/wiki/Mobile_software_content_rating_system |
||||||
|
'FOUR_PLUS': 0, |
||||||
|
'NINE_PLUS': 9, |
||||||
|
'TWELVE_PLUS': 12, |
||||||
|
'SEVENTEEN_PLUS': 17, |
||||||
|
}.get(r, 17))), |
||||||
|
})) |
@ -0,0 +1,139 @@ |
|||||||
|
# coding: utf-8 |
||||||
|
from __future__ import unicode_literals |
||||||
|
|
||||||
|
from .common import InfoExtractor |
||||||
|
from ..utils import ( |
||||||
|
extract_attributes, |
||||||
|
ExtractorError, |
||||||
|
T, |
||||||
|
traverse_obj, |
||||||
|
txt_or_none, |
||||||
|
url_or_none, |
||||||
|
) |
||||||
|
|
||||||
|
|
||||||
|
class GBNewsIE(InfoExtractor): |
||||||
|
IE_DESC = 'GB News clips, features and live stream' |
||||||
|
|
||||||
|
# \w+ is normally shows or news, but apparently any word redirects to the correct URL |
||||||
|
_VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)' |
||||||
|
|
||||||
|
_PLATFORM = 'safari' |
||||||
|
_SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php' |
||||||
|
_TESTS = [{ |
||||||
|
'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889', |
||||||
|
'info_dict': { |
||||||
|
'id': '106889', |
||||||
|
'ext': 'mp4', |
||||||
|
'title': "Andrew Neil's message to companies choosing to boycott GB News", |
||||||
|
'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351', |
||||||
|
}, |
||||||
|
'skip': '404 not found', |
||||||
|
}, { |
||||||
|
'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row', |
||||||
|
'info_dict': { |
||||||
|
'id': '52264136', |
||||||
|
'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row', |
||||||
|
'ext': 'mp4', |
||||||
|
'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism', |
||||||
|
'description': 'The post was criticised by former employers of the broadcaster', |
||||||
|
}, |
||||||
|
}, { |
||||||
|
'url': 'https://www.gbnews.uk/watchlive', |
||||||
|
'info_dict': { |
||||||
|
'id': '1069', |
||||||
|
'display_id': 'watchlive', |
||||||
|
'ext': 'mp4', |
||||||
|
'title': 'GB News Live', |
||||||
|
'is_live': True, |
||||||
|
}, |
||||||
|
'params': { |
||||||
|
'skip_download': 'm3u8', |
||||||
|
}, |
||||||
|
}] |
||||||
|
|
||||||
|
def _real_extract(self, url): |
||||||
|
display_id = self._match_id(url).split('/')[-1] |
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id) |
||||||
|
# extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341 |
||||||
|
''' |
||||||
|
<div id="video-106908" |
||||||
|
class="simplestream" |
||||||
|
data-id="GB001" |
||||||
|
data-type="vod" |
||||||
|
data-key="3Li3Nt2Qs8Ct3Xq9Fi5Uy0Mb2Bj0Qs" |
||||||
|
data-token="f9c317c727dc07f515b20036c8ef14a6" |
||||||
|
data-expiry="1624300052" |
||||||
|
data-uvid="37900558" |
||||||
|
data-poster="https://thumbnails.simplestreamcdn.com/gbnews/ondemand/37900558.jpg?width=700&" |
||||||
|
data-npaw="false" |
||||||
|
data-env="production"> |
||||||
|
''' |
||||||
|
# exception if no match |
||||||
|
video_data = self._search_regex( |
||||||
|
r'(<div\s[^>]*\bclass\s*=\s*(\'|")(?!.*sidebar\b)simplestream(?:\s[\s\w$-]*)?\2[^>]*>)', |
||||||
|
webpage, 'video data') |
||||||
|
|
||||||
|
video_data = extract_attributes(video_data) |
||||||
|
ss_id = video_data.get('data-id') |
||||||
|
if not ss_id: |
||||||
|
raise ExtractorError('Simplestream ID not found') |
||||||
|
|
||||||
|
json_data = self._download_json( |
||||||
|
self._SSMP_URL, display_id, |
||||||
|
note='Downloading Simplestream JSON metadata', |
||||||
|
errnote='Unable to download Simplestream JSON metadata', |
||||||
|
query={ |
||||||
|
'id': ss_id, |
||||||
|
'env': video_data.get('data-env', 'production'), |
||||||
|
}, fatal=False) |
||||||
|
|
||||||
|
meta_url = traverse_obj(json_data, ('response', 'api_hostname')) |
||||||
|
if not meta_url: |
||||||
|
raise ExtractorError('No API host found') |
||||||
|
|
||||||
|
uvid = video_data['data-uvid'] |
||||||
|
dtype = video_data.get('data-type') |
||||||
|
stream_data = self._download_json( |
||||||
|
'%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid), |
||||||
|
uvid, |
||||||
|
query={ |
||||||
|
'key': video_data.get('data-key'), |
||||||
|
'platform': self._PLATFORM, |
||||||
|
}, |
||||||
|
headers={ |
||||||
|
'Token': video_data.get('data-token'), |
||||||
|
'Token-Expiry': video_data.get('data-expiry'), |
||||||
|
'Uvid': uvid, |
||||||
|
}, fatal=False) |
||||||
|
|
||||||
|
stream_url = traverse_obj(stream_data, ( |
||||||
|
'response', 'stream', T(url_or_none))) |
||||||
|
if not stream_url: |
||||||
|
raise ExtractorError('No stream data/URL') |
||||||
|
|
||||||
|
# now known to be a dict |
||||||
|
stream_data = stream_data['response'] |
||||||
|
drm = stream_data.get('drm') |
||||||
|
if drm: |
||||||
|
self.report_drm(uvid) |
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats( |
||||||
|
stream_url, uvid, ext='mp4', entry_protocol='m3u8_native', |
||||||
|
fatal=False) |
||||||
|
# exception if no formats |
||||||
|
self._sort_formats(formats) |
||||||
|
|
||||||
|
return { |
||||||
|
'id': uvid, |
||||||
|
'display_id': display_id, |
||||||
|
'title': (traverse_obj(stream_data, ('title', T(txt_or_none))) |
||||||
|
or self._og_search_title(webpage, default=None) |
||||||
|
or display_id.replace('-', ' ').capitalize()), |
||||||
|
'description': self._og_search_description(webpage, default=None), |
||||||
|
'thumbnail': (traverse_obj(video_data, ('data-poster', T(url_or_none))) |
||||||
|
or self._og_search_thumbnail(webpage)), |
||||||
|
'formats': formats, |
||||||
|
'is_live': (dtype == 'live') or None, |
||||||
|
} |
Loading…
Reference in new issue