|
|
@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
from .adobepass import AdobePassIE
|
|
|
|
from .adobepass import AdobePassIE
|
|
|
|
|
|
|
|
from ..compat import compat_str
|
|
|
|
from ..utils import (
|
|
|
|
from ..utils import (
|
|
|
|
int_or_none,
|
|
|
|
int_or_none,
|
|
|
|
determine_ext,
|
|
|
|
determine_ext,
|
|
|
|
parse_age_limit,
|
|
|
|
parse_age_limit,
|
|
|
|
|
|
|
|
try_get,
|
|
|
|
urlencode_postdata,
|
|
|
|
urlencode_postdata,
|
|
|
|
ExtractorError,
|
|
|
|
ExtractorError,
|
|
|
|
)
|
|
|
|
)
|
|
|
@ -116,6 +118,18 @@ class GoIE(AdobePassIE):
|
|
|
|
# m3u8 download
|
|
|
|
# m3u8 download
|
|
|
|
'skip_download': True,
|
|
|
|
'skip_download': True,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
}, {
|
|
|
|
|
|
|
|
'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
|
|
|
|
|
|
|
|
'info_dict': {
|
|
|
|
|
|
|
|
'id': 'VDKA22600213',
|
|
|
|
|
|
|
|
'ext': 'mp4',
|
|
|
|
|
|
|
|
'title': 'Pilot',
|
|
|
|
|
|
|
|
'description': 'md5:74306df917cfc199d76d061d66bebdb4',
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
'params': {
|
|
|
|
|
|
|
|
# m3u8 download
|
|
|
|
|
|
|
|
'skip_download': True,
|
|
|
|
|
|
|
|
},
|
|
|
|
}, {
|
|
|
|
}, {
|
|
|
|
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
|
|
|
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
|
|
|
'only_matching': True,
|
|
|
|
'only_matching': True,
|
|
|
@ -149,14 +163,30 @@ class GoIE(AdobePassIE):
|
|
|
|
brand = site_info.get('brand')
|
|
|
|
brand = site_info.get('brand')
|
|
|
|
if not video_id or not site_info:
|
|
|
|
if not video_id or not site_info:
|
|
|
|
webpage = self._download_webpage(url, display_id or video_id)
|
|
|
|
webpage = self._download_webpage(url, display_id or video_id)
|
|
|
|
video_id = self._search_regex(
|
|
|
|
data = self._parse_json(
|
|
|
|
(
|
|
|
|
self._search_regex(
|
|
|
|
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
|
|
|
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
|
|
|
|
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
|
|
|
'data', default='{}'),
|
|
|
|
r'data-video-id=["\']*(VDKA\w+)',
|
|
|
|
display_id or video_id, fatal=False)
|
|
|
|
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
|
|
|
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
|
|
|
|
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
|
|
|
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
|
|
|
|
), webpage, 'video id', default=video_id)
|
|
|
|
video_id = None
|
|
|
|
|
|
|
|
if layout:
|
|
|
|
|
|
|
|
video_id = try_get(
|
|
|
|
|
|
|
|
layout,
|
|
|
|
|
|
|
|
(lambda x: x['videoid'], lambda x: x['video']['id']),
|
|
|
|
|
|
|
|
compat_str)
|
|
|
|
|
|
|
|
if not video_id:
|
|
|
|
|
|
|
|
video_id = self._search_regex(
|
|
|
|
|
|
|
|
(
|
|
|
|
|
|
|
|
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
|
|
|
|
|
|
|
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
|
|
|
|
|
|
|
r'data-video-id=["\']*(VDKA\w+)',
|
|
|
|
|
|
|
|
# page.analytics.videoIdCode
|
|
|
|
|
|
|
|
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
|
|
|
|
|
|
|
|
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
|
|
|
|
|
|
|
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
|
|
|
|
|
|
|
), webpage, 'video id', default=video_id)
|
|
|
|
if not site_info:
|
|
|
|
if not site_info:
|
|
|
|
brand = self._search_regex(
|
|
|
|
brand = self._search_regex(
|
|
|
|
(r'data-brand=\s*["\']\s*(\d+)',
|
|
|
|
(r'data-brand=\s*["\']\s*(\d+)',
|
|
|
|