7
7
from .common import InfoExtractor
8
8
from ..compat import compat_kwargs
9
9
from ..utils import (
10
+ base_url ,
10
11
determine_ext ,
11
12
ExtractorError ,
12
13
float_or_none ,
13
14
merge_dicts ,
14
15
T ,
15
16
traverse_obj ,
16
17
txt_or_none ,
18
+ url_basename ,
17
19
url_or_none ,
18
20
)
19
21
@@ -33,26 +35,28 @@ class Vbox7IE(InfoExtractor):
33
35
'''
34
36
_EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)' ]
35
37
_GEO_COUNTRIES = ['BG' ]
36
- _GEO_BYPASS = False
37
38
_TESTS = [{
39
+ # the http: URL just redirects here
38
40
'url' : 'https://vbox7.com/play:0946fff23c' ,
39
41
'md5' : '50ca1f78345a9c15391af47d8062d074' ,
40
42
'info_dict' : {
41
43
'id' : '0946fff23c' ,
42
44
'ext' : 'mp4' ,
43
45
'title' : 'Борисов: Притеснен съм за бъдещето на България' ,
44
46
'description' : 'По думите му е опасно страната ни да бъде обявена за "сигурна"' ,
45
- 'thumbnail' : r're:^https?://.*\.jpg$' ,
46
47
'timestamp' : 1470982814 ,
47
48
'upload_date' : '20160812' ,
48
49
'uploader' : 'zdraveibulgaria' ,
50
+ 'thumbnail' : r're:^https?://.*\.jpg$' ,
51
+ 'view_count' : int ,
52
+ 'duration' : 2640 ,
49
53
},
50
54
'expected_warnings' : [
51
55
'Unable to download webpage' ,
52
56
],
53
57
}, {
54
58
'url' : 'http://vbox7.com/play:249bb972c2' ,
55
- 'md5' : 'aaf19465e37ec0b30b918df83ec32c50 ' ,
59
+ 'md5' : '99f65c0c9ef9b682b97313e052734c3f ' ,
56
60
'info_dict' : {
57
61
'id' : '249bb972c2' ,
58
62
'ext' : 'mp4' ,
@@ -61,7 +65,11 @@ class Vbox7IE(InfoExtractor):
61
65
'timestamp' : 1360215023 ,
62
66
'upload_date' : '20130207' ,
63
67
'uploader' : 'svideteliat_ot_varshava' ,
68
+ 'thumbnail' : 'https://i49.vbox7.com/o/249/249bb972c20.jpg' ,
69
+ 'view_count' : int ,
70
+ 'duration' : 83 ,
64
71
},
72
+ 'expected_warnings' : ['Failed to download m3u8 information' ],
65
73
}, {
66
74
'url' : 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1' ,
67
75
'only_matching' : True ,
@@ -76,6 +84,9 @@ def _extract_url(cls, webpage):
76
84
if mobj :
77
85
return mobj .group ('url' )
78
86
87
+ # specialisation to transform what looks like ld+json that
88
+ # may contain invalid character combinations
89
+
79
90
# transform_source=None, fatal=True
80
91
def _parse_json (self , json_string , video_id , * args , ** kwargs ):
81
92
if '"@context"' in json_string [:30 ]:
@@ -103,49 +114,64 @@ def _real_extract(self, url):
103
114
104
115
now = time .time ()
105
116
response = self ._download_json (
106
- 'https://www.vbox7.com/aj/player/item/options?vid=%s' % ( video_id ,) ,
107
- video_id , headers = {'Referer' : url })
117
+ 'https://www.vbox7.com/aj/player/item/options' , video_id ,
118
+ query = { 'vid' : video_id } , headers = {'Referer' : url })
108
119
# estimate time to which possible `ago` member is relative
109
120
now = now + 0.5 * (time .time () - now )
110
121
111
- if 'error' in response :
122
+ if traverse_obj ( response , 'error' ) :
112
123
raise ExtractorError (
113
124
'%s said: %s' % (self .IE_NAME , response ['error' ]), expected = True )
114
125
115
- video_url = traverse_obj (response , ('options' , 'src' , T (url_or_none )))
126
+ src_url = traverse_obj (response , ('options' , 'src' , T (url_or_none ))) or ''
116
127
117
- if '/na.mp4' in video_url or '' :
128
+ fmt_base = url_basename (src_url ).rsplit ('.' , 1 )[0 ].rsplit ('_' , 1 )[0 ]
129
+ if fmt_base in ('na' , 'vn' ):
118
130
self .raise_geo_restricted (countries = self ._GEO_COUNTRIES )
119
131
120
- ext = determine_ext (video_url )
132
+ ext = determine_ext (src_url )
121
133
if ext == 'mpd' :
122
- # In case MPD cannot be parsed, or anyway, get mp4 combined
123
- # formats usually provided to Safari, iOS, and old Windows
134
+ # extract MPD
124
135
try :
125
136
formats , subtitles = self ._extract_mpd_formats_and_subtitles (
126
- video_url , video_id , 'dash' , fatal = False )
127
- except KeyError :
137
+ src_url , video_id , 'dash' , fatal = False )
138
+ except KeyError : # fatal doesn't catch this
128
139
self .report_warning ('Failed to parse MPD manifest' )
129
140
formats , subtitles = [], {}
141
+ elif ext != 'm3u8' :
142
+ formats = [{
143
+ 'url' : src_url ,
144
+ }] if src_url else []
145
+ subtitles = {}
130
146
147
+ if src_url :
148
+ # possibly extract HLS, based on https://github.com/yt-dlp/yt-dlp/pull/9100
149
+ fmt_base = base_url (src_url ) + fmt_base
150
+ # prepare for _extract_m3u8_formats_and_subtitles()
151
+ # hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
152
+ hls_formats = self ._extract_m3u8_formats (
153
+ '{0}.m3u8' .format (fmt_base ), video_id , m3u8_id = 'hls' , fatal = False )
154
+ formats .extend (hls_formats )
155
+ # self._merge_subtitles(hls_subs, target=subtitles)
156
+
157
+ # In case MPD/HLS cannot be parsed, or anyway, get mp4 combined
158
+ # formats usually provided to Safari, iOS, and old Windows
131
159
video = response ['options' ]
132
160
resolutions = (1080 , 720 , 480 , 240 , 144 )
133
- highest_res = traverse_obj (video , ('highestRes' , T (int ))) or resolutions [0 ]
134
- for res in traverse_obj (video , ('resolutions' , lambda _ , r : int (r ) > 0 )) or resolutions :
135
- if res > highest_res :
136
- continue
137
- formats .append ({
138
- 'url' : video_url .replace ('.mpd' , '_%d.mp4' % res ),
139
- 'format_id' : '%dp' % res ,
161
+ highest_res = traverse_obj (video , (
162
+ 'highestRes' , T (int ))) or resolutions [0 ]
163
+ resolutions = traverse_obj (video , (
164
+ 'resolutions' , lambda _ , r : highest_res >= int (r ) > 0 )) or resolutions
165
+ mp4_formats = traverse_obj (resolutions , (
166
+ Ellipsis , T (lambda res : {
167
+ 'url' : '{0}_{1}.mp4' .format (fmt_base , res ),
168
+ 'format_id' : 'http-{0}' .format (res ),
140
169
'height' : res ,
141
- })
170
+ })))
142
171
# if above formats are flaky, enable the line below
143
- # self._check_formats(formats, video_id)
144
- else :
145
- formats = [{
146
- 'url' : video_url ,
147
- }]
148
- subtitles = {}
172
+ # self._check_formats(mp4_formats, video_id)
173
+ formats .extend (mp4_formats )
174
+
149
175
self ._sort_formats (formats )
150
176
151
177
webpage = self ._download_webpage (url , video_id , fatal = False ) or ''
0 commit comments