|
5 | 5 | import time
|
6 | 6 |
|
7 | 7 | from .common import InfoExtractor
|
| 8 | +from ..compat import compat_kwargs |
8 | 9 | from ..utils import (
|
9 | 10 | determine_ext,
|
10 | 11 | ExtractorError,
|
@@ -75,6 +76,27 @@ def _extract_url(cls, webpage):
|
75 | 76 | if mobj:
|
76 | 77 | return mobj.group('url')
|
77 | 78 |
|
| 79 | + # transform_source=None, fatal=True |
| 80 | + def _parse_json(self, json_string, video_id, *args, **kwargs): |
| 81 | + if '"@context"' in json_string[:30]: |
| 82 | + # this is ld+json, or that's the way to bet |
| 83 | + transform_source = args[0] if len(args) > 0 else kwargs.get('transform_source') |
| 84 | + if not transform_source: |
| 85 | + |
| 86 | + def fix_chars(src): |
| 87 | + # fix malformed ld+json: replace raw CRLFs with escaped LFs |
| 88 | + return re.sub( |
| 89 | + r'"[^"]+"', lambda m: re.sub(r'\r?\n', r'\\n', m.group(0)), src) |
| 90 | + |
| 91 | + if len(args) > 0: |
| 92 | + args = (fix_chars,) + args[1:] |
| 93 | + else: |
| 94 | + kwargs['transform_source'] = fix_chars |
| 95 | + kwargs = compat_kwargs(kwargs) |
| 96 | + |
| 97 | + return super(Vbox7IE, self)._parse_json( |
| 98 | + json_string, video_id, *args, **kwargs) |
| 99 | + |
78 | 100 | def _real_extract(self, url):
|
79 | 101 | video_id = self._match_id(url)
|
80 | 102 | url = 'https://vbox7.com/play:%s' % (video_id,)
|
|
0 commit comments