Merge pull request #12 from iawia002/qq

iawia002 · web-flow · commit 894c7171e25b · 2018-01-20T10:34:26.000+08:00
fix qq.com
diff --git a/lulu/extractors/qq.py b/lulu/extractors/qq.py
@@ -1,18 +1,36 @@
 #!/usr/bin/env python
 
+import re
+import json
+
+from lulu.util import log
+from lulu.extractors.qie import download as qieDownload
+from lulu.extractors.qie_video import download_by_url as qie_video_download
+from lulu.common import (
+    match1,
+    matchall,
+    url_info,
+    print_info,
+    get_content,
+    url_locations,
+    download_urls,
+    playlist_not_supported,
+)
+
+
 __all__ = ['qq_download']
+site_info = 'QQ.com'
 
-from ..common import *
-from ..util.log import *
-from .qie import download as qieDownload
-from .qie_video import download_by_url as qie_video_download
-from urllib.parse import urlparse,parse_qs
 
-def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
-    info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&vid={}'.format(vid)
+def qq_download_by_vid(
+    vid, title, output_dir='.', merge=True, info_only=False
+):
+    info_api = (
+        'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333'
+        '&platform=11&defnpayver=1&vid={}'.format(vid)
+    )
     info = get_content(info_api)
     video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
-
     fn_pre = video_json['vl']['vi'][0]['lnk']
     title = video_json['vl']['vi'][0]['ti']
     host = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
@@ -21,23 +39,30 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
     if seg_cnt == 0:
         seg_cnt = 1
 
-    best_quality = streams[-1]['name']
+    # best_quality = streams[-1]['name']
     part_format_id = streams[-1]['id']
 
-    part_urls= []
+    part_urls = []
     total_size = 0
     for part in range(1, seg_cnt+1):
-        #if seg_cnt == 1 and video_json['vl']['vi'][0]['vh'] <= 480:
-        #    filename = fn_pre + '.mp4'
-        #else:
-        #    filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4'
-        filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4'
-        key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename)
+        filename = '{}.p{}.{}.mp4'.format(
+            fn_pre, str(part_format_id % 10000), str(part)
+        )
+        key_api = (
+            'http://vv.video.qq.com/getkey?otype=json&platform=11&'
+            'format={}&vid={}&filename={}&appver=3.2.19.333'.format(
+                part_format_id, vid, filename
+            )
+        )
         part_info = get_content(key_api)
         key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
         if key_json.get('key') is None:
             vkey = video_json['vl']['vi'][0]['fvkey']
-            url = '{}{}?vkey={}'.format(video_json['vl']['vi'][0]['ul']['ui'][0]['url'], fn_pre + '.mp4', vkey)
+            url = '{}{}?vkey={}'.format(
+                video_json['vl']['vi'][0]['ul']['ui'][0]['url'],
+                fn_pre + '.mp4',
+                vkey
+            )
         else:
             vkey = key_json['key']
             url = '{}{}?vkey={}'.format(host, filename, vkey)
@@ -54,11 +79,20 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
 
     print_info(site_info, title, ext, total_size)
     if not info_only:
-        download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge)
+        download_urls(
+            part_urls, title, ext, total_size, output_dir=output_dir,
+            merge=merge
+        )
 
-def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=False):
+
+def kg_qq_download_by_shareid(
+    shareid, output_dir='.', info_only=False, caption=False
+):
     BASE_URL = 'http://cgi.kg.qq.com/fcgi-bin/kg_ugc_getdetail'
-    params_str = '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&v=4&outCharset=utf-8&shareid=' + shareid
+    params_str = (
+        '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&'
+        'v=4&outCharset=utf-8&shareid={}'.format(shareid)
+    )
     url = BASE_URL + params_str
     content = get_content(url)
     json_str = content[len('jsonpcallback('):-1]
@@ -70,7 +104,10 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=
     real_url = real_url.replace('\/', '/')
 
     ksong_mid = json_data['data']['ksong_mid']
-    lyric_url = 'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&outCharset=utf-8&ksongmid=' + ksong_mid
+    lyric_url = (
+        'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&'
+        'outCharset=utf-8&ksongmid={}'.format(ksong_mid)
+    )
     lyric_data = get_content(lyric_url)
     lyric_string = lyric_data[len('jsopgetlrcdata('):-1]
     lyric_json = json.loads(lyric_string)
@@ -94,24 +131,35 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=
                     f.write(line)
                     f.write('\n')
 
+
 def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
-    """"""
     if re.match(r'https?://egame.qq.com/live\?anchorid=(\d+)', url):
         from . import qq_egame
-        qq_egame.qq_egame_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
+        qq_egame.qq_egame_download(
+            url, output_dir=output_dir, merge=merge, info_only=info_only,
+            **kwargs
+        )
         return
 
     if 'kg.qq.com' in url or 'kg2.qq.com' in url:
         shareid = url.split('?s=')[-1]
         caption = kwargs['caption']
-        kg_qq_download_by_shareid(shareid, output_dir=output_dir, info_only=info_only, caption=caption)
+        kg_qq_download_by_shareid(
+            shareid, output_dir=output_dir, info_only=info_only,
+            caption=caption
+        )
         return
 
     if 'live.qq.com' in url:
         if 'live.qq.com/video/v' in url:
-            qie_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
+            qie_video_download(
+                url, output_dir=output_dir, merge=merge, info_only=info_only,
+                **kwargs
+            )
         else:
-            qieDownload(url, output_dir=output_dir, merge=merge, info_only=info_only)
+            qieDownload(
+                url, output_dir=output_dir, merge=merge, info_only=info_only
+            )
         return
 
     if 'mp.weixin.qq.com/s?' in url:
@@ -121,19 +169,21 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
             qq_download_by_vid(vid, vid, output_dir, merge, info_only)
         return
 
-    #do redirect
+    # do redirect
     if 'v.qq.com/page' in url:
         # for URLs like this:
         # http://v.qq.com/page/k/9/7/k0194pwgw97.html
         new_url = url_locations([url])[0]
         if url == new_url:
-            #redirect in js?
+            # redirect in js?
             content = get_content(url)
-            url = match1(content,r'window\.location\.href="(.*?)"')
+            url = match1(content, r'window\.location\.href="(.*?)"')
         else:
             url = new_url
 
-    if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url):
+    if 'kuaibao.qq.com' in url or re.match(
+        r'http://daxue.qq.com/content/content/id/\d+', url
+    ):
         content = get_content(url)
         vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
         title = match1(content, r'title">([^"]+)</p>')
@@ -144,22 +194,34 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
         title = vid
     else:
         content = get_content(url)
-        #vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid  like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
-        rurl = match1(content, r'<link.*?rel\s*=\s*"canonical".*?href\s*="(.+?)".*?>') #https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html
-        vid = ""
+        # vid = parse_qs(urlparse(url).query).get('vid')
+        # for links specified vid like
+        # http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
+        rurl = match1(
+            content, r'<link.*?rel\s*=\s*"canonical".*?href\s*="(.+?)".*?>'
+        )  # https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html
+        vid = ''
         if rurl:
             vid = rurl.split('/')[-1].split('.')[0]
-        vid = vid if vid else url.split('/')[-1].split('.')[0] #https://v.qq.com/x/cover/ps6mnfqyrfo7es3/q0181hpdvo5.html?
-        vid = vid if vid else match1(content, r'vid"*\s*:\s*"\s*([^"]+)"') #general fallback
+        if vid == 'undefined':
+            vid = ''
+        # https://v.qq.com/x/cover/ps6mnfqyrfo7es3/q0181hpdvo5.html?
+        vid = vid if vid else url.split('/')[-1].split('.')[0]
+        # general fallback
+        vid = vid if vid else match1(content, r'vid"*\s*:\s*"\s*([^"]+)"')
         if not vid:
             vid = match1(content, r'id"*\s*:\s*"(.+?)"')
-        title = match1(content,r'<a.*?id\s*=\s*"%s".*?title\s*=\s*"(.+?)".*?>'%vid)
+        title = match1(
+            content, r'<a.*?id\s*=\s*"{}".*?title\s*=\s*"(.+?)".*?>'.format(
+                vid
+            )
+        )
         title = match1(content, r'title">([^"]+)</p>') if not title else title
         title = match1(content, r'"title":"([^"]+)"') if not title else title
-        title = vid if not title else title #general fallback
+        title = vid if not title else title  # general fallback
 
     qq_download_by_vid(vid, title, output_dir, merge, info_only)
 
-site_info = "QQ.com"
+
 download = qq_download
-download_playlist = playlist_not_supported('qq')
+download_playlist = playlist_not_supported(site_info)
diff --git a/lulu/version.py b/lulu/version.py
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 
-__version__ = '0.1.4'
+__version__ = '0.1.5'
 script_name = 'lulu'
diff --git a/tests/download.py b/tests/download.py
@@ -12,6 +12,7 @@
     miaopai,
     # netease,
     youku,
+    qq,
 )
 
 
@@ -79,6 +80,21 @@ def test_youku(self):
             info_only=True
         )
 
+    def test_qq(self):
+        qq.download('https://v.qq.com/x/page/n0528cwq4xr.html', info_only=True)
+        qq.download(
+            'https://v.qq.com/x/cover/ps6mnfqyrfo7es3/q0181hpdvo5.html?',
+            info_only=True
+        )
+        qq.download(
+            'http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5',
+            info_only=True
+        )
+        qq.download(
+            'https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html',
+            info_only=True
+        )
+
 
 if __name__ == '__main__':
     unittest.main()