git statuscustom

lockmatrix · SomeoneKong · commit 18b60f9dfd25 · 2025-10-23T22:01:19.000+08:00
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -81,7 +81,7 @@ jobs:
 
       - uses: actions/setup-python@v6
         with:
-          python-version: "3.10"  # Keep this in sync with test-workflows.yml
+          python-version: "3.12.8"
 
       - name: Process inputs
         id: process_inputs
@@ -175,7 +175,7 @@ jobs:
           fetch-depth: 0
       - uses: actions/setup-python@v6
         with:
-          python-version: "3.10"
+          python-version: "3.12.8"
 
       - name: Install Requirements
         run: |
@@ -243,7 +243,7 @@ jobs:
           merge-multiple: true
       - uses: actions/setup-python@v6
         with:
-          python-version: "3.10"
+          python-version: "3.12.8"
 
       - name: Generate release notes
         env:
diff --git a/requirements.txt b/requirements.txt
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
@@ -3547,7 +3547,7 @@ def ffmpeg_fixup(cndn, msg, cls):
                                      and info_dict.get('container') == 'm4a_dash',
                                      'writing DASH m4a. Only some players support this container',
                                      FFmpegFixupM4aPP)
-                        ffmpeg_fixup((downloader == 'hlsnative' and not self.params.get('hls_use_mpegts'))
+                        ffmpeg_fixup((downloader in ('hlsnative', 'hlsnative_fake_header') and not self.params.get('hls_use_mpegts'))
                                      or (info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None),
                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
                                      FFmpegFixupM3u8PP)
diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py
@@ -36,6 +36,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
 from .websocket import WebSocketFragmentFD
 from .youtube_live_chat import YoutubeLiveChatFD
 from .bunnycdn import BunnyCdnFD
+from .hls_fake_header import HlsFakeHeaderFD
 
 PROTOCOL_MAP = {
     'rtmp': RtmpFD,
@@ -108,6 +109,9 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
         if info_dict.get('is_live') and (external_downloader or '').lower() != 'native':
             return FFmpegFD
 
+    if protocol == 'm3u8_fake_header':
+        return HlsFakeHeaderFD
+
     if protocol in ('m3u8', 'm3u8_native'):
         if info_dict.get('is_live'):
             return FFmpegFD
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
@@ -143,6 +143,9 @@ def _read_fragment(self, ctx):
         down.close()
         return frag_content
 
+    def _fixup_fragment(self, ctx, frag_bytes):
+        return frag_bytes
+
     def _append_fragment(self, ctx, frag_content):
         try:
             ctx['dest_stream'].write(frag_content)
@@ -497,7 +500,8 @@ def _download_fragment(fragment):
                             'fragment_filename_sanitized': frag_filename,
                             'fragment_index': frag_index,
                         })
-                        if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx):
+                        frag_bytes = self._fixup_fragment(ctx, self._read_fragment(ctx))
+                        if not append_fragment(decrypt_fragment(fragment, frag_bytes), frag_index, ctx):
                             return False
                 except KeyboardInterrupt:
                     self._finish_multiline_status()
@@ -511,8 +515,9 @@ def _download_fragment(fragment):
                     break
                 try:
                     download_fragment(fragment, ctx)
+                    frag_bytes = self._fixup_fragment(ctx, self._read_fragment(ctx))
                     result = append_fragment(
-                        decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
+                        decrypt_fragment(fragment, frag_bytes), fragment['frag_index'], ctx)
                 except KeyboardInterrupt:
                     if info_dict.get('is_live'):
                         break
diff --git a/yt_dlp/downloader/hls_fake_header.py b/yt_dlp/downloader/hls_fake_header.py
@@ -0,0 +1,26 @@
+
+from . import HlsFD
+
+
+class HlsFakeHeaderFD(HlsFD):
+    """
+    For M3U8 with fake header in each frags
+    """
+
+    FD_NAME = 'hlsnative_fake_header'
+
+    has_warned = False
+
+    def _fixup_fragment(self, ctx, frag_bytes):
+        if frag_bytes is None:
+            return None
+        ts_start_pos = frag_bytes.find(b'\x47\x40')
+        frag_bytes = frag_bytes[ts_start_pos:]
+
+        no_fake_header = ts_start_pos == 0
+        if no_fake_header and not self.has_warned:
+            self.to_screen("")
+            self.to_screen("There is no fake header")
+            self.has_warned = True
+
+        return frag_bytes
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
@@ -260,6 +260,7 @@
     BiliIntlIE,
     BiliIntlSeriesIE,
     BiliLiveIE,
+    BilibiliCheeseIE,
 )
 from .biobiochiletv import BioBioChileTVIE
 from .bitchute import (
@@ -2477,6 +2478,7 @@
     XHamsterUserIE,
 )
 from .xiaohongshu import XiaoHongShuIE
+from .xiaoyuzhou import XiaoYuZhouIE
 from .ximalaya import (
     XimalayaAlbumIE,
     XimalayaIE,
diff --git a/yt_dlp/extractor/xiaoyuzhou.py b/yt_dlp/extractor/xiaoyuzhou.py
@@ -0,0 +1,75 @@
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    js_to_json,
+    url_or_none,
+)
+from ..utils.traversal import traverse_obj
+from datetime import datetime
+
+
+class XiaoYuZhouIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.xiaoyuzhoufm\.com/episode/(?P<id>[\da-f]+)'
+    IE_DESC = '小宇宙'
+    _TESTS = [{
+        'url': 'https://www.xiaoyuzhoufm.com/episode/670f2a7e0d2f24f289727fdc',
+        'info_dict': {
+            'id': '670f2a7e0d2f24f289727fdc',
+            'ext': 'm4a',
+            'description': str,
+            'title': '是不飘了？研究上私募了？没100万也不耽误听',
+            'duration': 6741,
+            'uploader': '面基',
+            'uploader_id': '6388760f22567e8ea6ad070f',
+            'uploader_url': 'https://www.xiaoyuzhoufm.com/podcast/6388760f22567e8ea6ad070f',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        initial_state = self._search_json(
+            r'<script id="__NEXT_DATA__" type="application/json">', webpage, 'json_data', display_id)
+
+        episode_info = traverse_obj(initial_state, ('props', 'pageProps', 'episode'))
+
+        episode_title = traverse_obj(episode_info, ('title', {str}))
+        audio_url = traverse_obj(episode_info, ('enclosure', 'url', {url_or_none}))
+        description = traverse_obj(episode_info, ('description', {str}))
+        duration = traverse_obj(episode_info, ('duration', {float_or_none}))
+        pubDateStr = traverse_obj(episode_info, ('pubDate', {str}))
+
+        upload_datetime = datetime.strptime(pubDateStr, "%Y-%m-%dT%H:%M:%S.%fZ")  # `2024-10-16T09:30:00.000Z`格式
+
+        # podcast 是指一个播客节目，包含多个 episode，podcast由多个实际user主持
+        podcast_id = traverse_obj(episode_info, ('pid', {str}))
+        podcast_title = traverse_obj(episode_info, ('podcast', 'title', {str}))
+        podcast_description = traverse_obj(episode_info, ('podcast', 'description', {str}))
+        podcast_url = f'https://www.xiaoyuzhoufm.com/podcast/{podcast_id}'
+
+        podcast_user_list = traverse_obj(episode_info, ('podcast', 'podcasters', ...))
+
+        ext = None
+        if '.' in audio_url.split('/')[-1]:
+            ext = audio_url.split('.')[-1]
+
+        formats = []
+        formats.append({
+            'url': audio_url,
+            'vcodec': 'none',
+            'ext': ext,
+        })
+
+        return {
+            'id': display_id,
+            'formats': formats,
+            'title': episode_title,
+            'description': description,
+            'duration': duration,
+            'timestamp': upload_datetime.timestamp(),
+            'uploader': podcast_title,
+            'uploader_id': podcast_id,
+            'uploader_url': podcast_url,
+        }
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
@@ -1116,6 +1116,12 @@ def _preset_alias_callback(option, opt_str, value, parser):
             'For ffmpeg, arguments can be passed to different positions using the same syntax as --postprocessor-args. '
             'You can use this option multiple times to give different arguments to different downloaders '
             '(Alias: --external-downloader-args)'))
+    downloader.add_option(
+        '--selenium-browner-timeout', dest='selenium_browner_timeout', metavar='NUMBER', default=20, type='float')
+    downloader.add_option(
+        '--selenium-browner-no-headless', dest='selenium_browner_headless', action='store_false')
+    downloader.add_option(
+        '--selenium-browner-headless', dest='selenium_browner_headless', action='store_true')
 
     workarounds = optparse.OptionGroup(parser, 'Workarounds')
     workarounds.add_option(
diff --git a/yt_dlp/selenium_container.py b/yt_dlp/selenium_container.py