teng-lin · octo-patch · Apr 11, 2026 · Apr 11, 2026 · coderabbitai · Apr 11, 2026
diff --git a/src/notebooklm/_sources.py b/src/notebooklm/_sources.py
@@ -102,12 +102,24 @@ async def list(self, notebook_id: str) -> list[Source]:
                 src_id = src[0][0] if isinstance(src[0], list) else src[0]
                 title = src[1] if len(src) > 1 else None
 
-                # Extract URL if present (at src[2][7])
+                # Extract URL if present (at src[2][7] for web/PDF, src[2][5] for YouTube)
                 url = None
-                if len(src) > 2 and isinstance(src[2], list) and len(src[2]) > 7:
-                    url_list = src[2][7]
-                    if isinstance(url_list, list) and len(url_list) > 0:
-                        url = url_list[0]
+                if len(src) > 2 and isinstance(src[2], list):
+                    if len(src[2]) > 7:
+                        url_list = src[2][7]
+                        if isinstance(url_list, list) and len(url_list) > 0:
+                            url = url_list[0]
+                    if not url and len(src[2]) > 5:
+                        yt_data = src[2][5]
+                        if (
+                            isinstance(yt_data, list)
+                            and len(yt_data) > 0
+                            and isinstance(yt_data[0], str)
+                        ):
+                            url = yt_data[0]
+                    if not url and len(src[2]) > 0:
+                        if isinstance(src[2][0], str) and src[2][0].startswith("http"):
+                            url = src[2][0]
-                    if len(src[2]) > 7:
-                        url_list = src[2][7]
-                        if isinstance(url_list, list) and len(url_list) > 0:
-                            url = url_list[0]
-                    if not url and len(src[2]) > 5:
-                        yt_data = src[2][5]
-                        if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
-                            url = yt_data[0]
-                    if not url and len(src[2]) > 0:
-                        if isinstance(src[2][0], str) and src[2][0].startswith("http"):
-                            url = src[2][0]
+                    # Handle varying API response formats for URL extraction.
+                    if len(src[2]) > 7 and isinstance(src[2][7], list) and len(src[2][7]) > 0 and isinstance(src[2][7][0], str):
+                        url = src[2][7][0]
+                    if not url and len(src[2]) > 5 and isinstance(src[2][5], list) and len(src[2][5]) > 0 and isinstance(src[2][5][0], str):
+                        url = src[2][5][0]
+                    if not url and len(src[2]) > 0:
+                        if isinstance(src[2][0], str) and src[2][0].startswith("http"):
+                            url = src[2][0]
-                    if len(src[2]) > 7:
-                        url_list = src[2][7]
-                        if isinstance(url_list, list) and len(url_list) > 0:
-                            url = url_list[0]
-                    if not url and len(src[2]) > 5:
-                        yt_data = src[2][5]
-                        if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
-                            url = yt_data[0]
-                    if not url and len(src[2]) > 0:
-                        if isinstance(src[2][0], str) and src[2][0].startswith("http"):
-                            url = src[2][0]
+                    # Handle varying API response formats for URL extraction.
+                    if len(src[2]) > 7 and isinstance(src[2][7], list) and len(src[2][7]) > 0 and isinstance(src[2][7][0], str):
+                        url = src[2][7][0]
+                    if not url and len(src[2]) > 5 and isinstance(src[2][5], list) and len(src[2][5]) > 0 and isinstance(src[2][5][0], str):
+                        url = src[2][5][0]
+                    if not url and len(src[2]) > 0:
+                        if isinstance(src[2][0], str) and src[2][0].startswith("http"):
+                            url = src[2][0]
 
                 # Extract timestamp from src[2][2] - [seconds, nanoseconds]
                 created_at = None

diff --git a/src/notebooklm/types.py b/src/notebooklm/types.py
@@ -582,11 +582,19 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> "
                     source_id = entry[0][0] if isinstance(entry[0], list) else entry[0]
                     title = entry[1] if len(entry) > 1 else None
 
-                    # Try to extract URL if present
+                    # Try to extract URL if present (web/PDF at [2][7], YouTube at [2][5])
                     url = None
                     if len(entry) > 2 and isinstance(entry[2], list):
                         if len(entry[2]) > 7 and isinstance(entry[2][7], list):
                             url = entry[2][7][0] if entry[2][7] else None
+                        if not url and len(entry[2]) > 5:
+                            yt_data = entry[2][5]
+                            if (
+                                isinstance(yt_data, list)
+                                and len(yt_data) > 0
+                                and isinstance(yt_data[0], str)
+                            ):
+                                url = yt_data[0]
-                        if len(entry[2]) > 7 and isinstance(entry[2][7], list):
-                            url = entry[2][7][0] if entry[2][7] else None
-                        if not url and len(entry[2]) > 5:
-                            yt_data = entry[2][5]
-                            if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
-                                url = yt_data[0]
+                        # Handle varying API response formats for URL extraction.
+                        if len(entry[2]) > 7 and isinstance(entry[2][7], list) and len(entry[2][7]) > 0 and isinstance(entry[2][7][0], str):
+                            url = entry[2][7][0]
+                        if not url and len(entry[2]) > 5 and isinstance(entry[2][5], list) and len(entry[2][5]) > 0 and isinstance(entry[2][5][0], str):
+                            url = entry[2][5][0]
+                        if not url and len(entry[2]) > 0 and isinstance(entry[2][0], str) and entry[2][0].startswith("http"):
+                            url = entry[2][0]
-                        if len(entry[2]) > 7 and isinstance(entry[2][7], list):
-                            url = entry[2][7][0] if entry[2][7] else None
-                        if not url and len(entry[2]) > 5:
-                            yt_data = entry[2][5]
-                            if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
-                                url = yt_data[0]
+                        # Handle varying API response formats for URL extraction.
+                        if len(entry[2]) > 7 and isinstance(entry[2][7], list) and len(entry[2][7]) > 0 and isinstance(entry[2][7][0], str):
+                            url = entry[2][7][0]
+                        if not url and len(entry[2]) > 5 and isinstance(entry[2][5], list) and len(entry[2][5]) > 0 and isinstance(entry[2][5][0], str):
+                            url = entry[2][5][0]
+                        if not url and len(entry[2]) > 0 and isinstance(entry[2][0], str) and entry[2][0].startswith("http"):
+                            url = entry[2][0]
 
                     return cls(id=str(source_id), title=title, url=url, _type_code=None)
 
@@ -598,6 +606,14 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> "
                         url_list = entry[2][7]
                         if isinstance(url_list, list) and len(url_list) > 0:
                             url = url_list[0]
+                    if not url and len(entry[2]) > 5:
+                        yt_data = entry[2][5]
+                        if (
+                            isinstance(yt_data, list)
+                            and len(yt_data) > 0
+                            and isinstance(yt_data[0], str)
+                        ):
+                            url = yt_data[0]
                     if not url and len(entry[2]) > 0:
                         if isinstance(entry[2][0], str) and entry[2][0].startswith("http"):
                             url = entry[2][0]

diff --git a/tests/integration/test_sources.py b/tests/integration/test_sources.py
@@ -215,6 +215,62 @@ async def test_list_sources(
         assert sources[0].kind == "web_page"
         assert sources[0].url == "https://example.com"
         assert sources[2].kind == "youtube"
+        assert sources[2].url == "https://youtube.com/watch?v=abc"
+
+    @pytest.mark.asyncio
+    async def test_list_sources_youtube_url_at_index_5(
+        self,
+        auth_tokens,
+        httpx_mock: HTTPXMock,
+        build_rpc_response,
+    ):
+        """Test that YouTube source URLs stored at src[2][5] are extracted correctly.
+
+        The NotebookLM API stores YouTube URLs at index 5 of the source metadata
+        array as [url, video_id, channel_name], not at index 7 like web/PDF sources.
+        """
+        response = build_rpc_response(
+            RPCMethod.GET_NOTEBOOK,
+            [
+                [
+                    "Test Notebook",
+                    [
+                        [
+                            ["src_yt"],
+                            "YouTube Video",
+                            [
+                                None,
+                                11,
+                                [1704240000, 0],
+                                None,
+                                9,  # YOUTUBE type code
+                                [
+                                    "https://www.youtube.com/watch?v=dcWU-qD8ISQ",
+                                    "dcWU-qD8ISQ",
+                                    "john newquist",
+                                ],
+                                None,
+                                None,  # index 7 is None for YouTube sources
+                            ],
+                            [None, 2],
+                        ],
+                    ],
+                    "nb_123",
+                    "📘",
+                    None,
+                    [None, None, None, None, None, [1704067200, 0]],
+                ]
+            ],
+        )
+        httpx_mock.add_response(content=response.encode())
+
+        async with NotebookLMClient(auth_tokens) as client:
+            sources = await client.sources.list("nb_123")
+
+        assert len(sources) == 1
+        assert sources[0].id == "src_yt"
+        assert sources[0].kind == "youtube"
+        assert sources[0].url == "https://www.youtube.com/watch?v=dcWU-qD8ISQ"
 
     @pytest.mark.asyncio
     async def test_list_sources_empty(

diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py
@@ -161,6 +161,40 @@ def test_from_api_response_youtube_source(self):
         assert source.kind == SourceType.YOUTUBE
         assert source.kind == "youtube"  # str enum comparison
 
+    def test_from_api_response_youtube_url_at_index_5(self):
+        """Test that YouTube sources store URL at src[2][5], not src[2][7].
+
+        The NotebookLM API stores YouTube URLs at index 5 of the metadata array
+        as [url, video_id, channel_name], while web/PDF sources use index 7.
+        """
+        data = [
+            [
+                [
+                    ["src_yt2"],
+                    "YouTube Video Title",
+                    [
+                        None,
+                        None,
+                        None,
+                        None,
+                        9,
+                        [
+                            "https://www.youtube.com/watch?v=dcWU-qD8ISQ",
+                            "dcWU-qD8ISQ",
+                            "john newquist",
+                        ],
+                        None,
+                        None,
+                    ],
+                ]
+            ]
+        ]
+        source = Source.from_api_response(data)
+
+        assert source.id == "src_yt2"
+        assert source.url == "https://www.youtube.com/watch?v=dcWU-qD8ISQ"
+        assert source.kind == SourceType.YOUTUBE
+
     def test_from_api_response_web_page_source(self):
         """Test that web page sources are parsed with type code 5."""
         data = [