From 37250527dbb100d533d885b08d5c68653870819e Mon Sep 17 00:00:00 2001
From: octo-patch <octo-patch@github.com>
Date: Sat, 11 Apr 2026 21:34:53 +0800
Subject: [PATCH 1/2] fix: extract YouTube source URL from src[2][5] in list()
 and from_api_response() (fixes #265)

---
 src/notebooklm/_sources.py        | 18 +++++++---
 src/notebooklm/types.py           | 10 +++++-
 tests/integration/test_sources.py | 56 +++++++++++++++++++++++++++++++
 tests/unit/test_types.py          | 30 +++++++++++++++++
 4 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/src/notebooklm/_sources.py b/src/notebooklm/_sources.py
index 7263b485..ce32480d 100644
--- a/src/notebooklm/_sources.py
+++ b/src/notebooklm/_sources.py
@@ -102,12 +102,20 @@ async def list(self, notebook_id: str) -> list[Source]:
                 src_id = src[0][0] if isinstance(src[0], list) else src[0]
                 title = src[1] if len(src) > 1 else None
 
-                # Extract URL if present (at src[2][7])
+                # Extract URL if present (at src[2][7] for web/PDF, src[2][5] for YouTube)
                 url = None
-                if len(src) > 2 and isinstance(src[2], list) and len(src[2]) > 7:
-                    url_list = src[2][7]
-                    if isinstance(url_list, list) and len(url_list) > 0:
-                        url = url_list[0]
+                if len(src) > 2 and isinstance(src[2], list):
+                    if len(src[2]) > 7:
+                        url_list = src[2][7]
+                        if isinstance(url_list, list) and len(url_list) > 0:
+                            url = url_list[0]
+                    if not url and len(src[2]) > 5:
+                        yt_data = src[2][5]
+                        if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
+                            url = yt_data[0]
+                    if not url and len(src[2]) > 0:
+                        if isinstance(src[2][0], str) and src[2][0].startswith("http"):
+                            url = src[2][0]
 
                 # Extract timestamp from src[2][2] - [seconds, nanoseconds]
                 created_at = None
diff --git a/src/notebooklm/types.py b/src/notebooklm/types.py
index 852e1316..43080a93 100644
--- a/src/notebooklm/types.py
+++ b/src/notebooklm/types.py
@@ -582,11 +582,15 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> "
                     source_id = entry[0][0] if isinstance(entry[0], list) else entry[0]
                     title = entry[1] if len(entry) > 1 else None
 
-                    # Try to extract URL if present
+                    # Try to extract URL if present (web/PDF at [2][7], YouTube at [2][5])
                     url = None
                     if len(entry) > 2 and isinstance(entry[2], list):
                         if len(entry[2]) > 7 and isinstance(entry[2][7], list):
                             url = entry[2][7][0] if entry[2][7] else None
+                        if not url and len(entry[2]) > 5:
+                            yt_data = entry[2][5]
+                            if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
+                                url = yt_data[0]
 
                     return cls(id=str(source_id), title=title, url=url, _type_code=None)
 
@@ -598,6 +602,10 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> "
                         url_list = entry[2][7]
                         if isinstance(url_list, list) and len(url_list) > 0:
                             url = url_list[0]
+                    if not url and len(entry[2]) > 5:
+                        yt_data = entry[2][5]
+                        if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
+                            url = yt_data[0]
                     if not url and len(entry[2]) > 0:
                         if isinstance(entry[2][0], str) and entry[2][0].startswith("http"):
                             url = entry[2][0]
diff --git a/tests/integration/test_sources.py b/tests/integration/test_sources.py
index 24d6bf7e..0d8cd26f 100644
--- a/tests/integration/test_sources.py
+++ b/tests/integration/test_sources.py
@@ -215,6 +215,62 @@ async def test_list_sources(
         assert sources[0].kind == "web_page"
         assert sources[0].url == "https://example.com"
         assert sources[2].kind == "youtube"
+        assert sources[2].url == "https://youtube.com/watch?v=abc"
+
+    @pytest.mark.asyncio
+    async def test_list_sources_youtube_url_at_index_5(
+        self,
+        auth_tokens,
+        httpx_mock: HTTPXMock,
+        build_rpc_response,
+    ):
+        """Test that YouTube source URLs stored at src[2][5] are extracted correctly.
+
+        The NotebookLM API stores YouTube URLs at index 5 of the source metadata
+        array as [url, video_id, channel_name], not at index 7 like web/PDF sources.
+        """
+        response = build_rpc_response(
+            RPCMethod.GET_NOTEBOOK,
+            [
+                [
+                    "Test Notebook",
+                    [
+                        [
+                            ["src_yt"],
+                            "YouTube Video",
+                            [
+                                None,
+                                11,
+                                [1704240000, 0],
+                                None,
+                                9,  # YOUTUBE type code
+                                [
+                                    "https://www.youtube.com/watch?v=dcWU-qD8ISQ",
+                                    "dcWU-qD8ISQ",
+                                    "john newquist",
+                                ],
+                                None,
+                                None,  # index 7 is None for YouTube sources
+                            ],
+                            [None, 2],
+                        ],
+                    ],
+                    "nb_123",
+                    "📘",
+                    None,
+                    [None, None, None, None, None, [1704067200, 0]],
+                ]
+            ],
+        )
+        httpx_mock.add_response(content=response.encode())
+
+        async with NotebookLMClient(auth_tokens) as client:
+            sources = await client.sources.list("nb_123")
+
+        assert len(sources) == 1
+        assert sources[0].id == "src_yt"
+        assert sources[0].kind == "youtube"
+        assert sources[0].url == "https://www.youtube.com/watch?v=dcWU-qD8ISQ"
 
     @pytest.mark.asyncio
     async def test_list_sources_empty(
diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py
index 58b5da18..250807e0 100644
--- a/tests/unit/test_types.py
+++ b/tests/unit/test_types.py
@@ -161,6 +161,36 @@ def test_from_api_response_youtube_source(self):
         assert source.kind == SourceType.YOUTUBE
         assert source.kind == "youtube"  # str enum comparison
 
+    def test_from_api_response_youtube_url_at_index_5(self):
+        """Test that YouTube sources store URL at src[2][5], not src[2][7].
+
+        The NotebookLM API stores YouTube URLs at index 5 of the metadata array
+        as [url, video_id, channel_name], while web/PDF sources use index 7.
+        """
+        data = [
+            [
+                [
+                    ["src_yt2"],
+                    "YouTube Video Title",
+                    [
+                        None,
+                        None,
+                        None,
+                        None,
+                        9,
+                        ["https://www.youtube.com/watch?v=dcWU-qD8ISQ", "dcWU-qD8ISQ", "john newquist"],
+                        None,
+                        None,
+                    ],
+                ]
+            ]
+        ]
+        source = Source.from_api_response(data)
+
+        assert source.id == "src_yt2"
+        assert source.url == "https://www.youtube.com/watch?v=dcWU-qD8ISQ"
+        assert source.kind == SourceType.YOUTUBE
+
     def test_from_api_response_web_page_source(self):
         """Test that web page sources are parsed with type code 5."""
         data = [

From 56c00c5fd312f8a42e00dac6ffdfca450cdae205 Mon Sep 17 00:00:00 2001
From: octo-patch <octo-patch@github.com>
Date: Sat, 11 Apr 2026 21:38:18 +0800
Subject: [PATCH 2/2] style: apply ruff formatting

---
 src/notebooklm/_sources.py |  6 +++++-
 src/notebooklm/types.py    | 12 ++++++++++--
 tests/unit/test_types.py   |  6 +++++-
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/notebooklm/_sources.py b/src/notebooklm/_sources.py
index ce32480d..34875eb5 100644
--- a/src/notebooklm/_sources.py
+++ b/src/notebooklm/_sources.py
@@ -111,7 +111,11 @@ async def list(self, notebook_id: str) -> list[Source]:
                             url = url_list[0]
                     if not url and len(src[2]) > 5:
                         yt_data = src[2][5]
-                        if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
+                        if (
+                            isinstance(yt_data, list)
+                            and len(yt_data) > 0
+                            and isinstance(yt_data[0], str)
+                        ):
                             url = yt_data[0]
                     if not url and len(src[2]) > 0:
                         if isinstance(src[2][0], str) and src[2][0].startswith("http"):
diff --git a/src/notebooklm/types.py b/src/notebooklm/types.py
index 43080a93..7307be0e 100644
--- a/src/notebooklm/types.py
+++ b/src/notebooklm/types.py
@@ -589,7 +589,11 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> "
                             url = entry[2][7][0] if entry[2][7] else None
                         if not url and len(entry[2]) > 5:
                             yt_data = entry[2][5]
-                            if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
+                            if (
+                                isinstance(yt_data, list)
+                                and len(yt_data) > 0
+                                and isinstance(yt_data[0], str)
+                            ):
                                 url = yt_data[0]
 
                     return cls(id=str(source_id), title=title, url=url, _type_code=None)
@@ -604,7 +608,11 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> "
                             url = url_list[0]
                     if not url and len(entry[2]) > 5:
                         yt_data = entry[2][5]
-                        if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str):
+                        if (
+                            isinstance(yt_data, list)
+                            and len(yt_data) > 0
+                            and isinstance(yt_data[0], str)
+                        ):
                             url = yt_data[0]
                     if not url and len(entry[2]) > 0:
                         if isinstance(entry[2][0], str) and entry[2][0].startswith("http"):
diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py
index 250807e0..bd15a45d 100644
--- a/tests/unit/test_types.py
+++ b/tests/unit/test_types.py
@@ -178,7 +178,11 @@ def test_from_api_response_youtube_url_at_index_5(self):
                         None,
                         None,
                         9,
-                        ["https://www.youtube.com/watch?v=dcWU-qD8ISQ", "dcWU-qD8ISQ", "john newquist"],
+                        [
+                            "https://www.youtube.com/watch?v=dcWU-qD8ISQ",
+                            "dcWU-qD8ISQ",
+                            "john newquist",
+                        ],
                         None,
                         None,
                     ],