From 37250527dbb100d533d885b08d5c68653870819e Mon Sep 17 00:00:00 2001 From: octo-patch Date: Sat, 11 Apr 2026 21:34:53 +0800 Subject: [PATCH 1/2] fix: extract YouTube source URL from src[2][5] in list() and from_api_response() (fixes #265) --- src/notebooklm/_sources.py | 18 +++++++--- src/notebooklm/types.py | 10 +++++- tests/integration/test_sources.py | 56 +++++++++++++++++++++++++++++++ tests/unit/test_types.py | 30 +++++++++++++++++ 4 files changed, 108 insertions(+), 6 deletions(-) diff --git a/src/notebooklm/_sources.py b/src/notebooklm/_sources.py index 7263b485..ce32480d 100644 --- a/src/notebooklm/_sources.py +++ b/src/notebooklm/_sources.py @@ -102,12 +102,20 @@ async def list(self, notebook_id: str) -> list[Source]: src_id = src[0][0] if isinstance(src[0], list) else src[0] title = src[1] if len(src) > 1 else None - # Extract URL if present (at src[2][7]) + # Extract URL if present (at src[2][7] for web/PDF, src[2][5] for YouTube) url = None - if len(src) > 2 and isinstance(src[2], list) and len(src[2]) > 7: - url_list = src[2][7] - if isinstance(url_list, list) and len(url_list) > 0: - url = url_list[0] + if len(src) > 2 and isinstance(src[2], list): + if len(src[2]) > 7: + url_list = src[2][7] + if isinstance(url_list, list) and len(url_list) > 0: + url = url_list[0] + if not url and len(src[2]) > 5: + yt_data = src[2][5] + if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str): + url = yt_data[0] + if not url and len(src[2]) > 0: + if isinstance(src[2][0], str) and src[2][0].startswith("http"): + url = src[2][0] # Extract timestamp from src[2][2] - [seconds, nanoseconds] created_at = None diff --git a/src/notebooklm/types.py b/src/notebooklm/types.py index 852e1316..43080a93 100644 --- a/src/notebooklm/types.py +++ b/src/notebooklm/types.py @@ -582,11 +582,15 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> " source_id = entry[0][0] if isinstance(entry[0], list) else entry[0] title = entry[1] if len(entry) > 1 else None - # Try to extract URL if present + # Try to extract URL if present (web/PDF at [2][7], YouTube at [2][5]) url = None if len(entry) > 2 and isinstance(entry[2], list): if len(entry[2]) > 7 and isinstance(entry[2][7], list): url = entry[2][7][0] if entry[2][7] else None + if not url and len(entry[2]) > 5: + yt_data = entry[2][5] + if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str): + url = yt_data[0] return cls(id=str(source_id), title=title, url=url, _type_code=None) @@ -598,6 +602,10 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> " url_list = entry[2][7] if isinstance(url_list, list) and len(url_list) > 0: url = url_list[0] + if not url and len(entry[2]) > 5: + yt_data = entry[2][5] + if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str): + url = yt_data[0] if not url and len(entry[2]) > 0: if isinstance(entry[2][0], str) and entry[2][0].startswith("http"): url = entry[2][0] diff --git a/tests/integration/test_sources.py b/tests/integration/test_sources.py index 24d6bf7e..0d8cd26f 100644 --- a/tests/integration/test_sources.py +++ b/tests/integration/test_sources.py @@ -215,6 +215,62 @@ async def test_list_sources( assert sources[0].kind == "web_page" assert sources[0].url == "https://example.com" assert sources[2].kind == "youtube" + assert sources[2].url == "https://youtube.com/watch?v=abc" + + @pytest.mark.asyncio + async def test_list_sources_youtube_url_at_index_5( + self, + auth_tokens, + httpx_mock: HTTPXMock, + build_rpc_response, + ): + """Test that YouTube source URLs stored at src[2][5] are extracted correctly. + + The NotebookLM API stores YouTube URLs at index 5 of the source metadata + array as [url, video_id, channel_name], not at index 7 like web/PDF sources. + """ + response = build_rpc_response( + RPCMethod.GET_NOTEBOOK, + [ + [ + "Test Notebook", + [ + [ + ["src_yt"], + "YouTube Video", + [ + None, + 11, + [1704240000, 0], + None, + 9, # YOUTUBE type code + [ + "https://www.youtube.com/watch?v=dcWU-qD8ISQ", + "dcWU-qD8ISQ", + "john newquist", + ], + None, + None, # index 7 is None for YouTube sources + ], + [None, 2], + ], + ], + "nb_123", + "📘", + None, + [None, None, None, None, None, [1704067200, 0]], + ] + ], + ) + httpx_mock.add_response(content=response.encode()) + + async with NotebookLMClient(auth_tokens) as client: + sources = await client.sources.list("nb_123") + + assert len(sources) == 1 + assert sources[0].id == "src_yt" + assert sources[0].kind == "youtube" + assert sources[0].url == "https://www.youtube.com/watch?v=dcWU-qD8ISQ" @pytest.mark.asyncio async def test_list_sources_empty( diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index 58b5da18..250807e0 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -161,6 +161,36 @@ def test_from_api_response_youtube_source(self): assert source.kind == SourceType.YOUTUBE assert source.kind == "youtube" # str enum comparison + def test_from_api_response_youtube_url_at_index_5(self): + """Test that YouTube sources store URL at src[2][5], not src[2][7]. + + The NotebookLM API stores YouTube URLs at index 5 of the metadata array + as [url, video_id, channel_name], while web/PDF sources use index 7. + """ + data = [ + [ + [ + ["src_yt2"], + "YouTube Video Title", + [ + None, + None, + None, + None, + 9, + ["https://www.youtube.com/watch?v=dcWU-qD8ISQ", "dcWU-qD8ISQ", "john newquist"], + None, + None, + ], + ] + ] + ] + source = Source.from_api_response(data) + + assert source.id == "src_yt2" + assert source.url == "https://www.youtube.com/watch?v=dcWU-qD8ISQ" + assert source.kind == SourceType.YOUTUBE + def test_from_api_response_web_page_source(self): """Test that web page sources are parsed with type code 5.""" data = [ From 56c00c5fd312f8a42e00dac6ffdfca450cdae205 Mon Sep 17 00:00:00 2001 From: octo-patch Date: Sat, 11 Apr 2026 21:38:18 +0800 Subject: [PATCH 2/2] style: apply ruff formatting --- src/notebooklm/_sources.py | 6 +++++- src/notebooklm/types.py | 12 ++++++++++-- tests/unit/test_types.py | 6 +++++- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/notebooklm/_sources.py b/src/notebooklm/_sources.py index ce32480d..34875eb5 100644 --- a/src/notebooklm/_sources.py +++ b/src/notebooklm/_sources.py @@ -111,7 +111,11 @@ async def list(self, notebook_id: str) -> list[Source]: url = url_list[0] if not url and len(src[2]) > 5: yt_data = src[2][5] - if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str): + if ( + isinstance(yt_data, list) + and len(yt_data) > 0 + and isinstance(yt_data[0], str) + ): url = yt_data[0] if not url and len(src[2]) > 0: if isinstance(src[2][0], str) and src[2][0].startswith("http"): diff --git a/src/notebooklm/types.py b/src/notebooklm/types.py index 43080a93..7307be0e 100644 --- a/src/notebooklm/types.py +++ b/src/notebooklm/types.py @@ -589,7 +589,11 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> " url = entry[2][7][0] if entry[2][7] else None if not url and len(entry[2]) > 5: yt_data = entry[2][5] - if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str): + if ( + isinstance(yt_data, list) + and len(yt_data) > 0 + and isinstance(yt_data[0], str) + ): url = yt_data[0] return cls(id=str(source_id), title=title, url=url, _type_code=None) @@ -604,7 +608,11 @@ def from_api_response(cls, data: list[Any], notebook_id: str | None = None) -> " url = url_list[0] if not url and len(entry[2]) > 5: yt_data = entry[2][5] - if isinstance(yt_data, list) and len(yt_data) > 0 and isinstance(yt_data[0], str): + if ( + isinstance(yt_data, list) + and len(yt_data) > 0 + and isinstance(yt_data[0], str) + ): url = yt_data[0] if not url and len(entry[2]) > 0: if isinstance(entry[2][0], str) and entry[2][0].startswith("http"): diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index 250807e0..bd15a45d 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -178,7 +178,11 @@ def test_from_api_response_youtube_url_at_index_5(self): None, None, 9, - ["https://www.youtube.com/watch?v=dcWU-qD8ISQ", "dcWU-qD8ISQ", "john newquist"], + [ + "https://www.youtube.com/watch?v=dcWU-qD8ISQ", + "dcWU-qD8ISQ", + "john newquist", + ], None, None, ],