From f2bdf477bb0df12089ff4fdda98892be5a7d5694 Mon Sep 17 00:00:00 2001 From: eldar702 Date: Mon, 8 Jun 2026 14:03:04 +0300 Subject: [PATCH] Fix KeyError on missing image/docx keys in configure_content (#1317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit configure_content guarded the image and docx indexing branches with a bracket lookup (files["image"] / files["docx"]) while every other content type — and even the bodies of these same two blocks — used the safe files.get(...). On a fresh install with the default t=all, files often lacks these keys, so the guard raised KeyError. The inner try/except swallowed it into success=False, surfacing as HTTP 500 "Failed to update content index" from GET /api/update. Use files.get("image") / files.get("docx") in the guards to match the convention already used throughout the function. Add DB-free regression tests asserting configure_content returns True when the image/docx keys are absent. Co-Authored-By: Claude Opus 4.8 --- src/khoj/routers/helpers.py | 8 +++++--- tests/test_helpers.py | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 29a0df0ff..1b65f4bee 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -3139,9 +3139,9 @@ def configure_content( try: # Initialize Image Search - if (search_type == state.SearchType.All.value or search_type == state.SearchType.Image.value) and files[ + if (search_type == state.SearchType.All.value or search_type == state.SearchType.Image.value) and files.get( "image" - ]: + ): logger.info("🖼️ Setting up search for images") # Extract Entries, Generate Image Embeddings text_search.setup( @@ -3154,7 +3154,9 @@ def configure_content( logger.error(f"🚨 Failed to setup images: {e}", exc_info=True) success = False try: - if (search_type == state.SearchType.All.value or search_type == state.SearchType.Docx.value) and files["docx"]: + if (search_type == state.SearchType.All.value or search_type == state.SearchType.Docx.value) and files.get( + "docx" + ): logger.info("📄 Setting up search for docx") text_search.setup( DocxToEntries, diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 131c35536..d983f1f41 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -11,7 +11,9 @@ read_webpage_at_url, read_webpage_with_olostep, ) +from khoj.routers.helpers import configure_content from khoj.utils import helpers +from khoj.utils.config import SearchType def test_get_from_null_dict(): @@ -116,3 +118,27 @@ async def test_reading_webpage_with_olostep(): "An alarm sent from the area near the fire also failed to register at the courthouse where the fire watchmen were" in response ) + + +# Regression tests for https://github.com/khoj-ai/khoj/issues/1317. +# When the indexed `files` dict omits the "image"/"docx" keys, configure_content used a +# bracket lookup (files["image"] / files["docx"]) in the guard conditions while every +# other content type used the safe files.get(...). The missing key raised KeyError, which +# was swallowed into success=False and surfaced as an HTTP 500 from /api/update. +# Passing a non-empty unrelated file type keeps no_client_sent_documents False so the +# Github/Notion server-side indexing branches (which hit the DB) are skipped, keeping the +# test DB-free. +def test_configure_content_handles_missing_image_key(): + files = {"markdown": {"note.md": "# hi"}} + + success = configure_content(user=None, files=files, regenerate=False, t=SearchType.Image) + + assert success is True + + +def test_configure_content_handles_missing_docx_key(): + files = {"markdown": {"note.md": "# hi"}} + + success = configure_content(user=None, files=files, regenerate=False, t=SearchType.Docx) + + assert success is True