Skip to content

Commit ae2bf18

Browse files
deptraiclaude
andcommitted
feat: sync upstream PRs MODSetter#893, MODSetter#894, MODSetter#605 with bug fixes
PR MODSetter#893 — fix: BookStack + Obsidian missing from periodic scheduler - Add index_bookstack_pages_task and index_obsidian_vault_task to schedule_checker_task.py imports and task_map PR MODSetter#894 — feat: BookStack shelf exclusion filter - bookstack_connector.py: get_all_shelves(), build_book_to_shelf_map() returning dict[int, set[int]] (fixes book-in-multiple-shelves edge case) get_all_pages/get_pages_by_date_range cache shelf map once (fixes N+1) - search_source_connectors_routes.py: POST /bookstack/shelves endpoint - bookstack_indexer.py: pass BOOKSTACK_EXCLUDED_SHELF_IDS to indexer - bookstack-connect-form.tsx + bookstack-config.tsx: shelf picker UI with loading guard to prevent race condition on rapid clicks - connectors-api.service.ts: listBookStackShelves() API method PR MODSetter#605 — fix: OLLAMA_BASE_URL support for external Ollama embeddings - config/__init__.py: add OLLAMA_BASE_URL env var; only inject base_url into embedding_kwargs when EMBEDDING_MODEL starts with "ollama://" (fixes original PR's bug: was unconditionally injected for all providers) - .env.example: document OLLAMA_BASE_URL with usage notes Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 51560ed commit ae2bf18

9 files changed

Lines changed: 511 additions & 10 deletions

File tree

surfsense_backend/.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,7 @@ LANGSMITH_TRACING=true
202202
LANGSMITH_ENDPOINT=https://api.smith.langchain.com
203203
LANGSMITH_API_KEY=lsv2_pt_.....
204204
LANGSMITH_PROJECT=surfsense
205+
206+
# Ollama Configuration (only used when EMBEDDING_MODEL starts with "ollama://")
207+
# Use host.docker.internal on Docker Desktop (Mac/Windows), or host IP on Linux
208+
# OLLAMA_BASE_URL=http://host.docker.internal:11434

surfsense_backend/app/config/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,13 +417,17 @@ def is_cloud(cls) -> bool:
417417
# Azure OpenAI credentials from environment variables
418418
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
419419
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
420+
# Ollama base URL — only applied when EMBEDDING_MODEL starts with "ollama://"
421+
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
420422

421-
# Pass Azure credentials to embeddings when using Azure OpenAI
423+
# Pass provider-specific credentials to embeddings
422424
embedding_kwargs = {}
423425
if AZURE_OPENAI_ENDPOINT:
424426
embedding_kwargs["azure_endpoint"] = AZURE_OPENAI_ENDPOINT
425427
if AZURE_OPENAI_API_KEY:
426428
embedding_kwargs["azure_api_key"] = AZURE_OPENAI_API_KEY
429+
if OLLAMA_BASE_URL and EMBEDDING_MODEL and EMBEDDING_MODEL.startswith("ollama://"):
430+
embedding_kwargs["base_url"] = OLLAMA_BASE_URL
427431

428432
embedding_model_instance = AutoEmbeddings.get_embeddings(
429433
EMBEDDING_MODEL,

surfsense_backend/app/connectors/bookstack_connector.py

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,81 @@ def make_api_request(
155155
except requests.exceptions.RequestException as e:
156156
raise Exception(f"BookStack API request failed: {e!s}") from e
157157

158-
def get_all_pages(self, count: int = 500) -> list[dict[str, Any]]:
158+
def get_all_shelves(self, count: int = 500) -> list[dict[str, Any]]:
159+
"""
160+
Fetch all shelves from BookStack with pagination.
161+
162+
Args:
163+
count: Number of records per request (max 500)
164+
165+
Returns:
166+
List of shelf objects
167+
"""
168+
all_shelves = []
169+
offset = 0
170+
171+
while True:
172+
params = {
173+
"count": min(count, 500),
174+
"offset": offset,
175+
}
176+
177+
result = self.make_api_request("shelves", params)
178+
179+
if not isinstance(result, dict) or "data" not in result:
180+
raise Exception("Invalid response from BookStack API")
181+
182+
shelves = result["data"]
183+
all_shelves.extend(shelves)
184+
185+
logger.info(f"Fetched {len(shelves)} shelves (offset: {offset})")
186+
187+
total = result.get("total", 0)
188+
if offset + len(shelves) >= total:
189+
break
190+
191+
offset += len(shelves)
192+
193+
logger.info(f"Total shelves fetched: {len(all_shelves)}")
194+
return all_shelves
195+
196+
def build_book_to_shelf_map(self) -> dict[int, set[int]]:
197+
"""
198+
Build a mapping from book_id to a set of shelf_ids.
199+
200+
Fetches all shelves and their book listings to create
201+
a lookup table used for filtering pages by shelf.
202+
A book can belong to multiple shelves, so we use a set.
203+
204+
Returns:
205+
Dict mapping book_id -> set of shelf_ids
206+
"""
207+
book_to_shelves: dict[int, set[int]] = {}
208+
shelves = self.get_all_shelves()
209+
210+
for shelf in shelves:
211+
shelf_id = shelf["id"]
212+
shelf_detail = self.make_api_request(f"shelves/{shelf_id}")
213+
if isinstance(shelf_detail, dict):
214+
for book in shelf_detail.get("books", []):
215+
book_id = book["id"]
216+
if book_id not in book_to_shelves:
217+
book_to_shelves[book_id] = set()
218+
book_to_shelves[book_id].add(shelf_id)
219+
220+
return book_to_shelves
221+
222+
def get_all_pages(
223+
self,
224+
count: int = 500,
225+
excluded_shelf_ids: list[int] | None = None,
226+
) -> list[dict[str, Any]]:
159227
"""
160228
Fetch all pages from BookStack with pagination.
161229
162230
Args:
163231
count: Number of records per request (max 500)
232+
excluded_shelf_ids: Optional list of shelf IDs whose pages should be excluded
164233
165234
Returns:
166235
List of page objects
@@ -195,6 +264,18 @@ def get_all_pages(self, count: int = 500) -> list[dict[str, Any]]:
195264

196265
offset += len(pages)
197266

267+
# Filter by excluded shelves if specified — build map once, outside the loop
268+
if excluded_shelf_ids:
269+
book_to_shelves = self.build_book_to_shelf_map()
270+
excluded = set(excluded_shelf_ids)
271+
all_pages = [
272+
p
273+
for p in all_pages
274+
if not any(
275+
s in excluded for s in book_to_shelves.get(p.get("book_id"), set())
276+
)
277+
]
278+
198279
logger.info(f"Total pages fetched: {len(all_pages)}")
199280
return all_pages
200281

@@ -268,6 +349,7 @@ def get_pages_by_date_range(
268349
start_date: str,
269350
end_date: str,
270351
count: int = 500,
352+
excluded_shelf_ids: list[int] | None = None,
271353
) -> tuple[list[dict[str, Any]], str | None]:
272354
"""
273355
Fetch pages updated within a specific date range.
@@ -278,6 +360,7 @@ def get_pages_by_date_range(
278360
start_date: Start date in YYYY-MM-DD format
279361
end_date: End date in YYYY-MM-DD format (currently unused, for future use)
280362
count: Number of records per request (max 500)
363+
excluded_shelf_ids: Optional list of shelf IDs whose pages should be excluded
281364
282365
Returns:
283366
Tuple of (list of page objects, error message or None)
@@ -316,6 +399,19 @@ def get_pages_by_date_range(
316399

317400
offset += len(pages)
318401

402+
# Filter by excluded shelves if specified — build map once, outside the loop
403+
if excluded_shelf_ids and all_pages:
404+
book_to_shelves = self.build_book_to_shelf_map()
405+
excluded = set(excluded_shelf_ids)
406+
all_pages = [
407+
p
408+
for p in all_pages
409+
if not any(
410+
s in excluded
411+
for s in book_to_shelves.get(p.get("book_id"), set())
412+
)
413+
]
414+
319415
if not all_pages:
320416
return [], f"No pages found updated after {start_date}"
321417

surfsense_backend/app/routes/search_source_connectors_routes.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,60 @@ async def list_github_repositories(
133133
) from e
134134

135135

136+
class BookStackCredentialsRequest(BaseModel):
137+
"""Request model for BookStack API credentials."""
138+
139+
base_url: str = Field(..., description="BookStack instance base URL")
140+
token_id: str = Field(..., description="BookStack API Token ID")
141+
token_secret: str = Field(..., description="BookStack API Token Secret")
142+
143+
144+
@router.post("/bookstack/shelves", response_model=list[dict[str, Any]])
145+
async def list_bookstack_shelves(
146+
creds: BookStackCredentialsRequest,
147+
user: User = Depends(current_active_user),
148+
):
149+
"""
150+
Fetches all shelves from a BookStack instance.
151+
Used by the frontend to let users select which shelves to exclude from indexing.
152+
"""
153+
try:
154+
from app.connectors.bookstack_connector import BookStackConnector
155+
156+
client = BookStackConnector(
157+
base_url=creds.base_url,
158+
token_id=creds.token_id,
159+
token_secret=creds.token_secret,
160+
)
161+
shelves = client.get_all_shelves()
162+
163+
result = []
164+
for shelf in shelves:
165+
detail = client.make_api_request(f"shelves/{shelf['id']}")
166+
books = detail.get("books", []) if isinstance(detail, dict) else []
167+
result.append(
168+
{
169+
"id": shelf["id"],
170+
"name": shelf["name"],
171+
"book_count": len(books),
172+
"books": [{"id": b["id"], "name": b["name"]} for b in books],
173+
}
174+
)
175+
return result
176+
except ValueError as e:
177+
logger.error(
178+
f"BookStack credential validation failed for user {user.id}: {e!s}"
179+
)
180+
raise HTTPException(
181+
status_code=400, detail=f"Invalid BookStack credentials: {e!s}"
182+
) from e
183+
except Exception as e:
184+
logger.error(f"Failed to fetch BookStack shelves for user {user.id}: {e!s}")
185+
raise HTTPException(
186+
status_code=500, detail=f"Failed to fetch BookStack shelves: {e!s}"
187+
) from e
188+
189+
136190
@router.post("/search-source-connectors", response_model=SearchSourceConnectorRead)
137191
async def create_search_source_connector(
138192
connector: SearchSourceConnectorCreate,
@@ -1106,8 +1160,12 @@ async def index_connector_content(
11061160
)
11071161
response_message = "Luma indexing started in the background."
11081162

1109-
elif connector.connector_type == SearchSourceConnectorType.DEXSCREENER_CONNECTOR:
1110-
from app.tasks.celery_tasks.connector_tasks import index_dexscreener_pairs_task
1163+
elif (
1164+
connector.connector_type == SearchSourceConnectorType.DEXSCREENER_CONNECTOR
1165+
):
1166+
from app.tasks.celery_tasks.connector_tasks import (
1167+
index_dexscreener_pairs_task,
1168+
)
11111169

11121170
logger.info(
11131171
f"Triggering DexScreener indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"

surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ async def _check_and_trigger_schedules():
5454
# Import all indexing tasks
5555
from app.tasks.celery_tasks.connector_tasks import (
5656
index_airtable_records_task,
57+
index_bookstack_pages_task,
5758
index_clickup_tasks_task,
5859
index_confluence_pages_task,
5960
index_crawled_urls_task,
@@ -68,6 +69,7 @@ async def _check_and_trigger_schedules():
6869
index_linear_issues_task,
6970
index_luma_events_task,
7071
index_notion_pages_task,
72+
index_obsidian_vault_task,
7173
index_slack_messages_task,
7274
)
7375

@@ -88,6 +90,8 @@ async def _check_and_trigger_schedules():
8890
SearchSourceConnectorType.DEXSCREENER_CONNECTOR: index_dexscreener_pairs_task,
8991
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
9092
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
93+
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: index_bookstack_pages_task,
94+
SearchSourceConnectorType.OBSIDIAN_CONNECTOR: index_obsidian_vault_task,
9195
SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,
9296
# Composio connector types (unified with native Google tasks)
9397
SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,

surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ async def index_bookstack_pages(
104104
bookstack_token_id = connector.config.get("BOOKSTACK_TOKEN_ID")
105105
bookstack_token_secret = connector.config.get("BOOKSTACK_TOKEN_SECRET")
106106

107+
# Optional: shelf IDs to exclude from indexing
108+
excluded_shelf_ids = connector.config.get("BOOKSTACK_EXCLUDED_SHELF_IDS", [])
109+
107110
if (
108111
not bookstack_base_url
109112
or not bookstack_token_id
@@ -148,7 +151,9 @@ async def index_bookstack_pages(
148151
# Get pages within date range
149152
try:
150153
pages, error = bookstack_client.get_pages_by_date_range(
151-
start_date=start_date_str, end_date=end_date_str
154+
start_date=start_date_str,
155+
end_date=end_date_str,
156+
excluded_shelf_ids=excluded_shelf_ids,
152157
)
153158

154159
if error:

0 commit comments

Comments
 (0)