myk-org
diff --git a/‎CLAUDE.md‎
Lines changed: 2 additions & 2 deletions b/‎CLAUDE.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/docsfy/api/projects.py‎
Lines changed: 12 additions & 1 deletion b/‎src/docsfy/api/projects.py‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎src/docsfy/config.py‎
Lines changed: 5 additions & 0 deletions b/‎src/docsfy/config.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/docsfy/generator.py‎
Lines changed: 162 additions & 54 deletions b/‎src/docsfy/generator.py‎
Lines changed: 162 additions & 54 deletions
@@ -28,11 +28,11 @@ When adding new code:
 
 | Resource Type | Location | Examples |
 |---|---|---|
-| Python constants | `src/docsfy/models.py` | `VALID_PROVIDERS`, `DEFAULT_BRANCH`, `DOCSFY_DOCS_URL`, `DOCSFY_REPO_URL` |
+| Python constants | `src/docsfy/models.py` | `VALID_PROVIDERS`, `DEFAULT_BRANCH`, `PAGE_TYPES`, `DOCSFY_DOCS_URL`, `DOCSFY_REPO_URL` |
 | Data models | `src/docsfy/models.py` | `GenerateRequest`, `DocPlan`, `DocPage`, `NavGroup` |
 | DB constants & validators | `src/docsfy/storage.py` | `VALID_STATUSES`, `VALID_ROLES`, `_validate_name()`, `_validate_owner()` |
 | Git timeouts | `src/docsfy/repository.py` | `_CLONE_TIMEOUT`, `_FETCH_TIMEOUT`, `_DIFF_TIMEOUT` |
-| Prompt constants | `src/docsfy/prompts.py` | `_MAX_DIFF_LENGTH`, `_PAGE_WRITING_RULES` |
+| Prompt constants | `src/docsfy/prompts.py` | `_MAX_DIFF_LENGTH`, `_GUIDE_WRITING_RULES`, `_REFERENCE_WRITING_RULES`, `_RECIPE_WRITING_RULES`, `_CONCEPT_WRITING_RULES`, `_INCREMENTAL_WRITING_RULES`, `truncate_diff_content()` |
 | Frontend constants | `frontend/src/lib/constants.ts` | API base URL, poll intervals, toast durations |
 | Frontend types | `frontend/src/types/index.ts` | `Project`, `User`, `Variant`, `AuthState` |
 | Frontend API client | `frontend/src/lib/api.ts` | `fetchProjects()`, `login()`, `generateDocs()` |
 
@@ -30,7 +30,13 @@
     VALID_PROVIDERS,
     GenerateRequest,
 )
-from docsfy.postprocess import add_cross_links, detect_version, validate_pages
+from docsfy.postprocess import (
+    add_cross_links,
+    detect_version,
+    fix_broken_internal_links,
+    linkify_plain_references,
+    validate_pages,
+)
 from docsfy.renderer import render_site
 from docsfy.repository import (
     clone_repo,
@@ -1016,6 +1022,11 @@ async def _on_page_generated(page_count: int) -> None:
             current_stage="cross_linking",
             page_count=len(pages),
         )
+        pages = fix_broken_internal_links(pages, plan, project_name=project_name)
+        try:
+            pages = linkify_plain_references(pages, plan, project_name=project_name)
+        except Exception as exc:
+            logger.warning(f"[{project_name}] linkify_plain_references failed: {exc}")
         pages = await add_cross_links(
             pages=pages,
             plan=plan,
 
@@ -20,6 +20,11 @@ class Settings(BaseSettings):
     log_level: str = "INFO"
     data_dir: str = "/data"
     secure_cookies: bool = True  # Set to False for local HTTP dev
+    max_concurrent_pages: int = Field(
+        default=10,
+        gt=0,
+        description="Maximum number of AI CLI calls to run in parallel during page generation and validation",
+    )
 
 
 @lru_cache
 
@@ -1,5 +1,9 @@
 from __future__ import annotations
 
+import json
+import re
+import shutil
+import tempfile
 from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any
@@ -10,12 +14,13 @@
 from docsfy.json_parser import parse_json_array_response, parse_json_response
 from pydantic import ValidationError
 
-from docsfy.models import DEFAULT_BRANCH, MAX_CONCURRENT_PAGES, DocPlan
+from docsfy.models import DEFAULT_BRANCH, PAGE_TYPES, DocPlan
 from docsfy.prompts import (
     build_incremental_page_prompt,
     build_incremental_planner_prompt,
     build_page_prompt,
     build_planner_prompt,
+    truncate_diff_content,
 )
 
 logger = get_logger(name=__name__)
@@ -37,6 +42,52 @@ def _strip_ai_preamble(text: str) -> str:
     return text
 
 
+_AI_COMMENTARY_END_MARKERS = (
+    "\nWait -",
+    "\nWait,",
+    "\nLet me refine",
+    "\nLet me remove",
+    "\nI should ",
+    "\nI'll also ",
+    "\nI'll remove",
+    "\nSo I should",
+    "\n`</think>`",
+)
+
+
+def _strip_ai_artifacts(text: str) -> str:
+    """Strip AI thinking/reasoning artifacts from generated content.
+
+    Removes:
+    - <think>...</think> blocks anywhere in the text
+    - </think> orphan closing tags
+    - Self-referential AI commentary at the end (e.g., "Wait - the user said...",
+      "Let me refine:", "I should NOT include...")
+    """
+    # Remove <think>...</think> blocks (including multiline)
+    while "<think>" in text and "</think>" in text:
+        text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
+
+    # Remove orphan </think> tags
+    text = re.sub(r"</think>", "", text)
+
+    # Remove orphan <think> tags
+    text = re.sub(r"<think>", "", text)
+
+    # Only scan the tail of the output for self-referential AI commentary.
+    # These markers only appear at the very end when the AI "thinks out loud"
+    # after finishing. Scanning the full text risks truncating legitimate prose.
+    if len(text) > 500:
+        tail_offset = len(text) - 500
+        for marker in _AI_COMMENTARY_END_MARKERS:
+            idx = text.find(marker, tail_offset)
+            if idx >= 0:
+                text = text[:idx]
+                break  # Only apply the first match
+
+    return text.strip()
+
+
 async def _call_ai_or_raise(
     prompt: str,
     repo_path: Path,
@@ -156,12 +207,16 @@ async def generate_full_page_content(
     ai_model: str,
     ai_cli_timeout: int | None = None,
     exclusions_path: str | None = None,
+    page_type: str = "guide",
+    other_pages_path: str | None = None,
 ) -> str:
     prompt = build_page_prompt(
         project_name=project_name,
         page_title=page_title,
         page_description=page_description,
+        page_type=page_type,
         exclusions_path=exclusions_path,
+        other_pages_path=other_pages_path,
     )
     output = await _call_ai_or_raise(
         prompt=prompt,
@@ -170,7 +225,7 @@ async def generate_full_page_content(
         ai_model=ai_model,
         ai_cli_timeout=ai_cli_timeout,
     )
-    return _strip_ai_preamble(output)
+    return _strip_ai_artifacts(_strip_ai_preamble(output))
 
 
 async def _generate_incremental_page_content(
@@ -184,22 +239,35 @@ async def _generate_incremental_page_content(
     ai_provider: str,
     ai_model: str,
     ai_cli_timeout: int | None = None,
+    page_type: str = "guide",
 ) -> str:
-    prompt = build_incremental_page_prompt(
-        project_name=project_name,
-        page_title=page_title,
-        page_description=page_description,
-        existing_content=existing_content,
-        changed_files=changed_files,
-        diff_content=diff_content,
-    )
-    output = await _call_ai_or_raise(
-        prompt=prompt,
-        repo_path=repo_path,
-        ai_provider=ai_provider,
-        ai_model=ai_model,
-        ai_cli_timeout=ai_cli_timeout,
-    )
+    job_dir = Path(tempfile.mkdtemp(prefix="docsfy-incremental-page-"))
+    try:
+        existing_page_file = job_dir / "existing_page.md"
+        existing_page_file.write_text(existing_content, encoding="utf-8")
+
+        truncated_diff = truncate_diff_content(diff_content)
+        diff_file = job_dir / "diff.patch"
+        diff_file.write_text(truncated_diff, encoding="utf-8")
+
+        prompt = build_incremental_page_prompt(
+            project_name=project_name,
+            page_title=page_title,
+            page_description=page_description,
+            existing_page_path=str(existing_page_file),
+            changed_files=changed_files,
+            diff_path=str(diff_file),
+            page_type=page_type,
+        )
+        output = await _call_ai_or_raise(
+            prompt=prompt,
+            repo_path=repo_path,
+            ai_provider=ai_provider,
+            ai_model=ai_model,
+            ai_cli_timeout=ai_cli_timeout,
+        )
+    finally:
+        shutil.rmtree(job_dir, ignore_errors=True)
     return _apply_incremental_page_updates(existing_content, output)
 
 
@@ -260,6 +328,8 @@ async def generate_page(
     diff_content: str | None = None,
     branch: str = DEFAULT_BRANCH,
     on_page_generated: Callable[[int], Awaitable[None]] | None = None,
+    page_type: str = "guide",
+    other_pages_path: str | None = None,
 ) -> str:
     _label = project_name or repo_path.name
     prompt_project_name = project_name or repo_path.name
@@ -291,6 +361,7 @@ async def generate_page(
                     ai_provider=ai_provider,
                     ai_model=ai_model,
                     ai_cli_timeout=ai_cli_timeout,
+                    page_type=page_type,
                 )
             except (RuntimeError, ValueError) as exc:
                 logger.warning(
@@ -305,6 +376,8 @@ async def generate_page(
                     ai_provider=ai_provider,
                     ai_model=ai_model,
                     ai_cli_timeout=ai_cli_timeout,
+                    page_type=page_type,
+                    other_pages_path=other_pages_path,
                 )
         else:
             output = await generate_full_page_content(
@@ -315,6 +388,8 @@ async def generate_page(
                 ai_provider=ai_provider,
                 ai_model=ai_model,
                 ai_cli_timeout=ai_cli_timeout,
+                page_type=page_type,
+                other_pages_path=other_pages_path,
             )
     except RuntimeError as exc:
         logger.warning(f"[{_label}] Failed to generate page '{slug}': {exc}")
@@ -380,40 +455,64 @@ async def generate_all_pages(
             if is_unsafe_slug(slug):
                 logger.warning(f"[{_label}] Skipping path-unsafe slug: '{slug}'")
                 continue
+            _page_type = page.get("type", "guide")
+            if _page_type not in PAGE_TYPES:
+                logger.warning(
+                    f"[{_label}] Unknown page type '{_page_type}' for slug '{slug}', "
+                    f"falling back to 'guide'"
+                )
+                _page_type = "guide"
             all_pages.append(
                 {
                     "slug": slug,
                     "title": title,
                     "description": page.get("description", ""),
+                    "type": _page_type,
                 }
             )
 
-    _existing_pages = existing_pages or {}
-    coroutines = [
-        generate_page(
-            repo_path=repo_path,
-            slug=p["slug"],
-            title=p["title"],
-            description=p["description"],
-            cache_dir=cache_dir,
-            ai_provider=ai_provider,
-            ai_model=ai_model,
-            ai_cli_timeout=ai_cli_timeout,
-            use_cache=use_cache,
-            project_name=project_name,
-            owner=owner,
-            existing_content=_existing_pages.get(p["slug"]),
-            changed_files=changed_files,
-            diff_content=diff_content,
-            branch=branch,
-            on_page_generated=on_page_generated,
-        )
-        for p in all_pages
-    ]
+    # Write page manifest once for cross-referencing (GOLDEN RULE: don't inline in prompts)
+    pages_manifest_dir = Path(tempfile.mkdtemp(prefix="docsfy-pages-manifest-"))
+    try:
+        pages_manifest_path = pages_manifest_dir / "pages.txt"
+        manifest_lines = [
+            f"- [{p['title']}]({p['slug']}.html) \u2014 {p['description']}"
+            for p in all_pages
+        ]
+        pages_manifest_path.write_text("\n".join(manifest_lines), encoding="utf-8")
+
+        _existing_pages = existing_pages or {}
+        coroutines = [
+            generate_page(
+                repo_path=repo_path,
+                slug=p["slug"],
+                title=p["title"],
+                description=p["description"],
+                cache_dir=cache_dir,
+                page_type=p["type"],
+                ai_provider=ai_provider,
+                ai_model=ai_model,
+                ai_cli_timeout=ai_cli_timeout,
+                use_cache=use_cache,
+                project_name=project_name,
+                owner=owner,
+                existing_content=_existing_pages.get(p["slug"]),
+                changed_files=changed_files,
+                diff_content=diff_content,
+                branch=branch,
+                on_page_generated=on_page_generated,
+                other_pages_path=str(pages_manifest_path),
+            )
+            for p in all_pages
+        ]
 
-    results = await run_parallel_with_limit(
-        coroutines, max_concurrency=MAX_CONCURRENT_PAGES
-    )
+        from docsfy.config import get_settings
+
+        results = await run_parallel_with_limit(
+            coroutines, max_concurrency=get_settings().max_concurrent_pages
+        )
+    finally:
+        shutil.rmtree(pages_manifest_dir, ignore_errors=True)
     pages: dict[str, str] = {}
     for page_info, result in zip(all_pages, results):
         if isinstance(result, Exception):
@@ -443,20 +542,29 @@ async def run_incremental_planner(
     logger.info(
         f"[{project_name}] Running incremental planner for {len(changed_files)} changed files"
     )
-    prompt = build_incremental_planner_prompt(
-        project_name, changed_files, existing_plan
-    )
+    job_dir = Path(tempfile.mkdtemp(prefix="docsfy-incremental-plan-"))
     try:
-        output = await _call_ai_or_raise(
-            prompt=prompt,
-            repo_path=repo_path,
-            ai_provider=ai_provider,
-            ai_model=ai_model,
-            ai_cli_timeout=ai_cli_timeout,
+        plan_file = job_dir / "existing_plan.json"
+        plan_file.write_text(json.dumps(existing_plan, indent=2), encoding="utf-8")
+
+        prompt = build_incremental_planner_prompt(
+            project_name, changed_files, str(plan_file)
         )
-    except RuntimeError:
-        logger.warning(f"[{project_name}] Incremental planner failed, regenerating all")
-        return ["all"]
+        try:
+            output = await _call_ai_or_raise(
+                prompt=prompt,
+                repo_path=repo_path,
+                ai_provider=ai_provider,
+                ai_model=ai_model,
+                ai_cli_timeout=ai_cli_timeout,
+            )
+        except RuntimeError:
+            logger.warning(
+                f"[{project_name}] Incremental planner failed, regenerating all"
+            )
+            return ["all"]
+    finally:
+        shutil.rmtree(job_dir, ignore_errors=True)
 
     raw_result = parse_json_array_response(output)
     if raw_result is None or not isinstance(raw_result, list):