HKUDS · txxxxz · Apr 14, 2026 · Apr 14, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -52,6 +52,18 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-pip-${{ matrix.python-version }}-
 
+      - name: Install WeasyPrint system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends \
+            libcairo2 \
+            libpango-1.0-0 \
+            libpangoft2-1.0-0 \
+            libgdk-pixbuf-2.0-0 \
+            libharfbuzz0b \
+            libharfbuzz-subset0 \
+            shared-mime-info
+
       - name: Install minimal dependencies for import check
         run: |
           python -m pip install --upgrade pip
@@ -92,6 +104,18 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-pip-3.11-
 
+      - name: Install WeasyPrint system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends \
+            libcairo2 \
+            libpango-1.0-0 \
+            libpangoft2-1.0-0 \
+            libgdk-pixbuf-2.0-0 \
+            libharfbuzz0b \
+            libharfbuzz-subset0 \
+            shared-mime-info
+
       - name: Install smoke test dependencies
         run: |
           python -m pip install --upgrade pip

diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "third_party/PageIndex"]
+	path = third_party/PageIndex
+	url = https://github.com/VectifyAI/PageIndex.git
diff --git a/Dockerfile b/Dockerfile
@@ -67,7 +67,8 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 WORKDIR /app
 
 # Install system dependencies
-# Note: libgl1 and libglib2.0-0 are required for OpenCV (used by mineru)
+# Note: libgl1 and libglib2.0-0 are required for OpenCV (used by mineru).
+# Pango/Cairo/GDK-PixBuf libraries are required by WeasyPrint PDF export.
 # Rust is required for building tiktoken and other packages without pre-built wheels
 RUN apt-get update && apt-get install -y --no-install-recommends \
     curl \
@@ -78,7 +79,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libsm6 \
     libxext6 \
     libxrender1 \
+    libcairo2 \
+    libpango-1.0-0 \
+    libpangoft2-1.0-0 \
+    libgdk-pixbuf-2.0-0 \
+    libharfbuzz0b \
+    libharfbuzz-subset0 \
+    shared-mime-info \
     pkg-config \
+    libffi-dev \
     libssl-dev \
     && rm -rf /var/lib/apt/lists/* \
     && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
@@ -114,7 +123,8 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 WORKDIR /app
 
 # Install system dependencies
-# Note: libgl1 and libglib2.0-0 are required for OpenCV (used by mineru)
+# Note: libgl1 and libglib2.0-0 are required for OpenCV (used by mineru).
+# Pango/Cairo/GDK-PixBuf libraries are required by WeasyPrint PDF export.
 RUN apt-get update && apt-get install -y --no-install-recommends \
     curl \
     ca-certificates \
@@ -125,6 +135,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libsm6 \
     libxext6 \
     libxrender1 \
+    libcairo2 \
+    libpango-1.0-0 \
+    libpangoft2-1.0-0 \
+    libgdk-pixbuf-2.0-0 \
+    libharfbuzz0b \
+    libharfbuzz-subset0 \
+    shared-mime-info \
     && rm -rf /var/lib/apt/lists/*
 
 # Copy Node.js from node-runtime stage (platform-matched binary)

diff --git a/deeptutor/agents/__init__.py b/deeptutor/agents/__init__.py
@@ -8,6 +8,7 @@
 - co_writer: Co-writing agents (EditAgent, NarratorAgent)
 - question: Question generation agents (ReAct architecture, separate base)
 - chat: Lightweight conversational agent with session management
+- structure_note: Structure Note generation workflow and artifact management
 
 Usage:
     from deeptutor.agents.base_agent import BaseAgent

diff --git a/deeptutor/agents/structure_note/__init__.py b/deeptutor/agents/structure_note/__init__.py
@@ -0,0 +1,40 @@
+from .difficulty import DifficultyPreset, get_difficulty_preset
+from .manager import StructureNoteManager
+from .models import (
+    CitationEntry,
+    DifficultyLevel,
+    DocumentPlan,
+    ExplanationStyleLevel,
+    GenerationChunk,
+    ImagePlaceholder,
+    JobStatus,
+    NoteLanguage,
+    PageIndexPage,
+    SectionEvidence,
+    SectionPlan,
+    SectionTreeNode,
+    StructureNoteArtifact,
+    StructureNoteProject,
+)
+from .storage import StructureNoteStorage
+
+__all__ = [
+    "CitationEntry",
+    "DifficultyLevel",
+    "DifficultyPreset",
+    "DocumentPlan",
+    "ExplanationStyleLevel",
+    "GenerationChunk",
+    "ImagePlaceholder",
+    "JobStatus",
+    "NoteLanguage",
+    "PageIndexPage",
+    "SectionEvidence",
+    "SectionPlan",
+    "SectionTreeNode",
+    "StructureNoteArtifact",
+    "StructureNoteManager",
+    "StructureNoteProject",
+    "StructureNoteStorage",
+    "get_difficulty_preset",
+]
diff --git a/deeptutor/agents/structure_note/difficulty.py b/deeptutor/agents/structure_note/difficulty.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from .models import DifficultyLevel
+
+
+@dataclass(frozen=True)
+class DifficultyPreset:
+    level: DifficultyLevel
+    page_window: int
+    depth_instruction: str
+    compression_instruction: str
+    placeholder_purpose: str
+
+
+PRESETS: dict[DifficultyLevel, DifficultyPreset] = {
+    DifficultyLevel.SIMPLE: DifficultyPreset(
+        level=DifficultyLevel.SIMPLE,
+        page_window=10,
+        depth_instruction=(
+            "Simple controls how much to cover: keep only the core thread, key concepts, "
+            "essential conclusions, and any indispensable bridge needed to understand them. "
+            "Short does not mean shallow."
+        ),
+        compression_instruction=(
+            "Compress by deleting repeated background, template transitions, low-information summaries, "
+            "and meta commentary. Preserve precise definitions, key mechanisms, critical formulas or "
+            "arguments, and the shortest logical bridge between ideas."
+        ),
+        placeholder_purpose="key_figure",
+    ),
+    DifficultyLevel.MEDIUM: DifficultyPreset(
+        level=DifficultyLevel.MEDIUM,
+        page_window=10,
+        depth_instruction=(
+            "Medium controls how much to cover: include the main knowledge points and the core logic chain "
+            "needed for a normal classroom handout."
+        ),
+        compression_instruction=(
+            "Compress by merging duplicated examples and background while retaining the main concepts, "
+            "mechanisms, evidence, and topic-to-topic reasoning."
+        ),
+        placeholder_purpose="supporting_figure",
+    ),
+    DifficultyLevel.DETAILED: DifficultyPreset(
+        level=DifficultyLevel.DETAILED,
+        page_window=6,
+        depth_instruction=(
+            "Detailed controls how much to cover: preserve a fuller knowledge structure, including "
+            "intermediate steps, boundary cases, supporting examples, and derivation or argument details "
+            "when they are present in the evidence."
+        ),
+        compression_instruction=(
+            "Compress only low-value repetition and boilerplate. Keep the complete conceptual chain, "
+            "important qualifications, examples, mechanisms, and source-supported derivation details."
+        ),
+        placeholder_purpose="detailed_figure",
+    ),
+}
+
+
+def get_difficulty_preset(level: DifficultyLevel) -> DifficultyPreset:
+    return PRESETS[level]