Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ jobs:
restore-keys: |
${{ runner.os }}-pip-${{ matrix.python-version }}-

- name: Install WeasyPrint system dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libcairo2 \
libpango-1.0-0 \
libpangoft2-1.0-0 \
libgdk-pixbuf-2.0-0 \
libharfbuzz0b \
libharfbuzz-subset0 \
shared-mime-info

- name: Install minimal dependencies for import check
run: |
python -m pip install --upgrade pip
Expand Down Expand Up @@ -92,6 +104,18 @@ jobs:
restore-keys: |
${{ runner.os }}-pip-3.11-

- name: Install WeasyPrint system dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libcairo2 \
libpango-1.0-0 \
libpangoft2-1.0-0 \
libgdk-pixbuf-2.0-0 \
libharfbuzz0b \
libharfbuzz-subset0 \
shared-mime-info

- name: Install smoke test dependencies
run: |
python -m pip install --upgrade pip
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "third_party/PageIndex"]
path = third_party/PageIndex
url = https://github.com/VectifyAI/PageIndex.git
21 changes: 19 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
WORKDIR /app

# Install system dependencies
# Note: libgl1 and libglib2.0-0 are required for OpenCV (used by mineru)
# Note: libgl1 and libglib2.0-0 are required for OpenCV (used by mineru).
# Pango/Cairo/GDK-PixBuf libraries are required by WeasyPrint PDF export.
# Rust is required for building tiktoken and other packages without pre-built wheels
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
Expand All @@ -78,7 +79,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libsm6 \
libxext6 \
libxrender1 \
libcairo2 \
libpango-1.0-0 \
libpangoft2-1.0-0 \
libgdk-pixbuf-2.0-0 \
libharfbuzz0b \
libharfbuzz-subset0 \
shared-mime-info \
pkg-config \
libffi-dev \
libssl-dev \
&& rm -rf /var/lib/apt/lists/* \
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
Expand Down Expand Up @@ -114,7 +123,8 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
WORKDIR /app

# Install system dependencies
# Note: libgl1 and libglib2.0-0 are required for OpenCV (used by mineru)
# Note: libgl1 and libglib2.0-0 are required for OpenCV (used by mineru).
# Pango/Cairo/GDK-PixBuf libraries are required by WeasyPrint PDF export.
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
ca-certificates \
Expand All @@ -125,6 +135,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libsm6 \
libxext6 \
libxrender1 \
libcairo2 \
libpango-1.0-0 \
libpangoft2-1.0-0 \
libgdk-pixbuf-2.0-0 \
libharfbuzz0b \
libharfbuzz-subset0 \
shared-mime-info \
&& rm -rf /var/lib/apt/lists/*

# Copy Node.js from node-runtime stage (platform-matched binary)
Expand Down
1 change: 1 addition & 0 deletions deeptutor/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- co_writer: Co-writing agents (EditAgent, NarratorAgent)
- question: Question generation agents (ReAct architecture, separate base)
- chat: Lightweight conversational agent with session management
- structure_note: Structure Note generation workflow and artifact management

Usage:
from deeptutor.agents.base_agent import BaseAgent
Expand Down
40 changes: 40 additions & 0 deletions deeptutor/agents/structure_note/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from .difficulty import DifficultyPreset, get_difficulty_preset
from .manager import StructureNoteManager
from .models import (
CitationEntry,
DifficultyLevel,
DocumentPlan,
ExplanationStyleLevel,
GenerationChunk,
ImagePlaceholder,
JobStatus,
NoteLanguage,
PageIndexPage,
SectionEvidence,
SectionPlan,
SectionTreeNode,
StructureNoteArtifact,
StructureNoteProject,
)
from .storage import StructureNoteStorage

__all__ = [
"CitationEntry",
"DifficultyLevel",
"DifficultyPreset",
"DocumentPlan",
"ExplanationStyleLevel",
"GenerationChunk",
"ImagePlaceholder",
"JobStatus",
"NoteLanguage",
"PageIndexPage",
"SectionEvidence",
"SectionPlan",
"SectionTreeNode",
"StructureNoteArtifact",
"StructureNoteManager",
"StructureNoteProject",
"StructureNoteStorage",
"get_difficulty_preset",
]
64 changes: 64 additions & 0 deletions deeptutor/agents/structure_note/difficulty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from __future__ import annotations

from dataclasses import dataclass

from .models import DifficultyLevel


@dataclass(frozen=True)
class DifficultyPreset:
level: DifficultyLevel
page_window: int
depth_instruction: str
compression_instruction: str
placeholder_purpose: str


PRESETS: dict[DifficultyLevel, DifficultyPreset] = {
DifficultyLevel.SIMPLE: DifficultyPreset(
level=DifficultyLevel.SIMPLE,
page_window=10,
depth_instruction=(
"Simple controls how much to cover: keep only the core thread, key concepts, "
"essential conclusions, and any indispensable bridge needed to understand them. "
"Short does not mean shallow."
),
compression_instruction=(
"Compress by deleting repeated background, template transitions, low-information summaries, "
"and meta commentary. Preserve precise definitions, key mechanisms, critical formulas or "
"arguments, and the shortest logical bridge between ideas."
),
placeholder_purpose="key_figure",
),
DifficultyLevel.MEDIUM: DifficultyPreset(
level=DifficultyLevel.MEDIUM,
page_window=10,
depth_instruction=(
"Medium controls how much to cover: include the main knowledge points and the core logic chain "
"needed for a normal classroom handout."
),
compression_instruction=(
"Compress by merging duplicated examples and background while retaining the main concepts, "
"mechanisms, evidence, and topic-to-topic reasoning."
),
placeholder_purpose="supporting_figure",
),
DifficultyLevel.DETAILED: DifficultyPreset(
level=DifficultyLevel.DETAILED,
page_window=6,
depth_instruction=(
"Detailed controls how much to cover: preserve a fuller knowledge structure, including "
"intermediate steps, boundary cases, supporting examples, and derivation or argument details "
"when they are present in the evidence."
),
compression_instruction=(
"Compress only low-value repetition and boilerplate. Keep the complete conceptual chain, "
"important qualifications, examples, mechanisms, and source-supported derivation details."
),
placeholder_purpose="detailed_figure",
),
}


def get_difficulty_preset(level: DifficultyLevel) -> DifficultyPreset:
return PRESETS[level]
Loading