Skip to content

Commit eec9981

Browse files
Bingxi ZhaoBingxi Zhao
authored andcommitted
doc: update
1 parent 1e9cc6b commit eec9981

File tree

9 files changed

+96
-24
lines changed

9 files changed

+96
-24
lines changed

.github/workflows/linting.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ jobs:
3838
- name: Install pre-commit
3939
run: |
4040
python -m pip install --upgrade pip
41-
pip install pre-commit
41+
# Install pre-commit and tomli (required for bandit to read pyproject.toml)
42+
pip install pre-commit tomli
4243
4344
- name: Run pre-commit on Python files
4445
run: |

.github/workflows/tests.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,14 @@ jobs:
7373
- name: Check module imports
7474
run: |
7575
echo "🐍 Testing with Python ${{ matrix.python-version }}"
76-
python -c "from src.services.llm import complete, stream, fetch_models, sanitize_url; print('✅ LLM service imports OK')"
76+
# Core service imports (no heavy RAG dependencies)
7777
python -c "from src.services.llm.config import get_llm_config; print('✅ LLM config imports OK')"
78+
python -c "from src.services.llm.factory import complete, stream; print('✅ LLM factory imports OK')"
79+
python -c "from src.services.llm.utils import sanitize_url; print('✅ LLM utils imports OK')"
7880
python -c "from src.services.config.loader import load_config_with_main; print('✅ Config loader imports OK')"
7981
python -c "from src.services.config.unified_config import UnifiedConfigManager; print('✅ Unified config imports OK')"
8082
python -c "from src.logging import get_logger; print('✅ Logging imports OK')"
81-
python -c "from src.services.prompt import get_prompt_manager; print('✅ Prompt service imports OK')"
82-
python -c "from src.services.search import web_search; print('✅ Search service imports OK')"
83+
python -c "from src.services.prompt.manager import PromptManager; print('✅ Prompt manager imports OK')"
8384
env:
8485
PYTHONPATH: ${{ github.workspace }}
8586

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ repos:
8787
- id: bandit
8888
args: [-c, pyproject.toml, -q]
8989
exclude: ^tests/
90+
additional_dependencies: ["bandit[toml]"]
9091

9192
# ============================================
9293
# Type checking

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
4646
### 📦 Releases
4747

48-
> **[2026.1.18]** Release [v0.5.1](https://github.com/HKUDS/DeepTutor/releases/tag/v0.5.1) - Enhance RAG pipeline with Docling support and improve CI/CD workflows with several minor bugs fixed -- Thanks to all the feedbacks!
48+
> **[2026.1.18]** Release [v0.5.2](https://github.com/HKUDS/DeepTutor/releases/tag/v0.5.1) - Enhance RAG pipeline with Docling support and improve CI/CD workflows with several minor bugs fixed -- Thanks to all the feedbacks!
4949
5050
<details>
5151
<summary>History releases</summary>

src/services/__init__.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@
4343
result = web_search("What is AI?")
4444
"""
4545

46-
from . import config, embedding, llm, prompt, rag, search, setup, tts
46+
# Note: rag and embedding modules are lazy-loaded via __getattr__
47+
# to avoid importing heavy dependencies (lightrag, llama_index) at module load time
48+
from . import config, llm, prompt, search, setup, tts
4749

4850
__all__ = [
4951
"llm",
@@ -55,3 +57,16 @@
5557
"setup",
5658
"config",
5759
]
60+
61+
62+
def __getattr__(name: str):
63+
"""Lazy import for modules that depend on heavy libraries."""
64+
if name == "rag":
65+
from . import rag
66+
67+
return rag
68+
if name == "embedding":
69+
from . import embedding
70+
71+
return embedding
72+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

src/services/llm/__init__.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@
4848
from src.services.llm import sanitize_url, is_local_llm_server
4949
"""
5050

51-
# Also expose the providers for direct access if needed
52-
from . import cloud_provider, local_provider
51+
# Note: cloud_provider and local_provider are lazy-loaded via __getattr__
52+
# to avoid importing lightrag at module load time
5353
from .capabilities import (
5454
DEFAULT_CAPABILITIES,
5555
MODEL_OVERRIDES,
@@ -140,7 +140,7 @@
140140
"DEFAULT_MAX_RETRIES",
141141
"DEFAULT_RETRY_DELAY",
142142
"DEFAULT_EXPONENTIAL_BACKOFF",
143-
# Providers
143+
# Providers (lazy loaded)
144144
"cloud_provider",
145145
"local_provider",
146146
# Utils
@@ -151,3 +151,16 @@
151151
"clean_thinking_tags",
152152
"extract_response_content",
153153
]
154+
155+
156+
def __getattr__(name: str):
157+
"""Lazy import for provider modules that depend on heavy libraries."""
158+
if name == "cloud_provider":
159+
from . import cloud_provider
160+
161+
return cloud_provider
162+
if name == "local_provider":
163+
from . import local_provider
164+
165+
return local_provider
166+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

src/services/llm/cloud_provider.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,26 @@
1212
from typing import AsyncGenerator, Dict, List, Optional
1313

1414
import aiohttp
15-
from lightrag.llm.openai import openai_complete_if_cache
1615

1716
# Get loggers for suppression during fallback scenarios
1817
# (lightrag logs errors internally before raising exceptions)
1918
_lightrag_logger = logging.getLogger("lightrag")
2019
_openai_logger = logging.getLogger("openai")
2120

21+
# Lazy import for lightrag to avoid import errors when not installed
22+
_openai_complete_if_cache = None
23+
24+
25+
def _get_openai_complete_if_cache():
26+
"""Lazy load openai_complete_if_cache from lightrag."""
27+
global _openai_complete_if_cache
28+
if _openai_complete_if_cache is None:
29+
from lightrag.llm.openai import openai_complete_if_cache
30+
31+
_openai_complete_if_cache = openai_complete_if_cache
32+
return _openai_complete_if_cache
33+
34+
2235
from .capabilities import supports_response_format
2336
from .config import get_token_limit_kwargs
2437
from .exceptions import LLMAPIError, LLMAuthenticationError, LLMConfigError
@@ -183,6 +196,7 @@ async def _openai_complete(
183196
_openai_logger.setLevel(logging.CRITICAL)
184197
try:
185198
# model and prompt must be positional arguments
199+
openai_complete_if_cache = _get_openai_complete_if_cache()
186200
content = await openai_complete_if_cache(model, prompt, **lightrag_kwargs)
187201
finally:
188202
_lightrag_logger.setLevel(original_lightrag_level)

src/services/rag/__init__.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,20 @@
4040

4141
from .factory import get_pipeline, has_pipeline, list_pipelines, register_pipeline
4242
from .pipeline import RAGPipeline
43-
44-
# Import pipeline classes for convenience
45-
from .pipelines.raganything import RAGAnythingPipeline
4643
from .service import RAGService
4744
from .types import Chunk, Document, SearchResult
4845

46+
47+
# Lazy import for RAGAnythingPipeline to avoid importing heavy dependencies at module load time
48+
def __getattr__(name: str):
49+
"""Lazy import for pipeline classes that depend on heavy libraries."""
50+
if name == "RAGAnythingPipeline":
51+
from .pipelines.raganything import RAGAnythingPipeline
52+
53+
return RAGAnythingPipeline
54+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
55+
56+
4957
__all__ = [
5058
# Service (recommended entry point)
5159
"RAGService",
@@ -60,6 +68,6 @@
6068
"list_pipelines",
6169
"register_pipeline",
6270
"has_pipeline",
63-
# Pipeline implementations
71+
# Pipeline implementations (lazy loaded)
6472
"RAGAnythingPipeline",
6573
]

src/services/rag/factory.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,38 @@
44
================
55
66
Factory for creating and managing RAG pipelines.
7+
8+
Note: Pipeline imports are lazy to avoid importing heavy dependencies (lightrag, llama_index, etc.)
9+
at module load time. This allows the core services to be imported without RAG dependencies.
710
"""
811

912
from typing import Callable, Dict, List, Optional
1013
import warnings
1114

12-
from .pipelines import lightrag, llamaindex
13-
from .pipelines.raganything import RAGAnythingPipeline
14-
from .pipelines.raganything_docling import RAGAnythingDoclingPipeline
15+
# Pipeline registry - populated lazily
16+
_PIPELINES: Dict[str, Callable] = {}
17+
_PIPELINES_INITIALIZED = False
18+
19+
20+
def _init_pipelines():
21+
"""Lazily initialize pipeline registry to avoid import errors when RAG deps not installed."""
22+
global _PIPELINES, _PIPELINES_INITIALIZED
23+
if _PIPELINES_INITIALIZED:
24+
return
1525

16-
# Pipeline registry
17-
_PIPELINES: Dict[str, Callable] = {
18-
"raganything": RAGAnythingPipeline, # Full multimodal: MinerU parser, deep analysis (slow, thorough)
19-
"raganything_docling": RAGAnythingDoclingPipeline, # Docling parser: Office/HTML friendly, easier setup
20-
"lightrag": lightrag.LightRAGPipeline, # Knowledge graph: PDFParser, fast text-only (medium speed)
21-
"llamaindex": llamaindex.LlamaIndexPipeline, # Vector-only: Simple chunking, fast (fastest)
22-
}
26+
from .pipelines import lightrag, llamaindex
27+
from .pipelines.raganything import RAGAnythingPipeline
28+
from .pipelines.raganything_docling import RAGAnythingDoclingPipeline
29+
30+
_PIPELINES.update(
31+
{
32+
"raganything": RAGAnythingPipeline, # Full multimodal: MinerU parser, deep analysis (slow, thorough)
33+
"raganything_docling": RAGAnythingDoclingPipeline, # Docling parser: Office/HTML friendly, easier setup
34+
"lightrag": lightrag.LightRAGPipeline, # Knowledge graph: PDFParser, fast text-only (medium speed)
35+
"llamaindex": llamaindex.LlamaIndexPipeline, # Vector-only: Simple chunking, fast (fastest)
36+
}
37+
)
38+
_PIPELINES_INITIALIZED = True
2339

2440

2541
def get_pipeline(name: str = "raganything", kb_base_dir: Optional[str] = None, **kwargs):
@@ -37,6 +53,7 @@ def get_pipeline(name: str = "raganything", kb_base_dir: Optional[str] = None, *
3753
Raises:
3854
ValueError: If pipeline name is not found
3955
"""
56+
_init_pipelines()
4057
if name not in _PIPELINES:
4158
available = list(_PIPELINES.keys())
4259
raise ValueError(f"Unknown pipeline: {name}. Available: {available}")
@@ -98,6 +115,7 @@ def register_pipeline(name: str, factory: Callable):
98115
name: Pipeline name
99116
factory: Factory function or class that creates the pipeline
100117
"""
118+
_init_pipelines()
101119
_PIPELINES[name] = factory
102120

103121

@@ -111,6 +129,7 @@ def has_pipeline(name: str) -> bool:
111129
Returns:
112130
True if pipeline exists
113131
"""
132+
_init_pipelines()
114133
return name in _PIPELINES
115134

116135

0 commit comments

Comments
 (0)