-
Notifications
You must be signed in to change notification settings - Fork 79
Expand file tree
/
Copy pathmain.py
More file actions
313 lines (262 loc) · 9.45 KB
/
main.py
File metadata and controls
313 lines (262 loc) · 9.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
"""
ForgeRAG one-shot launcher.
Running `python main.py` in the project root does the right thing
in three cases:
1. ./forgerag.yaml exists -> use it, start uvicorn.
2. $FORGERAG_CONFIG set -> use it, start uvicorn.
3. Nothing configured -> write a minimal default config
to ./forgerag.yaml, then start.
Overrides:
python main.py --host 127.0.0.1 --port 9000 --reload
python main.py --config path/to/my.yaml
python main.py --init-only # write default config and exit
The generated default uses zero external services:
- SQLite at ./storage/forgerag.db
- ChromaDB at ./storage/chroma
- Local blob store at ./storage/blobs
- Embedder / LLM = litellm with OPENAI_API_KEY (override via setup wizard)
For a richer wizard, run: python scripts/setup.py
"""
from __future__ import annotations
import argparse
import logging
import os
import sys
from pathlib import Path
from typing import Any
# Ensure repo root is on sys.path so `import config` etc. work when
# the user invokes `python main.py` directly.
_ROOT = Path(__file__).resolve().parent
if str(_ROOT) not in sys.path:
sys.path.insert(0, str(_ROOT))
log = logging.getLogger("forgerag.main")
# ---------------------------------------------------------------------------
# Default minimal config
# ---------------------------------------------------------------------------
DEFAULT_CONFIG_PATH = _ROOT / "forgerag.yaml"
def _minimal_default_config() -> dict[str, Any]:
"""
Smallest config that runs out of the box.
Infrastructure only. LLM models, API keys, retrieval strategy,
and parsing options are managed via /settings (DB-backed,
frontend-editable). Defaults are seeded on first startup.
"""
return {
"parser": {
"backends": {"pymupdf": {"enabled": True}},
},
"storage": {
"mode": "local",
"local": {"root": "./storage/blobs"},
},
"files": {
"hash_algorithm": "sha256",
"max_bytes": 524288000,
},
"persistence": {
"relational": {
"backend": "sqlite",
"sqlite": {
"path": "./storage/forgerag.db",
"journal_mode": "wal",
},
},
"vector": {
"backend": "chromadb",
"chromadb": {
"mode": "persistent",
"persist_directory": "./storage/chroma",
"collection_name": "forgerag",
"dimension": 1536,
"distance": "cosine",
},
},
},
"embedder": {
"dimension": 1536,
},
}
def _write_default_config(path: Path) -> None:
try:
import yaml
except ImportError as e:
raise RuntimeError("pyyaml not installed. pip install pyyaml") from e
path.parent.mkdir(parents=True, exist_ok=True)
header = (
"# ForgeRAG config (auto-generated by main.py).\n"
"# Infrastructure only — LLM models, API keys, retrieval strategy\n"
"# are managed via /settings API after startup.\n"
"#\n"
"# For an interactive wizard: python scripts/setup.py\n"
"# To configure models/keys: visit http://localhost:8000/settings\n"
"\n"
)
with open(path, "w", encoding="utf-8") as f:
f.write(header)
yaml.safe_dump(
_minimal_default_config(),
f,
sort_keys=False,
default_flow_style=False,
allow_unicode=True,
)
# ---------------------------------------------------------------------------
# Config resolution
# ---------------------------------------------------------------------------
def resolve_config_path(cli_path: Path | None) -> Path:
"""
Precedence (highest first):
1. --config argument
2. FORGERAG_CONFIG env var
3. ./forgerag.yaml (current directory)
4. None -> caller writes the default to ./forgerag.yaml
"""
if cli_path is not None:
return cli_path.resolve()
env_path = os.environ.get("FORGERAG_CONFIG")
if env_path:
p = Path(env_path)
return p.resolve()
cwd_candidate = Path.cwd() / "forgerag.yaml"
if cwd_candidate.exists():
return cwd_candidate.resolve()
return DEFAULT_CONFIG_PATH.resolve()
# ---------------------------------------------------------------------------
# Pre-flight checks on the resolved config
# ---------------------------------------------------------------------------
def _preflight(cfg_path: Path) -> None:
"""Validate the config and print a concise startup banner."""
try:
from config import load_config
except ImportError as e:
print(f"error: cannot import config package: {e}", file=sys.stderr)
print(
"hint: did you install dependencies? pip install sqlalchemy pydantic pyyaml",
file=sys.stderr,
)
raise SystemExit(2)
try:
cfg = load_config(cfg_path)
except Exception as e:
print(f"error: failed to load {cfg_path}: {e}", file=sys.stderr)
raise SystemExit(1)
print()
print(f" config {cfg_path}")
print(f" relational {cfg.persistence.relational.backend}")
print(f" vector {cfg.persistence.vector.backend} (dim={cfg.embedder.dimension})")
print(f" blob {cfg.storage.mode}")
print(f" embedder {cfg.embedder.backend}")
print(f" generator {cfg.answering.generator.backend} / {cfg.answering.generator.model}")
print()
print(
" note: LLM models and API keys are configured via /settings\n"
" Visit http://localhost:8000/settings after startup.\n"
)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
prog="main.py",
description=(
"Launch ForgeRAG. If forgerag.yaml exists in the current "
"directory (or FORGERAG_CONFIG points at one), it is used. "
"Otherwise a minimal default config is written to ./forgerag.yaml "
"and the server starts."
),
)
p.add_argument(
"--config",
type=Path,
default=None,
help="Path to forgerag.yaml. Overrides env var and cwd discovery.",
)
p.add_argument(
"--host",
type=str,
default=os.environ.get("FORGERAG_HOST", "0.0.0.0"),
help="Bind host (default: 0.0.0.0, or $FORGERAG_HOST)",
)
p.add_argument(
"--port",
type=int,
default=int(os.environ.get("FORGERAG_PORT", "8000")),
help="Bind port (default: 8000, or $FORGERAG_PORT)",
)
p.add_argument(
"--reload",
action="store_true",
help="Enable uvicorn --reload for development.",
)
p.add_argument(
"--workers",
type=int,
default=4,
help="Uvicorn worker count (default: 4). Ignored when --reload is set.",
)
p.add_argument(
"--log-level",
type=str,
default="info",
help="uvicorn log level (critical / error / warning / info / debug / trace).",
)
p.add_argument(
"--init-only",
action="store_true",
help="Write the default config (if missing) and exit without starting the server.",
)
return p.parse_args()
def main() -> int:
args = parse_args()
cfg_path = resolve_config_path(args.config)
if not cfg_path.exists():
print(f"no config found, writing default to {cfg_path}")
_write_default_config(cfg_path)
print("default config ready. edit it to customize.")
print("for an interactive wizard: python scripts/setup.py")
if args.init_only:
return 0
# Point everything downstream at the resolved config
os.environ["FORGERAG_CONFIG"] = str(cfg_path)
# Initialise logging early so preflight and all downstream modules
# write to the daily-rotated log file under logs/.
try:
from config import load_config as _load_cfg
from config.logging import setup_logging
_early_cfg = _load_cfg(cfg_path)
setup_logging(_early_cfg.logging)
except Exception as _log_err:
# Fallback: basic stderr logging so the server still starts
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(name)s %(message)s",
)
logging.getLogger("forgerag.main").warning(
"Failed to initialise structured logging, using basicConfig: %s",
_log_err,
)
_preflight(cfg_path)
try:
import uvicorn
except ImportError:
print(
"error: uvicorn not installed. pip install 'uvicorn[standard]' fastapi python-multipart",
file=sys.stderr,
)
return 2
uvicorn_kwargs: dict[str, Any] = dict(
host=args.host,
port=args.port,
log_level=args.log_level,
)
if args.reload:
uvicorn_kwargs["reload"] = True
# --reload is incompatible with --workers
elif args.workers > 1:
uvicorn_kwargs["workers"] = args.workers
# Pass the factory string so uvicorn's --reload watcher can
# re-import after file changes.
uvicorn.run("api.app:app", **uvicorn_kwargs)
return 0
if __name__ == "__main__":
raise SystemExit(main())