rag_assistant/env.example at main · marieltv/rag_assistant · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# ─────────────────────────────────────────────────────────────
# DocMind — Document Q&A RAG Assistant
# Copy this file to .env and fill in your values.
# Never commit .env to version control.
# ─────────────────────────────────────────────────────────────


# ── LLM Provider ─────────────────────────────────────────────
# Required. Get your key at https://platform.openai.com/api-keys
OPENAI_API_KEY=sk-...


# ── Chunking ─────────────────────────────────────────────────
# chunk_size: target characters per chunk. Larger = more context
# per retrieval call, but higher token cost. 800 works well for
# dense policy/regulatory documents.
RAG_CHUNK_SIZE=800

# chunk_overlap: characters shared between adjacent chunks.
# Prevents answers being cut at chunk boundaries. ~15-20% of
# chunk_size is a reasonable default.
RAG_CHUNK_OVERLAP=150


# ── Retrieval ─────────────────────────────────────────────────
# top_k: number of chunks passed to the LLM as context.
# Increasing this improves recall but raises token cost and
# may dilute answer quality with low-relevance content.
RAG_TOP_K=5

# score_threshold: minimum cosine similarity (0–1) for a chunk
# to be included. 0.30 is permissive; raise to 0.50+ for
# high-precision use cases (e.g. regulatory Q&A).
RAG_SCORE_THRESHOLD=0.30


# ── Models ───────────────────────────────────────────────────
# LLM used for answer generation. gpt-4o-mini gives the best
# cost/quality ratio for RAG. Switch to gpt-4o for higher
# reasoning quality on complex documents.
RAG_LLM_MODEL=gpt-4o-mini

# Embedding model. text-embedding-3-small is fast and cheap.
# Use text-embedding-3-large if retrieval quality is critical.
# Must match the model used when the index was originally built —
# changing this requires re-indexing all documents.
RAG_EMBEDDING_MODEL=text-embedding-3-small


# ── Storage ───────────────────────────────────────────────────
# Directory where the FAISS index is persisted between runs.
RAG_INDEX_DIR=data/faiss_index

# JSON file tracking which documents have been indexed
# (used for deduplication by SHA-256 hash).
RAG_METADATA_PATH=data/doc_metadata.json


# ── API Server ────────────────────────────────────────────────
# Base URL of the FastAPI backend, consumed by the Streamlit UI.
RAG_API_URL=http://localhost:8000