code-indexer/.env.example at master · vinikjkkj/code-indexer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# ------------------------------------------------------------------------------
# Host mounts (Docker)
# ------------------------------------------------------------------------------
# Optional: host folder mounted at /workspaces (read-only) inside the container.
# Use this if you want static codebases in config/codebases.yaml like /workspaces/my-repo.
HOST_CODEBASES_ROOT=C:/path/to/projects

# Host filesystem root mounted at /host (read-only). This enables dynamic indexing
# of repos outside /workspaces via MCP register_codebase (example: /host/Users/...).
HOST_FILESYSTEM_ROOT=C:/

# Dockerfile used by mcp-server build.
# Use "Dockerfile" for default image, or "Dockerfile.rocm" for AMD ROCm hosts.
MCP_DOCKERFILE=Dockerfile

# ------------------------------------------------------------------------------
# Models and retrieval pipeline
# ------------------------------------------------------------------------------
# Dense embedding model used for semantic code retrieval.
EMBEDDING_MODEL=jinaai/jina-embeddings-v2-base-code

# Sparse encoder model used for hybrid dense+sparse retrieval.
SPARSE_MODEL=naver/splade-cocondenser-ensembledistil

# Enable sparse branch in retrieval (recommended for recall).
# For very large first-time indexing, set false to speed up ingestion.
USE_SPARSE=true

# Cross-encoder reranker model used after retrieval candidates are fetched.
RERANKER_MODEL=BAAI/bge-reranker-v2-m3

# Enable reranking stage (recommended for precision).
USE_RERANKER=true

# Candidate pool sizing before reranking:
# final_candidates = clamp(limit * multiplier, min, max)
RERANK_CANDIDATE_MULTIPLIER=8
RERANK_MIN_CANDIDATES=30
RERANK_MAX_CANDIDATES=150

# ------------------------------------------------------------------------------
# Devices and batch sizes
# ------------------------------------------------------------------------------
# Device: auto, cuda, or cpu.
EMBEDDING_DEVICE=auto
SPARSE_DEVICE=auto
RERANK_DEVICE=auto

# Throughput tuning (increase with more VRAM/RAM).
EMBEDDING_BATCH_SIZE=32
SPARSE_BATCH_SIZE=16
RERANK_BATCH_SIZE=16

# If true, recreate Qdrant collection when vector schema/dimension mismatches.
# Warning: this can drop existing indexed vectors in that collection.
RECREATE_COLLECTION_ON_DIM_MISMATCH=true

# If true, each Qdrant write waits for persistence/ack and increases indexing latency.
# For faster bulk indexing, keep false.
QDRANT_WRITE_WAIT=false

# ------------------------------------------------------------------------------
# Runtime persistence
# ------------------------------------------------------------------------------
# File used to persist codebases registered at runtime via MCP.
RUNTIME_CODEBASES_FILE=/models/runtime_codebases.yaml

# Directory used to persist incremental indexing state per codebase.
INDEX_STATE_DIR=/models/index_state

# ------------------------------------------------------------------------------
# Search behavior tuning
# ------------------------------------------------------------------------------
# Light score multipliers applied by file type after retrieval/rerank.
CODE_EXTENSION_BOOST=1.08
DOC_EXTENSION_BOOST=0.90

# Default mode: auto, code_only, mixed, docs_only.
SEARCH_MODE_DEFAULT=auto

# Max chunks kept per file before reranking (dedupe control).
MAX_CANDIDATES_PER_FILE=6

# Overlap ratio threshold to treat two chunks as duplicates.
DEDUPE_LINE_OVERLAP_THRESHOLD=0.65

# Character-range overlap threshold used before line-based overlap in dedupe.
# Helps avoid collapsing many chunks from single-line/minified files.
DEDUPE_CHAR_OVERLAP_THRESHOLD=0.65

# ------------------------------------------------------------------------------
# MCP server
# ------------------------------------------------------------------------------
# MCP HTTP port (used both inside container and host publish).
MCP_PORT=8000
MCP_TRANSPORT=streamable-http
MCP_HTTP_PATH=/mcp

# If true, server accepts stateless MCP HTTP calls (no session ID required).
# Enable this if your MCP client reports "Missing session ID".
MCP_STATELESS_HTTP=false

# Async indexing job executor:
# keep as 1 for predictable resource usage; increase if you want queued jobs to run in parallel.
INDEX_JOB_MAX_WORKERS=1

# Max number of recent indexing jobs kept in memory for status/history lookup.
INDEX_JOB_MAX_HISTORY=200

# ------------------------------------------------------------------------------
# Indexing behavior
# ------------------------------------------------------------------------------
# If true, indexes configured codebases on startup.
# Startup indexing runs in background (non-blocking), so MCP endpoint can accept requests immediately.
AUTO_INDEX_ON_STARTUP=true

# If true, startup indexing will force full rebuild instead of incremental.
AUTO_INDEX_ON_STARTUP_FULL_REINDEX=false

# Enable tree-sitter chunking with textual fallback.
USE_TREE_SITTER=true

# Number of chunks buffered across multiple files before an embedding/upsert flush.
# Higher values reduce roundtrips and improve throughput on large repos.
INDEX_CHUNK_BUFFER_SIZE=512

# Qdrant upsert batch size per request.
QDRANT_UPSERT_BATCH_SIZE=256

# Print indexing progress every N scanned files (0 disables progress logs).
INDEX_PROGRESS_EVERY_FILES=250

# Parallel workers for file read/hash/chunk preparation.
# Keep embedding/upsert centralized; this only parallelizes per-file preprocessing.
INDEX_FILE_WORKERS=4

# Max queued file tasks before applying backpressure to the scanner loop.
INDEX_MAX_PENDING_FUTURES=16