Skip to content

Commit 49c2419

Browse files
committed
docs: add feature sections
Document redaction, RAG, skeleton mode, few-shot examples, and CI usage. Add --redact/--no-redact flag to match documentation. Made-with: Cursor
1 parent c5ae94a commit 49c2419

4 files changed

Lines changed: 103 additions & 15 deletions

File tree

README.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,58 @@ patterns = parse_ctxengignore(Path("."))
199199
# → list of pattern strings, or [] if no file
200200
```
201201

202+
### Secrets & PII Redaction
203+
204+
ctxeng automatically redacts common secrets and PII from file contents before sending to any LLM:
205+
206+
- API keys and tokens
207+
- Passwords and credentials
208+
- Email addresses
209+
- Private keys
210+
211+
Redaction happens before token counting, tracing, and output — your secrets never leave your machine.
212+
213+
To disable:
214+
215+
```bash
216+
ctxeng build "Your query" --no-redact
217+
```
218+
219+
### RAG (Intelligent Chunk Retrieval)
220+
221+
For large repositories, `--rag` switches from whole-file inclusion to **chunk-level retrieval**. ctxeng splits top-ranked files into overlapping chunks, then selects the most relevant chunks for your query:
222+
223+
- Uses **embeddings** when `sentence-transformers` is installed
224+
- Falls back to **lexical retrieval** when embeddings aren’t available
225+
226+
```bash
227+
ctxeng build "Explain the login flow" --rag
228+
```
229+
230+
### AST Skeleton (Python)
231+
232+
`--skeleton` replaces Python file bodies with an AST-derived outline (imports, classes, methods, function signatures). This is useful when you want a high-level overview within a tight token budget:
233+
234+
```bash
235+
ctxeng build "Give me a high-level architecture overview" --skeleton
236+
```
237+
238+
### Few-shot Examples
239+
240+
You can inject a small “style guide” / best-practice library into the context with `--fewshot`. Put markdown/text files under `.ctxeng/examples/` and enable:
241+
242+
```bash
243+
ctxeng build "Refactor this module" --fewshot
244+
```
245+
246+
### CI subcommand
247+
248+
Use `ctxeng ci` for pipeline-friendly context generation. It always writes to a file and is designed for non-interactive runs:
249+
250+
```bash
251+
ctxeng ci "Generate release notes" --output context.md --fmt markdown --trace
252+
```
253+
202254
### Import graph (Python)
203255

204256
After files are scored, **ctxeng** parses static ``import`` / ``from … import`` statements in each discovered ``.py`` file, resolves **relative imports** from the file’s location, and can **pull in imported modules** from the same collection set before the token budget is applied.

ctxeng/builder.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def __init__(self, root: str | Path = ".") -> None:
5656
self._rag_chunk_overlap = 20
5757
self._rag_embedding_model = "all-MiniLM-L6-v2"
5858
self._skeleton = False
59+
self._redact = True
5960
self._fewshot = False
6061
self._fewshot_dir: str | Path = ".ctxeng/examples"
6162
self._fewshot_max_files = 5
@@ -167,6 +168,11 @@ def skeleton(self, enabled: bool = True) -> ContextBuilder:
167168
self._skeleton = enabled
168169
return self
169170

171+
def redact(self, enabled: bool = True) -> ContextBuilder:
172+
"""Enable/disable secrets & PII redaction before output."""
173+
self._redact = enabled
174+
return self
175+
170176
def fewshot(
171177
self,
172178
enabled: bool = True,
@@ -225,6 +231,7 @@ def _build_engine(self) -> ContextEngine:
225231
rag_chunk_overlap=self._rag_chunk_overlap,
226232
rag_embedding_model=self._rag_embedding_model,
227233
skeleton=self._skeleton,
234+
redact=self._redact,
228235
fewshot=self._fewshot,
229236
fewshot_dir=self._fewshot_dir,
230237
fewshot_max_files=self._fewshot_max_files,

ctxeng/cli.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ def _cmd_ci(args: argparse.Namespace) -> None:
3737
builder = builder.no_gitignore()
3838
if args.trace:
3939
builder = builder.trace(True, trace_dir=args.trace_dir, trace_id=args.trace_id)
40+
if not args.redact:
41+
builder = builder.redact(False)
4042
if args.rag:
4143
builder = builder.rag(
4244
True,
@@ -95,6 +97,8 @@ def cmd_build(args: argparse.Namespace) -> None:
9597
builder = builder.deny(*args.deny)
9698
if args.trace:
9799
builder = builder.trace(True, trace_dir=args.trace_dir, trace_id=args.trace_id)
100+
if not args.redact:
101+
builder = builder.redact(False)
98102
if args.rag:
99103
builder = builder.rag(
100104
True,
@@ -249,6 +253,8 @@ def cmd_watch(args: argparse.Namespace) -> None:
249253
builder = builder.use_semantic(model=args.semantic_model)
250254
if args.budget:
251255
builder = builder.with_budget(args.budget)
256+
if not args.redact:
257+
builder = builder.redact(False)
252258

253259
query = " ".join(args.query) if args.query else ""
254260

@@ -334,6 +340,12 @@ def main() -> None:
334340
"--trace-id",
335341
help="Provide a custom trace id (default: random)",
336342
)
343+
build_p.add_argument(
344+
"--redact",
345+
action=argparse.BooleanOptionalAction,
346+
default=True,
347+
help="Redact secrets and PII before output (default: on)",
348+
)
337349
build_p.add_argument(
338350
"--rag",
339351
action="store_true",
@@ -471,6 +483,12 @@ def main() -> None:
471483
"--trace-id",
472484
help="Provide a custom trace id (default: random)",
473485
)
486+
watch_p.add_argument(
487+
"--redact",
488+
action=argparse.BooleanOptionalAction,
489+
default=True,
490+
help="Redact secrets and PII before output (default: on)",
491+
)
474492
watch_p.add_argument(
475493
"--rag",
476494
action="store_true",
@@ -576,6 +594,12 @@ def main() -> None:
576594
ci_p.add_argument("--trace", action="store_true", help="Write JSONL trace under .ctxeng/traces/")
577595
ci_p.add_argument("--trace-dir", help="Override trace output directory")
578596
ci_p.add_argument("--trace-id", help="Provide a custom trace id")
597+
ci_p.add_argument(
598+
"--redact",
599+
action=argparse.BooleanOptionalAction,
600+
default=True,
601+
help="Redact secrets and PII before output (default: on)",
602+
)
579603
ci_p.add_argument("--rag", action="store_true", help="Enable chunk-level retrieval (RAG)")
580604
ci_p.add_argument("--rag-max-chunks", type=int, default=20)
581605
ci_p.add_argument("--rag-chunk-max-lines", type=int, default=120)

ctxeng/core.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def __init__(
6969
rag_chunk_overlap: int = 20,
7070
rag_embedding_model: str = "all-MiniLM-L6-v2",
7171
skeleton: bool = False,
72+
redact: bool = True,
7273
fewshot: bool = False,
7374
fewshot_dir: str | Path = ".ctxeng/examples",
7475
fewshot_max_files: int = 5,
@@ -98,6 +99,7 @@ def __init__(
9899
self.rag_chunk_overlap = rag_chunk_overlap
99100
self.rag_embedding_model = rag_embedding_model
100101
self.skeleton = skeleton
102+
self.redact = redact
101103
self.fewshot = fewshot
102104
self.fewshot_dir = fewshot_dir
103105
self.fewshot_max_files = fewshot_max_files
@@ -286,22 +288,25 @@ def build(
286288
)
287289

288290
# 4. Redact sensitive info before budgeting/output
289-
redacted_files = 0
290-
redacted_total = 0
291-
for f in context_files:
292-
r = redact_text(f.content, redact_secrets=True, redact_pii=True)
293-
if r.total:
294-
f.content = r.text
295-
f.redaction_count = r.total
296-
redacted_files += 1
297-
redacted_total += r.total
291+
if self.redact:
292+
redacted_files = 0
293+
redacted_total = 0
294+
for f in context_files:
295+
r = redact_text(f.content, redact_secrets=True, redact_pii=True)
296+
if r.total:
297+
f.content = r.text
298+
f.redaction_count = r.total
299+
redacted_files += 1
300+
redacted_total += r.total
298301

299-
if trace_writer:
300-
trace_writer.emit(
301-
"redaction_summary",
302-
files_with_redactions=redacted_files,
303-
total_redactions=redacted_total,
304-
)
302+
if trace_writer:
303+
trace_writer.emit(
304+
"redaction_summary",
305+
files_with_redactions=redacted_files,
306+
total_redactions=redacted_total,
307+
)
308+
elif trace_writer:
309+
trace_writer.emit("redaction_summary", disabled=True)
305310

306311
# 5. Optimize for token budget
307312
query_tokens = count_tokens(query, self.model) if query else 0

0 commit comments

Comments
 (0)