rtk-ai
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 2 additions & 1 deletion b/‎CONTRIBUTING.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎install.sh‎
Lines changed: 7 additions & 0 deletions b/‎install.sh‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎scripts/test-install.sh‎
Lines changed: 98 additions & 0 deletions b/‎scripts/test-install.sh‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎src/cmds/README.md‎
Lines changed: 16 additions & 0 deletions b/‎src/cmds/README.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/cmds/cloud/aws_cmd.rs‎
Lines changed: 11 additions & 10 deletions b/‎src/cmds/cloud/aws_cmd.rs‎
Lines changed: 11 additions & 10 deletions
@@ -50,7 +50,7 @@ Don't invent new output formats. Don't add RTK-specific headers or markers in th
 
 If a filter fails, fall back to raw output. RTK should never prevent a command from executing or producing output. Better to pass through unfiltered than to error out. Same for hooks: exit 0 on all error paths so the agent's command runs unmodified.
 
-Every filter needs a fallback path. Every hook must handle malformed input gracefully.
+Every filter needs a fallback path. Every hook must handle malformed input gracefully. Truncation follows the same rule: capping output at N items is only acceptable if accompanied by a hint that lets the agent recover the hidden data.
 
 ### Zero Overhead
 
@@ -262,6 +262,7 @@ cargo fmt --all --check && cargo clippy --all-targets && cargo test
 - [ ] Unit tests added/updated for changed code
 - [ ] Snapshot tests reviewed (`cargo insta review`)
 - [ ] Token savings >=60% verified
+- [ ] Any truncated list has a recovery hint (`force_tee_tail_hint` or `force_tee_hint`) and uses a `CAP_*` from `src/core/truncate.rs`
 - [ ] Edge cases covered
 - [ ] `cargo fmt --all --check && cargo clippy --all-targets && cargo test` passes
 - [ ] Manual test: run `rtk <cmd>` and inspect output
 
@@ -9,7 +9,7 @@
 <p align="center">
   <a href="https://github.com/rtk-ai/rtk/actions"><img src="https://github.com/rtk-ai/rtk/workflows/Security%20Check/badge.svg" alt="CI"></a>
   <a href="https://github.com/rtk-ai/rtk/releases"><img src="https://img.shields.io/github/v/release/rtk-ai/rtk" alt="Release"></a>
-  <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"></a>
+  <a href="https://opensource.org/licenses/Apache-2.0"><img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License: Apache 2.0"></a>
   <a href="https://discord.gg/RySmvNF5kF"><img src="https://img.shields.io/discord/1470188214710046894?label=Discord&logo=discord" alt="Discord"></a>
   <a href="https://formulae.brew.sh/formula/rtk"><img src="https://img.shields.io/homebrew/v/rtk" alt="Homebrew"></a>
 </p>
@@ -483,7 +483,7 @@ Join the community on [Discord](https://discord.gg/RySmvNF5kF).
 
 ## License
 
-MIT License - see [LICENSE](LICENSE) for details.
+Apache License 2.0 - see [LICENSE](LICENSE) for details.
 
 ## Disclaimer
 
 
@@ -98,6 +98,13 @@ install() {
         error "Failed to download binary"
     fi
 
+    # Verify archive contents before extraction (CWE-22 path traversal).
+    # Reject any entry with an absolute path or a ".." component.
+    info "Verifying archive..."
+    if tar -tzf "$ARCHIVE" | grep -qE '^/|(^|/)\.\.(/|$)'; then
+        error "Archive contains unsafe paths (absolute or directory traversal) — refusing to extract"
+    fi
+
     info "Extracting..."
     tar -xzf "$ARCHIVE" -C "$TEMP_DIR"
 
 
@@ -0,0 +1,98 @@
+#!/usr/bin/env sh
+# Tests for install.sh path traversal check (issue #1250, CWE-22).
+#
+# Verifies:
+#   1. Safe archives (single binary, "./prefix", subdirs) are accepted.
+#   2. Archives with absolute paths are rejected pre-extraction.
+#   3. Archives with ".." components are rejected pre-extraction.
+#   4. The check is still present in install.sh (regression guard).
+
+set -eu
+
+REPO_ROOT=$(cd "$(dirname "$0")/.." && pwd)
+INSTALL_SH="$REPO_ROOT/install.sh"
+
+if [ ! -f "$INSTALL_SH" ]; then
+    echo "FAIL: install.sh not found at $INSTALL_SH"
+    exit 1
+fi
+
+if ! command -v python3 >/dev/null 2>&1; then
+    echo "SKIP: python3 not available — crafted tarball tests require python3"
+    exit 0
+fi
+
+TMPDIR=$(mktemp -d)
+trap 'rm -rf "$TMPDIR"' EXIT
+
+# The check replicated from install.sh (keep in sync with install.sh).
+# Returns 0 when archive is safe, 1 when unsafe.
+check_archive() {
+    if tar -tzf "$1" | grep -qE '^/|(^|/)\.\.(/|$)'; then
+        return 1
+    fi
+    return 0
+}
+
+# --- Build safe archive using standard tar ---
+mkdir -p "$TMPDIR/safe_src"
+printf '#!/bin/sh\necho rtk\n' > "$TMPDIR/safe_src/rtk"
+(cd "$TMPDIR/safe_src" && tar -czf "$TMPDIR/safe.tgz" rtk)
+
+# --- Build crafted malicious archives with python ---
+python3 - "$TMPDIR" <<'PY'
+import sys, tarfile, io
+
+base = sys.argv[1]
+
+
+def make(name, entry):
+    with tarfile.open(f"{base}/{name}", "w:gz") as t:
+        info = tarfile.TarInfo(name=entry)
+        data = b"pwned"
+        info.size = len(data)
+        t.addfile(info, io.BytesIO(data))
+
+
+make("traversal.tgz", "../etc/evil")
+make("absolute.tgz", "/tmp/evil_abs")
+make("middle.tgz", "rtk/../../../etc/evil")
+make("end_dotdot.tgz", "rtk/..")
+PY
+
+FAIL=0
+pass() { printf '  PASS: %s\n' "$1"; }
+fail() { printf '  FAIL: %s\n' "$1"; FAIL=1; }
+
+echo "==> Functional checks"
+
+if check_archive "$TMPDIR/safe.tgz"; then
+    pass "safe archive accepted"
+else
+    fail "safe archive rejected (false positive)"
+fi
+
+for bad in traversal absolute middle end_dotdot; do
+    if check_archive "$TMPDIR/$bad.tgz"; then
+        fail "$bad archive accepted (should be rejected)"
+    else
+        pass "$bad archive rejected"
+    fi
+done
+
+echo "==> Regression guard"
+
+if grep -qF 'tar -tzf' "$INSTALL_SH" && grep -qF '\.\.' "$INSTALL_SH"; then
+    pass "install.sh still contains the path-traversal check"
+else
+    fail "install.sh is missing the path-traversal check — was it removed?"
+fi
+
+echo ""
+if [ "$FAIL" -eq 0 ]; then
+    echo "All install.sh path traversal tests passed"
+    exit 0
+else
+    echo "Some tests failed"
+    exit 1
+fi
@@ -252,6 +252,21 @@ When filtering fails, fall back to raw output and warn on stderr. Never block th
 
 Modules that parse structured output (JSON, NDJSON, state machines) must call `tee::tee_and_hint()` so users can recover full output on failure.
 
+### Internal Truncation Recovery
+
+When a filter caps a list at N items (e.g. `take(20)`), the remaining items must be accessible via a tee hint. **Never show `"… +N more"` without a recovery path** — the agent has no way to retrieve the hidden content.
+
+**Choosing the right hint:**
+
+| Content type | Function | Condition |
+|---|---|---|
+| Flat list — one item = one line in the tee | `force_tee_tail_hint(content, slug, MAX + 1)` | PR lists, error lines, file paths — anything where each item is a single-line string |
+| Multi-line blocks | `force_tee_hint(content, slug)` | Test failures, build error blocks — items that span multiple lines so a line offset is meaningless |
+
+**Cap values come from `src/core/truncate.rs`.** Pick the `CAP_*` matching your data class (`CAP_ERRORS`, `CAP_WARNINGS`, `CAP_LIST`, `CAP_INVENTORY`) and bind it to a local `const MAX_XXX: usize = CAP_Y;`. Derive `take(MAX_XXX)`, `> MAX_XXX`, and the offset `MAX_XXX + 1` from the local. These CAPs will later become the configuration surface for per-filter cap tuning (user-overridable via config) — keep all truncation values routed through them so that hook lands as a single switch rather than a codebase-wide hunt. A filter that genuinely needs to deviate uses **`truncate::reduced(CAP_Y, n)`** (e.g. `reduced(CAP_WARNINGS, 5)`) so it still tracks the global when reconfigured — never a bare literal, never `cap - n` (underflows once caps are runtime-configurable), and never `*`/`/` (those scale unboundedly). `reduced` falls back to the full cap if the reduction would empty the list. Each deviation needs a one-line comment stating why; if there's no real reason, just use the plain CAP. See `src/core/README.md` ("Truncation Caps") for the full rationale.
+
+**The tee content must match what `tail` produces.** For `force_tee_tail_hint`, build the tee from the same formatted values shown in the output — not raw/intermediate data. If the filter reformats items before displaying them, pre-build a `Vec<String>` of formatted lines and use it for both the display loop and the tee.
+
 ### Stderr Handling
 
 Modules must capture stderr and include it in the raw string passed to `timer.track()`, so token savings reflect total output.
@@ -278,6 +293,7 @@ Adding a new filter or command requires changes in multiple places. For TOML-vs-
    - Use `RunOptions::default()` when filtering combined text output
    - Add `.tee("label")` when the filter parses structured output (enables raw output recovery on failure)
    - **Exit codes**: handled automatically by `run_filtered()` — just return its result
+   - **Truncation**: if the filter caps any list at N items, emit `force_tee_tail_hint` (flat lists) or `force_tee_hint` (multi-line blocks) so the agent can recover hidden items — see [Internal Truncation Recovery](#internal-truncation-recovery). Use a named constant for the cap; derive the offset from it (`MAX_XXX + 1`)
 2. **Register module**:
    - Ecosystem `mod.rs` files use `automod::dir!()` — any `.rs` file in the directory becomes a public module automatically. No manual `pub mod` needed, but be aware: WIP or helper files will also be exposed. Only commit command-ready modules.
    - Add variant to `Commands` enum in `main.rs` with `#[arg(trailing_var_arg = true, allow_hyphen_values = true)]`
 
@@ -5,6 +5,7 @@
 
 use crate::core::tee::force_tee_hint;
 use crate::core::tracking;
+use crate::core::truncate::{CAP_INVENTORY, CAP_LIST};
 use crate::core::utils::{
     exit_code_from_output, exit_code_from_status, human_bytes, join_with_overflow,
     resolved_command, shorten_arn, truncate_iso_date,
@@ -15,7 +16,7 @@ use lazy_static::lazy_static;
 use regex::Regex;
 use serde_json::Value;
 
-const MAX_ITEMS: usize = 20;
+const MAX_ITEMS: usize = CAP_LIST;
 const JSON_COMPRESS_DEPTH: usize = 4;
 
 /// Result of a filter function: filtered text + whether items were truncated.
@@ -494,7 +495,7 @@ fn filter_s3_ls(output: &str) -> FilterResult {
 
     if total > limit {
         let text = format!(
-            "{}\n... +{} more items",
+            "{}\n… +{} more items",
             lines[..limit].join("\n"),
             total - limit
         );
@@ -553,7 +554,7 @@ fn filter_ec2_instances(json_str: &str) -> Option<FilterResult> {
     }
 
     if truncated {
-        result.push_str(&format!("  ... +{} more\n", total - MAX_ITEMS));
+        result.push_str(&format!("  … +{} more\n", total - MAX_ITEMS));
     }
 
     let text = result.trim_end().to_string();
@@ -700,7 +701,7 @@ fn filter_cfn_describe_stacks(json_str: &str) -> Option<FilterResult> {
 
 // --- P0 filters: CloudWatch Logs, CloudFormation Events, Lambda ---
 
-const MAX_LOG_EVENTS: usize = 50;
+const MAX_LOG_EVENTS: usize = CAP_INVENTORY;
 
 /// Convert days since Unix epoch to (year, month, day). Civil calendar, UTC.
 fn days_to_ymd(days: i64) -> (i64, i64, i64) {
@@ -759,7 +760,7 @@ fn filter_logs_events(json_str: &str) -> Option<FilterResult> {
     }
 
     if truncated {
-        lines.push(format!("... +{} more events", total - MAX_LOG_EVENTS));
+        lines.push(format!("… +{} more events", total - MAX_LOG_EVENTS));
     }
 
     let text = lines.join("\n");
@@ -1132,7 +1133,7 @@ fn filter_dynamodb_items(json_str: &str) -> Option<FilterResult> {
     }
 
     if truncated {
-        lines.push(format!("... +{} more items", total - MAX_ITEMS));
+        lines.push(format!("… +{} more items", total - MAX_ITEMS));
     }
 
     let text = lines.join("\n");
@@ -1426,7 +1427,7 @@ fn filter_logs_query_results(json_str: &str) -> Option<FilterResult> {
         }
 
         if truncated {
-            lines.push(format!("... +{} more rows", total - MAX_ITEMS));
+            lines.push(format!("… +{} more rows", total - MAX_ITEMS));
         }
 
         let text = lines.join("\n");
@@ -1616,7 +1617,7 @@ mod tests {
         }
         let input = lines.join("\n");
         let result = filter_s3_ls(&input);
-        assert!(result.text.contains("... +20 more items"));
+        assert!(result.text.contains("… +20 more items"));
         assert!(result.truncated);
     }
 
@@ -1852,7 +1853,7 @@ mod tests {
         }
         let json = format!(r#"{{"DBInstances": [{}]}}"#, dbs.join(","));
         let result = filter_rds_instances(&json).unwrap();
-        assert!(result.text.contains("... +5 more instances"));
+        assert!(result.text.contains("… +5 more instances"));
         assert!(result.truncated);
     }
 
@@ -1893,7 +1894,7 @@ mod tests {
         }
         let json = format!(r#"{{"events": [{}]}}"#, events.join(","));
         let result = filter_logs_events(&json).unwrap();
-        assert!(result.text.contains("... +10 more events"));
+        assert!(result.text.contains("… +10 more events"));
         assert!(result.truncated);
     }
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@`
`5`	`5`
`6`	`6`	`use crate::core::tee::force_tee_hint;`
`7`	`7`	`use crate::core::tracking;`
	`8`	`+use crate::core::truncate::{CAP_INVENTORY, CAP_LIST};`
`8`	`9`	`use crate::core::utils::{`
`9`	`10`	`exit_code_from_output, exit_code_from_status, human_bytes, join_with_overflow,`
`10`	`11`	`resolved_command, shorten_arn, truncate_iso_date,`
`@@ -15,7 +16,7 @@ use lazy_static::lazy_static;`
`15`	`16`	`use regex::Regex;`
`16`	`17`	`use serde_json::Value;`
`17`	`18`
`18`		`-const MAX_ITEMS: usize = 20;`
	`19`	`+const MAX_ITEMS: usize = CAP_LIST;`
`19`	`20`	`const JSON_COMPRESS_DEPTH: usize = 4;`
`20`	`21`
`21`	`22`	`/// Result of a filter function: filtered text + whether items were truncated.`
`@@ -494,7 +495,7 @@ fn filter_s3_ls(output: &str) -> FilterResult {`
`494`	`495`
`495`	`496`	`if total > limit {`
`496`	`497`	`let text = format!(`
`497`		`- "{}\n... +{} more items",`
	`498`	`+ "{}\n… +{} more items",`
`498`	`499`	`lines[..limit].join("\n"),`
`499`	`500`	`total - limit`
`500`	`501`	`);`
`@@ -553,7 +554,7 @@ fn filter_ec2_instances(json_str: &str) -> Option<FilterResult> {`
`553`	`554`	`}`
`554`	`555`
`555`	`556`	`if truncated {`
`556`		`- result.push_str(&format!(" ... +{} more\n", total - MAX_ITEMS));`
	`557`	`+ result.push_str(&format!(" … +{} more\n", total - MAX_ITEMS));`
`557`	`558`	`}`
`558`	`559`
`559`	`560`	`let text = result.trim_end().to_string();`
`@@ -700,7 +701,7 @@ fn filter_cfn_describe_stacks(json_str: &str) -> Option<FilterResult> {`
`700`	`701`
`701`	`702`	`// --- P0 filters: CloudWatch Logs, CloudFormation Events, Lambda ---`
`702`	`703`
`703`		`-const MAX_LOG_EVENTS: usize = 50;`
	`704`	`+const MAX_LOG_EVENTS: usize = CAP_INVENTORY;`
`704`	`705`
`705`	`706`	`/// Convert days since Unix epoch to (year, month, day). Civil calendar, UTC.`
`706`	`707`	`fn days_to_ymd(days: i64) -> (i64, i64, i64) {`
`@@ -759,7 +760,7 @@ fn filter_logs_events(json_str: &str) -> Option<FilterResult> {`
`759`	`760`	`}`
`760`	`761`
`761`	`762`	`if truncated {`
`762`		`- lines.push(format!("... +{} more events", total - MAX_LOG_EVENTS));`
	`763`	`+ lines.push(format!("… +{} more events", total - MAX_LOG_EVENTS));`
`763`	`764`	`}`
`764`	`765`
`765`	`766`	`let text = lines.join("\n");`
`@@ -1132,7 +1133,7 @@ fn filter_dynamodb_items(json_str: &str) -> Option<FilterResult> {`
`1132`	`1133`	`}`
`1133`	`1134`
`1134`	`1135`	`if truncated {`
`1135`		`- lines.push(format!("... +{} more items", total - MAX_ITEMS));`
	`1136`	`+ lines.push(format!("… +{} more items", total - MAX_ITEMS));`
`1136`	`1137`	`}`
`1137`	`1138`
`1138`	`1139`	`let text = lines.join("\n");`
`@@ -1426,7 +1427,7 @@ fn filter_logs_query_results(json_str: &str) -> Option<FilterResult> {`
`1426`	`1427`	`}`
`1427`	`1428`
`1428`	`1429`	`if truncated {`
`1429`		`- lines.push(format!("... +{} more rows", total - MAX_ITEMS));`
	`1430`	`+ lines.push(format!("… +{} more rows", total - MAX_ITEMS));`
`1430`	`1431`	`}`
`1431`	`1432`
`1432`	`1433`	`let text = lines.join("\n");`
`@@ -1616,7 +1617,7 @@ mod tests {`
`1616`	`1617`	`}`
`1617`	`1618`	`let input = lines.join("\n");`
`1618`	`1619`	`let result = filter_s3_ls(&input);`
`1619`		`- assert!(result.text.contains("... +20 more items"));`
	`1620`	`+ assert!(result.text.contains("… +20 more items"));`
`1620`	`1621`	`assert!(result.truncated);`
`1621`	`1622`	`}`
`1622`	`1623`
`@@ -1852,7 +1853,7 @@ mod tests {`
`1852`	`1853`	`}`
`1853`	`1854`	`let json = format!(r#"{{"DBInstances": [{}]}}"#, dbs.join(","));`
`1854`	`1855`	`let result = filter_rds_instances(&json).unwrap();`
`1855`		`- assert!(result.text.contains("... +5 more instances"));`
	`1856`	`+ assert!(result.text.contains("… +5 more instances"));`
`1856`	`1857`	`assert!(result.truncated);`
`1857`	`1858`	`}`
`1858`	`1859`
`@@ -1893,7 +1894,7 @@ mod tests {`
`1893`	`1894`	`}`
`1894`	`1895`	`let json = format!(r#"{{"events": [{}]}}"#, events.join(","));`
`1895`	`1896`	`let result = filter_logs_events(&json).unwrap();`
`1896`		`- assert!(result.text.contains("... +10 more events"));`
	`1897`	`+ assert!(result.text.contains("… +10 more events"));`
`1897`	`1898`	`assert!(result.truncated);`
`1898`	`1899`	`}`
`1899`	`1900`