Skip to content

Commit edaaa2b

Browse files
perf(trufflehog): single process for PR changed-file scan (#151)
* perf(trufflehog): batch PR changed-path scans Feed filtered paths through GNU xargs -0 so a typical PR runs one TruffleHog process (avoids per-file startup) while argv stays under OS limits on very large diffs. Paths are still filtered for excludes and missing files (e.g. deletions in the diff). * perf(trufflehog): PR scan via --include-paths regex file Use one trufflehog filesystem invocation over . with --include-paths (anchored re.escape per path) to avoid argv limits; addresses review. Revert get-vault-secrets action pin to match main (f1614b2).
1 parent 07958e8 commit edaaa2b

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

.github/workflows/reusable-trufflehog.yml

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,17 +147,35 @@ jobs:
147147
fi
148148
149149
if [[ -s changed-files.txt ]]; then
150+
# One TruffleHog process over repo root; --include-paths file lists anchored regexes
151+
# (TruffleHog expects regex lines, not raw paths — see trufflesecurity docs).
152+
INCLUDE_REGEXES=/tmp/trufflehog-pr-include-regexes.txt
153+
: > "${INCLUDE_REGEXES}"
150154
while IFS= read -r file; do
151155
if [[ -s /tmp/exclude-regexes.txt ]] && echo "$file" | grep -qEf /tmp/exclude-regexes.txt 2>/dev/null; then
152156
echo "Skipping: ${file} (matches exclude pattern)"
153157
continue
154158
fi
155-
156159
if [[ -f "${file}" ]]; then
157-
echo "Scanning: ${file}"
158-
trufflehog filesystem "${file}" --exclude-paths /tmp/trufflehog-exclude.txt --concurrency 16 --json --no-update --results=verified,unverified >> results.ndjson || true
160+
python3 -c 'import re, sys; print("^" + re.escape(sys.argv[1]) + "$")' "$file" >> "${INCLUDE_REGEXES}"
159161
fi
160162
done < changed-files.txt
163+
164+
if [[ -s "${INCLUDE_REGEXES}" ]]; then
165+
sort -u -o "${INCLUDE_REGEXES}" "${INCLUDE_REGEXES}"
166+
n_inc=$(wc -l < "${INCLUDE_REGEXES}")
167+
echo "TruffleHog: ${n_inc} path(s) via --include-paths (anchored regexes)"
168+
: > results.ndjson
169+
trufflehog filesystem . \
170+
--include-paths "${INCLUDE_REGEXES}" \
171+
--exclude-paths /tmp/trufflehog-exclude.txt \
172+
--concurrency 16 \
173+
--json \
174+
--no-update \
175+
--results=verified,unverified > results.ndjson || true
176+
else
177+
echo "No files to scan after excludes (only deletions or excluded paths)"
178+
fi
161179
else
162180
echo "No files changed"
163181
fi

0 commit comments

Comments
 (0)