@@ -160,55 +160,44 @@ jobs:
160160 [[ -n "${PID_MAIN_BUILD:-}" ]] && { wait $PID_MAIN_BUILD || { echo "Main binary build failed" >&2; exit 1; }; }
161161 wait $PID_PR_BUILD || { echo "PR binary build failed" >&2; exit 1; }
162162
163- # TODO: remove before merging — fake results for testing the diff/comment steps without a full scan.
164- # Restore the real step below once comment rendering is verified.
163+ # PR and main scans share a single S3 stream per dataset file, teed to
164+ # both binaries simultaneously. The main side is skipped on a cache hit
165+ # (results already in /tmp/results-main.jsonl) or when main_csv is empty
166+ # (PR adds only new detectors — no overlap with main).
165167 - name : Run corpora tests
166168 if : steps.detect.outputs.any_changed == 'true'
167169 shell : bash
170+ env :
171+ PR_CSV : ${{ steps.detect.outputs.pr_csv }}
172+ MAIN_CSV : ${{ steps.detect.outputs.main_csv }}
173+ MAIN_SCAN_CACHE_HIT : ${{ steps.main_scan_cache.outputs.cache-hit }}
168174 run : |
169- echo '{"DetectorName":"JDBC","Raw":"jdbc:mysql://user:pass@host/db","RawV2":"","Redacted":"","ExtraData":null,"StructuredData":null,"Verified":false,"VerificationError":null}' > /tmp/results-pr.jsonl
170- echo '{"DetectorName":"JDBC","Raw":"jdbc:mysql://user:pass@host/db","RawV2":"","Redacted":"","ExtraData":null,"StructuredData":null,"Verified":false,"VerificationError":null}' >> /tmp/results-pr.jsonl
171- echo '{"DetectorName":"JDBC","Raw":"jdbc:postgresql://admin:secret@db.example.com/prod","RawV2":"","Redacted":"","ExtraData":null,"StructuredData":null,"Verified":false,"VerificationError":null}' >> /tmp/results-pr.jsonl
172- echo '{"DetectorName":"JDBC","Raw":"jdbc:mysql://user:pass@host/db","RawV2":"","Redacted":"","ExtraData":null,"StructuredData":null,"Verified":false,"VerificationError":null}' > /tmp/results-main.jsonl
175+ set -o pipefail
176+ files=()
177+ while IFS= read -r dataset; do
178+ [[ -z "$dataset" ]] && continue
179+ files+=("$dataset")
180+ done <<< "$DATASETS"
173181
174- # PR and main scans share a single S3 stream per dataset file, teed to
175- # both binaries simultaneously. The main side is skipped on a cache hit
176- # (results already in /tmp/results-main.jsonl) or when main_csv is empty
177- # (PR adds only new detectors — no overlap with main).
178- # - name: Run corpora tests
179- # if: steps.detect.outputs.any_changed == 'true'
180- # shell: bash
181- # env:
182- # PR_CSV: ${{ steps.detect.outputs.pr_csv }}
183- # MAIN_CSV: ${{ steps.detect.outputs.main_csv }}
184- # MAIN_SCAN_CACHE_HIT: ${{ steps.main_scan_cache.outputs.cache-hit }}
185- # run: |
186- # set -o pipefail
187- # files=()
188- # while IFS= read -r dataset; do
189- # [[ -z "$dataset" ]] && continue
190- # files+=("$dataset")
191- # done <<< "$DATASETS"
192- #
193- # export TRUFFLEHOG_BIN=/tmp/trufflehog-pr
194- # export OUTPUT_JSONL=/tmp/results-pr.jsonl
195- # export STDERR_FILE=/tmp/corpora-stderr-pr.txt
196- # export INCLUDE_DETECTORS="$PR_CSV"
197- #
198- # if [[ -n "$MAIN_CSV" && "$MAIN_SCAN_CACHE_HIT" != 'true' ]]; then
199- # # Dual-binary: single S3 download teed to both PR and main binaries.
200- # export TRUFFLEHOG_BIN_MAIN=/tmp/trufflehog-main
201- # export OUTPUT_JSONL_MAIN=/tmp/results-main.jsonl
202- # export INCLUDE_DETECTORS_MAIN="$MAIN_CSV"
203- # elif [[ -z "$MAIN_CSV" ]]; then
204- # echo "No overlapping detectors in main; skipping main scan."
205- # : > /tmp/results-main.jsonl
206- # else
207- # echo "Main scan cache hit; skipping main scan."
208- # fi
209- #
210- # ./scripts/test/detector_corpora_test.sh "${files[@]}" \
211- # || { echo "Corpora scan failed" >&2; exit 1; }
182+ export TRUFFLEHOG_BIN=/tmp/trufflehog-pr
183+ export OUTPUT_JSONL=/tmp/results-pr.jsonl
184+ export STDERR_FILE=/tmp/corpora-stderr-pr.txt
185+ export INCLUDE_DETECTORS="$PR_CSV"
186+
187+ if [[ -n "$MAIN_CSV" && "$MAIN_SCAN_CACHE_HIT" != 'true' ]]; then
188+ # Dual-binary: single S3 download teed to both PR and main binaries.
189+ export TRUFFLEHOG_BIN_MAIN=/tmp/trufflehog-main
190+ export OUTPUT_JSONL_MAIN=/tmp/results-main.jsonl
191+ export INCLUDE_DETECTORS_MAIN="$MAIN_CSV"
192+ elif [[ -z "$MAIN_CSV" ]]; then
193+ echo "No overlapping detectors in main; skipping main scan."
194+ : > /tmp/results-main.jsonl
195+ else
196+ echo "Main scan cache hit; skipping main scan."
197+ fi
198+
199+ ./scripts/test/detector_corpora_test.sh "${files[@]}" \
200+ || { echo "Corpora scan failed" >&2; exit 1; }
212201
213202 - name : Save main scan cache
214203 if : steps.detect.outputs.any_changed == 'true' && steps.detect.outputs.main_csv != '' && steps.main_scan_cache.outputs.cache-hit != 'true'
0 commit comments