Skip to content

Commit 5b722a2

Browse files
committed
revert test changes
1 parent 5a6e1a2 commit 5b722a2

2 files changed

Lines changed: 34 additions & 46 deletions

File tree

.github/workflows/detector-corpora-test.yml

Lines changed: 33 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -160,55 +160,44 @@ jobs:
160160
[[ -n "${PID_MAIN_BUILD:-}" ]] && { wait $PID_MAIN_BUILD || { echo "Main binary build failed" >&2; exit 1; }; }
161161
wait $PID_PR_BUILD || { echo "PR binary build failed" >&2; exit 1; }
162162
163-
# TODO: remove before merging — fake results for testing the diff/comment steps without a full scan.
164-
# Restore the real step below once comment rendering is verified.
163+
# PR and main scans share a single S3 stream per dataset file, teed to
164+
# both binaries simultaneously. The main side is skipped on a cache hit
165+
# (results already in /tmp/results-main.jsonl) or when main_csv is empty
166+
# (PR adds only new detectors — no overlap with main).
165167
- name: Run corpora tests
166168
if: steps.detect.outputs.any_changed == 'true'
167169
shell: bash
170+
env:
171+
PR_CSV: ${{ steps.detect.outputs.pr_csv }}
172+
MAIN_CSV: ${{ steps.detect.outputs.main_csv }}
173+
MAIN_SCAN_CACHE_HIT: ${{ steps.main_scan_cache.outputs.cache-hit }}
168174
run: |
169-
echo '{"DetectorName":"JDBC","Raw":"jdbc:mysql://user:pass@host/db","RawV2":"","Redacted":"","ExtraData":null,"StructuredData":null,"Verified":false,"VerificationError":null}' > /tmp/results-pr.jsonl
170-
echo '{"DetectorName":"JDBC","Raw":"jdbc:mysql://user:pass@host/db","RawV2":"","Redacted":"","ExtraData":null,"StructuredData":null,"Verified":false,"VerificationError":null}' >> /tmp/results-pr.jsonl
171-
echo '{"DetectorName":"JDBC","Raw":"jdbc:postgresql://admin:secret@db.example.com/prod","RawV2":"","Redacted":"","ExtraData":null,"StructuredData":null,"Verified":false,"VerificationError":null}' >> /tmp/results-pr.jsonl
172-
echo '{"DetectorName":"JDBC","Raw":"jdbc:mysql://user:pass@host/db","RawV2":"","Redacted":"","ExtraData":null,"StructuredData":null,"Verified":false,"VerificationError":null}' > /tmp/results-main.jsonl
175+
set -o pipefail
176+
files=()
177+
while IFS= read -r dataset; do
178+
[[ -z "$dataset" ]] && continue
179+
files+=("$dataset")
180+
done <<< "$DATASETS"
173181
174-
# PR and main scans share a single S3 stream per dataset file, teed to
175-
# both binaries simultaneously. The main side is skipped on a cache hit
176-
# (results already in /tmp/results-main.jsonl) or when main_csv is empty
177-
# (PR adds only new detectors — no overlap with main).
178-
# - name: Run corpora tests
179-
# if: steps.detect.outputs.any_changed == 'true'
180-
# shell: bash
181-
# env:
182-
# PR_CSV: ${{ steps.detect.outputs.pr_csv }}
183-
# MAIN_CSV: ${{ steps.detect.outputs.main_csv }}
184-
# MAIN_SCAN_CACHE_HIT: ${{ steps.main_scan_cache.outputs.cache-hit }}
185-
# run: |
186-
# set -o pipefail
187-
# files=()
188-
# while IFS= read -r dataset; do
189-
# [[ -z "$dataset" ]] && continue
190-
# files+=("$dataset")
191-
# done <<< "$DATASETS"
192-
#
193-
# export TRUFFLEHOG_BIN=/tmp/trufflehog-pr
194-
# export OUTPUT_JSONL=/tmp/results-pr.jsonl
195-
# export STDERR_FILE=/tmp/corpora-stderr-pr.txt
196-
# export INCLUDE_DETECTORS="$PR_CSV"
197-
#
198-
# if [[ -n "$MAIN_CSV" && "$MAIN_SCAN_CACHE_HIT" != 'true' ]]; then
199-
# # Dual-binary: single S3 download teed to both PR and main binaries.
200-
# export TRUFFLEHOG_BIN_MAIN=/tmp/trufflehog-main
201-
# export OUTPUT_JSONL_MAIN=/tmp/results-main.jsonl
202-
# export INCLUDE_DETECTORS_MAIN="$MAIN_CSV"
203-
# elif [[ -z "$MAIN_CSV" ]]; then
204-
# echo "No overlapping detectors in main; skipping main scan."
205-
# : > /tmp/results-main.jsonl
206-
# else
207-
# echo "Main scan cache hit; skipping main scan."
208-
# fi
209-
#
210-
# ./scripts/test/detector_corpora_test.sh "${files[@]}" \
211-
# || { echo "Corpora scan failed" >&2; exit 1; }
182+
export TRUFFLEHOG_BIN=/tmp/trufflehog-pr
183+
export OUTPUT_JSONL=/tmp/results-pr.jsonl
184+
export STDERR_FILE=/tmp/corpora-stderr-pr.txt
185+
export INCLUDE_DETECTORS="$PR_CSV"
186+
187+
if [[ -n "$MAIN_CSV" && "$MAIN_SCAN_CACHE_HIT" != 'true' ]]; then
188+
# Dual-binary: single S3 download teed to both PR and main binaries.
189+
export TRUFFLEHOG_BIN_MAIN=/tmp/trufflehog-main
190+
export OUTPUT_JSONL_MAIN=/tmp/results-main.jsonl
191+
export INCLUDE_DETECTORS_MAIN="$MAIN_CSV"
192+
elif [[ -z "$MAIN_CSV" ]]; then
193+
echo "No overlapping detectors in main; skipping main scan."
194+
: > /tmp/results-main.jsonl
195+
else
196+
echo "Main scan cache hit; skipping main scan."
197+
fi
198+
199+
./scripts/test/detector_corpora_test.sh "${files[@]}" \
200+
|| { echo "Corpora scan failed" >&2; exit 1; }
212201
213202
- name: Save main scan cache
214203
if: steps.detect.outputs.any_changed == 'true' && steps.detect.outputs.main_csv != '' && steps.main_scan_cache.outputs.cache-hit != 'true'

pkg/detectors/jdbc/jdbc.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,7 @@ var (
5353
// Matches typical JDBC connection strings.
5454
// The terminal character class additionally excludes () and & to avoid
5555
// capturing surrounding delimiters (e.g. "(jdbc:…)" or "…&user=x&").
56-
// TODO: revert before merging — regex intentionally loosened to trigger corpora test CI.
57-
keyPat = regexp.MustCompile(`(?i)[\w]{3,10}:[^\s"'<>,{}[\]]{10,511}[^\s"'<>,{}[\]()&]`)
56+
keyPat = regexp.MustCompile(`(?i)jdbc:[\w]{3,10}:[^\s"'<>,{}[\]]{10,511}[^\s"'<>,{}[\]()&]`)
5857
)
5958

6059
// Keywords are used for efficiently pre-filtering chunks.

0 commit comments

Comments
 (0)