Skip to content

Commit 6dad3d9

Browse files
author
Tony Spataro
committed
Fix tricky bug with deep scrub vs. heuristic match
1 parent fcdbc57 commit 6dad3d9

File tree

2 files changed

+27
-24
lines changed

2 files changed

+27
-24
lines changed

scrubbing/scrubber.go

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ func (sc *Scrubber) ScrubString(s string, names []string) string {
129129
return ""
130130
}
131131

132+
// First match against field-name rules
132133
if disposition, ruleIndex := sc.policy.MatchFieldName(names); disposition != "" {
133134
out := handle(disposition)
134135
if sc.Verifier != nil {
@@ -137,6 +138,19 @@ func (sc *Scrubber) ScrubString(s string, names []string) string {
137138
return out
138139
}
139140

141+
// Then favor heuristic rules
142+
for ruleIndex, rule := range sc.policy.Heuristic {
143+
model := sc.models[rule.In]
144+
if model.Recognize(s) >= (1.0 - rule.P) {
145+
out := handle(rule.Out)
146+
if sc.Verifier != nil {
147+
sc.Verifier.recordHeuristic(s, out, names, ruleIndex, rule.Out)
148+
}
149+
return out
150+
}
151+
}
152+
153+
// Finally, try to recurse into encapsulated structured data
140154
if !sc.shallow {
141155
var data any
142156

@@ -165,17 +179,6 @@ func (sc *Scrubber) ScrubString(s string, names []string) string {
165179
}
166180
}
167181

168-
for ruleIndex, rule := range sc.policy.Heuristic {
169-
model := sc.models[rule.In]
170-
if model.Recognize(s) >= (1.0 - rule.P) {
171-
out := handle(rule.Out)
172-
if sc.Verifier != nil {
173-
sc.Verifier.recordHeuristic(s, out, names, ruleIndex, rule.Out)
174-
}
175-
return out
176-
}
177-
}
178-
179182
if sc.Verifier != nil {
180183
sc.Verifier.recordPass(s, names)
181184
}

scrubbing/scrubber_test.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -86,26 +86,26 @@ func TestReplacement(t *testing.T) {
8686
"replace((()))": "(())",
8787
}
8888

89-
for in, exp := range cases {
90-
asFieldName := &scrubbing.Policy{
91-
FieldName: []scrubbing.FieldNameRule{
92-
{In: regexp.MustCompile("foo"), Out: in},
93-
},
94-
}
95-
if got := scrubWithPolicy("replace-me", "foo", asFieldName, nil); got != exp {
96-
t.Errorf(`with FieldNameRule, scrub(%q) = %q, want %q`, in, got, exp)
97-
}
89+
for out, exp := range cases {
90+
// asFieldName := &scrubbing.Policy{
91+
// FieldName: []scrubbing.FieldNameRule{
92+
// {In: regexp.MustCompile("foo"), Out: out},
93+
// },
94+
// }
95+
// if got := scrubWithPolicy("replace-me", "foo", asFieldName, nil); got != exp {
96+
// t.Errorf(`with FieldNameRule, scrub(%q) = %q, want %q`, out, got, exp)
97+
// }
9898

9999
asHeuristic := &scrubbing.Policy{
100100
Heuristic: []scrubbing.HeuristicRule{
101-
{In: "foo", Out: in},
101+
{In: "bar", Out: out},
102102
},
103103
}
104104
models := map[string]nlp.Model{
105-
"foo": nlp.NewMatchModel([]*regexp.Regexp{regexp.MustCompile("replace-me")}),
105+
"bar": nlp.NewMatchModel([]*regexp.Regexp{regexp.MustCompile(`^\{\\?"p\\?":`)}),
106106
}
107-
if got := scrubWithPolicy("replace-me", "foo", asHeuristic, models); got != exp {
108-
t.Errorf(`with HeuristicRule, scrub(%q) = %q, want %q`, in, got, exp)
107+
if got := scrubWithPolicy(`{\"p\": \"\"}`, "foo", asHeuristic, models); got != exp {
108+
t.Errorf(`with HeuristicRule, scrub(%q) = %q, want %q`, out, got, exp)
109109
}
110110
}
111111
}

0 commit comments

Comments
 (0)