Skip to content

Commit 6b724e3

Browse files
committed
Used regular for loop for small instances to minimize overload
1 parent 8fb46f3 commit 6b724e3

1 file changed

Lines changed: 99 additions & 41 deletions

File tree

Apps/TyposquattingDetector/TyposquattingDetector.cs

Lines changed: 99 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -176,52 +176,23 @@ static bool PassesPrefilter(string q, string d, int threshold)
176176
var bucket = buckets[i];
177177
if (bucket is null) continue;
178178

179-
var locals = new System.Collections.Concurrent.ConcurrentBag<(int score, string dom)>();
179+
// Tuneable knobs
180+
const int SequentialCutoff = 256;
181+
int maxDop = Math.Max(1, Environment.ProcessorCount / 2);
180182

181-
Parallel.ForEach(
182-
bucket,
183-
() => (score: 0, dom: (string?)null),
184-
185-
(domain, state, local) =>
186-
{
187-
if (bestScore >= 98)
188-
{
189-
state.Stop();
190-
return local;
191-
}
192-
193-
if (!PassesPrefilter(query, domain, _threshold))
194-
return local;
195-
196-
int score = Fuzz.WeightedRatio(query, domain);
197-
198-
if (score > local.score)
199-
local = (score, domain);
200-
201-
if (score >= 95)
202-
state.Stop();
203-
204-
return local;
205-
},
206-
207-
local =>
208-
{
209-
if (local.score > 0 && local.dom is not null)
210-
locals.Add((local.score, local.dom));
211-
}
212-
);
213-
214-
// serial reduction (no races)
215-
foreach (var l in locals)
183+
if (bucket.Count <= SequentialCutoff)
184+
{
185+
// Sequential fast-path for small buckets
186+
SequentialMatch(query, ref bestDomain, ref bestScore, bucket);
187+
}
188+
else
216189
{
217-
if (l.score > bestScore)
190+
(bool flowControl, (bestDomain, bestScore)) = ParallelMatch(query, bestDomain, bestScore, bucket, maxDop);
191+
if (!flowControl)
218192
{
219-
bestScore = l.score;
220-
bestDomain = l.dom;
193+
break;
221194
}
222195
}
223-
if (bestScore >= 98)
224-
break;
225196
}
226197

227198
if (bestDomain != null)
@@ -239,6 +210,93 @@ static bool PassesPrefilter(string q, string d, int threshold)
239210
}
240211

241212
return result;
213+
214+
(bool flowControl, (string? bestDomain, int bestScore) value) ParallelMatch(string query, string? bestDomain, int bestScore, List<string> bucket, int maxDop)
215+
{
216+
{
217+
// Bounded parallelism for large buckets
218+
var locals = new System.Collections.Concurrent.ConcurrentBag<(int score, string dom)>();
219+
var po = new ParallelOptions { MaxDegreeOfParallelism = maxDop };
220+
221+
Parallel.ForEach(
222+
bucket,
223+
po,
224+
() => (score: 0, dom: (string?)null),
225+
226+
(domain, state, local) =>
227+
{
228+
if (bestScore >= 98)
229+
{
230+
state.Stop();
231+
return local;
232+
}
233+
234+
if (!PassesPrefilter(query, domain, _threshold))
235+
return local;
236+
237+
int score = Fuzz.WeightedRatio(query, domain);
238+
239+
if (score > local.score)
240+
local = (score, domain);
241+
242+
if (score >= 95)
243+
state.Stop();
244+
245+
return local;
246+
},
247+
248+
local =>
249+
{
250+
if (local.score > 0 && local.dom is not null)
251+
locals.Add((local.score, local.dom));
252+
}
253+
);
254+
255+
foreach (var l in locals)
256+
if (l.score > bestScore)
257+
{
258+
bestScore = l.score;
259+
bestDomain = l.dom;
260+
}
261+
262+
263+
// serial reduction (no races)
264+
foreach (var l in locals)
265+
{
266+
if (l.score > bestScore)
267+
{
268+
bestScore = l.score;
269+
bestDomain = l.dom;
270+
}
271+
}
272+
if (bestScore >= 98)
273+
return (flowControl: false, value: default);
274+
}
275+
276+
return (flowControl: true, value: default);
277+
}
278+
279+
void SequentialMatch(string query, ref string? bestDomain, ref int bestScore, List<string> bucket)
280+
{
281+
foreach (var domain in bucket)
282+
{
283+
if (bestScore >= 98) break;
284+
285+
if (!PassesPrefilter(query, domain, _threshold))
286+
continue;
287+
288+
int score = Fuzz.WeightedRatio(query, domain);
289+
290+
if (score > bestScore)
291+
{
292+
bestScore = score;
293+
bestDomain = domain;
294+
}
295+
296+
if (score >= 95)
297+
break;
298+
}
299+
}
242300
}
243301

244302
private static string? ExtractDomain(string line)

0 commit comments

Comments
 (0)