Skip to content

Commit 6ac22b0

Browse files
committed
feat(search): implement advanced query builder and parser
- Added a new `QueryBuilder` component for constructing complex search queries with AND/OR logic. - Introduced a `parseSearchQuery` function to parse raw search strings into structured terms and groups. - Created a `search-query-builder` module to generate SQL conditions for ClickHouse based on parsed queries. - Enhanced the `useSearch` hook to utilize the new query parsing logic for detecting search types. - Implemented support for field prefixes (e.g., domain:, user:) and various match types (exact, contains, wildcard). - Added UI components for managing search terms and exclusions, including term pills and inline input fields. - Updated the overall search functionality to accommodate the new query structure and improve user experience.
1 parent d13755c commit 6ac22b0

File tree

13 files changed

+1501
-420
lines changed

13 files changed

+1501
-420
lines changed

app/api/domain-recon/credentials/route.ts

Lines changed: 16 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ import { NextRequest, NextResponse } from "next/server"
22
import { executeQuery as executeClickHouseQuery } from "@/lib/clickhouse"
33
import { validateRequest } from "@/lib/auth"
44
import { throwIfAborted, getRequestSignal, handleAbortError } from "@/lib/api-helpers"
5+
import { parseSearchQuery } from "@/lib/query-parser"
6+
import { buildDomainReconCondition, buildKeywordReconCondition } from "@/lib/search-query-builder"
57

68
export async function POST(request: NextRequest) {
79
// ✅ Check abort VERY EARLY - before validateRequest
@@ -24,11 +26,13 @@ export async function POST(request: NextRequest) {
2426
}
2527

2628
// Normalize Domain
27-
let cleanDomain = targetDomain.trim().toLowerCase()
28-
cleanDomain = cleanDomain.replace(/^https?:\/\//, '').replace(/^www\./, '').split('/')[0].split(':')[0]
29+
const cleanDomain = targetDomain.trim().toLowerCase()
30+
31+
// Parse query for operator support (OR, NOT, wildcard, exact)
32+
const parsed = parseSearchQuery(cleanDomain)
2933

3034
// Cleaner log: only show search if present
31-
const logData: any = { type: searchType, domain: cleanDomain }
35+
const logData: any = { type: searchType, domain: cleanDomain, terms: parsed.terms.length }
3236
if (searchQuery && searchQuery.trim()) {
3337
logData.search = searchQuery.trim()
3438
}
@@ -41,7 +45,7 @@ export async function POST(request: NextRequest) {
4145
throwIfAborted(request)
4246

4347
// Call the new data getter function
44-
const credentialsData = await getCredentialsDataOptimized(cleanDomain, filters, pagination, searchQuery, searchType, body.keywordMode, signal)
48+
const credentialsData = await getCredentialsDataOptimized(parsed, filters, pagination, searchQuery, searchType, body.keywordMode, signal)
4549

4650
// Check abort after operations
4751
throwIfAborted(request)
@@ -71,7 +75,7 @@ export async function POST(request: NextRequest) {
7175
}
7276

7377
async function getCredentialsDataOptimized(
74-
query: string,
78+
parsed: import("@/lib/query-parser").ParsedQuery,
7579
filters?: any,
7680
pagination?: any,
7781
searchQuery?: string,
@@ -99,44 +103,19 @@ async function getCredentialsDataOptimized(
99103
// ==========================================
100104
// 1. BUILD PREWHERE (Main Table Filters)
101105
// ==========================================
102-
// PREWHERE is the key to speed in ClickHouse.
103-
// It filters before JOIN and before reading heavy columns.
106+
// Use the shared query builder for the main domain/keyword condition
104107

105108
const prewhereConditions: string[] = []
106109
const params: Record<string, any> = {}
107110

108111
if (searchType === 'domain') {
109-
// DOMAIN OPTIMIZATION:
110-
// 1. Check Exact Match domain
111-
// 2. Check Subdomain using endsWith (much faster than ilike/regex)
112-
// 3. Fallback to URL pattern match only if needed
113-
114-
params['targetDomain'] = query
115-
params['dotTargetDomain'] = '.' + query
116-
117-
// Logic: Domain column exact match OR Domain column ends with .target.com
118-
// This leverages suffix index if available, or at least fast string scan
119-
prewhereConditions.push(`(
120-
c.domain = {targetDomain:String} OR
121-
endsWith(c.domain, {dotTargetDomain:String}) OR
122-
c.url ilike {urlPattern:String}
123-
)`)
124-
// Fallback URL pattern for catch-all
125-
params['urlPattern'] = `%${query}%`
126-
112+
const built = buildDomainReconCondition(parsed, { notNullCheck: false })
113+
prewhereConditions.push(`(${built.condition})`)
114+
Object.assign(params, built.params)
127115
} else {
128-
// KEYWORD SEARCH
129-
params['keyword'] = query
130-
params['likeKeyword'] = `%${query}%`
131-
132-
if (keywordMode === 'domain-only') {
133-
prewhereConditions.push(`(c.domain ilike {likeKeyword:String})`)
134-
} else {
135-
// Optimization: multiSearchAnyCase is faster than OR OR OR
136-
// But for simplicity and param binding, we use ilike in PREWHERE
137-
// because PREWHERE already significantly reduces cost.
138-
prewhereConditions.push(`(c.url ilike {likeKeyword:String} OR c.domain ilike {likeKeyword:String})`)
139-
}
116+
const built = buildKeywordReconCondition(parsed, keywordMode)
117+
prewhereConditions.push(`(${built.condition})`)
118+
Object.assign(params, built.params)
140119
}
141120

142121
// Additional Filters to PREWHERE (To filter faster at the start)

app/api/domain-recon/overview/route.ts

Lines changed: 12 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2,59 +2,8 @@ import { NextRequest, NextResponse } from "next/server"
22
import { executeQuery as executeClickHouseQuery } from "@/lib/clickhouse"
33
import { validateRequest } from "@/lib/auth"
44
import { throwIfAborted, getRequestSignal, handleAbortError } from "@/lib/api-helpers"
5-
6-
/**
7-
* Build WHERE clause for domain matching (ClickHouse version)
8-
* Uses named parameters
9-
*/
10-
function buildDomainWhereClause(targetDomain: string): { whereClause: string; params: Record<string, string> } {
11-
const whereClause = `WHERE (
12-
c.domain = {domain:String} OR
13-
c.domain ilike concat('%.', {domain:String}) OR
14-
c.url ilike {pattern1:String} OR
15-
c.url ilike {pattern2:String} OR
16-
c.url ilike {pattern3:String} OR
17-
c.url ilike {pattern4:String}
18-
) AND c.domain IS NOT NULL`
19-
20-
return {
21-
whereClause,
22-
params: {
23-
domain: targetDomain,
24-
pattern1: `%://${targetDomain}/%`,
25-
pattern2: `%://${targetDomain}:%`,
26-
pattern3: `%://%.${targetDomain}/%`,
27-
pattern4: `%://%.${targetDomain}:%`
28-
}
29-
}
30-
}
31-
32-
/**
33-
* Build WHERE clause for keyword search (ClickHouse version)
34-
*/
35-
function buildKeywordWhereClause(keyword: string, mode: 'domain-only' | 'full-url' = 'full-url'): { whereClause: string; params: Record<string, string> } {
36-
if (mode === 'domain-only') {
37-
// Extract hostname safe logic without Arrays
38-
// IMPORTANT: Use domain() native function with fallback extract() regex
39-
const hostnameExpr = `if(
40-
length(domain(c.url)) > 0,
41-
domain(c.url),
42-
extract(c.url, '^(?:https?://)?([^/:]+)')
43-
)`
44-
45-
const whereClause = `WHERE ${hostnameExpr} ilike {keyword:String} AND c.url IS NOT NULL`
46-
return {
47-
whereClause,
48-
params: { keyword: `%${keyword}%` }
49-
}
50-
} else {
51-
const whereClause = `WHERE c.url ilike {keyword:String} AND c.url IS NOT NULL`
52-
return {
53-
whereClause,
54-
params: { keyword: `%${keyword}%` }
55-
}
56-
}
57-
}
5+
import { parseSearchQuery } from "@/lib/query-parser"
6+
import { buildDomainReconCondition, buildKeywordReconCondition } from "@/lib/search-query-builder"
587

598
export async function POST(request: NextRequest) {
609
// ✅ Check abort VERY EARLY - before validateRequest
@@ -80,19 +29,19 @@ export async function POST(request: NextRequest) {
8029
const signal = getRequestSignal(request)
8130

8231
let whereClause = ''
83-
let params: Record<string, string> = {}
32+
let params: Record<string, unknown> = {}
8433

8534
if (searchType === 'keyword') {
8635
const keyword = targetDomain.trim()
8736
const keywordMode = body.keywordMode || 'full-url'
88-
const built = buildKeywordWhereClause(keyword, keywordMode)
89-
whereClause = built.whereClause
37+
const parsed = parseSearchQuery(keyword)
38+
const built = buildKeywordReconCondition(parsed, keywordMode)
39+
whereClause = `WHERE ${built.condition}`
9040
params = built.params
9141
} else {
92-
let normalizedDomain = targetDomain.trim().toLowerCase()
93-
normalizedDomain = normalizedDomain.replace(/^https?:\/\//, '').replace(/^www\./, '').split('/')[0].split(':')[0]
94-
const built = buildDomainWhereClause(normalizedDomain)
95-
whereClause = built.whereClause
42+
const parsed = parseSearchQuery(targetDomain)
43+
const built = buildDomainReconCondition(parsed, { notNullCheck: true })
44+
whereClause = `WHERE ${built.condition}`
9645
params = built.params
9746
}
9847

@@ -229,7 +178,7 @@ export async function POST(request: NextRequest) {
229178
}
230179
}
231180

232-
async function getTimelineData(whereClause: string, params: Record<string, string>, granularity: string, signal?: AbortSignal) {
181+
async function getTimelineData(whereClause: string, params: Record<string, unknown>, granularity: string, signal?: AbortSignal) {
233182
// OPTIMIZED DATE PARSING STRATEGY (POST-NORMALIZATION)
234183
// After normalization, log_date is already in standard YYYY-MM-DD format
235184
// Query becomes very simple and fast - directly toDate() without complex parsing
@@ -315,7 +264,7 @@ async function getTimelineData(whereClause: string, params: Record<string, strin
315264

316265
async function getTopSubdomains(
317266
whereClause: string,
318-
params: Record<string, string>,
267+
params: Record<string, unknown>,
319268
limit: number,
320269
searchType: string,
321270
keywordMode: string,
@@ -355,7 +304,7 @@ async function getTopSubdomains(
355304
}))
356305
}
357306

358-
async function getTopPaths(whereClause: string, params: Record<string, string>, limit: number, signal?: AbortSignal) {
307+
async function getTopPaths(whereClause: string, params: Record<string, unknown>, limit: number, signal?: AbortSignal) {
359308
// SECURITY: Validate limit parameter
360309
const safeLimit = Math.min(1000, Math.max(1, Math.floor(Number(limit)) || 10))
361310

app/api/domain-recon/passwords/route.ts

Lines changed: 11 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -2,61 +2,8 @@ import { NextRequest, NextResponse } from "next/server"
22
import { executeQuery as executeClickHouseQuery } from "@/lib/clickhouse"
33
import { validateRequest } from "@/lib/auth"
44
import { throwIfAborted, getRequestSignal, handleAbortError } from "@/lib/api-helpers"
5-
6-
/**
7-
* Build WHERE clause for domain matching that supports subdomains (ClickHouse version)
8-
* OPTIMIZED: Avoid heavy string manipulation in SQL
9-
* Uses named parameters for ClickHouse
10-
*/
11-
function buildDomainWhereClause(targetDomain: string): { whereClause: string; params: Record<string, string> } {
12-
// Use ilike for case-insensitive matching (data in DB might be mixed case)
13-
const whereClause = `WHERE (
14-
c.domain = {domain:String} OR
15-
c.domain ilike concat('%.', {domain:String}) OR
16-
c.url ilike {pattern1:String} OR
17-
c.url ilike {pattern2:String}
18-
) AND c.domain IS NOT NULL`
19-
20-
return {
21-
whereClause,
22-
params: {
23-
domain: targetDomain, // Exact domain match
24-
pattern1: `%://${targetDomain}/%`, // Match: https://target.com/
25-
pattern2: `%://${targetDomain}:%` // Match: https://target.com:8080/
26-
}
27-
}
28-
}
29-
30-
/**
31-
* Build WHERE clause for keyword search (ClickHouse version)
32-
* OPTIMIZED: Use simple LIKE instead of heavy string manipulation
33-
* Uses ilike for case-insensitive search
34-
*/
35-
function buildKeywordWhereClause(keyword: string, mode: 'domain-only' | 'full-url' = 'full-url'): { whereClause: string; params: Record<string, string> } {
36-
if (mode === 'domain-only') {
37-
// For domain-only, check both domain column and URL (ClickHouse: use ilike)
38-
const whereClause = `WHERE (
39-
c.domain ilike {keyword:String} OR
40-
c.url ilike {pattern1:String} OR
41-
c.url ilike {pattern2:String}
42-
) AND c.url IS NOT NULL`
43-
return {
44-
whereClause,
45-
params: {
46-
keyword: `%${keyword}%`, // Domain column contains keyword
47-
pattern1: `%://%${keyword}%/%`, // URL contains keyword in hostname
48-
pattern2: `%://%${keyword}%:%` // URL contains keyword in hostname with port
49-
}
50-
}
51-
} else {
52-
// Full URL mode: search keyword anywhere in URL (ClickHouse: use ilike)
53-
const whereClause = `WHERE c.url ilike {keyword:String} AND c.url IS NOT NULL`
54-
return {
55-
whereClause,
56-
params: { keyword: `%${keyword}%` }
57-
}
58-
}
59-
}
5+
import { parseSearchQuery } from "@/lib/query-parser"
6+
import { buildDomainReconCondition, buildKeywordReconCondition } from "@/lib/search-query-builder"
607

618
export async function POST(request: NextRequest) {
629
// ✅ Check abort VERY EARLY - before validateRequest
@@ -82,24 +29,20 @@ export async function POST(request: NextRequest) {
8229
const signal = getRequestSignal(request)
8330

8431
let whereClause: string
85-
let params: Record<string, string>
32+
let params: Record<string, unknown>
8633

8734
if (searchType === 'keyword') {
8835
const keyword = targetDomain.trim()
8936
const mode = keywordMode || 'full-url'
90-
const result = buildKeywordWhereClause(keyword, mode)
91-
whereClause = result.whereClause
92-
params = result.params
37+
const parsed = parseSearchQuery(keyword)
38+
const built = buildKeywordReconCondition(parsed, mode)
39+
whereClause = `WHERE ${built.condition}`
40+
params = built.params
9341
} else {
94-
let normalizedDomain = targetDomain.trim().toLowerCase()
95-
normalizedDomain = normalizedDomain.replace(/^https?:\/\//, '')
96-
normalizedDomain = normalizedDomain.replace(/^www\./, '')
97-
normalizedDomain = normalizedDomain.replace(/\/$/, '')
98-
normalizedDomain = normalizedDomain.split('/')[0].split(':')[0]
99-
100-
const result = buildDomainWhereClause(normalizedDomain)
101-
whereClause = result.whereClause
102-
params = result.params
42+
const parsed = parseSearchQuery(targetDomain)
43+
const built = buildDomainReconCondition(parsed, { notNullCheck: true })
44+
whereClause = `WHERE ${built.condition}`
45+
params = built.params
10346
}
10447

10548
// Check abort before expensive operations

0 commit comments

Comments
 (0)