Skip to content

Commit 1397717

Browse files
authored
Merge pull request #14 from wolfeidau/feat_add_regex_search
feat: added regex search operation with context lines
2 parents a7bf97c + 09b8be4 commit 1397717

File tree

4 files changed

+356
-6
lines changed

4 files changed

+356
-6
lines changed

README.md

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,67 @@ Matched entries: 2
240240
Query time: 0.36 ms
241241
```
242242

243+
**Search entries using regex patterns:**
244+
```bash
245+
./build/bklog query -file output.parquet -op search -pattern "git clone"
246+
```
247+
Output:
248+
```
249+
Matches found: 1
250+
251+
[2025-04-22 21:43:29.975] [~~~ Preparing working directory] MATCH: $ git clone -v -- https://github.com/buildkite/bash-example.git .
252+
253+
--- Search Statistics (Streaming) ---
254+
Total entries: 212
255+
Matches found: 1
256+
Query time: 0.65 ms
257+
```
258+
259+
**Search with context lines (ripgrep-style):**
260+
```bash
261+
./build/bklog query -file output.parquet -op search -pattern "error|failed" -C 3
262+
```
263+
Output:
264+
```
265+
Matches found: 2
266+
267+
[2025-04-22 21:43:30.690] [~~~ Running script] Running tests...
268+
[2025-04-22 21:43:30.691] [~~~ Running script] Test suite started
269+
[2025-04-22 21:43:30.692] [~~~ Running script] Running unit tests
270+
[2025-04-22 21:43:30.693] [~~~ Running script] MATCH: Test failed: authentication error
271+
[2025-04-22 21:43:30.694] [~~~ Running script] Cleaning up test files
272+
[2025-04-22 21:43:30.695] [~~~ Running script] Test run completed
273+
[2025-04-22 21:43:30.696] [~~~ Running script] Generating report
274+
--
275+
[2025-04-22 21:43:30.750] [~~~ Post-processing] Validating results
276+
[2025-04-22 21:43:30.751] [~~~ Post-processing] Checking exit codes
277+
[2025-04-22 21:43:30.752] [~~~ Post-processing] Build status: some tests failed
278+
[2025-04-22 21:43:30.753] [~~~ Post-processing] MATCH: Build failed due to test failures
279+
[2025-04-22 21:43:30.754] [~~~ Post-processing] Uploading logs
280+
[2025-04-22 21:43:30.755] [~~~ Post-processing] Notifying team
281+
[2025-04-22 21:43:30.756] [~~~ Post-processing] Cleanup completed
282+
```
283+
284+
**Search with separate before/after context:**
285+
```bash
286+
./build/bklog query -file output.parquet -op search -pattern "npm install" -B 2 -A 5
287+
```
288+
289+
**Case-sensitive search:**
290+
```bash
291+
./build/bklog query -file output.parquet -op search -pattern "ERROR" --case-sensitive
292+
```
293+
294+
**Invert match (show non-matching lines):**
295+
```bash
296+
./build/bklog query -file output.parquet -op search -pattern "buildkite" --invert-match -limit 5
297+
```
298+
299+
**Search with JSON output:**
300+
```bash
301+
./build/bklog query -file output.parquet -op search -pattern "git clone" -format json -C 1
302+
```
303+
243304
**JSON output for programmatic use:**
244305
```bash
245306
./build/bklog query -file output.parquet -op list-groups -format json
@@ -307,6 +368,18 @@ export BUILDKITE_API_TOKEN="bkua_your_token_here"
307368
./build/bklog query -org myorg -pipeline mypipeline -build 123 -job abc-def-456 -op by-group -group "tests"
308369
```
309370

371+
**Search API logs with regex patterns:**
372+
```bash
373+
export BUILDKITE_API_TOKEN="bkua_your_token_here"
374+
./build/bklog query -org myorg -pipeline mypipeline -build 123 -job abc-def-456 -op search -pattern "error|failed" -C 2
375+
```
376+
377+
**Search API logs with case sensitivity:**
378+
```bash
379+
export BUILDKITE_API_TOKEN="bkua_your_token_here"
380+
./build/bklog query -org myorg -pipeline mypipeline -build 123 -job abc-def-456 -op search -pattern "ERROR" --case-sensitive
381+
```
382+
310383
**Query last 10 entries from API logs:**
311384
```bash
312385
export BUILDKITE_API_TOKEN="bkua_your_token_here"
@@ -416,14 +489,22 @@ Output:
416489
- `-job <id>`: Buildkite job ID (for API access)
417490

418491
**Query Options:**
419-
- `-op <operation>`: Query operation (`list-groups`, `list-commands`, `by-group`, `info`, `tail`, `seek`, `dump`) (default: `list-groups`)
492+
- `-op <operation>`: Query operation (`list-groups`, `list-commands`, `by-group`, `search`, `info`, `tail`, `seek`, `dump`) (default: `list-groups`)
420493
- `-group <pattern>`: Group name pattern to filter by (for `by-group` operation)
421494
- `-format <format>`: Output format (`text`, `json`) (default: `text`)
422495
- `-stats`: Show query statistics (default: `true`)
423496
- `-limit <number>`: Limit number of entries returned (0 = no limit, enables early termination)
424497
- `-tail <number>`: Number of lines to show from end (for `tail` operation, default: 10)
425498
- `-seek <row>`: Row number to seek to (0-based, for `seek` operation)
426499

500+
**Search Options:**
501+
- `-pattern <regex>`: Regex pattern to search for (for `search` operation)
502+
- `-A <num>`: Show NUM lines after each match (ripgrep-style)
503+
- `-B <num>`: Show NUM lines before each match (ripgrep-style)
504+
- `-C <num>`: Show NUM lines before and after each match (ripgrep-style)
505+
- `-case-sensitive`: Enable case-sensitive search (default: case-insensitive)
506+
- `-invert-match`: Show non-matching lines instead of matching ones
507+
427508
**Note:** For API usage, set `BUILDKITE_API_TOKEN` environment variable. Logs are automatically downloaded and cached in `~/.bklog/`.
428509

429510
## Log Entry Types

cmd/bklog/main.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,20 @@ func handleQueryCommand() {
151151

152152
queryFlags := flag.NewFlagSet("query", flag.ExitOnError)
153153
queryFlags.StringVar(&config.ParquetFile, "file", "", "Path to Parquet log file (use this OR API parameters)")
154-
queryFlags.StringVar(&config.Operation, "op", "list-groups", "Query operation: list-groups, list-commands, by-group, info, tail, seek, dump")
154+
queryFlags.StringVar(&config.Operation, "op", "list-groups", "Query operation: list-groups, list-commands, by-group, info, tail, seek, dump, search")
155155
queryFlags.StringVar(&config.GroupName, "group", "", "Group name to filter by (for by-group operation)")
156156
queryFlags.StringVar(&config.Format, "format", "text", "Output format: text, json")
157157
queryFlags.BoolVar(&config.ShowStats, "stats", true, "Show query statistics")
158158
queryFlags.IntVar(&config.LimitEntries, "limit", 0, "Limit number of entries returned (0 = no limit, enables early termination)")
159159
queryFlags.IntVar(&config.TailLines, "tail", 10, "Number of lines to show from end (for tail operation)")
160160
queryFlags.Int64Var(&config.SeekToRow, "seek", 0, "Row number to seek to (0-based, for seek operation)")
161+
// Search operation parameters
162+
queryFlags.StringVar(&config.SearchPattern, "pattern", "", "Regex pattern to search for (for search operation)")
163+
queryFlags.IntVar(&config.AfterContext, "A", 0, "Show NUM lines after each match")
164+
queryFlags.IntVar(&config.BeforeContext, "B", 0, "Show NUM lines before each match")
165+
queryFlags.IntVar(&config.Context, "C", 0, "Show NUM lines before and after each match")
166+
queryFlags.BoolVar(&config.CaseSensitive, "case-sensitive", false, "Case-sensitive search")
167+
queryFlags.BoolVar(&config.InvertMatch, "invert-match", false, "Show non-matching lines")
161168
// Buildkite API parameters
162169
queryFlags.StringVar(&config.Organization, "org", "", "Buildkite organization slug (for API)")
163170
queryFlags.StringVar(&config.Pipeline, "pipeline", "", "Buildkite pipeline slug (for API)")
@@ -178,15 +185,17 @@ func handleQueryCommand() {
178185
fmt.Println(" list-groups List all groups with statistics")
179186
fmt.Println(" list-commands List all command entries")
180187
fmt.Println(" by-group Show entries for a specific group")
181-
fmt.Println(" info Show file metadata (row count, file size, etc.)")
182-
fmt.Println(" tail Show last N entries from the file")
183-
fmt.Println(" seek Start reading from a specific row number")
184-
fmt.Println(" dump Output all entries from the file")
188+
fmt.Println(" search Search entries using regex pattern with context")
189+
fmt.Println(" info Show file metadata (row count, file size, etc.)")
190+
fmt.Println(" tail Show last N entries from the file")
191+
fmt.Println(" seek Start reading from a specific row number")
192+
fmt.Println(" dump Output all entries from the file")
185193
fmt.Println("\nExamples:")
186194
fmt.Printf(" # Local file:\n")
187195
fmt.Printf(" %s query -file logs.parquet -op list-groups\n", os.Args[0])
188196
fmt.Printf(" %s query -file logs.parquet -op list-commands\n", os.Args[0])
189197
fmt.Printf(" %s query -file logs.parquet -op by-group -group \"Running tests\"\n", os.Args[0])
198+
fmt.Printf(" %s query -file logs.parquet -op search -pattern \"error|failed\" -C 3\n", os.Args[0])
190199
fmt.Printf(" %s query -file logs.parquet -op info\n", os.Args[0])
191200
fmt.Printf(" %s query -file logs.parquet -op tail -tail 20\n", os.Args[0])
192201
fmt.Printf(" %s query -file logs.parquet -op seek -seek 1000 -limit 50\n", os.Args[0])

cmd/bklog/query_cli.go

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ type QueryConfig struct {
2020
LimitEntries int // Limit output entries (0 = no limit)
2121
TailLines int // Number of lines to show from end (for tail operation)
2222
SeekToRow int64 // Row number to seek to (0-based)
23+
// Search operation parameters
24+
SearchPattern string // Regex pattern to search for
25+
AfterContext int // Lines to show after match
26+
BeforeContext int // Lines to show before match
27+
Context int // Lines to show before and after match
28+
CaseSensitive bool // Case-sensitive search
29+
InvertMatch bool // Show non-matching lines
2330
// Buildkite API parameters
2431
Organization string
2532
Pipeline string
@@ -78,6 +85,11 @@ func runStreamingQuery(reader *buildkitelogs.ParquetReader, config *QueryConfig)
7885
return fmt.Errorf("group pattern is required for by-group operation")
7986
}
8087
return streamByGroup(reader, config, start)
88+
case "search":
89+
if config.SearchPattern == "" {
90+
return fmt.Errorf("pattern is required for search operation")
91+
}
92+
return streamSearch(reader, config, start)
8193
case "info":
8294
return showFileInfo(reader, config)
8395
case "tail":
@@ -188,6 +200,40 @@ func streamListCommands(reader *buildkitelogs.ParquetReader, config *QueryConfig
188200
return formatStreamingCommandsResult(commands, totalEntries, commandCount, queryTime, config)
189201
}
190202

203+
// streamSearch handles search operation using streaming with regex pattern matching and context lines
204+
func streamSearch(reader *buildkitelogs.ParquetReader, config *QueryConfig, start time.Time) error {
205+
// Create search options
206+
options := buildkitelogs.SearchOptions{
207+
Pattern: config.SearchPattern,
208+
CaseSensitive: config.CaseSensitive,
209+
InvertMatch: config.InvertMatch,
210+
BeforeContext: config.BeforeContext,
211+
AfterContext: config.AfterContext,
212+
Context: config.Context,
213+
}
214+
215+
var results []buildkitelogs.SearchResult
216+
matchesFound := 0
217+
218+
for result, err := range reader.SearchEntriesIter(options) {
219+
if err != nil {
220+
return fmt.Errorf("error during search: %w", err)
221+
}
222+
223+
matchesFound++
224+
results = append(results, result)
225+
226+
// Apply limit if specified
227+
if config.LimitEntries > 0 && matchesFound >= config.LimitEntries {
228+
break
229+
}
230+
}
231+
232+
// Format output
233+
queryTime := float64(time.Since(start).Nanoseconds()) / 1e6
234+
return formatSearchResultsLibrary(results, matchesFound, queryTime, config)
235+
}
236+
191237
// streamByGroup handles by-group operation using streaming with optional limiting
192238
func streamByGroup(reader *buildkitelogs.ParquetReader, config *QueryConfig, start time.Time) error {
193239
var entries []buildkitelogs.ParquetLogEntry
@@ -341,6 +387,99 @@ func formatStreamingCommandsResult(commands []buildkitelogs.ParquetLogEntry, tot
341387
return nil
342388
}
343389

390+
// formatSearchResultsLibrary formats search results with context lines using library types
391+
func formatSearchResultsLibrary(results []buildkitelogs.SearchResult, matchesFound int, queryTime float64, config *QueryConfig) error {
392+
if config.Format == "json" {
393+
result := struct {
394+
Matches []buildkitelogs.SearchResult `json:"matches"`
395+
Stats struct {
396+
MatchesFound int `json:"matches_found"`
397+
QueryTime float64 `json:"query_time_ms"`
398+
} `json:"stats,omitempty"`
399+
}{
400+
Matches: results,
401+
}
402+
403+
if config.ShowStats {
404+
result.Stats.MatchesFound = matchesFound
405+
result.Stats.QueryTime = queryTime
406+
}
407+
408+
encoder := json.NewEncoder(os.Stdout)
409+
encoder.SetIndent("", " ")
410+
return encoder.Encode(result)
411+
}
412+
413+
// Text format
414+
limitText := ""
415+
if config.LimitEntries > 0 && matchesFound >= config.LimitEntries {
416+
limitText = fmt.Sprintf(" (limited to %d)", config.LimitEntries)
417+
}
418+
fmt.Fprintf(os.Stderr, "Matches found: %d%s\n\n", matchesFound, limitText)
419+
420+
if len(results) == 0 {
421+
fmt.Fprintln(os.Stderr, "No matches found.")
422+
return nil
423+
}
424+
425+
for i, result := range results {
426+
if i > 0 {
427+
fmt.Println("--")
428+
}
429+
430+
// Print before context
431+
for _, entry := range result.BeforeContext {
432+
timestamp := time.Unix(0, entry.Timestamp*int64(time.Millisecond))
433+
if entry.Group != "" {
434+
fmt.Printf("[%s] [%s] %s\n",
435+
timestamp.Format("2006-01-02 15:04:05.000"),
436+
entry.Group,
437+
entry.Content)
438+
} else {
439+
fmt.Printf("[%s] %s\n",
440+
timestamp.Format("2006-01-02 15:04:05.000"),
441+
entry.Content)
442+
}
443+
}
444+
445+
// Print match line (highlighted)
446+
timestamp := time.Unix(0, result.Match.Timestamp*int64(time.Millisecond))
447+
if result.Match.Group != "" {
448+
fmt.Printf("[%s] [%s] MATCH: %s\n",
449+
timestamp.Format("2006-01-02 15:04:05.000"),
450+
result.Match.Group,
451+
result.Match.Content)
452+
} else {
453+
fmt.Printf("[%s] MATCH: %s\n",
454+
timestamp.Format("2006-01-02 15:04:05.000"),
455+
result.Match.Content)
456+
}
457+
458+
// Print after context
459+
for _, entry := range result.AfterContext {
460+
timestamp := time.Unix(0, entry.Timestamp*int64(time.Millisecond))
461+
if entry.Group != "" {
462+
fmt.Printf("[%s] [%s] %s\n",
463+
timestamp.Format("2006-01-02 15:04:05.000"),
464+
entry.Group,
465+
entry.Content)
466+
} else {
467+
fmt.Printf("[%s] %s\n",
468+
timestamp.Format("2006-01-02 15:04:05.000"),
469+
entry.Content)
470+
}
471+
}
472+
}
473+
474+
if config.ShowStats {
475+
fmt.Fprintf(os.Stderr, "\n--- Search Statistics (Streaming) ---\n")
476+
fmt.Fprintf(os.Stderr, "Matches found: %d\n", matchesFound)
477+
fmt.Fprintf(os.Stderr, "Query time: %.2f ms\n", queryTime)
478+
}
479+
480+
return nil
481+
}
482+
344483
// formatStreamingEntriesResult formats entries output from streaming query
345484
func formatStreamingEntriesResult(entries []buildkitelogs.ParquetLogEntry, totalEntries, matchedEntries int, queryTime float64, config *QueryConfig) error {
346485
if config.Format == "json" {

0 commit comments

Comments
 (0)