Skip to content

Commit 921e30f

Browse files
authored
fix: skip binaries using mime sniffing (#13)
- add gofile-based BinaryCheck to classify binary magic headers - propagate binary check through discovery and eligibility reasons - exclude .git directories with recursive pattern default - add pdf fixture and broaden coverage across app, filesystem, detector, finding, report - bump dependencies including gofile and golang.org/x/sys
1 parent 2b80893 commit 921e30f

17 files changed

Lines changed: 1915 additions & 48 deletions

go.mod

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@ require github.com/fatih/color v1.19.0
66

77
require github.com/rs/zerolog v1.35.0
88

9-
require github.com/spf13/pflag v1.0.10
9+
require (
10+
github.com/shirou/gofile v0.0.0-20260314143841-7dc06be96404
11+
github.com/spf13/pflag v1.0.10
12+
)
1013

1114
require (
1215
github.com/mattn/go-colorable v0.1.14 // indirect
13-
github.com/mattn/go-isatty v0.0.20 // indirect
14-
golang.org/x/sys v0.42.0 // indirect
16+
github.com/mattn/go-isatty v0.0.21 // indirect
17+
golang.org/x/sys v0.43.0 // indirect
1518
)

go.sum

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@ github.com/fatih/color v1.19.0 h1:Zp3PiM21/9Ld6FzSKyL5c/BULoe/ONr9KlbYVOfG8+w=
22
github.com/fatih/color v1.19.0/go.mod h1:zNk67I0ZUT1bEGsSGyCZYZNrHuTkJJB+r6Q9VuMi0LE=
33
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
44
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
5-
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
6-
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
5+
github.com/mattn/go-isatty v0.0.21 h1:xYae+lCNBP7QuW4PUnNG61ffM4hVIfm+zUzDuSzYLGs=
6+
github.com/mattn/go-isatty v0.0.21/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
77
github.com/rs/zerolog v1.35.0 h1:VD0ykx7HMiMJytqINBsKcbLS+BJ4WYjz+05us+LRTdI=
88
github.com/rs/zerolog v1.35.0/go.mod h1:EjML9kdfa/RMA7h/6z6pYmq1ykOuA8/mjWaEvGI+jcw=
9+
github.com/shirou/gofile v0.0.0-20260314143841-7dc06be96404 h1:FSesQzaVDGqNiF1pyQ8QTNpRZrBjXq+AQijCoEqY1Gw=
10+
github.com/shirou/gofile v0.0.0-20260314143841-7dc06be96404/go.mod h1:5R2Lq6rCwzvUkQLMyzYn2lkwhkXgrlgz803ZRWViHdw=
911
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
1012
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
11-
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
12-
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
13-
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
13+
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
14+
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=

internal/app/coverage_test.go

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// Copyright 2026 Jean-Philippe Couture
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to deal
5+
// in the Software without restriction, including without limitation the rights
6+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
// copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in all
11+
// copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19+
// SOFTWARE.
20+
21+
package app
22+
23+
import (
24+
"context"
25+
"io"
26+
"strings"
27+
"testing"
28+
)
29+
30+
// --- isBinaryMIME ---
31+
32+
func TestIsBinaryMIME(t *testing.T) {
33+
t.Parallel()
34+
35+
tests := []struct {
36+
mime string
37+
want bool
38+
}{
39+
{"", false},
40+
{"text/plain", false},
41+
{"text/html", false},
42+
{"application/json", false},
43+
{"application/javascript", false},
44+
{"application/pdf", true},
45+
{"application/octet-stream", true},
46+
{"image/png", true},
47+
{"image/jpeg", true},
48+
{"image/gif", true},
49+
{"image/svg+xml", true},
50+
{"audio/mpeg", true},
51+
{"video/mp4", true},
52+
{"font/ttf", true},
53+
{"application/zip", true},
54+
{"application/gzip", true},
55+
{"application/x-gzip", true},
56+
{"application/x-bzip2", true},
57+
{"application/x-tar", true},
58+
{"application/x-7z-compressed", true},
59+
{"application/vnd.rar", true},
60+
{"application/java-archive", true},
61+
{"application/x-java-class", true},
62+
{"application/wasm", true},
63+
{"application/x-sqlite3", true},
64+
}
65+
66+
for _, tt := range tests {
67+
t.Run(tt.mime, func(t *testing.T) {
68+
t.Parallel()
69+
if got := isBinaryMIME(tt.mime); got != tt.want {
70+
t.Fatalf("isBinaryMIME(%q) = %v, want %v", tt.mime, got, tt.want)
71+
}
72+
})
73+
}
74+
}
75+
76+
// --- Run with invalid format ---
77+
78+
func TestRunInvalidFormat(t *testing.T) {
79+
t.Parallel()
80+
81+
_, err := Run(context.Background(), Options{
82+
Path: ".",
83+
Stdout: io.Discard,
84+
Format: OutputFormat("invalid"),
85+
})
86+
if err == nil {
87+
t.Fatal("Run() error = nil, want error for invalid format")
88+
}
89+
if !strings.Contains(err.Error(), "unsupported --format") {
90+
t.Fatalf("Run() error = %q, want mention of unsupported format", err.Error())
91+
}
92+
}
93+
94+
// --- Run with invalid exclude pattern ---
95+
96+
func TestRunInvalidExcludePattern(t *testing.T) {
97+
t.Parallel()
98+
99+
_, err := Run(context.Background(), Options{
100+
Path: ".",
101+
Stdout: io.Discard,
102+
Excludes: []string{"bad["},
103+
})
104+
if err == nil {
105+
t.Fatal("Run() error = nil, want error for invalid exclude pattern")
106+
}
107+
if !strings.Contains(err.Error(), "configure excludes") {
108+
t.Fatalf("Run() error = %q, want configure excludes", err.Error())
109+
}
110+
}
111+
112+
// --- scanCandidates with empty paths ---
113+
114+
func TestScanCandidatesEmpty(t *testing.T) {
115+
t.Parallel()
116+
117+
results, errors := scanCandidates(context.Background(), nil, nil)
118+
if results != nil {
119+
t.Fatalf("scanCandidates(nil) results = %v, want nil", results)
120+
}
121+
if errors != nil {
122+
t.Fatalf("scanCandidates(nil) errors = %v, want nil", errors)
123+
}
124+
}
125+
126+
// --- Run with canceled context ---
127+
128+
func TestRunCanceledContext(t *testing.T) {
129+
t.Parallel()
130+
131+
ctx, cancel := context.WithCancel(context.Background())
132+
cancel()
133+
134+
_, err := Run(ctx, Options{
135+
Path: ".",
136+
Stdout: io.Discard,
137+
})
138+
if err == nil {
139+
t.Fatal("Run() error = nil, want error for canceled context")
140+
}
141+
}

internal/app/run.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,14 @@ import (
2727
"io"
2828
"runtime"
2929
"sort"
30+
"strings"
3031
"time"
3132

3233
"github.com/jcouture/ghostscan/internal/filesystem"
3334
"github.com/jcouture/ghostscan/internal/finding"
3435
"github.com/jcouture/ghostscan/internal/report"
3536
"github.com/jcouture/ghostscan/internal/scan"
37+
"github.com/shirou/gofile"
3638
)
3739

3840
type Options struct {
@@ -108,6 +110,14 @@ func Run(ctx context.Context, opts Options) (Result, error) {
108110
now = time.Now
109111
}
110112

113+
identifier, err := gofile.New(gofile.Options{MimeType: true})
114+
if err != nil {
115+
return Result{}, fmt.Errorf("initialize binary identifier: %w", err)
116+
}
117+
binaryCheck := func(buf []byte) bool {
118+
return isBinaryMIME(identifier.IdentifyBuffer(buf))
119+
}
120+
111121
runStart := now().UTC()
112122
walkStart := now()
113123
maxFileSize := opts.MaxFileSize
@@ -137,6 +147,7 @@ func Run(ctx context.Context, opts Options) (Result, error) {
137147
MaxFileSize: maxFileSize,
138148
Excluder: excluder,
139149
OnExclude: buildExcludeReporter(opts.Stdout, format == OutputFormatHuman && opts.Verbose),
150+
BinaryCheck: binaryCheck,
140151
})
141152
if err != nil {
142153
return Result{}, fmt.Errorf("discover files from %q: %w", path, err)
@@ -314,9 +325,32 @@ func reportErrors(scanErrors []scanError) []report.ErrorEntry {
314325
return items
315326
}
316327

328+
func isBinaryMIME(mimeType string) bool {
329+
if mimeType == "" {
330+
return false
331+
}
332+
return strings.HasPrefix(mimeType, "image/") ||
333+
strings.HasPrefix(mimeType, "audio/") ||
334+
strings.HasPrefix(mimeType, "video/") ||
335+
strings.HasPrefix(mimeType, "font/") ||
336+
mimeType == "application/pdf" ||
337+
mimeType == "application/octet-stream" ||
338+
mimeType == "application/zip" ||
339+
mimeType == "application/gzip" ||
340+
mimeType == "application/x-gzip" ||
341+
mimeType == "application/x-bzip2" ||
342+
mimeType == "application/x-tar" ||
343+
mimeType == "application/x-7z-compressed" ||
344+
mimeType == "application/vnd.rar" ||
345+
mimeType == "application/java-archive" ||
346+
mimeType == "application/x-java-class" ||
347+
mimeType == "application/wasm" ||
348+
mimeType == "application/x-sqlite3"
349+
}
350+
317351
func mapSkipReason(reason filesystem.EligibilityReason) string {
318352
switch reason {
319-
case filesystem.EligibilityReasonBinaryNUL:
353+
case filesystem.EligibilityReasonBinaryNUL, filesystem.EligibilityReasonBinaryMagic:
320354
return "binary"
321355
case filesystem.EligibilityReasonTooLarge:
322356
return "max_file_size_exceeded"

0 commit comments

Comments
 (0)