Skip to content

Commit 93d2153

Browse files
authored
fix(security): detect GitHub stateless token format (~520 chars) (#530)
GitHub App installation tokens (ghs_) are moving to a stateless format that is much longer (~520 chars) than today's 40-char tokens. Two detection layers hardcoded the old length and silently break on it: - internal/security/patterns/tokens.go: ghp_/gho_/ghs_/ghr_ patterns pinned to exactly {36} chars. FindAllString still substring-matched a longer token but truncated the capture to 40 chars, so the recorded detection dropped the ~480-char tail. Widened to {36,}. - internal/logs/sanitizer.go: github_token mask regex bounded at {36,255} with \b anchors. A ~520-char alphanumeric run has no word boundary within range, so the regex matched nothing and the token leaked into logs unmasked. Widened to {36,}. Short-token rejection (ghp_12345) and 40-char tokens still behave as before. Added tests asserting full capture / masking of a 520-char token.
1 parent df9489c commit 93d2153

4 files changed

Lines changed: 94 additions & 5 deletions

File tree

internal/logs/sanitizer.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@ func NewSecretSanitizer(core zapcore.Core) *SecretSanitizer {
3939
// registerDefaultPatterns registers patterns for common secret formats
4040
func (s *SecretSanitizer) registerDefaultPatterns() {
4141
// GitHub tokens (ghp_, gho_, ghu_, ghs_, ghr_)
42+
// Open-ended length ({36,}): the new stateless token format can be ~520 chars,
43+
// and an alphanumeric run has no \b boundary mid-token to stop a fixed upper bound.
4244
s.patterns = append(s.patterns, &secretPattern{
4345
name: "github_token",
44-
regex: regexp.MustCompile(`\b(gh[poushr]_[A-Za-z0-9]{36,255})\b`),
46+
regex: regexp.MustCompile(`\b(gh[poushr]_[A-Za-z0-9]{36,})\b`),
4547
maskFunc: func(token string) string {
4648
if len(token) <= 7 {
4749
return "****"

internal/logs/sanitizer_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package logs
2+
3+
import (
4+
"strings"
5+
"sync"
6+
"testing"
7+
)
8+
9+
// newTestSanitizer builds a SecretSanitizer with the default patterns registered
10+
// but no wrapped core, suitable for exercising sanitizeString directly.
11+
func newTestSanitizer() *SecretSanitizer {
12+
s := &SecretSanitizer{resolvedCache: &sync.Map{}}
13+
s.registerDefaultPatterns()
14+
return s
15+
}
16+
17+
func TestSanitizer_GitHubTokens(t *testing.T) {
18+
s := newTestSanitizer()
19+
20+
tests := []struct {
21+
name string
22+
token string
23+
}{
24+
{"classic ghp_ (40 chars)", "ghp_1234567890abcdefghijABCDEFGHIJ123456"},
25+
{"installation ghs_ (40 chars)", "ghs_1234567890abcdefghijABCDEFGHIJ123456"},
26+
}
27+
for _, tt := range tests {
28+
t.Run(tt.name, func(t *testing.T) {
29+
out := s.sanitizeString("token=" + tt.token)
30+
if strings.Contains(out, tt.token) {
31+
t.Fatalf("token leaked unmasked: %q", out)
32+
}
33+
})
34+
}
35+
}
36+
37+
// TestSanitizer_LongStatelessGitHubToken verifies the new ~520-char stateless
38+
// GitHub token format is masked. The previous {36,255} upper bound left these
39+
// tokens unmasked because the alphanumeric run had no \b boundary within range.
40+
func TestSanitizer_LongStatelessGitHubToken(t *testing.T) {
41+
s := newTestSanitizer()
42+
43+
const tail = 516 // total length 520 incl. "ghs_" prefix
44+
token := "ghs_" + strings.Repeat("aB3", (tail/3)+1)[:tail]
45+
46+
out := s.sanitizeString("Authorization context token=" + token)
47+
if strings.Contains(out, token) {
48+
t.Fatalf("long stateless token leaked unmasked (len %d)", len(token))
49+
}
50+
}

internal/security/patterns/tokens.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ func GetTokenPatterns() []*Pattern {
4242
func githubPATPattern() *Pattern {
4343
// ghp_ = classic PAT, github_pat_ = fine-grained PAT
4444
// Fine-grained format: github_pat_<base62>_<base62> (variable lengths)
45+
// Length is open-ended ({36,}): GitHub's new stateless token format can be ~520 chars.
4546
return NewPattern("github_pat").
46-
WithRegex(`(?:ghp_[a-zA-Z0-9]{36}|github_pat_[a-zA-Z0-9]+_[a-zA-Z0-9]{30,})`).
47+
WithRegex(`(?:ghp_[a-zA-Z0-9]{36,}|github_pat_[a-zA-Z0-9]+_[a-zA-Z0-9]{30,})`).
4748
WithCategory(CategoryAPIToken).
4849
WithSeverity(SeverityCritical).
4950
WithDescription("GitHub Personal Access Token").
@@ -53,7 +54,7 @@ func githubPATPattern() *Pattern {
5354
// GitHub OAuth Token
5455
func githubOAuthPattern() *Pattern {
5556
return NewPattern("github_oauth").
56-
WithRegex(`gho_[a-zA-Z0-9]{36}`).
57+
WithRegex(`gho_[a-zA-Z0-9]{36,}`).
5758
WithCategory(CategoryAPIToken).
5859
WithSeverity(SeverityHigh).
5960
WithDescription("GitHub OAuth access token").
@@ -63,7 +64,7 @@ func githubOAuthPattern() *Pattern {
6364
// GitHub App Installation Token
6465
func githubAppPattern() *Pattern {
6566
return NewPattern("github_app").
66-
WithRegex(`ghs_[a-zA-Z0-9]{36}`).
67+
WithRegex(`ghs_[a-zA-Z0-9]{36,}`).
6768
WithCategory(CategoryAPIToken).
6869
WithSeverity(SeverityHigh).
6970
WithDescription("GitHub App installation access token").
@@ -73,7 +74,7 @@ func githubAppPattern() *Pattern {
7374
// GitHub App Refresh Token
7475
func githubRefreshPattern() *Pattern {
7576
return NewPattern("github_refresh").
76-
WithRegex(`ghr_[a-zA-Z0-9]{36}`).
77+
WithRegex(`ghr_[a-zA-Z0-9]{36,}`).
7778
WithCategory(CategoryAPIToken).
7879
WithSeverity(SeverityHigh).
7980
WithDescription("GitHub App refresh token").

internal/security/patterns/tokens_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,42 @@ func TestGitHubTokenPatterns(t *testing.T) {
8484
}
8585
}
8686

87+
// TestGitHubTokenPatterns_LongStatelessFormat verifies the new long stateless
88+
// GitHub token format (~520 chars, e.g. ghs_ App installation tokens) is matched
89+
// in full. A fixed {36} length truncates the match and leaks the token tail.
90+
func TestGitHubTokenPatterns_LongStatelessFormat(t *testing.T) {
91+
const tail = 516 // total length 520 incl. "ghs_"/"ghp_"/etc. prefix
92+
body := strings.Repeat("aB3", (tail/3)+1)[:tail]
93+
94+
tests := []struct {
95+
name string
96+
input string
97+
patternName string
98+
}{
99+
{"ghs_ stateless installation token", "ghs_" + body, "github_app"},
100+
{"ghp_ long PAT", "ghp_" + body, "github_pat"},
101+
{"gho_ long OAuth token", "gho_" + body, "github_oauth"},
102+
{"ghr_ long refresh token", "ghr_" + body, "github_refresh"},
103+
}
104+
105+
patterns := GetTokenPatterns()
106+
for _, tt := range tests {
107+
t.Run(tt.name, func(t *testing.T) {
108+
pattern := findPatternByName(patterns, tt.patternName)
109+
if pattern == nil {
110+
t.Fatalf("%s pattern not found", tt.patternName)
111+
}
112+
matches := pattern.Match(tt.input)
113+
assert.NotEmpty(t, matches, "expected match for long token: %s", tt.input)
114+
if len(matches) > 0 {
115+
assert.Equal(t, tt.input, matches[0],
116+
"expected full token captured, got truncated match (len %d of %d)",
117+
len(matches[0]), len(tt.input))
118+
}
119+
})
120+
}
121+
}
122+
87123
// Test GitLab Token patterns
88124
func TestGitLabTokenPatterns(t *testing.T) {
89125
tests := []struct {

0 commit comments

Comments
 (0)