Skip to content

Commit fae4b93

Browse files
committed
Add hmac hash support
1 parent da14a0e commit fae4b93

File tree

4 files changed

+328
-8
lines changed

4 files changed

+328
-8
lines changed

processor/redactionprocessor/README.md

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,74 @@ The value is then masked according to the configuration.
164164
`hash_function` defines the function for hashing values of matched keys or matches in values
165165
instead of masking them with a fixed string. By default, no hash function is used
166166
and masking with a fixed string is performed. The supported hash functions
167-
are `md5`, `sha1` and `sha3` (SHA-256).
167+
are `md5`, `sha1`, `sha3` (SHA-256), `hmac-sha256`, and `hmac-sha512`.
168+
169+
### HMAC Hash Functions
170+
171+
For enhanced security, especially when dealing with low-entropy data like IP addresses, HMAC (Hash-based Message Authentication Code) hash functions are recommended over simple hash functions like MD5, SHA1, or SHA3.
172+
173+
**Why HMAC?**
174+
175+
Simple hash functions are vulnerable to rainbow table attacks for low-entropy data:
176+
- IPv4 address space: only 2^32 ≈ 4.3 billion possible values
177+
- Attackers can pre-compute all possible IPv4 hashes to reverse the hashing
178+
179+
HMAC uses a secret key, making it practically impossible to:
180+
- Reverse-engineer the original value without the key
181+
- Use pre-computed rainbow tables
182+
- Brute-force the hash even if the algorithm is known
183+
184+
**Benefits:**
185+
- ✅ Consistency: Same input + same key = same output (required for pattern analysis)
186+
- ✅ Irreversibility: Cannot reverse without the secret key
187+
- ✅ Rainbow table resistant: Pre-computed hash tables are useless
188+
- ✅ GDPR compliant: Meets true pseudonymization requirements per Article 4(5)
189+
190+
**Configuration Example:**
191+
192+
```yaml
193+
processors:
194+
redaction:
195+
allow_all_keys: true
196+
blocked_values:
197+
- "(?:[0-9]{1,3}\\.){3}[0-9]{1,3}" # IPv4 addresses
198+
- "(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}" # IPv6 addresses
199+
hash_function: hmac-sha256 # or hmac-sha512
200+
hmac_key: "${env:REDACTION_SECRET_KEY}" # Load from environment variable
201+
summary: silent
202+
```
203+
204+
**Key Management:**
205+
206+
```bash
207+
# Generate a strong random key (do this once and store securely)
208+
export REDACTION_SECRET_KEY=$(openssl rand -hex 32)
209+
210+
# Use the key when running the collector
211+
./otelcol-contrib --config=config.yaml
212+
213+
# For production, store keys in:
214+
# - Kubernetes Secrets
215+
# - HashiCorp Vault
216+
# - AWS Secrets Manager
217+
# - Azure Key Vault
218+
# Never commit keys to version control!
219+
```
220+
221+
**Security Notes:**
222+
- Use at least 256-bit (32-byte) random keys
223+
- Store keys separately from log data
224+
- Rotate keys periodically according to your security policy
225+
- Document which key version was used for each time period
226+
- HMAC-SHA256 provides sufficient security for most use cases
227+
- HMAC-SHA512 offers additional security margin with minimal performance cost (~10-20% CPU overhead vs simple hashes)
228+
229+
**GDPR Compliance:**
230+
231+
HMAC satisfies GDPR Article 4(5) pseudonymization requirements:
232+
- Without the key, personal data cannot be attributed to a specific data subject
233+
- Provides technical measures to ensure data protection
234+
- Key and data are stored separately
168235

169236
The `url_sanitizer` configuration enables sanitization of URLs in specified attributes by removing potentially sensitive information like UUIDs, timestamps, and other non-essential path segments. This is particularly useful for reducing cardinality in telemetry data while preserving the essential parts of URLs for troubleshooting.
170237

@@ -200,7 +267,7 @@ Example configuration with database sanitization:
200267
processors:
201268
redaction:
202269
# ... other redaction settings ...
203-
270+
204271
# Database sanitization configuration
205272
db_sanitizer:
206273
# sanitize_span_name controls whether span names should be sanitized for database queries (default: true)
@@ -215,7 +282,7 @@ processors:
215282
attributes: ["db.statement", "redis.command"]
216283
memcached:
217284
enabled: true
218-
attributes: ["db.statement", "memcached.command"]
285+
attributes: ["db.statement", "memcached.command"]
219286
mongo:
220287
enabled: true
221288
attributes: ["db.statement", "mongodb.query"]

processor/redactionprocessor/config.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ var _ encoding.TextUnmarshaler = (*HashFunction)(nil)
1818
type HashFunction string
1919

2020
const (
21-
None HashFunction = ""
22-
SHA1 HashFunction = "sha1"
23-
SHA3 HashFunction = "sha3"
24-
MD5 HashFunction = "md5"
21+
None HashFunction = ""
22+
SHA1 HashFunction = "sha1"
23+
SHA3 HashFunction = "sha3"
24+
MD5 HashFunction = "md5"
25+
HMACSHA256 HashFunction = "hmac-sha256"
26+
HMACSHA512 HashFunction = "hmac-sha512"
2527
)
2628

2729
type Config struct {
@@ -44,6 +46,10 @@ type Config struct {
4446
// and masking with a fixed string is performed.
4547
HashFunction HashFunction `mapstructure:"hash_function"`
4648

49+
// HMACKey is the secret key used for HMAC hashing when HashFunction is set to hmac-sha256 or hmac-sha512.
50+
// This should be loaded from a secure source like environment variables.
51+
HMACKey string `mapstructure:"hmac_key"`
52+
4753
// IgnoredKeys is a list of span attribute keys that are not redacted.
4854
// Span attributes in this list are allowed to pass through the filter
4955
// without being changed or removed.
@@ -101,9 +107,15 @@ func (u *HashFunction) UnmarshalText(text []byte) error {
101107
case strings.ToLower(SHA3.String()):
102108
*u = SHA3
103109
return nil
110+
case strings.ToLower(HMACSHA256.String()):
111+
*u = HMACSHA256
112+
return nil
113+
case strings.ToLower(HMACSHA512.String()):
114+
*u = HMACSHA512
115+
return nil
104116
case strings.ToLower(None.String()):
105117
*u = None
106118
return nil
107119
}
108-
return fmt.Errorf("unknown HashFunction %s, allowed functions are %s, %s and %s", str, SHA1, SHA3, MD5)
120+
return fmt.Errorf("unknown HashFunction %s, allowed functions are %s, %s, %s, %s and %s", str, SHA1, SHA3, MD5, HMACSHA256, HMACSHA512)
109121
}

processor/redactionprocessor/processor.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@ package redactionprocessor // import "github.com/open-telemetry/opentelemetry-co
66
//nolint:gosec
77
import (
88
"context"
9+
"crypto/hmac"
910
"crypto/md5"
1011
"crypto/sha1"
12+
"crypto/sha256"
13+
"crypto/sha512"
1114
"encoding/hex"
1215
"fmt"
1316
"hash"
@@ -398,6 +401,10 @@ func (s *redaction) maskValue(val string, regex *regexp.Regexp) string {
398401
return hashString(match, sha3.New256())
399402
case MD5:
400403
return hashString(match, md5.New())
404+
case HMACSHA256:
405+
return hashStringHMAC(match, s.config.HMACKey, sha256.New)
406+
case HMACSHA512:
407+
return hashStringHMAC(match, s.config.HMACKey, sha512.New)
401408
default:
402409
return "****"
403410
}
@@ -410,6 +417,12 @@ func hashString(input string, hasher hash.Hash) string {
410417
return hex.EncodeToString(hasher.Sum(nil))
411418
}
412419

420+
func hashStringHMAC(input string, key string, newHash func() hash.Hash) string {
421+
h := hmac.New(newHash, []byte(key))
422+
h.Write([]byte(input))
423+
return hex.EncodeToString(h.Sum(nil))
424+
}
425+
413426
// addMetaAttrs adds diagnostic information about redacted or masked attribute keys
414427
func (s *redaction) addMetaAttrs(redactedAttrs []string, attributes pcommon.Map, valuesAttr, countAttr string) {
415428
redactedCount := int64(len(redactedAttrs))

0 commit comments

Comments
 (0)