Skip to content

Commit 0b64ef9

Browse files
voidkeylyingbug
authored andcommitted
fix(security): support IPv6 in SSRF validation via whitelist mechanism
- Keep strict mode blocking all direct IPs (IPv4 and IPv6 uniformly) - Unify all SSRF call sites to use ValidateURLForSSRF (whitelist-aware) - Add Teredo (2001:0000::/32) and 6to4 (2002::/16) tunnel detection - Make redirect handler and DNS pinning respect SSRF_WHITELIST - Unexport isSSRFSafeURL to prevent future callers bypassing whitelist - Add scheme validation for whitelisted redirect targets - Document IPv6 whitelist syntax in .env.example - Add comprehensive IPv6 test coverage
1 parent a5ca9a2 commit 0b64ef9

File tree

9 files changed

+231
-52
lines changed

9 files changed

+231
-52
lines changed

.env.example

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,9 @@ TENANT_AES_KEY=weknorarag-api-key-secret-secret
105105
SYSTEM_AES_KEY=weknora-system-aes-key-32bytes!!
106106

107107
# SSRF 校验白名单(可选)。逗号分隔;每条可为:精确域名(api.internal)、通配域名(*.example.com)、
108-
# IP(203.0.113.5)或 CIDR(10.0.0.0/8)。列入者会在 URL 校验等地方绕过常规 SSRF 规则,生产环境请谨慎配置。
109-
# SSRF_WHITELIST=internal.service,*.corp.example,172.16.0.0/12
108+
# IPv4(203.0.113.5)、IPv6(2001:db8::1,不带方括号)或 CIDR(10.0.0.0/8, 2001:db8::/32)。
109+
# 列入者会在 URL 校验等地方绕过常规 SSRF 规则,生产环境请谨慎配置。
110+
# SSRF_WHITELIST=internal.service,*.corp.example,172.16.0.0/12,2001:db8::1,fd00::/8
110111

111112
# 是否开启知识图谱构建和检索(构建阶段需调用大模型,耗时较长)
112113
ENABLE_GRAPH_RAG=false

internal/agent/tools/web_fetch.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,9 @@ func (t *WebFetchTool) validateAndResolve(p webFetchParams) (*validatedParams, e
262262
return nil, fmt.Errorf("invalid URL format")
263263
}
264264

265-
// SSRF protection: validate URL is safe (scheme, hostname, and that resolved IPs are not restricted)
266-
if safe, reason := utils.IsSSRFSafeURL(p.URL); !safe {
267-
return nil, fmt.Errorf("URL rejected for security reasons: %s", reason)
265+
// SSRF protection: validate URL is safe (uses centralised entry-point with whitelist support)
266+
if err := utils.ValidateURLForSSRF(p.URL); err != nil {
267+
return nil, fmt.Errorf("URL rejected for security reasons: %v", err)
268268
}
269269

270270
u, err := url.Parse(p.URL)
@@ -281,14 +281,16 @@ func (t *WebFetchTool) validateAndResolve(p webFetchParams) (*validatedParams, e
281281
}
282282
}
283283

284-
// Resolve and pin to the first public IP (same resolver as IsSSRFSafeURL; we pin so chromedp cannot re-resolve)
284+
// Resolve and pin to the first safe IP (same resolver as isSSRFSafeURL; we pin so chromedp cannot re-resolve).
285+
// Whitelisted hosts may resolve to private/restricted IPs, so we allow any IP for them.
285286
ips, err := net.DefaultResolver.LookupIP(context.Background(), "ip", hostname)
286287
if err != nil || len(ips) == 0 {
287288
return nil, fmt.Errorf("DNS lookup failed for %s: %w", hostname, err)
288289
}
290+
isWhitelisted := utils.IsSSRFWhitelisted(hostname)
289291
var pinnedIP net.IP
290292
for _, ip := range ips {
291-
if utils.IsPublicIP(ip) {
293+
if isWhitelisted || utils.IsPublicIP(ip) {
292294
pinnedIP = ip
293295
break
294296
}

internal/application/service/knowledge.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -473,9 +473,9 @@ func (s *knowledgeService) CreateKnowledgeFromURL(ctx context.Context,
473473
return nil, ErrInvalidURL
474474
}
475475

476-
// SSRF protection: validate URL is safe to fetch
477-
if safe, reason := secutils.IsSSRFSafeURL(url); !safe {
478-
logger.Errorf(ctx, "URL rejected for SSRF protection: %s, reason: %s", url, reason)
476+
// SSRF protection: validate URL is safe to fetch (uses centralised entry-point with whitelist support)
477+
if err := secutils.ValidateURLForSSRF(url); err != nil {
478+
logger.Errorf(ctx, "URL rejected for SSRF protection: %s, err: %v", url, err)
479479
return nil, ErrInvalidURL
480480
}
481481

@@ -659,8 +659,8 @@ func (s *knowledgeService) createKnowledgeFromFileURL(
659659
logger.Error(ctx, "Invalid or unsafe file URL format")
660660
return nil, ErrInvalidURL
661661
}
662-
if safe, reason := secutils.IsSSRFSafeURL(fileURL); !safe {
663-
logger.Errorf(ctx, "File URL rejected for SSRF protection: %s, reason: %s", fileURL, reason)
662+
if err := secutils.ValidateURLForSSRF(fileURL); err != nil {
663+
logger.Errorf(ctx, "File URL rejected for SSRF protection: %s, err: %v", fileURL, err)
664664
return nil, ErrInvalidURL
665665
}
666666

@@ -7458,8 +7458,8 @@ func (s *knowledgeService) ProcessDocument(ctx context.Context, t *asynq.Task) e
74587458

74597459
if payload.FileURL != "" {
74607460
// file_url import: SSRF re-check (防 DNS 重绑定), download, persist, then delegate to convert()
7461-
if safe, reason := secutils.IsSSRFSafeURL(payload.FileURL); !safe {
7462-
logger.Errorf(ctx, "File URL rejected for SSRF protection in ProcessDocument: %s, reason: %s", payload.FileURL, reason)
7461+
if err := secutils.ValidateURLForSSRF(payload.FileURL); err != nil {
7462+
logger.Errorf(ctx, "File URL rejected for SSRF protection in ProcessDocument: %s, err: %v", payload.FileURL, err)
74637463
knowledge.ParseStatus = "failed"
74647464
knowledge.ErrorMessage = "File URL is not allowed for security reasons"
74657465
knowledge.UpdatedAt = time.Now()
@@ -7668,8 +7668,8 @@ func (s *knowledgeService) convert(
76687668
overrides := s.getParserEngineOverridesFromContext(ctx)
76697669

76707670
if isURL {
7671-
if safe, reason := secutils.IsSSRFSafeURL(payload.URL); !safe {
7672-
logger.Errorf(ctx, "URL rejected for SSRF protection: %s, reason: %s", payload.URL, reason)
7671+
if err := secutils.ValidateURLForSSRF(payload.URL); err != nil {
7672+
logger.Errorf(ctx, "URL rejected for SSRF protection: %s, err: %v", payload.URL, err)
76737673
knowledge.ParseStatus = "failed"
76747674
knowledge.ErrorMessage = "URL is not allowed for security reasons"
76757675
knowledge.UpdatedAt = time.Now()

internal/handler/system.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,7 @@ func sanitizeStorageCheckError(err error) string {
643643
}
644644

645645
// isBlockedStorageEndpoint checks whether a storage endpoint resolves to a dangerous
646-
// address (cloud metadata, loopback, link-local). Unlike the stricter IsSSRFSafeURL,
646+
// address (cloud metadata, loopback, link-local). Unlike the stricter isSSRFSafeURL,
647647
// this allows private IPs since MinIO is commonly deployed on internal networks.
648648
// It also respects the SSRF_WHITELIST environment variable for whitelisted hosts.
649649
func isBlockedStorageEndpoint(endpoint string) (bool, string) {

internal/im/wecom/webhook_adapter.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,8 @@ func (a *WebhookAdapter) DownloadFile(ctx context.Context, msg *im.IncomingMessa
503503
func downloadFromURL(ctx context.Context, rawURL, fileName string) (io.ReadCloser, string, error) {
504504
// SSRF protection: reject internal/private URLs unless on the WeCom API allowlist.
505505
if !isAllowedIMAPIHost(rawURL) {
506-
if safe, reason := secutils.IsSSRFSafeURL(rawURL); !safe {
507-
return nil, "", fmt.Errorf("URL rejected for security reasons: %s", reason)
506+
if err := secutils.ValidateURLForSSRF(rawURL); err != nil {
507+
return nil, "", fmt.Errorf("URL rejected for security reasons: %v", err)
508508
}
509509
}
510510

@@ -582,7 +582,7 @@ func downloadFromURL(ctx context.Context, rawURL, fileName string) (io.ReadClose
582582
}
583583

584584
// allowedIMAPIHosts lists IM platform API hosts that are trusted for file downloads.
585-
// URLs pointing to these hosts bypass IsSSRFSafeURL checks because the WeCom API
585+
// URLs pointing to these hosts bypass isSSRFSafeURL checks because the WeCom API
586586
// itself returns these URLs in callback payloads (e.g. temporary media links).
587587
var allowedIMAPIHosts = []string{
588588
"qyapi.weixin.qq.com",

internal/infrastructure/docparser/image_resolver.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -667,9 +667,9 @@ func (r *ImageResolver) ResolveRemoteImages(
667667
continue
668668
}
669669

670-
// --- SSRF check ---
671-
if safe, reason := secutils.IsSSRFSafeURL(imgURL); !safe {
672-
log.Printf("WARN: remote image blocked by SSRF check (%s): %s", reason, imgURL)
670+
// --- SSRF check (centralised entry-point with whitelist support) ---
671+
if err := secutils.ValidateURLForSSRF(imgURL); err != nil {
672+
log.Printf("WARN: remote image blocked by SSRF check (%v): %s", err, imgURL)
673673
continue
674674
}
675675

internal/infrastructure/docparser/mineru_cloud_converter.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,8 +345,8 @@ func (c *MinerUCloudReader) extractDoneResult(_ context.Context, item *extractRe
345345
var imgRefPattern = regexp.MustCompile(`!\[[^\]]*\]\(([^)]+)\)`)
346346

347347
func downloadAndExtractZip(zipURL string) (string, []types.ImageRef, error) {
348-
if safe, reason := utils.IsSSRFSafeURL(zipURL); !safe {
349-
return "", nil, fmt.Errorf("zip URL blocked by SSRF check: %s", reason)
348+
if err := utils.ValidateURLForSSRF(zipURL); err != nil {
349+
return "", nil, fmt.Errorf("zip URL blocked by SSRF check: %v", err)
350350
}
351351
client := utils.NewSSRFSafeHTTPClient(utils.SSRFSafeHTTPClientConfig{Timeout: 120 * time.Second, MaxRedirects: 5})
352352
resp, err := client.Get(zipURL)

internal/utils/security.go

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,19 @@ func isRestrictedIP(ip net.IP) (bool, string) {
302302
return true, fmt.Sprintf("IPv4-mapped %s", reason)
303303
}
304304
}
305+
// Teredo tunneling addresses: 2001:0000::/32
306+
// Embed arbitrary IPv4 in the payload; can reach internal hosts via relay.
307+
if ip[0] == 0x20 && ip[1] == 0x01 && ip[2] == 0x00 && ip[3] == 0x00 {
308+
return true, "Teredo tunneling address"
309+
}
310+
// 6to4 addresses: 2002::/16
311+
// Bits 16-47 carry an IPv4 address; block when embedded IPv4 is restricted.
312+
if ip[0] == 0x20 && ip[1] == 0x02 {
313+
embeddedIP := net.IP(ip[2:6])
314+
if restricted, reason := isRestrictedIP(embeddedIP); restricted {
315+
return true, fmt.Sprintf("6to4 embedded %s", reason)
316+
}
317+
}
305318
}
306319

307320
return false, ""
@@ -357,15 +370,15 @@ func isIPLikeHostname(hostname string) bool {
357370
return false
358371
}
359372

360-
// IsSSRFSafeURL validates a URL to prevent SSRF attacks
373+
// isSSRFSafeURL validates a URL to prevent SSRF attacks
361374
// It checks for:
362375
// - Valid http/https protocol
363376
// - Private IP addresses (10.x.x.x, 172.16-31.x.x, 192.168.x.x)
364377
// - Loopback addresses (127.x.x.x, ::1)
365378
// - Link-local addresses (169.254.x.x, fe80::)
366379
// - Cloud metadata endpoints
367380
// - Reserved hostnames (localhost, *.local, etc.)
368-
func IsSSRFSafeURL(rawURL string) (bool, string) {
381+
func isSSRFSafeURL(rawURL string) (bool, string) {
369382
if rawURL == "" {
370383
return false, "URL is empty"
371384
}
@@ -408,11 +421,14 @@ func IsSSRFSafeURL(rawURL string) (bool, string) {
408421
}
409422
}
410423

411-
// STRICT MODE: Completely block IP addresses in URLs
412-
// This prevents all IP-based SSRF attacks including edge cases and bypasses
424+
// STRICT MODE: Block all direct IP addresses in URLs (both IPv4 and IPv6).
425+
// This prevents IP-based SSRF attacks including obfuscation, tunneling, and
426+
// transition mechanism bypasses. Legitimate IPs should be whitelisted via
427+
// SSRF_WHITELIST env var; the whitelist is checked by ValidateURLForSSRF
428+
// before this function is called.
413429
ip := net.ParseIP(hostname)
414430
if ip != nil {
415-
return false, "direct IP address access is not allowed, use domain name instead"
431+
return false, "direct IP address access is not allowed, use domain name or add to SSRF_WHITELIST"
416432
}
417433

418434
// Also check for IP addresses in various formats that ParseIP might not catch
@@ -425,11 +441,6 @@ func IsSSRFSafeURL(rawURL string) (bool, string) {
425441
// This prevents DNS rebinding attacks where a domain resolves to internal IPs
426442
ips, err := net.LookupIP(hostname)
427443
if err != nil {
428-
// DNS resolution failed - reject the URL for security
429-
// This prevents attacks where:
430-
// 1. The domain is only resolvable within internal network (intranet domains)
431-
// 2. Different DNS servers between validation and actual request
432-
// 3. Attacker-controlled DNS that selectively responds
433444
return false, fmt.Sprintf("DNS resolution failed for hostname %s: cannot verify if it resolves to safe IP", hostname)
434445
}
435446

@@ -760,9 +771,18 @@ func NewSSRFSafeHTTPClient(config SSRFSafeHTTPClientConfig) *http.Client {
760771
return fmt.Errorf("stopped after %d redirects", config.MaxRedirects)
761772
}
762773

763-
// Validate the redirect target URL for SSRF
774+
// Validate the redirect target URL for SSRF (whitelist-aware).
775+
// Even whitelisted hosts must use http/https to prevent scheme-based attacks.
776+
redirectScheme := strings.ToLower(req.URL.Scheme)
777+
if redirectScheme != "http" && redirectScheme != "https" {
778+
return fmt.Errorf("%w: invalid scheme %s", ErrSSRFRedirectBlocked, redirectScheme)
779+
}
780+
redirectHost := req.URL.Hostname()
781+
if redirectHost != "" && IsSSRFWhitelisted(redirectHost) {
782+
return nil
783+
}
764784
redirectURL := req.URL.String()
765-
if safe, reason := IsSSRFSafeURL(redirectURL); !safe {
785+
if safe, reason := isSSRFSafeURL(redirectURL); !safe {
766786
return fmt.Errorf("%w: %s", ErrSSRFRedirectBlocked, reason)
767787
}
768788

@@ -782,7 +802,9 @@ func SSRFSafeDialContext(ctx context.Context, network, addr string) (net.Conn, e
782802
}
783803

784804
// Whitelisted hosts bypass all dial-time SSRF checks, consistent with
785-
// ValidateURLForSSRF which skips IsSSRFSafeURL for whitelisted hosts.
805+
// ValidateURLForSSRF which skips isSSRFSafeURL for whitelisted hosts.
806+
// NOTE: This intentionally relaxes DNS-rebinding protection for whitelisted
807+
// hosts. Admins must ensure whitelisted domains are under their control.
786808
if IsSystemProxy(addr) || IsSSRFWhitelisted(host) {
787809
dialer := &net.Dialer{
788810
Timeout: 30 * time.Second,
@@ -834,10 +856,11 @@ func SSRFSafeDialContext(ctx context.Context, network, addr string) (net.Conn, e
834856
// allowed host patterns. Each entry can be:
835857
// - An exact domain: "example.com"
836858
// - A wildcard domain: "*.example.com" (matches all subdomains)
837-
// - An IP address: "203.0.113.5"
838-
// - A CIDR range: "10.0.0.0/8"
859+
// - An IPv4 address: "203.0.113.5"
860+
// - An IPv6 address: "2001:db8::1"
861+
// - A CIDR range (v4 or v6): "10.0.0.0/8", "2001:db8::/32"
839862
//
840-
// Whitelisted entries bypass the normal SSRF checks performed by IsSSRFSafeURL.
863+
// Whitelisted entries bypass the normal SSRF checks performed by isSSRFSafeURL.
841864

842865
var (
843866
ssrfWhitelistOnce sync.Once
@@ -932,16 +955,16 @@ func IsSSRFWhitelisted(hostname string) bool {
932955
return false
933956
}
934957

935-
// ResetSSRFWhitelistForTest resets the whitelist singleton so tests can
958+
// resetSSRFWhitelistForTest resets the whitelist singleton so tests can
936959
// re-read the environment variable. NOT for production use.
937-
func ResetSSRFWhitelistForTest() {
960+
func resetSSRFWhitelistForTest() {
938961
ssrfWhitelistOnce = sync.Once{}
939962
ssrfWhitelist = nil
940963
}
941964

942965
// ValidateURLForSSRF is the centralised entry-point that all handlers should
943966
// call to validate a user-supplied URL. It first checks the SSRF_WHITELIST;
944-
// whitelisted hosts skip the full IsSSRFSafeURL check.
967+
// whitelisted hosts skip the full isSSRFSafeURL check.
945968
//
946969
// rawURL may be a full URL ("https://example.com/v1") or a bare host/host:port
947970
// (for cases like ReconnectDocReader). If a scheme is missing the function
@@ -975,7 +998,7 @@ func ValidateURLForSSRF(rawURL string) error {
975998
}
976999

9771000
// Delegate to the full SSRF validation (uses the normalised URL).
978-
if safe, reason := IsSSRFSafeURL(normalized); !safe {
1001+
if safe, reason := isSSRFSafeURL(normalized); !safe {
9791002
return fmt.Errorf("SSRF validation failed: %s", reason)
9801003
}
9811004
return nil

0 commit comments

Comments
 (0)