Skip to content
29 changes: 28 additions & 1 deletion packages/automl/bff/internal/api/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,34 @@ func NewApp(cfg config.EnvConfig, logger *slog.Logger) (*App, error) {
s3ClientFactory = s3mocks.NewMockClientFactory()
} else {
logger.Info("Using real S3 client factory")
s3ClientFactory = s3int.NewRealClientFactory(s3int.S3ClientOptions{DevMode: cfg.DevMode})
// INTENTIONAL SECURITY DECISION: S3 TLS and network defaults are permissive.
//
// InsecureSkipVerify defaults to TRUE because RHOAI's managed MinIO and many
// customer S3-compatible stores use self-signed certificates. Requiring valid
// certs by default would break the most common deployment scenario. The BFF
// runs inside the cluster and communicates over the internal network, so the
// risk of MITM is low. Customers who use a CA-signed S3 endpoint can set
// S3_INSECURE_SKIP_VERIFY=false to enforce certificate verification.
//
// AllowInternalIPs defaults to TRUE because MinIO typically runs on the same
// cluster as the BFF, using RFC-1918 private IPs (e.g. 10.x service IPs).
// Blocking private IPs by default would prevent the most common S3 configuration.
// Loopback, link-local, and reserved ranges remain always blocked regardless
// of this setting. Customers can set S3_ALLOW_INTERNAL_IPS=false to also block
// private ranges if their S3 store is external.
//
// AllowHTTP defaults to TRUE because some in-cluster MinIO deployments expose
// HTTP-only endpoints. Traffic stays within the cluster network, so the risk
// of interception is low. Customers can set S3_ALLOW_HTTP=false to require HTTPS.
s3InsecureSkipVerify := os.Getenv("S3_INSECURE_SKIP_VERIFY") != "false"
s3AllowInternalIPs := os.Getenv("S3_ALLOW_INTERNAL_IPS") != "false"
s3AllowHTTP := os.Getenv("S3_ALLOW_HTTP") != "false"
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
s3ClientFactory = s3int.NewRealClientFactory(s3int.S3ClientOptions{
DevMode: cfg.DevMode,
InsecureSkipVerify: s3InsecureSkipVerify,
AllowInternalIPs: s3AllowInternalIPs,
AllowHTTP: s3AllowHTTP,
})
}

app := &App{
Expand Down
57 changes: 50 additions & 7 deletions packages/automl/bff/internal/integrations/s3/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ import (
"bufio"
"bytes"
"context"
"crypto/tls"
"encoding/csv"
"errors"
"fmt"
"io"
"log/slog"
"math"
"net"
"net/http"
"net/url"
"os"
"strconv"
Expand Down Expand Up @@ -97,6 +99,17 @@ func NewRealS3Client(creds *S3Credentials, opts S3ClientOptions) (*RealS3Client,
Credentials: credentials.NewStaticCredentialsProvider(creds.AccessKeyID, creds.SecretAccessKey, ""),
}

if opts.InsecureSkipVerify {
cfg.HTTPClient = &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true, //nolint:gosec // user-configured for dev/self-signed certs
MinVersion: tls.VersionTLS12,
},
},
}
}

c.s3Client = awss3.NewFromConfig(cfg, func(o *awss3.Options) {
o.BaseEndpoint = aws.String(validatedEndpoint)
o.UsePathStyle = true
Expand Down Expand Up @@ -423,7 +436,13 @@ func (c *RealS3Client) GetCSVSchema(ctx context.Context, bucket, key string) (CS
}

// validateAndNormalizeEndpoint validates the S3 endpoint URL to prevent SSRF attacks.
// Requires HTTPS and blocks private/reserved IP ranges.
//
// Configurable behaviors (all default to permissive for in-cluster MinIO compatibility):
// - AllowHTTP (S3_ALLOW_HTTP, default: true) — permits plain HTTP endpoints
// - InsecureSkipVerify (S3_INSECURE_SKIP_VERIFY, default: true) — skips TLS cert verification
// - AllowInternalIPs (S3_ALLOW_INTERNAL_IPS, default: true) — permits RFC-1918 private IPs
//
// Loopback, link-local, and reserved IP ranges are always blocked.
func (c *RealS3Client) validateAndNormalizeEndpoint(endpoint string) (string, error) {
if endpoint == "" {
return "", fmt.Errorf("endpoint URL cannot be empty")
Expand All @@ -434,8 +453,11 @@ func (c *RealS3Client) validateAndNormalizeEndpoint(endpoint string) (string, er
return "", fmt.Errorf("invalid endpoint URL format: %w", err)
}

if parsedURL.Scheme != "https" {
return "", fmt.Errorf("endpoint URL must use HTTPS scheme, got: %s", parsedURL.Scheme)
if parsedURL.Scheme != "https" && parsedURL.Scheme != "http" {
return "", fmt.Errorf("endpoint URL must use HTTPS or HTTP scheme, got: %s", parsedURL.Scheme)
}
if parsedURL.Scheme == "http" && !c.options.AllowHTTP {
return "", fmt.Errorf("endpoint URL uses HTTP but S3_ALLOW_HTTP is not enabled; use HTTPS or set S3_ALLOW_HTTP=true")
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

hostname := parsedURL.Hostname()
Expand Down Expand Up @@ -472,21 +494,42 @@ func (c *RealS3Client) validateAndNormalizeEndpoint(endpoint string) (string, er
}

// validateIPAddress checks if an IP address is in a blocked range.
// When AllowInternalIPs is true, RFC-1918 private ranges and IPv6 unique local addresses
// are permitted (for in-cluster MinIO and similar services).
func (c *RealS3Client) validateIPAddress(ip net.IP) error {
// Always-blocked ranges (dangerous regardless of deployment context)
blockedRanges := []struct {
cidr string
description string
}{
{"0.0.0.0/8", "reserved 'this network' range (RFC 1122)"},
{"10.0.0.0/8", "RFC-1918 private range (10.0.0.0/8)"},
{"172.16.0.0/12", "RFC-1918 private range (172.16.0.0/12)"},
{"192.168.0.0/16", "RFC-1918 private range (192.168.0.0/16)"},
{"169.254.0.0/16", "link-local range (169.254.0.0/16)"},
{"127.0.0.0/8", "loopback range (127.0.0.0/8)"},
{"240.0.0.0/4", "reserved for future use (RFC 1112)"},
{"::1/128", "IPv6 loopback"},
{"fe80::/10", "IPv6 link-local"},
{"fc00::/7", "IPv6 unique local addresses"},
}

// Private/internal ranges — blocked unless AllowInternalIPs is set
if !c.options.AllowInternalIPs {
blockedRanges = append(blockedRanges,
struct {
cidr string
description string
}{"10.0.0.0/8", "RFC-1918 private range (10.0.0.0/8)"},
struct {
cidr string
description string
}{"172.16.0.0/12", "RFC-1918 private range (172.16.0.0/12)"},
struct {
cidr string
description string
}{"192.168.0.0/16", "RFC-1918 private range (192.168.0.0/16)"},
struct {
cidr string
description string
}{"fc00::/7", "IPv6 unique local addresses"},
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}

for _, blocked := range blockedRanges {
Expand Down
16 changes: 16 additions & 0 deletions packages/automl/bff/internal/integrations/s3/client_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@ const (
type S3ClientOptions struct {
DevMode bool // Controls whether ALLOW_UNRESOLVED_S3_ENDPOINTS is honoured

// InsecureSkipVerify skips TLS certificate verification for S3 endpoints.
// Required for MinIO or other S3-compatible stores using self-signed certificates.
// Controlled via S3_INSECURE_SKIP_VERIFY env var (default: true).
InsecureSkipVerify bool

// AllowInternalIPs permits connections to RFC-1918 private IP ranges
// (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16) and IPv6 unique local addresses.
// Required when the S3-compatible store (e.g. MinIO) runs on the same cluster.
// Controlled via S3_ALLOW_INTERNAL_IPS env var (default: true).
AllowInternalIPs bool

// AllowHTTP permits plain HTTP (non-TLS) S3 endpoint URLs.
// Some MinIO deployments use HTTP internally within the cluster.
// Controlled via S3_ALLOW_HTTP env var (default: true).
AllowHTTP bool

// Transfer manager tuning for GetObject. Zero values fall back to defaults.
Concurrency int
PartSizeBytes int64
Expand Down
29 changes: 28 additions & 1 deletion packages/autorag/bff/internal/api/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,34 @@ func NewApp(cfg config.EnvConfig, logger *slog.Logger) (*App, error) {
s3ClientFactory = s3mocks.NewMockClientFactory()
} else {
logger.Info("Using real S3 client factory")
s3ClientOptions := s3int.S3ClientOptions{DevMode: cfg.DevMode}
// INTENTIONAL SECURITY DECISION: S3 TLS and network defaults are permissive.
//
// InsecureSkipVerify defaults to TRUE because RHOAI's managed MinIO and many
// customer S3-compatible stores use self-signed certificates. Requiring valid
// certs by default would break the most common deployment scenario. The BFF
// runs inside the cluster and communicates over the internal network, so the
// risk of MITM is low. Customers who use a CA-signed S3 endpoint can set
// S3_INSECURE_SKIP_VERIFY=false to enforce certificate verification.
//
// AllowInternalIPs defaults to TRUE because MinIO typically runs on the same
// cluster as the BFF, using RFC-1918 private IPs (e.g. 10.x service IPs).
// Blocking private IPs by default would prevent the most common S3 configuration.
// Loopback, link-local, and reserved ranges remain always blocked regardless
// of this setting. Customers can set S3_ALLOW_INTERNAL_IPS=false to also block
// private ranges if their S3 store is external.
//
// AllowHTTP defaults to TRUE because some in-cluster MinIO deployments expose
// HTTP-only endpoints. Traffic stays within the cluster network, so the risk
// of interception is low. Customers can set S3_ALLOW_HTTP=false to require HTTPS.
s3InsecureSkipVerify := os.Getenv("S3_INSECURE_SKIP_VERIFY") != "false"
s3AllowInternalIPs := os.Getenv("S3_ALLOW_INTERNAL_IPS") != "false"
s3AllowHTTP := os.Getenv("S3_ALLOW_HTTP") != "false"
s3ClientOptions := s3int.S3ClientOptions{
DevMode: cfg.DevMode,
InsecureSkipVerify: s3InsecureSkipVerify,
AllowInternalIPs: s3AllowInternalIPs,
AllowHTTP: s3AllowHTTP,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}
s3ClientFactory = s3int.NewRealClientFactory(s3ClientOptions)
}

Expand Down
67 changes: 55 additions & 12 deletions packages/autorag/bff/internal/integrations/s3/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ package s3

import (
"context"
"crypto/tls"
"errors"
"fmt"
"io"
"log/slog"
"net"
"net/http"
"net/url"
"os"
"strings"
Expand Down Expand Up @@ -77,6 +79,17 @@ func NewRealS3Client(creds *S3Credentials, opts S3ClientOptions) (*RealS3Client,
Credentials: credentials.NewStaticCredentialsProvider(creds.AccessKeyID, creds.SecretAccessKey, ""),
}

if opts.InsecureSkipVerify {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
cfg.HTTPClient = &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true, //nolint:gosec // user-configured for dev/self-signed certs
MinVersion: tls.VersionTLS12,
},
},
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
Comment thread
chrjones-rh marked this conversation as resolved.
}

c.s3Client = awss3.NewFromConfig(cfg, func(o *awss3.Options) {
o.BaseEndpoint = aws.String(validatedEndpoint)
// Enable path-style addressing for S3-compatible services like MinIO
Expand Down Expand Up @@ -247,8 +260,13 @@ func (c *RealS3Client) ObjectExists(ctx context.Context, bucket, key string) (bo
}

// validateAndNormalizeEndpoint validates the S3 endpoint URL to prevent SSRF attacks.
// It ensures the URL uses HTTPS, is properly formatted, and does not target private IP ranges.
// Returns the normalized URL string or an error if validation fails.
//
// Configurable behaviors (all default to permissive for in-cluster MinIO compatibility):
// - AllowHTTP (S3_ALLOW_HTTP, default: true) — permits plain HTTP endpoints
// - InsecureSkipVerify (S3_INSECURE_SKIP_VERIFY, default: true) — skips TLS cert verification
// - AllowInternalIPs (S3_ALLOW_INTERNAL_IPS, default: true) — permits RFC-1918 private IPs
//
// Loopback, link-local, and reserved IP ranges are always blocked.
func (c *RealS3Client) validateAndNormalizeEndpoint(endpoint string) (string, error) {
if endpoint == "" {
return "", fmt.Errorf("endpoint URL cannot be empty")
Expand All @@ -260,9 +278,11 @@ func (c *RealS3Client) validateAndNormalizeEndpoint(endpoint string) (string, er
return "", fmt.Errorf("invalid endpoint URL format: %w", err)
}

// Ensure scheme is HTTPS
if parsedURL.Scheme != "https" {
return "", fmt.Errorf("endpoint URL must use HTTPS scheme, got: %s", parsedURL.Scheme)
if parsedURL.Scheme != "https" && parsedURL.Scheme != "http" {
return "", fmt.Errorf("endpoint URL must use HTTPS or HTTP scheme, got: %s", parsedURL.Scheme)
}
if parsedURL.Scheme == "http" && !c.options.AllowHTTP {
return "", fmt.Errorf("endpoint URL uses HTTP but S3_ALLOW_HTTP is not enabled; use HTTPS or set S3_ALLOW_HTTP=true")
}

// Extract hostname (may be hostname or IP)
Expand Down Expand Up @@ -306,24 +326,47 @@ func (c *RealS3Client) validateAndNormalizeEndpoint(endpoint string) (string, er
return parsedURL.String(), nil
}

// validateIPAddress checks if an IP address is in a blocked range (private or link-local).
// Returns an error if the IP is blocked, nil otherwise.
// validateIPAddress checks if an IP address is in a blocked range.
// When AllowInternalIPs is true, RFC-1918 private ranges and IPv6 unique local addresses
// are permitted (for in-cluster MinIO and similar services).
func (c *RealS3Client) validateIPAddress(ip net.IP) error {
// Always-blocked ranges (dangerous regardless of deployment context)
blockedRanges := []struct {
cidr string
description string
}{
{"0.0.0.0/8", "reserved 'this network' range (RFC 1122)"},
{"10.0.0.0/8", "RFC-1918 private range (10.0.0.0/8)"},
{"100.64.0.0/10", "Carrier-Grade NAT range (RFC 6598)"},
{"172.16.0.0/12", "RFC-1918 private range (172.16.0.0/12)"},
{"192.168.0.0/16", "RFC-1918 private range (192.168.0.0/16)"},
{"169.254.0.0/16", "link-local range (169.254.0.0/16)"},
{"127.0.0.0/8", "loopback range (127.0.0.0/8)"},
{"240.0.0.0/4", "reserved for future use (RFC 1112)"},
{"::1/128", "IPv6 loopback"},
{"fe80::/10", "IPv6 link-local"},
{"fc00::/7", "IPv6 unique local addresses"},
}

// Private/internal ranges — blocked unless AllowInternalIPs is set
if !c.options.AllowInternalIPs {
blockedRanges = append(blockedRanges,
struct {
cidr string
description string
}{"10.0.0.0/8", "RFC-1918 private range (10.0.0.0/8)"},
struct {
cidr string
description string
}{"100.64.0.0/10", "Carrier-Grade NAT range (RFC 6598)"},
struct {
cidr string
description string
}{"172.16.0.0/12", "RFC-1918 private range (172.16.0.0/12)"},
struct {
cidr string
description string
}{"192.168.0.0/16", "RFC-1918 private range (192.168.0.0/16)"},
struct {
cidr string
description string
}{"fc00::/7", "IPv6 unique local addresses"},
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}

for _, blocked := range blockedRanges {
Expand Down
16 changes: 16 additions & 0 deletions packages/autorag/bff/internal/integrations/s3/client_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,22 @@ const (
type S3ClientOptions struct {
DevMode bool // Controls whether ALLOW_UNRESOLVED_S3_ENDPOINTS is honored during endpoint validation

// InsecureSkipVerify skips TLS certificate verification for S3 endpoints.
// Required for MinIO or other S3-compatible stores using self-signed certificates.
// Controlled via S3_INSECURE_SKIP_VERIFY env var (default: true).
InsecureSkipVerify bool

// AllowInternalIPs permits connections to RFC-1918 private IP ranges
// (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16) and IPv6 unique local addresses.
// Required when the S3-compatible store (e.g. MinIO) runs on the same cluster.
// Controlled via S3_ALLOW_INTERNAL_IPS env var (default: true).
AllowInternalIPs bool

// AllowHTTP permits plain HTTP (non-TLS) S3 endpoint URLs.
// Some MinIO deployments use HTTP internally within the cluster.
// Controlled via S3_ALLOW_HTTP env var (default: true).
AllowHTTP bool
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

// Transfer manager tuning for GetObject downloads.
// Zero values fall back to the conservative defaults above.
Concurrency int // Number of concurrent part downloads
Expand Down
Loading