Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions cmd/bd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,20 +515,31 @@ func validateSyncConfig(repoPath string) []string {
issues = append(issues, "federation.remote: required for Dolt sync")
}

// Validate remote URL format
// Strict security validation of remote URL
if federationRemote != "" {
if !isValidRemoteURL(federationRemote) {
issues = append(issues, fmt.Sprintf("federation.remote: %q is not a valid remote URL (expected dolthub://, gs://, s3://, az://, file://, or standard git URL)", federationRemote))
if err := remotecache.ValidateRemoteURL(federationRemote); err != nil {
issues = append(issues, fmt.Sprintf("federation.remote: %s", err))
}
}

// Validate against allowed-remote-patterns if configured
if federationRemote != "" {
patterns := v.GetStringSlice("federation.allowed-remote-patterns")
if len(patterns) > 0 {
if err := remotecache.ValidateRemoteURLWithPatterns(federationRemote, patterns); err != nil {
issues = append(issues, fmt.Sprintf("federation.remote: %s", err))
}
}
}

return issues
}

// isValidRemoteURL validates remote URL formats for sync configuration.
// Delegates to remotecache.IsRemoteURL for consistent URL classification.
func isValidRemoteURL(url string) bool {
return remotecache.IsRemoteURL(url)
// Uses strict security validation that checks structural correctness,
// rejects control characters, and validates per-scheme requirements.
func isValidRemoteURL(rawURL string) bool {
return remotecache.ValidateRemoteURL(rawURL) == nil
}

// findBeadsRepoRoot walks up from the given path to find the repo root (containing .beads)
Expand Down
62 changes: 61 additions & 1 deletion cmd/bd/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ federation:
issues := validateSyncConfig(tmpDir)
found := false
for _, issue := range issues {
if strings.Contains(issue, "federation.remote") && strings.Contains(issue, "not a valid remote URL") {
if strings.Contains(issue, "federation.remote") && (strings.Contains(issue, "not a valid remote URL") || strings.Contains(issue, "no scheme") || strings.Contains(issue, "not allowed")) {
found = true
break
}
Expand Down Expand Up @@ -420,6 +420,66 @@ federation:
t.Errorf("Expected no issues for valid config, got: %v", issues)
}
})

t.Run("remote URL with null byte", func(t *testing.T) {
configContent := "prefix: test\nfederation:\n remote: \"dolthub://org/repo\\x00evil\"\n"
if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil {
t.Fatalf("Failed to write config.yaml: %v", err)
}

issues := validateSyncConfig(tmpDir)
found := false
for _, issue := range issues {
if strings.Contains(issue, "federation.remote") {
found = true
break
}
}
if !found {
t.Errorf("Expected issue about invalid remote URL with null byte, got: %v", issues)
}
})

t.Run("allowed-remote-patterns enforcement", func(t *testing.T) {
configContent := `prefix: test
federation:
remote: "https://github.com/user/repo"
allowed-remote-patterns:
- "dolthub://myorg/*"
`
if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil {
t.Fatalf("Failed to write config.yaml: %v", err)
}

issues := validateSyncConfig(tmpDir)
found := false
for _, issue := range issues {
if strings.Contains(issue, "does not match") {
found = true
break
}
}
if !found {
t.Errorf("Expected issue about remote not matching allowed patterns, got: %v", issues)
}
})

t.Run("allowed-remote-patterns passes when matching", func(t *testing.T) {
configContent := `prefix: test
federation:
remote: "dolthub://myorg/myrepo"
allowed-remote-patterns:
- "dolthub://myorg/*"
`
if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil {
t.Fatalf("Failed to write config.yaml: %v", err)
}

issues := validateSyncConfig(tmpDir)
if len(issues) != 0 {
t.Errorf("Expected no issues when remote matches allowed pattern, got: %v", issues)
}
})
}

func TestResolvedConfigRepoRoot(t *testing.T) {
Expand Down
5 changes: 3 additions & 2 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,9 @@ func Initialize() error {
v.SetDefault("sync.require_confirmation_on_mass_delete", false)

// Federation configuration (optional Dolt remote)
v.SetDefault("federation.remote", "") // e.g., dolthub://org/beads, gs://bucket/beads, s3://bucket/beads, az://account.blob.core.windows.net/container/beads
v.SetDefault("federation.sovereignty", "") // T1 | T2 | T3 | T4 (empty = no restriction)
v.SetDefault("federation.remote", "") // e.g., dolthub://org/beads, gs://bucket/beads, s3://bucket/beads, az://account.blob.core.windows.net/container/beads
v.SetDefault("federation.sovereignty", "") // T1 | T2 | T3 | T4 (empty = no restriction)
v.SetDefault("federation.allowed-remote-patterns", []string{}) // glob patterns restricting allowed remote URLs (enterprise lockdown)

// Push configuration defaults
v.SetDefault("no-push", false)
Expand Down
3 changes: 3 additions & 0 deletions internal/remotecache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ func (c *Cache) lockPath(remoteURL string) string {
// DOLT_REMOTE_USER, DOLT_REMOTE_PASSWORD, or DoltHub credentials
// configured via `dolt creds`.
func (c *Cache) Ensure(ctx context.Context, remoteURL string) (string, error) {
if err := ValidateRemoteURL(remoteURL); err != nil {
return "", fmt.Errorf("invalid remote URL: %w", err)
}
if _, err := exec.LookPath("dolt"); err != nil {
return "", fmt.Errorf("dolt CLI not found (required for remote cache): %w", err)
}
Expand Down
187 changes: 186 additions & 1 deletion internal/remotecache/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package remotecache
import (
"crypto/sha256"
"fmt"
"net/url"
"path"
"regexp"
"strings"
)
Expand All @@ -21,8 +23,28 @@ var remoteSchemes = []string{
"git+https://",
}

// allowedSchemes is the set of recognized URL schemes for validation.
var allowedSchemes = map[string]bool{
"dolthub": true,
"gs": true,
"s3": true,
"az": true,
"file": true,
"https": true,
"http": true,
"ssh": true,
"git+ssh": true,
"git+https": true,
}

// gitSSHPattern matches SCP-style git remote URLs (user@host:path).
var gitSSHPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+@[a-zA-Z0-9][a-zA-Z0-9._-]*:.+$`)
// The path portion excludes control characters (0x00-0x1f, 0x7f).
var gitSSHPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+@[a-zA-Z0-9][a-zA-Z0-9._-]*:[^\x00-\x1f\x7f]+$`)

// validRemoteNameRegex matches valid remote names: starts with a letter,
// contains only alphanumeric characters, hyphens, and underscores.
// Aligned with peer-name validation in credentials.go.
var validRemoteNameRegex = regexp.MustCompile(`^[a-zA-Z][a-zA-Z0-9_-]*$`)

// IsRemoteURL returns true if s looks like a dolt remote URL rather than
// a local filesystem path. Recognized schemes: dolthub://, https://, http://,
Expand All @@ -37,6 +59,169 @@ func IsRemoteURL(s string) bool {
return gitSSHPattern.MatchString(s)
}

// ValidateRemoteURL performs strict security validation on a remote URL.
// It rejects URLs containing control characters (including null bytes),
// validates structural correctness per scheme, and rejects leading dashes
// that could be interpreted as CLI flags.
//
// This is a security boundary — all remote URLs should pass through this
// before reaching exec.Command arguments or SQL parameters.
func ValidateRemoteURL(rawURL string) error {
if rawURL == "" {
return fmt.Errorf("remote URL cannot be empty")
}

// Reject control characters (null bytes, newlines, tabs, etc.)
for i, c := range rawURL {
if c < 0x20 || c == 0x7f {
return fmt.Errorf("remote URL contains control character at position %d (0x%02x)", i, c)
}
}

// Reject leading dash (CLI flag injection via exec.Command arguments)
if strings.HasPrefix(rawURL, "-") {
return fmt.Errorf("remote URL must not start with a dash")
}

// SCP-style URLs (user@host:path) are validated separately
if gitSSHPattern.MatchString(rawURL) {
return validateSCPURL(rawURL)
}

// Parse as standard URL
return validateSchemeURL(rawURL)
}

// validateSchemeURL validates a scheme-based URL (https://, dolthub://, etc.)
func validateSchemeURL(rawURL string) error {
// net/url doesn't understand git+ssh:// etc., so we normalize first
normalizedURL := rawURL
scheme := ""
if idx := strings.Index(rawURL, "://"); idx > 0 {
scheme = rawURL[:idx]
// For net/url parsing, replace git+ssh with a parseable scheme
if strings.HasPrefix(scheme, "git+") {
normalizedURL = rawURL[len(scheme)+3:] // strip scheme://
normalizedURL = "placeholder://" + normalizedURL
}
}

if scheme == "" {
return fmt.Errorf("remote URL has no scheme (expected one of: %s)", strings.Join(sortedSchemes(), ", "))
}

if !allowedSchemes[scheme] {
return fmt.Errorf("remote URL scheme %q is not allowed (expected one of: %s)", scheme, strings.Join(sortedSchemes(), ", "))
}

parsed, err := url.Parse(normalizedURL)
if err != nil {
return fmt.Errorf("remote URL is malformed: %w", err)
}

// Scheme-specific structural validation
switch scheme {
case "dolthub":
// dolthub://org/repo — requires org and repo
p := strings.TrimPrefix(parsed.Path, "/")
host := parsed.Host
combined := host
if p != "" {
combined = host + "/" + p
}
parts := strings.Split(combined, "/")
if len(parts) < 2 || parts[0] == "" || parts[1] == "" {
return fmt.Errorf("dolthub:// URL must have org/repo format (e.g., dolthub://myorg/myrepo)")
}
case "https", "http", "git+https":
if parsed.Host == "" {
return fmt.Errorf("%s:// URL must include a hostname", scheme)
}
case "ssh", "git+ssh":
if parsed.Host == "" {
return fmt.Errorf("%s:// URL must include a hostname", scheme)
}
case "s3", "gs":
// s3://bucket/path, gs://bucket/path — host is the bucket
if parsed.Host == "" {
return fmt.Errorf("%s:// URL must include a bucket name", scheme)
}
case "az":
// az://account.blob.core.windows.net/container/path
if parsed.Host == "" {
return fmt.Errorf("az:// URL must include a storage account hostname")
}
case "file":
// file:// is allowed with any path
}

return nil
}

// validateSCPURL validates an SCP-style URL (user@host:path)
func validateSCPURL(rawURL string) error {
// Already matched gitSSHPattern, so structure is valid.
// Extract host and verify no control chars (already checked above).
atIdx := strings.Index(rawURL, "@")
colonIdx := strings.Index(rawURL[atIdx:], ":")
if atIdx < 0 || colonIdx < 0 {
return fmt.Errorf("SCP-style URL must be in user@host:path format")
}
return nil
}

// ValidateRemoteName checks that a remote name is safe for use as a Dolt
// remote identifier. Names must start with a letter and contain only
// alphanumeric characters, hyphens, and underscores. Max 64 characters.
func ValidateRemoteName(name string) error {
if name == "" {
return fmt.Errorf("remote name cannot be empty")
}
if len(name) > 64 {
return fmt.Errorf("remote name too long (max 64 characters)")
}
if strings.HasPrefix(name, "-") {
return fmt.Errorf("remote name must not start with a dash")
}
if !validRemoteNameRegex.MatchString(name) {
return fmt.Errorf("remote name must start with a letter and contain only alphanumeric characters, hyphens, and underscores")
}
return nil
}

// MatchesRemotePattern checks whether a URL matches a glob-style pattern.
// Patterns use path.Match semantics (e.g., "dolthub://myorg/*").
func MatchesRemotePattern(rawURL, pattern string) bool {
matched, err := path.Match(pattern, rawURL)
if err != nil {
return false
}
return matched
}

// ValidateRemoteURLWithPatterns validates a URL and optionally checks it
// against an allowlist of glob patterns. If patterns is empty, only
// structural validation is performed.
func ValidateRemoteURLWithPatterns(rawURL string, patterns []string) error {
if err := ValidateRemoteURL(rawURL); err != nil {
return err
}
if len(patterns) == 0 {
return nil
}
for _, p := range patterns {
if MatchesRemotePattern(rawURL, p) {
return nil
}
}
return fmt.Errorf("remote URL %q does not match any allowed pattern", rawURL)
}

func sortedSchemes() []string {
// Return in a consistent display order
return []string{"dolthub", "https", "http", "ssh", "git+ssh", "git+https", "s3", "gs", "az", "file"}
}

// CacheKey returns a filesystem-safe identifier for a remote URL.
// It uses the first 16 hex characters (64 bits) of the SHA-256 hash.
// Birthday-bound collision risk is negligible for a local cache: 50% at
Expand Down
Loading
Loading