diff --git a/cmd/bd/config.go b/cmd/bd/config.go index 55c55e2a2e..d95ffb414c 100644 --- a/cmd/bd/config.go +++ b/cmd/bd/config.go @@ -515,10 +515,20 @@ func validateSyncConfig(repoPath string) []string { issues = append(issues, "federation.remote: required for Dolt sync") } - // Validate remote URL format + // Strict security validation of remote URL if federationRemote != "" { - if !isValidRemoteURL(federationRemote) { - issues = append(issues, fmt.Sprintf("federation.remote: %q is not a valid remote URL (expected dolthub://, gs://, s3://, az://, file://, or standard git URL)", federationRemote)) + if err := remotecache.ValidateRemoteURL(federationRemote); err != nil { + issues = append(issues, fmt.Sprintf("federation.remote: %s", err)) + } + } + + // Validate against allowed-remote-patterns if configured + if federationRemote != "" { + patterns := v.GetStringSlice("federation.allowed-remote-patterns") + if len(patterns) > 0 { + if err := remotecache.ValidateRemoteURLWithPatterns(federationRemote, patterns); err != nil { + issues = append(issues, fmt.Sprintf("federation.remote: %s", err)) + } } } @@ -526,9 +536,10 @@ func validateSyncConfig(repoPath string) []string { } // isValidRemoteURL validates remote URL formats for sync configuration. -// Delegates to remotecache.IsRemoteURL for consistent URL classification. -func isValidRemoteURL(url string) bool { - return remotecache.IsRemoteURL(url) +// Uses strict security validation that checks structural correctness, +// rejects control characters, and validates per-scheme requirements. +func isValidRemoteURL(rawURL string) bool { + return remotecache.ValidateRemoteURL(rawURL) == nil } // findBeadsRepoRoot walks up from the given path to find the repo root (containing .beads) diff --git a/cmd/bd/config_test.go b/cmd/bd/config_test.go index 54841ef622..6ed9332d8d 100644 --- a/cmd/bd/config_test.go +++ b/cmd/bd/config_test.go @@ -391,7 +391,7 @@ federation: issues := validateSyncConfig(tmpDir) found := false for _, issue := range issues { - if strings.Contains(issue, "federation.remote") && strings.Contains(issue, "not a valid remote URL") { + if strings.Contains(issue, "federation.remote") && (strings.Contains(issue, "not a valid remote URL") || strings.Contains(issue, "no scheme") || strings.Contains(issue, "not allowed")) { found = true break } @@ -420,6 +420,66 @@ federation: t.Errorf("Expected no issues for valid config, got: %v", issues) } }) + + t.Run("remote URL with null byte", func(t *testing.T) { + configContent := "prefix: test\nfederation:\n remote: \"dolthub://org/repo\\x00evil\"\n" + if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil { + t.Fatalf("Failed to write config.yaml: %v", err) + } + + issues := validateSyncConfig(tmpDir) + found := false + for _, issue := range issues { + if strings.Contains(issue, "federation.remote") { + found = true + break + } + } + if !found { + t.Errorf("Expected issue about invalid remote URL with null byte, got: %v", issues) + } + }) + + t.Run("allowed-remote-patterns enforcement", func(t *testing.T) { + configContent := `prefix: test +federation: + remote: "https://github.com/user/repo" + allowed-remote-patterns: + - "dolthub://myorg/*" +` + if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil { + t.Fatalf("Failed to write config.yaml: %v", err) + } + + issues := validateSyncConfig(tmpDir) + found := false + for _, issue := range issues { + if strings.Contains(issue, "does not match") { + found = true + break + } + } + if !found { + t.Errorf("Expected issue about remote not matching allowed patterns, got: %v", issues) + } + }) + + t.Run("allowed-remote-patterns passes when matching", func(t *testing.T) { + configContent := `prefix: test +federation: + remote: "dolthub://myorg/myrepo" + allowed-remote-patterns: + - "dolthub://myorg/*" +` + if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil { + t.Fatalf("Failed to write config.yaml: %v", err) + } + + issues := validateSyncConfig(tmpDir) + if len(issues) != 0 { + t.Errorf("Expected no issues when remote matches allowed pattern, got: %v", issues) + } + }) } func TestResolvedConfigRepoRoot(t *testing.T) { diff --git a/internal/config/config.go b/internal/config/config.go index f57c96a348..0c2d0dd05f 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -144,8 +144,9 @@ func Initialize() error { v.SetDefault("sync.require_confirmation_on_mass_delete", false) // Federation configuration (optional Dolt remote) - v.SetDefault("federation.remote", "") // e.g., dolthub://org/beads, gs://bucket/beads, s3://bucket/beads, az://account.blob.core.windows.net/container/beads - v.SetDefault("federation.sovereignty", "") // T1 | T2 | T3 | T4 (empty = no restriction) + v.SetDefault("federation.remote", "") // e.g., dolthub://org/beads, gs://bucket/beads, s3://bucket/beads, az://account.blob.core.windows.net/container/beads + v.SetDefault("federation.sovereignty", "") // T1 | T2 | T3 | T4 (empty = no restriction) + v.SetDefault("federation.allowed-remote-patterns", []string{}) // glob patterns restricting allowed remote URLs (enterprise lockdown) // Push configuration defaults v.SetDefault("no-push", false) diff --git a/internal/remotecache/cache.go b/internal/remotecache/cache.go index 11ee1fdf92..6fd032b2d6 100644 --- a/internal/remotecache/cache.go +++ b/internal/remotecache/cache.go @@ -80,6 +80,9 @@ func (c *Cache) lockPath(remoteURL string) string { // DOLT_REMOTE_USER, DOLT_REMOTE_PASSWORD, or DoltHub credentials // configured via `dolt creds`. func (c *Cache) Ensure(ctx context.Context, remoteURL string) (string, error) { + if err := ValidateRemoteURL(remoteURL); err != nil { + return "", fmt.Errorf("invalid remote URL: %w", err) + } if _, err := exec.LookPath("dolt"); err != nil { return "", fmt.Errorf("dolt CLI not found (required for remote cache): %w", err) } diff --git a/internal/remotecache/url.go b/internal/remotecache/url.go index 2317e9350c..32f39c2331 100644 --- a/internal/remotecache/url.go +++ b/internal/remotecache/url.go @@ -3,6 +3,8 @@ package remotecache import ( "crypto/sha256" "fmt" + "net/url" + "path" "regexp" "strings" ) @@ -21,8 +23,28 @@ var remoteSchemes = []string{ "git+https://", } +// allowedSchemes is the set of recognized URL schemes for validation. +var allowedSchemes = map[string]bool{ + "dolthub": true, + "gs": true, + "s3": true, + "az": true, + "file": true, + "https": true, + "http": true, + "ssh": true, + "git+ssh": true, + "git+https": true, +} + // gitSSHPattern matches SCP-style git remote URLs (user@host:path). -var gitSSHPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+@[a-zA-Z0-9][a-zA-Z0-9._-]*:.+$`) +// The path portion excludes control characters (0x00-0x1f, 0x7f). +var gitSSHPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+@[a-zA-Z0-9][a-zA-Z0-9._-]*:[^\x00-\x1f\x7f]+$`) + +// validRemoteNameRegex matches valid remote names: starts with a letter, +// contains only alphanumeric characters, hyphens, and underscores. +// Aligned with peer-name validation in credentials.go. +var validRemoteNameRegex = regexp.MustCompile(`^[a-zA-Z][a-zA-Z0-9_-]*$`) // IsRemoteURL returns true if s looks like a dolt remote URL rather than // a local filesystem path. Recognized schemes: dolthub://, https://, http://, @@ -37,6 +59,169 @@ func IsRemoteURL(s string) bool { return gitSSHPattern.MatchString(s) } +// ValidateRemoteURL performs strict security validation on a remote URL. +// It rejects URLs containing control characters (including null bytes), +// validates structural correctness per scheme, and rejects leading dashes +// that could be interpreted as CLI flags. +// +// This is a security boundary — all remote URLs should pass through this +// before reaching exec.Command arguments or SQL parameters. +func ValidateRemoteURL(rawURL string) error { + if rawURL == "" { + return fmt.Errorf("remote URL cannot be empty") + } + + // Reject control characters (null bytes, newlines, tabs, etc.) + for i, c := range rawURL { + if c < 0x20 || c == 0x7f { + return fmt.Errorf("remote URL contains control character at position %d (0x%02x)", i, c) + } + } + + // Reject leading dash (CLI flag injection via exec.Command arguments) + if strings.HasPrefix(rawURL, "-") { + return fmt.Errorf("remote URL must not start with a dash") + } + + // SCP-style URLs (user@host:path) are validated separately + if gitSSHPattern.MatchString(rawURL) { + return validateSCPURL(rawURL) + } + + // Parse as standard URL + return validateSchemeURL(rawURL) +} + +// validateSchemeURL validates a scheme-based URL (https://, dolthub://, etc.) +func validateSchemeURL(rawURL string) error { + // net/url doesn't understand git+ssh:// etc., so we normalize first + normalizedURL := rawURL + scheme := "" + if idx := strings.Index(rawURL, "://"); idx > 0 { + scheme = rawURL[:idx] + // For net/url parsing, replace git+ssh with a parseable scheme + if strings.HasPrefix(scheme, "git+") { + normalizedURL = rawURL[len(scheme)+3:] // strip scheme:// + normalizedURL = "placeholder://" + normalizedURL + } + } + + if scheme == "" { + return fmt.Errorf("remote URL has no scheme (expected one of: %s)", strings.Join(sortedSchemes(), ", ")) + } + + if !allowedSchemes[scheme] { + return fmt.Errorf("remote URL scheme %q is not allowed (expected one of: %s)", scheme, strings.Join(sortedSchemes(), ", ")) + } + + parsed, err := url.Parse(normalizedURL) + if err != nil { + return fmt.Errorf("remote URL is malformed: %w", err) + } + + // Scheme-specific structural validation + switch scheme { + case "dolthub": + // dolthub://org/repo — requires org and repo + p := strings.TrimPrefix(parsed.Path, "/") + host := parsed.Host + combined := host + if p != "" { + combined = host + "/" + p + } + parts := strings.Split(combined, "/") + if len(parts) < 2 || parts[0] == "" || parts[1] == "" { + return fmt.Errorf("dolthub:// URL must have org/repo format (e.g., dolthub://myorg/myrepo)") + } + case "https", "http", "git+https": + if parsed.Host == "" { + return fmt.Errorf("%s:// URL must include a hostname", scheme) + } + case "ssh", "git+ssh": + if parsed.Host == "" { + return fmt.Errorf("%s:// URL must include a hostname", scheme) + } + case "s3", "gs": + // s3://bucket/path, gs://bucket/path — host is the bucket + if parsed.Host == "" { + return fmt.Errorf("%s:// URL must include a bucket name", scheme) + } + case "az": + // az://account.blob.core.windows.net/container/path + if parsed.Host == "" { + return fmt.Errorf("az:// URL must include a storage account hostname") + } + case "file": + // file:// is allowed with any path + } + + return nil +} + +// validateSCPURL validates an SCP-style URL (user@host:path) +func validateSCPURL(rawURL string) error { + // Already matched gitSSHPattern, so structure is valid. + // Extract host and verify no control chars (already checked above). + atIdx := strings.Index(rawURL, "@") + colonIdx := strings.Index(rawURL[atIdx:], ":") + if atIdx < 0 || colonIdx < 0 { + return fmt.Errorf("SCP-style URL must be in user@host:path format") + } + return nil +} + +// ValidateRemoteName checks that a remote name is safe for use as a Dolt +// remote identifier. Names must start with a letter and contain only +// alphanumeric characters, hyphens, and underscores. Max 64 characters. +func ValidateRemoteName(name string) error { + if name == "" { + return fmt.Errorf("remote name cannot be empty") + } + if len(name) > 64 { + return fmt.Errorf("remote name too long (max 64 characters)") + } + if strings.HasPrefix(name, "-") { + return fmt.Errorf("remote name must not start with a dash") + } + if !validRemoteNameRegex.MatchString(name) { + return fmt.Errorf("remote name must start with a letter and contain only alphanumeric characters, hyphens, and underscores") + } + return nil +} + +// MatchesRemotePattern checks whether a URL matches a glob-style pattern. +// Patterns use path.Match semantics (e.g., "dolthub://myorg/*"). +func MatchesRemotePattern(rawURL, pattern string) bool { + matched, err := path.Match(pattern, rawURL) + if err != nil { + return false + } + return matched +} + +// ValidateRemoteURLWithPatterns validates a URL and optionally checks it +// against an allowlist of glob patterns. If patterns is empty, only +// structural validation is performed. +func ValidateRemoteURLWithPatterns(rawURL string, patterns []string) error { + if err := ValidateRemoteURL(rawURL); err != nil { + return err + } + if len(patterns) == 0 { + return nil + } + for _, p := range patterns { + if MatchesRemotePattern(rawURL, p) { + return nil + } + } + return fmt.Errorf("remote URL %q does not match any allowed pattern", rawURL) +} + +func sortedSchemes() []string { + // Return in a consistent display order + return []string{"dolthub", "https", "http", "ssh", "git+ssh", "git+https", "s3", "gs", "az", "file"} +} + // CacheKey returns a filesystem-safe identifier for a remote URL. // It uses the first 16 hex characters (64 bits) of the SHA-256 hash. // Birthday-bound collision risk is negligible for a local cache: 50% at diff --git a/internal/remotecache/url_test.go b/internal/remotecache/url_test.go index d3d805480d..28ece2f40e 100644 --- a/internal/remotecache/url_test.go +++ b/internal/remotecache/url_test.go @@ -1,6 +1,7 @@ package remotecache import ( + "strings" "testing" ) @@ -45,6 +46,193 @@ func TestIsRemoteURL(t *testing.T) { } } +func TestValidateRemoteURL(t *testing.T) { + tests := []struct { + name string + url string + wantErr bool + errMsg string // substring expected in error message + }{ + // === Valid URLs (should pass) === + {"dolthub basic", "dolthub://org/repo", false, ""}, + {"dolthub with dash", "dolthub://my-org/my-repo", false, ""}, + {"https dolthub", "https://doltremoteapi.dolthub.com/org/repo", false, ""}, + {"http localhost", "http://localhost:50051/mydb", false, ""}, + {"s3 bucket", "s3://my-bucket/beads", false, ""}, + {"gs bucket", "gs://my-bucket/beads", false, ""}, + {"az storage", "az://account.blob.core.windows.net/container/beads", false, ""}, + {"file URL", "file:///data/dolt-remote", false, ""}, + {"ssh URL", "ssh://git@github.com/org/repo", false, ""}, + {"git+ssh URL", "git+ssh://git@github.com/org/repo", false, ""}, + {"git+https URL", "git+https://github.com/org/repo", false, ""}, + {"SCP-style git", "git@github.com:org/repo.git", false, ""}, + {"SCP-style deploy", "deploy@myserver.com:beads/data", false, ""}, + {"https with port", "https://example.com:8443/repo", false, ""}, + {"https with path", "https://github.com/user/repo/path", false, ""}, + + // === Empty / missing === + {"empty string", "", true, "cannot be empty"}, + + // === Control character injection === + {"null byte", "dolthub://org/repo\x00malicious", true, "control character"}, + {"null in middle", "dolthub://org\x00/repo", true, "control character"}, + {"newline injection", "dolthub://org/repo\nmalicious", true, "control character"}, + {"carriage return", "dolthub://org/repo\rmalicious", true, "control character"}, + {"tab character", "dolthub://org/repo\tmalicious", true, "control character"}, + {"bell character", "dolthub://org/repo\x07", true, "control character"}, + {"escape character", "dolthub://org/repo\x1b[31m", true, "control character"}, + {"DEL character", "dolthub://org/repo\x7f", true, "control character"}, + + // === CLI flag injection === + {"leading dash", "-origin", true, "must not start with a dash"}, + {"double dash", "--force", true, "must not start with a dash"}, + {"dash flag URL", "-https://evil.com", true, "must not start with a dash"}, + + // === Invalid schemes === + {"ftp scheme", "ftp://server/path", true, "not allowed"}, + {"javascript scheme", "javascript://alert(1)", true, "not allowed"}, + {"data scheme", "data:text/html,