diff --git a/Cargo.lock b/Cargo.lock index 29dc40ff85f0..b42922fdcb4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2590,6 +2590,7 @@ dependencies = [ "tonic 0.14.2", "tracing", "typetag", + "url", "uuid", ] diff --git a/src/cli/src/common/store.rs b/src/cli/src/common/store.rs index 373c96a37a44..faa09d94a711 100644 --- a/src/cli/src/common/store.rs +++ b/src/cli/src/common/store.rs @@ -19,6 +19,7 @@ use common_error::ext::BoxedError; use common_meta::kv_backend::KvBackendRef; use common_meta::kv_backend::chroot::ChrootKvBackend; use common_meta::kv_backend::etcd::EtcdStore; +use common_meta::kv_backend::util::sanitize_connection_string; use meta_srv::metasrv::BackendClientOptions; use meta_srv::utils::etcd::create_etcd_client_with_tls; use serde::{Deserialize, Serialize}; @@ -134,7 +135,7 @@ impl StoreConfig { } else { common_telemetry::info!( "Building kvbackend with store addrs: {:?}, backend: {:?}", - store_addrs, + sanitize_store_addrs(store_addrs), self.backend ); let kvbackend = match self.backend { @@ -224,3 +225,35 @@ impl StoreConfig { } } } + +fn sanitize_store_addrs(store_addrs: &[String]) -> Vec { + store_addrs + .iter() + .map(|addr| sanitize_connection_string(addr)) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sanitize_store_addrs() { + let password = "sensitive-value"; + let store_addrs = vec![ + format!("mysql://user:{password}@localhost:3306/greptime_meta"), + format!("host=localhost port=5432 user=postgres password={password} dbname=postgres"), + ]; + + let sanitized = sanitize_store_addrs(&store_addrs); + + assert_eq!( + sanitized, + vec![ + "localhost:3306/greptime_meta".to_string(), + "host=localhost port=5432 user=postgres password=*** dbname=postgres".to_string() + ] + ); + assert!(!format!("{sanitized:?}").contains(password)); + } +} diff --git a/src/cmd/src/metasrv.rs b/src/cmd/src/metasrv.rs index bf3cb2f5e756..4d3a57bd1808 100644 --- a/src/cmd/src/metasrv.rs +++ b/src/cmd/src/metasrv.rs @@ -466,6 +466,26 @@ mod tests { assert_eq!("debug", logging_opt.level.as_ref().unwrap()); } + #[test] + fn test_start_command_debug_sanitizes_store_addrs() { + let password = "sensitive-value"; + let cmd = StartCommand { + store_addrs: Some(vec![ + format!("mysql://user:{password}@localhost:3306/greptime_meta"), + format!( + "host=localhost port=5432 user=postgres password={password} dbname=postgres" + ), + ]), + ..Default::default() + }; + + let debug = format!("{cmd:?}"); + + assert!(!debug.contains(password)); + assert!(debug.contains("localhost:3306/greptime_meta")); + assert!(debug.contains("password=***")); + } + #[test] fn test_config_precedence_order() { let mut file = create_named_temp_file(); diff --git a/src/common/meta/Cargo.toml b/src/common/meta/Cargo.toml index 5134c40a8044..2eb2ec6a71c2 100644 --- a/src/common/meta/Cargo.toml +++ b/src/common/meta/Cargo.toml @@ -92,6 +92,7 @@ tokio-postgres-rustls = { version = "0.12", optional = true } tonic.workspace = true tracing.workspace = true typetag.workspace = true +url.workspace = true [dev-dependencies] chrono.workspace = true diff --git a/src/common/meta/src/kv_backend/util.rs b/src/common/meta/src/kv_backend/util.rs index 1021d78a60b6..29aaedc9bf15 100644 --- a/src/common/meta/src/kv_backend/util.rs +++ b/src/common/meta/src/kv_backend/util.rs @@ -12,46 +12,69 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::LazyLock; + +use regex::{Captures, Regex}; +use url::Url; + +const REDACTED: &str = "***"; + +static SENSITIVE_KV_RE: LazyLock = LazyLock::new(|| { + Regex::new(r#"(?i)(^|\s)(password|pass|pwd|token|secret)\s*=\s*('[^']*'|"[^"]*"|\S+)"#).unwrap() +}); + /// Removes sensitive information like passwords from connection strings. -/// -/// This function sanitizes connection strings by removing credentials: -/// - For URL format (mysql://user:password@host:port/db): Removes everything before '@' -/// - For parameter format (host=localhost password=secret): Removes the password parameter -/// - For URL format without credentials (mysql://host:port/db): Removes the protocol prefix -/// -/// # Arguments -/// -/// * `conn_str` - The connection string to sanitize -/// -/// # Returns -/// -/// A sanitized version of the connection string with sensitive information removed pub fn sanitize_connection_string(conn_str: &str) -> String { - // Case 1: URL format with credentials (mysql://user:password@host:port/db) - // Extract everything after the '@' symbol - if let Some(at_pos) = conn_str.find('@') { - return conn_str[at_pos + 1..].to_string(); + if let Ok(url) = Url::parse(conn_str) { + return sanitize_url(url); } - // Case 2: Parameter format with password (host=localhost password=secret dbname=mydb) - // Filter out any parameter that starts with "password=" - if conn_str.contains("password=") { - return conn_str - .split_whitespace() - .filter(|param| !param.starts_with("password=")) - .collect::>() - .join(" "); + sanitize_key_value_connection_string(conn_str) +} + +fn sanitize_url(mut url: Url) -> String { + if !url.username().is_empty() { + let _ = url.set_username(""); + } + if url.password().is_some() { + let _ = url.set_password(None); } - // Case 3: URL format without credentials (mysql://host:port/db) - // Extract everything after the protocol prefix - if let Some(host_part) = conn_str.split("://").nth(1) { - return host_part.to_string(); + if url.query().is_some() { + let pairs = url + .query_pairs() + .map(|(key, value)| { + let value = if is_sensitive_key(&key) { + REDACTED.into() + } else { + value + }; + (key.into_owned(), value.into_owned()) + }) + .collect::>(); + + url.query_pairs_mut().clear().extend_pairs(pairs); } - // Case 4: Already sanitized or unknown format - // Return as is - conn_str.to_string() + url.as_str() + .split_once("://") + .map(|(_, addr)| addr.to_string()) + .unwrap_or_else(|| url.to_string()) +} + +fn sanitize_key_value_connection_string(conn_str: &str) -> String { + SENSITIVE_KV_RE + .replace_all(conn_str, |caps: &Captures<'_>| { + format!("{}{}={}", &caps[1], &caps[2], REDACTED) + }) + .into_owned() +} + +fn is_sensitive_key(key: &str) -> bool { + matches!( + key.to_ascii_lowercase().as_str(), + "password" | "pass" | "pwd" | "token" | "secret" + ) } #[cfg(test)] @@ -60,22 +83,62 @@ mod tests { #[test] fn test_sanitize_connection_string() { - // Test URL format with username/password - let conn_str = "mysql://user:password123@localhost:3306/db"; - assert_eq!(sanitize_connection_string(conn_str), "localhost:3306/db"); + let password = "sensitive-value"; + + let conn_str = format!("mysql://user:{password}@localhost:3306/db"); + assert_eq!(sanitize_connection_string(&conn_str), "localhost:3306/db"); - // Test URL format without credentials let conn_str = "mysql://localhost:3306/db"; assert_eq!(sanitize_connection_string(conn_str), "localhost:3306/db"); - // Test parameter format with password - let conn_str = "host=localhost port=5432 user=postgres password=secret dbname=mydb"; + let conn_str = + format!("host=localhost port=5432 user=postgres password={password} dbname=mydb"); assert_eq!( - sanitize_connection_string(conn_str), - "host=localhost port=5432 user=postgres dbname=mydb" + sanitize_connection_string(&conn_str), + "host=localhost port=5432 user=postgres password=*** dbname=mydb" + ); + + let conn_str = + format!("host=localhost port=5432 user=postgres PASSWORD={password} dbname=mydb"); + assert_eq!( + sanitize_connection_string(&conn_str), + "host=localhost port=5432 user=postgres PASSWORD=*** dbname=mydb" + ); + + let conn_str = + format!("host=localhost port=5432 user=postgres password = {password} dbname=mydb"); + assert_eq!( + sanitize_connection_string(&conn_str), + "host=localhost port=5432 user=postgres password=*** dbname=mydb" + ); + + let conn_str = + format!("host=localhost port=5432 password='{password} with spaces' dbname=mydb"); + assert_eq!( + sanitize_connection_string(&conn_str), + "host=localhost port=5432 password=*** dbname=mydb" + ); + + let conn_str = format!("mysql://localhost:3306/db?password={password}&ssl-mode=VERIFY_CA"); + assert_eq!( + sanitize_connection_string(&conn_str), + "localhost:3306/db?password=***&ssl-mode=VERIFY_CA" + ); + + let conn_str = + format!("mysql://localhost:3306/db?token={password}&secret={password}&user=root"); + assert_eq!( + sanitize_connection_string(&conn_str), + "localhost:3306/db?token=***&secret=***&user=root" + ); + + let conn_str = + format!("host=localhost port=5432 token={password} secret={password} dbname=mydb"); + assert_eq!( + sanitize_connection_string(&conn_str), + "host=localhost port=5432 token=*** secret=*** dbname=mydb" ); - // Test parameter format without password let conn_str = "host=localhost port=5432 user=postgres dbname=mydb"; assert_eq!( sanitize_connection_string(conn_str), diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index 654457814888..8eaa71efb594 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -922,7 +922,7 @@ impl Metasrv { #[cfg(test)] mod tests { - use crate::metasrv::MetasrvNodeInfo; + use crate::metasrv::{MetasrvNodeInfo, MetasrvOptions}; #[test] fn test_deserialize_metasrv_node_info() { @@ -933,4 +933,24 @@ mod tests { assert_eq!(node_info.git_commit, "1234567890"); assert_eq!(node_info.start_time_ms, 1715145600); } + + #[test] + fn test_metasrv_options_debug_sanitizes_store_addrs() { + let password = "sensitive-value"; + let options = MetasrvOptions { + store_addrs: vec![ + format!("mysql://user:{password}@localhost:3306/greptime_meta"), + format!( + "host=localhost port=5432 user=postgres password={password} dbname=postgres" + ), + ], + ..Default::default() + }; + + let debug = format!("{options:?}"); + + assert!(!debug.contains(password)); + assert!(debug.contains("localhost:3306/greptime_meta")); + assert!(debug.contains("password=***")); + } }