@@ -40,6 +40,7 @@ pub fn create_safe_tools(
4040 config : & Config ,
4141 memory : Option < Arc < MemoryManager > > ,
4242) -> Result < Vec < Box < dyn Tool > > > {
43+ use super :: hardcoded_filters;
4344 use super :: tool_filters:: CompiledToolFilter ;
4445
4546 let workspace = config. workspace_path ( ) ;
@@ -51,22 +52,27 @@ pub fn create_safe_tools(
5152 Box :: new ( MemorySearchTool :: new ( workspace. clone ( ) ) )
5253 } ;
5354
54- // Compile web_fetch filter from user config (SSRF protection is handled
55- // by validate_web_fetch_url() which does DNS-resolution-based IP checking)
55+ // Compile web_fetch filter from user config and merge small hardcoded
56+ // fail-fast deny rules (authoritative SSRF protection is still handled by
57+ // validate_web_fetch_url() with host parsing + DNS/IP checks).
5658 let web_fetch_filter = config
5759 . tools
5860 . filters
5961 . get ( "web_fetch" )
6062 . map ( CompiledToolFilter :: compile)
61- . unwrap_or_else ( || Ok ( CompiledToolFilter :: permissive ( ) ) ) ?;
63+ . unwrap_or_else ( || Ok ( CompiledToolFilter :: permissive ( ) ) ) ?
64+ . merge_hardcoded (
65+ hardcoded_filters:: WEB_FETCH_DENY_SUBSTRINGS ,
66+ hardcoded_filters:: WEB_FETCH_DENY_PATTERNS ,
67+ ) ?;
6268
6369 let mut tools: Vec < Box < dyn Tool > > = vec ! [
6470 memory_search_tool,
6571 Box :: new( MemoryGetTool :: new( workspace) ) ,
6672 Box :: new( WebFetchTool :: new(
6773 config. tools. web_fetch_max_bytes,
6874 web_fetch_filter,
69- ) ) ,
75+ ) ? ) ,
7076 ] ;
7177
7278 // Conditionally add web search tool
@@ -449,6 +455,29 @@ async fn validate_web_fetch_url(url: &str) -> Result<reqwest::Url> {
449455 Ok ( parsed)
450456}
451457
458+ const MAX_WEB_FETCH_REDIRECTS : usize = 10 ;
459+
460+ fn should_follow_redirect ( status : reqwest:: StatusCode ) -> bool {
461+ matches ! (
462+ status,
463+ reqwest:: StatusCode :: MOVED_PERMANENTLY
464+ | reqwest:: StatusCode :: FOUND
465+ | reqwest:: StatusCode :: SEE_OTHER
466+ | reqwest:: StatusCode :: TEMPORARY_REDIRECT
467+ | reqwest:: StatusCode :: PERMANENT_REDIRECT
468+ )
469+ }
470+
471+ async fn resolve_and_validate_redirect_target (
472+ current : & reqwest:: Url ,
473+ location : & str ,
474+ ) -> Result < reqwest:: Url > {
475+ let candidate = current
476+ . join ( location)
477+ . map_err ( |e| anyhow:: anyhow!( "Invalid redirect target '{}': {}" , location, e) ) ?;
478+ validate_web_fetch_url ( candidate. as_str ( ) ) . await
479+ }
480+
452481fn extract_fallback_text ( html : & str ) -> String {
453482 static SCRIPT_RE : Lazy < Regex > =
454483 Lazy :: new ( || Regex :: new ( r"(?is)<script[^>]*>.*?</script>" ) . expect ( "valid script regex" ) ) ;
@@ -495,12 +524,65 @@ pub struct WebFetchTool {
495524}
496525
497526impl WebFetchTool {
498- pub fn new ( max_bytes : usize , filter : super :: tool_filters:: CompiledToolFilter ) -> Self {
499- Self {
500- client : reqwest:: Client :: new ( ) ,
527+ pub fn new ( max_bytes : usize , filter : super :: tool_filters:: CompiledToolFilter ) -> Result < Self > {
528+ let client = reqwest:: Client :: builder ( )
529+ . redirect ( reqwest:: redirect:: Policy :: none ( ) )
530+ . build ( ) ?;
531+
532+ Ok ( Self {
533+ client,
501534 max_bytes,
502535 filter,
536+ } )
537+ }
538+
539+ async fn fetch_with_validated_redirects (
540+ & self ,
541+ mut current_url : reqwest:: Url ,
542+ ) -> Result < ( reqwest:: Response , reqwest:: Url ) > {
543+ for redirect_count in 0 ..=MAX_WEB_FETCH_REDIRECTS {
544+ let response = self
545+ . client
546+ . get ( current_url. clone ( ) )
547+ . header ( "User-Agent" , "LocalGPT/0.1" )
548+ . send ( )
549+ . await ?;
550+
551+ if !should_follow_redirect ( response. status ( ) ) {
552+ return Ok ( ( response, current_url) ) ;
553+ }
554+
555+ if redirect_count == MAX_WEB_FETCH_REDIRECTS {
556+ anyhow:: bail!(
557+ "Too many redirects (>{}) while fetching {}" ,
558+ MAX_WEB_FETCH_REDIRECTS ,
559+ current_url
560+ ) ;
561+ }
562+
563+ let location = response
564+ . headers ( )
565+ . get ( reqwest:: header:: LOCATION )
566+ . ok_or_else ( || {
567+ anyhow:: anyhow!(
568+ "Redirect response {} missing Location header" ,
569+ response. status( )
570+ )
571+ } ) ?
572+ . to_str ( )
573+ . map_err ( |_| anyhow:: anyhow!( "Redirect Location header is not valid UTF-8" ) ) ?;
574+
575+ let next_url = resolve_and_validate_redirect_target ( & current_url, location) . await ?;
576+ debug ! (
577+ "Following redirect {}: {} -> {}" ,
578+ redirect_count + 1 ,
579+ current_url,
580+ next_url
581+ ) ;
582+ current_url = next_url;
503583 }
584+
585+ unreachable ! ( "redirect loop should return or bail" )
504586 }
505587}
506588
@@ -539,12 +621,7 @@ impl Tool for WebFetchTool {
539621 let parsed_url = validate_web_fetch_url ( url) . await ?;
540622 debug ! ( "Fetching URL: {}" , parsed_url) ;
541623
542- let response = self
543- . client
544- . get ( parsed_url. clone ( ) )
545- . header ( "User-Agent" , "LocalGPT/0.1" )
546- . send ( )
547- . await ?;
624+ let ( response, final_url) = self . fetch_with_validated_redirects ( parsed_url) . await ?;
548625
549626 let status = response. status ( ) ;
550627 let content_type = response
@@ -556,7 +633,7 @@ impl Tool for WebFetchTool {
556633 let body = response. text ( ) . await ?;
557634 let extracted =
558635 if content_type. contains ( "text/html" ) || content_type. contains ( "application/xhtml" ) {
559- extract_readable_text ( & body, & parsed_url )
636+ extract_readable_text ( & body, & final_url )
560637 } else {
561638 body
562639 } ;
@@ -575,7 +652,7 @@ impl Tool for WebFetchTool {
575652
576653 Ok ( format ! (
577654 "Status: {}\n URL: {}\n Content-Type: {}\n \n {}" ,
578- status, parsed_url , content_type, truncated
655+ status, final_url , content_type, truncated
579656 ) )
580657 }
581658}
@@ -655,4 +732,31 @@ mod tests {
655732 assert ! ( text. contains( "Hello world" ) ) ;
656733 assert ! ( !text. contains( "alert(1)" ) ) ;
657734 }
735+
736+ #[ tokio:: test]
737+ async fn test_redirect_target_validation_blocks_private_ip ( ) {
738+ let current = reqwest:: Url :: parse ( "https://93.184.216.34/start" ) . unwrap ( ) ;
739+ let err = resolve_and_validate_redirect_target ( & current, "http://127.0.0.1/admin" ) . await ;
740+ assert ! ( err. is_err( ) ) ;
741+ let msg = err. unwrap_err ( ) . to_string ( ) ;
742+ assert ! ( msg. contains( "private IP" ) ) ;
743+ }
744+
745+ #[ tokio:: test]
746+ async fn test_redirect_target_validation_allows_relative_public_ip_target ( ) {
747+ let current = reqwest:: Url :: parse ( "https://93.184.216.34/start" ) . unwrap ( ) ;
748+ let next = resolve_and_validate_redirect_target ( & current, "/next" )
749+ . await
750+ . unwrap ( ) ;
751+ assert_eq ! ( next. as_str( ) , "https://93.184.216.34/next" ) ;
752+ }
753+
754+ #[ tokio:: test]
755+ async fn test_redirect_target_validation_blocks_non_http_scheme ( ) {
756+ let current = reqwest:: Url :: parse ( "https://93.184.216.34/start" ) . unwrap ( ) ;
757+ let err = resolve_and_validate_redirect_target ( & current, "file:///etc/passwd" ) . await ;
758+ assert ! ( err. is_err( ) ) ;
759+ let msg = err. unwrap_err ( ) . to_string ( ) ;
760+ assert ! ( msg. contains( "Only http/https" ) ) ;
761+ }
658762}
0 commit comments