@@ -40,8 +40,11 @@ def _not_implemented(drs_uri: str, desc: str) -> NotImplementedError:
4040 if "s3" in desc .lower ():
4141 rest_of_message = """For S3 access methods, this DRS resource uses AWS S3 storage.
4242
43- Most research data repositories require AWS credentials for S3 access:
44- - Public datasets: May allow anonymous access via configured S3 file source
43+ S3 URLs are now handled through Galaxy's file source system. If you're seeing this error,
44+ it means no configured S3 file source can handle the S3 URLs returned by this DRS service.
45+
46+ Most research data repositories require specific AWS credentials for S3 access:
47+ - Public datasets: May work with anonymous S3 file source (anon: true)
4548 - Controlled access: Requires specific AWS credentials/permissions
4649 - SPARC datasets: Use "Requester Pays" model (user pays ~$0.09/GB)
4750
@@ -55,8 +58,8 @@ def _not_implemented(drs_uri: str, desc: str) -> NotImplementedError:
5558 secret: YOUR_AWS_SECRET_KEY
5659 id: s3_research_data
5760
58- Note: Some datasets (like SPARC) require RequestPayer='requester' parameter
59- which is not currently supported by Galaxy's S3 file source .
61+ Galaxy includes a stock S3 file source for basic anonymous access, but it may not
62+ work with all S3 buckets depending on their access policies .
6063 """
6164 else :
6265 rest_of_message = """Currently Galaxy client only works with HTTP/HTTPS targets but extensions for
@@ -121,14 +124,15 @@ def _download_s3_file(s3_url: str, target_path: StrPath, headers: Optional[dict]
121124 response = requests .get (s3_url , headers = headers or {}, timeout = DEFAULT_SOCKET_TIMEOUT , stream = True )
122125 response .raise_for_status ()
123126
124- with open (target_path , 'wb' ) as f :
127+ with open (target_path , "wb" ) as f :
125128 for chunk in response .iter_content (chunk_size = CHUNK_SIZE ):
126129 f .write (chunk )
127130 return
128131
129132 # For raw S3 URLs, try s3fs with different access patterns
130133 log .debug (f"Using s3fs for S3 URL: { s3_url } " )
131134 import s3fs
135+
132136 s3_path = s3_url [5 :] # Remove 's3://' prefix
133137
134138 # Try different S3 access methods in order of preference
@@ -142,8 +146,8 @@ def _download_s3_file(s3_url: str, target_path: StrPath, headers: Optional[dict]
142146 for method_name , fs_factory in access_methods :
143147 try :
144148 fs = fs_factory ()
145- with fs .open (s3_path , 'rb' ) as s3_file :
146- with open (target_path , 'wb' ) as local_file :
149+ with fs .open (s3_path , "rb" ) as s3_file :
150+ with open (target_path , "wb" ) as local_file :
147151 while True :
148152 chunk = s3_file .read (CHUNK_SIZE )
149153 if not chunk :
@@ -385,27 +389,20 @@ def fetch_drs_to_file(
385389 opts .extra_props = PartialFilesSourceProperties (** extra_props )
386390
387391 try :
388- # Handle S3 URLs directly using s3fs instead of going through file sources
389- if access_url .startswith ("s3://" ):
390- log .debug (f"Handling S3 URL directly: { access_url } " )
391- _download_s3_file (access_url , target_path , access_headers )
392- downloaded = True
393- break
394- else :
395- file_sources = (
396- user_context .file_sources
397- if user_context
398- else ConfiguredFileSources .from_dict (None , load_stock_plugins = True )
399- )
400- stream_url_to_file (
401- access_url ,
402- target_path = str (target_path ),
403- file_sources = file_sources ,
404- user_context = user_context ,
405- file_source_opts = opts ,
406- )
407- downloaded = True
408- break
392+ file_sources = (
393+ user_context .file_sources
394+ if user_context
395+ else ConfiguredFileSources .from_dict (None , load_stock_plugins = True )
396+ )
397+ stream_url_to_file (
398+ access_url ,
399+ target_path = str (target_path ),
400+ file_sources = file_sources ,
401+ user_context = user_context ,
402+ file_source_opts = opts ,
403+ )
404+ downloaded = True
405+ break
409406 except exceptions .RequestParameterInvalidException as e :
410407 log .debug (f"Failed to fetch via { access_method ['type' ]} access method: { e } " )
411408 continue
0 commit comments