1+ import json
2+ import logging
13import time
2- from typing import Optional
4+ from typing import (
5+ Dict ,
6+ List ,
7+ Optional ,
8+ Tuple ,
9+ )
10+ from urllib .parse import quote
311
412from galaxy import exceptions
513from galaxy .files import (
1826from galaxy .util .config_parsers import IpAllowedListEntryT
1927from galaxy .util .path import StrPath
2028
29+ log = logging .getLogger (__name__ )
30+
2131
2232def _not_implemented (drs_uri : str , desc : str ) -> NotImplementedError :
2333 missing_client_func = f"Galaxy client cannot currently fetch URIs { desc } ."
@@ -72,6 +82,134 @@ def _get_access_info(obj_url: str, access_method: dict, headers=None) -> tuple[s
7282 return url , headers_as_dict
7383
7484
85+ class CompactIdentifierResolver :
86+ _instance : Optional ["CompactIdentifierResolver" ] = None
87+
88+ def __new__ (cls , * args , ** kwargs ):
89+ if cls ._instance is None :
90+ cls ._instance = super ().__new__ (cls )
91+ return cls ._instance
92+
93+ def __init__ (self , cache_ttl : int = 86400 ):
94+ if not hasattr (self , "_cache" ):
95+ self ._cache : Dict [str , Dict ] = {}
96+ self ._cache_ttl = cache_ttl
97+
98+ @classmethod
99+ def _reset_singleton (cls ):
100+ """Reset the singleton instance - for testing only."""
101+ cls ._instance = None
102+
103+ def _is_cached (self , prefix : str ) -> bool :
104+ if prefix not in self ._cache :
105+ return False
106+ cached_time = self ._cache [prefix ].get ("timestamp" , 0 )
107+ return (time .time () - cached_time ) < self ._cache_ttl
108+
109+ def _cache_result (self , prefix : str , url_pattern : str ):
110+ self ._cache [prefix ] = {"url_pattern" : url_pattern , "timestamp" : time .time ()}
111+
112+ def _query_identifiers_org (self , prefix : str ) -> Optional [str ]:
113+ try :
114+ namespace_url = (
115+ f"https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix={ prefix } "
116+ )
117+ response = requests .get (namespace_url , timeout = DEFAULT_SOCKET_TIMEOUT )
118+ response .raise_for_status ()
119+
120+ namespace_data = response .json ()
121+ if not namespace_data or "_links" not in namespace_data :
122+ return None
123+
124+ if "resources" in namespace_data ["_links" ]:
125+ resources_url = namespace_data ["_links" ]["resources" ]["href" ]
126+ else :
127+ return None
128+ response = requests .get (resources_url , timeout = DEFAULT_SOCKET_TIMEOUT )
129+ response .raise_for_status ()
130+
131+ resources = response .json ()
132+ if "_embedded" in resources and "resources" in resources ["_embedded" ]:
133+ official_resource = None
134+ fallback_resource = None
135+
136+ for resource in resources ["_embedded" ]["resources" ]:
137+ if "urlPattern" in resource :
138+ if resource .get ("official" , False ):
139+ official_resource = resource
140+ break
141+ elif fallback_resource is None :
142+ fallback_resource = resource
143+
144+ best_resource = official_resource or fallback_resource
145+ if best_resource :
146+ return best_resource ["urlPattern" ]
147+
148+ except requests .exceptions .RequestException as e :
149+ log .warning (f"Failed to query identifiers.org for prefix { prefix } : { e } " )
150+ except (KeyError , json .JSONDecodeError ) as e :
151+ log .warning (f"Invalid response from identifiers.org for prefix { prefix } : { e } " )
152+
153+ return None
154+
155+ def resolve_prefix (self , prefix : str ) -> Optional [str ]:
156+ if self ._is_cached (prefix ):
157+ return self ._cache [prefix ]["url_pattern" ]
158+
159+ url_pattern = self ._query_identifiers_org (prefix )
160+
161+ if url_pattern :
162+ self ._cache_result (prefix , url_pattern )
163+ log .info (f"Resolved DRS prefix '{ prefix } ' to URL pattern: { url_pattern } " )
164+ else :
165+ log .warning (f"Could not resolve DRS prefix '{ prefix } ' via identifiers.org" )
166+
167+ return url_pattern
168+
169+
170+ def parse_compact_identifier (drs_uri : str ) -> Tuple [str , str ]:
171+ if not drs_uri .startswith ("drs://" ):
172+ raise ValueError (f"Not a valid DRS URI: { drs_uri } " )
173+
174+ rest_of_uri = drs_uri [len ("drs://" ) :]
175+
176+ colon_idx = rest_of_uri .find (":" )
177+ if colon_idx == - 1 :
178+ raise ValueError (f"Invalid compact identifier format (missing colon): { drs_uri } " )
179+
180+ prefix = rest_of_uri [:colon_idx ]
181+ accession = rest_of_uri [colon_idx + 1 :]
182+
183+ if not all (c .islower () or c .isdigit () or c in "._" for c in prefix ):
184+ raise ValueError (
185+ f"Invalid prefix format '{ prefix } ': must contain only lowercase letters, numbers, dots, and underscores"
186+ )
187+
188+ if not prefix or not accession :
189+ raise ValueError (f"Empty prefix or accession in compact identifier: { drs_uri } " )
190+
191+ return prefix , accession
192+
193+
194+ def resolve_compact_identifier_to_url (drs_uri : str , resolver : Optional [CompactIdentifierResolver ] = None ) -> str :
195+ prefix , accession = parse_compact_identifier (drs_uri )
196+
197+ if resolver is None :
198+ resolver = CompactIdentifierResolver ()
199+
200+ url_pattern = resolver .resolve_prefix (prefix )
201+ if not url_pattern :
202+ raise ValueError (f"Could not resolve prefix '{ prefix } ' via identifiers.org" )
203+
204+ encoded_accession = quote (accession , safe = "" )
205+ resolved_url = url_pattern .replace ("{$id}" , encoded_accession )
206+
207+ if not resolved_url .startswith (("http://" , "https://" )):
208+ raise ValueError (f"Resolved URL is not HTTP(S): { resolved_url } " )
209+
210+ return resolved_url
211+
212+
75213def fetch_drs_to_file (
76214 drs_uri : str ,
77215 target_path : StrPath ,
@@ -84,16 +222,23 @@ def fetch_drs_to_file(
84222 """Fetch contents of drs:// URI to a target path."""
85223 if not drs_uri .startswith ("drs://" ):
86224 raise ValueError (f"Unknown scheme for drs_uri { drs_uri } " )
225+
87226 rest_of_drs_uri = drs_uri [len ("drs://" ) :]
88- if "/" not in rest_of_drs_uri :
89- # DRS URI uses compact identifiers, not yet implemented.
90- # https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.2.0/docs/more-background-on-compact-identifiers.html
91- raise _not_implemented (drs_uri , "that use compact identifiers" )
92- netspec , object_id = rest_of_drs_uri .split ("/" , 1 )
93- scheme = "https"
94- if force_http :
95- scheme = "http"
96- get_url = f"{ scheme } ://{ netspec } /ga4gh/drs/v1/objects/{ object_id } "
227+
228+ if "/" not in rest_of_drs_uri and ":" in rest_of_drs_uri :
229+ try :
230+ get_url = resolve_compact_identifier_to_url (drs_uri )
231+ log .info (f"Resolved compact identifier DRS URI { drs_uri } to { get_url } " )
232+ except ValueError as e :
233+ raise ValueError (f"Failed to resolve compact identifier DRS URI { drs_uri } : { str (e )} " )
234+ elif "/" in rest_of_drs_uri :
235+ netspec , object_id = rest_of_drs_uri .split ("/" , 1 )
236+ scheme = "https"
237+ if force_http :
238+ scheme = "http"
239+ get_url = f"{ scheme } ://{ netspec } /ga4gh/drs/v1/objects/{ object_id } "
240+ else :
241+ raise ValueError (f"Invalid DRS URI format: { drs_uri } " )
97242 response = retry_and_get (get_url , retry_options or RetryOptions (), headers = headers )
98243 response .raise_for_status ()
99244 response_object = response .json ()
0 commit comments