1+ import json
2+ import logging
13import time
24from typing import (
5+ Dict ,
36 List ,
47 Optional ,
58 Tuple ,
69)
10+ from urllib .parse import quote
711
812from galaxy import exceptions
913from galaxy .files import (
2024from galaxy .util .config_parsers import IpAllowedListEntryT
2125from galaxy .util .path import StrPath
2226
27+ log = logging .getLogger (__name__ )
28+
2329
2430def _not_implemented (drs_uri : str , desc : str ) -> NotImplementedError :
2531 missing_client_func = f"Galaxy client cannot currently fetch URIs { desc } ."
@@ -74,6 +80,134 @@ def _get_access_info(obj_url: str, access_method: dict, headers=None) -> Tuple[s
7480 return url , headers_as_dict
7581
7682
83+ class CompactIdentifierResolver :
84+ _instance : Optional ["CompactIdentifierResolver" ] = None
85+
86+ def __new__ (cls , * args , ** kwargs ):
87+ if cls ._instance is None :
88+ cls ._instance = super ().__new__ (cls )
89+ return cls ._instance
90+
91+ def __init__ (self , cache_ttl : int = 86400 ):
92+ if not hasattr (self , "_cache" ):
93+ self ._cache : Dict [str , Dict ] = {}
94+ self ._cache_ttl = cache_ttl
95+
96+ @classmethod
97+ def _reset_singleton (cls ):
98+ """Reset the singleton instance - for testing only."""
99+ cls ._instance = None
100+
101+ def _is_cached (self , prefix : str ) -> bool :
102+ if prefix not in self ._cache :
103+ return False
104+ cached_time = self ._cache [prefix ].get ("timestamp" , 0 )
105+ return (time .time () - cached_time ) < self ._cache_ttl
106+
107+ def _cache_result (self , prefix : str , url_pattern : str ):
108+ self ._cache [prefix ] = {"url_pattern" : url_pattern , "timestamp" : time .time ()}
109+
110+ def _query_identifiers_org (self , prefix : str ) -> Optional [str ]:
111+ try :
112+ namespace_url = (
113+ f"https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix={ prefix } "
114+ )
115+ response = requests .get (namespace_url , timeout = DEFAULT_SOCKET_TIMEOUT )
116+ response .raise_for_status ()
117+
118+ namespace_data = response .json ()
119+ if not namespace_data or "_links" not in namespace_data :
120+ return None
121+
122+ if "resources" in namespace_data ["_links" ]:
123+ resources_url = namespace_data ["_links" ]["resources" ]["href" ]
124+ else :
125+ return None
126+ response = requests .get (resources_url , timeout = DEFAULT_SOCKET_TIMEOUT )
127+ response .raise_for_status ()
128+
129+ resources = response .json ()
130+ if "_embedded" in resources and "resources" in resources ["_embedded" ]:
131+ official_resource = None
132+ fallback_resource = None
133+
134+ for resource in resources ["_embedded" ]["resources" ]:
135+ if "urlPattern" in resource :
136+ if resource .get ("official" , False ):
137+ official_resource = resource
138+ break
139+ elif fallback_resource is None :
140+ fallback_resource = resource
141+
142+ best_resource = official_resource or fallback_resource
143+ if best_resource :
144+ return best_resource ["urlPattern" ]
145+
146+ except requests .exceptions .RequestException as e :
147+ log .warning (f"Failed to query identifiers.org for prefix { prefix } : { e } " )
148+ except (KeyError , json .JSONDecodeError ) as e :
149+ log .warning (f"Invalid response from identifiers.org for prefix { prefix } : { e } " )
150+
151+ return None
152+
153+ def resolve_prefix (self , prefix : str ) -> Optional [str ]:
154+ if self ._is_cached (prefix ):
155+ return self ._cache [prefix ]["url_pattern" ]
156+
157+ url_pattern = self ._query_identifiers_org (prefix )
158+
159+ if url_pattern :
160+ self ._cache_result (prefix , url_pattern )
161+ log .info (f"Resolved DRS prefix '{ prefix } ' to URL pattern: { url_pattern } " )
162+ else :
163+ log .warning (f"Could not resolve DRS prefix '{ prefix } ' via identifiers.org" )
164+
165+ return url_pattern
166+
167+
168+ def parse_compact_identifier (drs_uri : str ) -> Tuple [str , str ]:
169+ if not drs_uri .startswith ("drs://" ):
170+ raise ValueError (f"Not a valid DRS URI: { drs_uri } " )
171+
172+ rest_of_uri = drs_uri [len ("drs://" ) :]
173+
174+ colon_idx = rest_of_uri .find (":" )
175+ if colon_idx == - 1 :
176+ raise ValueError (f"Invalid compact identifier format (missing colon): { drs_uri } " )
177+
178+ prefix = rest_of_uri [:colon_idx ]
179+ accession = rest_of_uri [colon_idx + 1 :]
180+
181+ if not all (c .islower () or c .isdigit () or c in "._" for c in prefix ):
182+ raise ValueError (
183+ f"Invalid prefix format '{ prefix } ': must contain only lowercase letters, numbers, dots, and underscores"
184+ )
185+
186+ if not prefix or not accession :
187+ raise ValueError (f"Empty prefix or accession in compact identifier: { drs_uri } " )
188+
189+ return prefix , accession
190+
191+
192+ def resolve_compact_identifier_to_url (drs_uri : str , resolver : Optional [CompactIdentifierResolver ] = None ) -> str :
193+ prefix , accession = parse_compact_identifier (drs_uri )
194+
195+ if resolver is None :
196+ resolver = CompactIdentifierResolver ()
197+
198+ url_pattern = resolver .resolve_prefix (prefix )
199+ if not url_pattern :
200+ raise ValueError (f"Could not resolve prefix '{ prefix } ' via identifiers.org" )
201+
202+ encoded_accession = quote (accession , safe = "" )
203+ resolved_url = url_pattern .replace ("{$id}" , encoded_accession )
204+
205+ if not resolved_url .startswith (("http://" , "https://" )):
206+ raise ValueError (f"Resolved URL is not HTTP(S): { resolved_url } " )
207+
208+ return resolved_url
209+
210+
77211def fetch_drs_to_file (
78212 drs_uri : str ,
79213 target_path : StrPath ,
@@ -86,16 +220,23 @@ def fetch_drs_to_file(
86220 """Fetch contents of drs:// URI to a target path."""
87221 if not drs_uri .startswith ("drs://" ):
88222 raise ValueError (f"Unknown scheme for drs_uri { drs_uri } " )
223+
89224 rest_of_drs_uri = drs_uri [len ("drs://" ) :]
90- if "/" not in rest_of_drs_uri :
91- # DRS URI uses compact identifiers, not yet implemented.
92- # https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.2.0/docs/more-background-on-compact-identifiers.html
93- raise _not_implemented (drs_uri , "that use compact identifiers" )
94- netspec , object_id = rest_of_drs_uri .split ("/" , 1 )
95- scheme = "https"
96- if force_http :
97- scheme = "http"
98- get_url = f"{ scheme } ://{ netspec } /ga4gh/drs/v1/objects/{ object_id } "
225+
226+ if "/" not in rest_of_drs_uri and ":" in rest_of_drs_uri :
227+ try :
228+ get_url = resolve_compact_identifier_to_url (drs_uri )
229+ log .info (f"Resolved compact identifier DRS URI { drs_uri } to { get_url } " )
230+ except ValueError as e :
231+ raise ValueError (f"Failed to resolve compact identifier DRS URI { drs_uri } : { str (e )} " )
232+ elif "/" in rest_of_drs_uri :
233+ netspec , object_id = rest_of_drs_uri .split ("/" , 1 )
234+ scheme = "https"
235+ if force_http :
236+ scheme = "http"
237+ get_url = f"{ scheme } ://{ netspec } /ga4gh/drs/v1/objects/{ object_id } "
238+ else :
239+ raise ValueError (f"Invalid DRS URI format: { drs_uri } " )
99240 response = retry_and_get (get_url , retry_options or RetryOptions (), headers = headers )
100241 response .raise_for_status ()
101242 response_object = response .json ()
0 commit comments