Skip to content

Commit 337af4b

Browse files
committed
Add DRS compact identifier support
Support compact identifiers (e.g., drs://drs.anv0:object-id) by resolving prefixes via identifiers.org. Includes caching and tests.
1 parent ae55554 commit 337af4b

File tree

3 files changed

+415
-9
lines changed

3 files changed

+415
-9
lines changed

lib/galaxy/files/sources/util.py

Lines changed: 150 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
import json
2+
import logging
13
import time
24
from typing import (
5+
Dict,
36
List,
47
Optional,
58
Tuple,
69
)
10+
from urllib.parse import quote
711

812
from galaxy import exceptions
913
from galaxy.files import (
@@ -20,6 +24,8 @@
2024
from galaxy.util.config_parsers import IpAllowedListEntryT
2125
from galaxy.util.path import StrPath
2226

27+
log = logging.getLogger(__name__)
28+
2329

2430
def _not_implemented(drs_uri: str, desc: str) -> NotImplementedError:
2531
missing_client_func = f"Galaxy client cannot currently fetch URIs {desc}."
@@ -74,6 +80,134 @@ def _get_access_info(obj_url: str, access_method: dict, headers=None) -> Tuple[s
7480
return url, headers_as_dict
7581

7682

83+
class CompactIdentifierResolver:
84+
_instance: Optional["CompactIdentifierResolver"] = None
85+
86+
def __new__(cls, *args, **kwargs):
87+
if cls._instance is None:
88+
cls._instance = super().__new__(cls)
89+
return cls._instance
90+
91+
def __init__(self, cache_ttl: int = 86400):
92+
if not hasattr(self, "_cache"):
93+
self._cache: Dict[str, Dict] = {}
94+
self._cache_ttl = cache_ttl
95+
96+
@classmethod
97+
def _reset_singleton(cls):
98+
"""Reset the singleton instance - for testing only."""
99+
cls._instance = None
100+
101+
def _is_cached(self, prefix: str) -> bool:
102+
if prefix not in self._cache:
103+
return False
104+
cached_time = self._cache[prefix].get("timestamp", 0)
105+
return (time.time() - cached_time) < self._cache_ttl
106+
107+
def _cache_result(self, prefix: str, url_pattern: str):
108+
self._cache[prefix] = {"url_pattern": url_pattern, "timestamp": time.time()}
109+
110+
def _query_identifiers_org(self, prefix: str) -> Optional[str]:
111+
try:
112+
namespace_url = (
113+
f"https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix={prefix}"
114+
)
115+
response = requests.get(namespace_url, timeout=DEFAULT_SOCKET_TIMEOUT)
116+
response.raise_for_status()
117+
118+
namespace_data = response.json()
119+
if not namespace_data or "_links" not in namespace_data:
120+
return None
121+
122+
if "resources" in namespace_data["_links"]:
123+
resources_url = namespace_data["_links"]["resources"]["href"]
124+
else:
125+
return None
126+
response = requests.get(resources_url, timeout=DEFAULT_SOCKET_TIMEOUT)
127+
response.raise_for_status()
128+
129+
resources = response.json()
130+
if "_embedded" in resources and "resources" in resources["_embedded"]:
131+
official_resource = None
132+
fallback_resource = None
133+
134+
for resource in resources["_embedded"]["resources"]:
135+
if "urlPattern" in resource:
136+
if resource.get("official", False):
137+
official_resource = resource
138+
break
139+
elif fallback_resource is None:
140+
fallback_resource = resource
141+
142+
best_resource = official_resource or fallback_resource
143+
if best_resource:
144+
return best_resource["urlPattern"]
145+
146+
except requests.exceptions.RequestException as e:
147+
log.warning(f"Failed to query identifiers.org for prefix {prefix}: {e}")
148+
except (KeyError, json.JSONDecodeError) as e:
149+
log.warning(f"Invalid response from identifiers.org for prefix {prefix}: {e}")
150+
151+
return None
152+
153+
def resolve_prefix(self, prefix: str) -> Optional[str]:
154+
if self._is_cached(prefix):
155+
return self._cache[prefix]["url_pattern"]
156+
157+
url_pattern = self._query_identifiers_org(prefix)
158+
159+
if url_pattern:
160+
self._cache_result(prefix, url_pattern)
161+
log.info(f"Resolved DRS prefix '{prefix}' to URL pattern: {url_pattern}")
162+
else:
163+
log.warning(f"Could not resolve DRS prefix '{prefix}' via identifiers.org")
164+
165+
return url_pattern
166+
167+
168+
def parse_compact_identifier(drs_uri: str) -> Tuple[str, str]:
169+
if not drs_uri.startswith("drs://"):
170+
raise ValueError(f"Not a valid DRS URI: {drs_uri}")
171+
172+
rest_of_uri = drs_uri[len("drs://") :]
173+
174+
colon_idx = rest_of_uri.find(":")
175+
if colon_idx == -1:
176+
raise ValueError(f"Invalid compact identifier format (missing colon): {drs_uri}")
177+
178+
prefix = rest_of_uri[:colon_idx]
179+
accession = rest_of_uri[colon_idx + 1 :]
180+
181+
if not all(c.islower() or c.isdigit() or c in "._" for c in prefix):
182+
raise ValueError(
183+
f"Invalid prefix format '{prefix}': must contain only lowercase letters, numbers, dots, and underscores"
184+
)
185+
186+
if not prefix or not accession:
187+
raise ValueError(f"Empty prefix or accession in compact identifier: {drs_uri}")
188+
189+
return prefix, accession
190+
191+
192+
def resolve_compact_identifier_to_url(drs_uri: str, resolver: Optional[CompactIdentifierResolver] = None) -> str:
193+
prefix, accession = parse_compact_identifier(drs_uri)
194+
195+
if resolver is None:
196+
resolver = CompactIdentifierResolver()
197+
198+
url_pattern = resolver.resolve_prefix(prefix)
199+
if not url_pattern:
200+
raise ValueError(f"Could not resolve prefix '{prefix}' via identifiers.org")
201+
202+
encoded_accession = quote(accession, safe="")
203+
resolved_url = url_pattern.replace("{$id}", encoded_accession)
204+
205+
if not resolved_url.startswith(("http://", "https://")):
206+
raise ValueError(f"Resolved URL is not HTTP(S): {resolved_url}")
207+
208+
return resolved_url
209+
210+
77211
def fetch_drs_to_file(
78212
drs_uri: str,
79213
target_path: StrPath,
@@ -86,16 +220,23 @@ def fetch_drs_to_file(
86220
"""Fetch contents of drs:// URI to a target path."""
87221
if not drs_uri.startswith("drs://"):
88222
raise ValueError(f"Unknown scheme for drs_uri {drs_uri}")
223+
89224
rest_of_drs_uri = drs_uri[len("drs://") :]
90-
if "/" not in rest_of_drs_uri:
91-
# DRS URI uses compact identifiers, not yet implemented.
92-
# https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.2.0/docs/more-background-on-compact-identifiers.html
93-
raise _not_implemented(drs_uri, "that use compact identifiers")
94-
netspec, object_id = rest_of_drs_uri.split("/", 1)
95-
scheme = "https"
96-
if force_http:
97-
scheme = "http"
98-
get_url = f"{scheme}://{netspec}/ga4gh/drs/v1/objects/{object_id}"
225+
226+
if "/" not in rest_of_drs_uri and ":" in rest_of_drs_uri:
227+
try:
228+
get_url = resolve_compact_identifier_to_url(drs_uri)
229+
log.info(f"Resolved compact identifier DRS URI {drs_uri} to {get_url}")
230+
except ValueError as e:
231+
raise ValueError(f"Failed to resolve compact identifier DRS URI {drs_uri}: {str(e)}")
232+
elif "/" in rest_of_drs_uri:
233+
netspec, object_id = rest_of_drs_uri.split("/", 1)
234+
scheme = "https"
235+
if force_http:
236+
scheme = "http"
237+
get_url = f"{scheme}://{netspec}/ga4gh/drs/v1/objects/{object_id}"
238+
else:
239+
raise ValueError(f"Invalid DRS URI format: {drs_uri}")
99240
response = retry_and_get(get_url, retry_options or RetryOptions(), headers=headers)
100241
response.raise_for_status()
101242
response_object = response.json()
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import os
2+
import tempfile
3+
from unittest.mock import (
4+
Mock,
5+
patch,
6+
)
7+
8+
import pytest
9+
10+
from galaxy.files.sources.util import fetch_drs_to_file
11+
12+
13+
class TestDRSCompactIdentifiersIntegration:
14+
@patch("galaxy.files.sources.util.requests.get")
15+
@patch("galaxy.files.sources.util.stream_url_to_file")
16+
def test_fetch_compact_identifier_drs_file(self, mock_stream, mock_get):
17+
namespace_response = Mock()
18+
namespace_response.json.return_value = {
19+
"_links": {
20+
"self": {"href": "https://registry.api.identifiers.org/restApi/namespaces/123"},
21+
"resources": {"href": "https://registry.api.identifiers.org/restApi/namespaces/123/resources"},
22+
}
23+
}
24+
namespace_response.raise_for_status.return_value = None
25+
26+
resources_response = Mock()
27+
resources_response.json.return_value = {
28+
"_embedded": {
29+
"resources": [{"urlPattern": "https://example-drs.com/ga4gh/drs/v1/objects/{$id}", "official": True}]
30+
}
31+
}
32+
resources_response.raise_for_status.return_value = None
33+
34+
drs_response = Mock()
35+
drs_response.json.return_value = {
36+
"id": "test-object-id",
37+
"access_methods": [
38+
{
39+
"type": "https",
40+
"access_url": {
41+
"url": "https://download.example.com/test-file.txt",
42+
"headers": ["Authorization: Bearer test-token"],
43+
},
44+
}
45+
],
46+
}
47+
drs_response.raise_for_status.return_value = None
48+
drs_response.status_code = 200
49+
50+
mock_get.side_effect = [namespace_response, resources_response, drs_response]
51+
52+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
53+
try:
54+
fetch_drs_to_file("drs://drs.test:test-object-id", tmp.name, None)
55+
56+
assert mock_get.call_count == 3
57+
mock_get.assert_any_call(
58+
"https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix=drs.test",
59+
timeout=600,
60+
)
61+
mock_get.assert_any_call(
62+
"https://registry.api.identifiers.org/restApi/namespaces/123/resources",
63+
timeout=600,
64+
)
65+
66+
mock_get.assert_any_call(
67+
"https://example-drs.com/ga4gh/drs/v1/objects/test-object-id", timeout=600, headers=None
68+
)
69+
70+
mock_stream.assert_called_once()
71+
call_args = mock_stream.call_args
72+
assert call_args[0][0] == "https://download.example.com/test-file.txt"
73+
assert call_args[1]["target_path"] == tmp.name
74+
75+
finally:
76+
if os.path.exists(tmp.name):
77+
os.unlink(tmp.name)
78+
79+
def test_compact_identifier_error_handling(self):
80+
invalid_uris = [
81+
"drs://", # No identifier
82+
"drs://no-colon", # Missing colon
83+
"drs://:only-accession", # Missing prefix
84+
"drs://prefix:", # Missing accession
85+
"drs://UPPERCASE:accession", # Invalid prefix format
86+
"https://not-drs:accession", # Wrong scheme
87+
]
88+
89+
for uri in invalid_uris:
90+
with pytest.raises(ValueError):
91+
fetch_drs_to_file(uri, "/tmp/output", None)
92+
93+
@patch("galaxy.files.sources.util.CompactIdentifierResolver._query_identifiers_org")
94+
def test_meta_resolver_failure_handling(self, mock_identifiers):
95+
mock_identifiers.return_value = None
96+
97+
with pytest.raises(ValueError, match="Failed to resolve compact identifier DRS URI"):
98+
fetch_drs_to_file("drs://unknown.prefix:accession", "/tmp/output", None)

0 commit comments

Comments
 (0)