Skip to content

Commit ca79c94

Browse files
committed
require local urls in seqspec checks
1 parent 0683bdc commit ca79c94

File tree

7 files changed

+185
-16
lines changed

7 files changed

+185
-16
lines changed

seqspec/seqspec_check.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
file_exists,
2020
load_spec,
2121
local_onlist_locator,
22+
local_resource_url,
2223
map_read_id_to_regions,
2324
)
2425

@@ -269,7 +270,17 @@ def check_onlist_files_exist(spec, errors, idx):
269270

270271
for ol in olrgns:
271272
if ol.urltype == "local":
272-
locator = local_onlist_locator(ol)
273+
try:
274+
locator = local_onlist_locator(ol)
275+
except ValueError as err:
276+
errobj = {
277+
"error_type": "check_onlist_files_exist",
278+
"error_message": str(err),
279+
"error_object": "onlist",
280+
}
281+
errors.append(errobj)
282+
idx += 1
283+
continue
273284
if locator.endswith(".gz"):
274285
check = locator
275286
if spec_base and not Path(check).is_absolute():
@@ -350,7 +361,17 @@ def check_read_files_exist(spec, errors, idx):
350361
for read in spec.sequence_spec:
351362
for f in read.files:
352363
if f.urltype == "local":
353-
check = f.url
364+
try:
365+
check = local_resource_url(f.url, f.filename, "file")
366+
except ValueError as err:
367+
errobj = {
368+
"error_type": "check_read_files_exist",
369+
"error_message": str(err),
370+
"error_object": "file",
371+
}
372+
errors.append(errobj)
373+
idx += 1
374+
continue
354375
if spec_base and not Path(check).is_absolute():
355376
check = str((spec_base / check).resolve())
356377
if not path.exists(check):

seqspec/utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,14 @@ def yield_onlist_contents(stream):
503503
yield line.strip().split()[0]
504504

505505

506+
def local_resource_url(url: str, filename: str, resource: str) -> str:
507+
if not url:
508+
raise ValueError(f"local {resource} '{filename}' has empty url")
509+
return str(url)
510+
511+
506512
def local_onlist_locator(onlist: Onlist) -> str:
507-
return str(onlist.url or onlist.filename)
513+
return local_resource_url(onlist.url, onlist.filename, "onlist")
508514

509515

510516
def read_local_list(onlist: Onlist, base_path: str = "") -> List[str]:

src/seqspec_check.rs

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,19 @@ fn check_onlist_files_exist(
359359
match ol.urltype.as_str() {
360360
"local" => {
361361
let mut candidates: Vec<PathBuf> = Vec::new();
362-
let locator = utils::local_onlist_locator(&ol);
362+
let locator = match utils::local_onlist_locator(&ol) {
363+
Ok(locator) => locator,
364+
Err(err) => {
365+
push_error(
366+
&mut errors,
367+
&mut idx,
368+
"check_onlist_files_exist",
369+
err,
370+
"onlist",
371+
);
372+
continue;
373+
}
374+
};
363375
let p = PathBuf::from(locator);
364376
candidates.push(if let Some(base) = spec_base {
365377
if p.is_absolute() {
@@ -439,7 +451,20 @@ fn check_read_files_exist(
439451
for f in &read.files {
440452
match f.urltype.as_str() {
441453
"local" => {
442-
let p = PathBuf::from(&f.url);
454+
let locator = match utils::local_resource_url(&f.url, &f.filename, "file") {
455+
Ok(locator) => locator,
456+
Err(err) => {
457+
push_error(
458+
&mut errors,
459+
&mut idx,
460+
"check_read_files_exist",
461+
err,
462+
"file",
463+
);
464+
continue;
465+
}
466+
};
467+
let p = PathBuf::from(locator);
443468
let full = if let Some(base) = spec_base {
444469
if p.is_absolute() {
445470
p.clone()
@@ -993,6 +1018,40 @@ mod tests {
9931018
);
9941019
}
9951020

1021+
#[test]
1022+
fn test_check_onlist_files_exist_errors_when_local_url_is_empty() {
1023+
let spec_path = PathBuf::from("tests/fixtures/onlist_read_clip/spec.yaml");
1024+
let mut spec = load_spec(&spec_path);
1025+
let barcode_region = spec
1026+
.library_spec
1027+
.get_mut(0)
1028+
.unwrap()
1029+
.regions
1030+
.iter_mut()
1031+
.find(|region| region.region_id == "barcode_a")
1032+
.unwrap();
1033+
barcode_region.onlist.as_mut().unwrap().url.clear();
1034+
1035+
let diagnostics = seqspec_check(&spec, None, &spec_path);
1036+
assert!(diagnostics.iter().any(|diagnostic| {
1037+
diagnostic.error_type == "check_onlist_files_exist"
1038+
&& diagnostic.error_message == "local onlist 'barcode_a.txt' has empty url"
1039+
}));
1040+
}
1041+
1042+
#[test]
1043+
fn test_check_read_files_exist_errors_when_local_url_is_empty() {
1044+
let spec_path = PathBuf::from("tests/fixtures/onlist_read_clip/spec.yaml");
1045+
let mut spec = load_spec(&spec_path);
1046+
spec.sequence_spec[0].files[0].url.clear();
1047+
1048+
let diagnostics = seqspec_check(&spec, None, &spec_path);
1049+
assert!(diagnostics.iter().any(|diagnostic| {
1050+
diagnostic.error_type == "check_read_files_exist"
1051+
&& diagnostic.error_message == "local file 'rna_read.fastq.gz' has empty url"
1052+
}));
1053+
}
1054+
9961055
#[test]
9971056
fn test_error_obj_structure() {
9981057
let e = ErrorObj {

src/seqspec_onlist.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,10 @@ fn get_onlist_urls(onlists: &Vec<Onlist>, base_path: &Path) -> Vec<UrlInfo> {
181181
for ol in onlists {
182182
let url = if ol.urltype == "local" {
183183
base_path
184-
.join(utils::local_onlist_locator(ol))
184+
.join(utils::local_onlist_locator(ol).unwrap_or_else(|err| {
185+
eprintln!("{}", err);
186+
std::process::exit(1);
187+
}))
185188
.to_string_lossy()
186189
.to_string()
187190
} else {
@@ -205,7 +208,10 @@ fn download_onlists_to_path(
205208
let mut out = Vec::new();
206209
for ol in onlists {
207210
if ol.urltype == "local" {
208-
let local = base_path.join(utils::local_onlist_locator(ol));
211+
let local = base_path.join(utils::local_onlist_locator(ol).unwrap_or_else(|err| {
212+
eprintln!("{}", err);
213+
std::process::exit(1);
214+
}));
209215
out.push(PathInfo {
210216
url: local.to_string_lossy().to_string(),
211217
});
@@ -242,8 +248,11 @@ fn join_onlists_and_save(
242248
let mut contents: Vec<Vec<String>> = Vec::new();
243249
for ol in onlists {
244250
let content = if ol.urltype == "local" {
245-
utils::read_local_list(&base_path.join(utils::local_onlist_locator(ol)))
246-
.unwrap_or_default()
251+
let locator = utils::local_onlist_locator(ol).unwrap_or_else(|err| {
252+
eprintln!("{}", err);
253+
std::process::exit(1);
254+
});
255+
utils::read_local_list(&base_path.join(locator)).unwrap_or_default()
247256
} else {
248257
utils::read_remote_list(&ol.url, remote_access).unwrap_or_default()
249258
};

src/utils.rs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,22 @@ pub fn load_spec(spec: &std::path::PathBuf) -> Assay {
6161
spec.into_assay()
6262
}
6363

64-
pub fn local_onlist_locator(onlist: &Onlist) -> &str {
65-
if onlist.url.is_empty() {
66-
&onlist.filename
64+
pub fn local_resource_url<'a>(
65+
url: &'a str,
66+
filename: &str,
67+
resource: &str,
68+
) -> Result<&'a str, String> {
69+
if url.is_empty() {
70+
Err(format!("local {} '{}' has empty url", resource, filename))
6771
} else {
68-
&onlist.url
72+
Ok(url)
6973
}
7074
}
7175

76+
pub fn local_onlist_locator(onlist: &Onlist) -> Result<&str, String> {
77+
local_resource_url(&onlist.url, &onlist.filename, "onlist")
78+
}
79+
7280
/// Read a local text file into Vec<String>, handling optional .gz
7381
pub fn read_local_list(path: &std::path::Path) -> Result<Vec<String>, String> {
7482
let p = if path.exists() {
@@ -483,11 +491,14 @@ mod tests {
483491
String::new(),
484492
);
485493

486-
assert_eq!(local_onlist_locator(&onlist), "nested/whitelist.txt");
494+
assert_eq!(
495+
local_onlist_locator(&onlist).unwrap(),
496+
"nested/whitelist.txt"
497+
);
487498
}
488499

489500
#[test]
490-
fn test_local_onlist_locator_falls_back_to_filename() {
501+
fn test_local_onlist_locator_errors_when_url_is_empty() {
491502
let onlist = Onlist::new(
492503
"ol1".into(),
493504
"display.txt".into(),
@@ -498,6 +509,17 @@ mod tests {
498509
String::new(),
499510
);
500511

501-
assert_eq!(local_onlist_locator(&onlist), "display.txt");
512+
assert_eq!(
513+
local_onlist_locator(&onlist).unwrap_err(),
514+
"local onlist 'display.txt' has empty url"
515+
);
516+
}
517+
518+
#[test]
519+
fn test_local_resource_url_errors_when_url_is_empty() {
520+
assert_eq!(
521+
local_resource_url("", "display.fastq.gz", "file").unwrap_err(),
522+
"local file 'display.fastq.gz' has empty url"
523+
);
502524
}
503525
}

tests/test_seqspec_check.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,34 @@ def test_seqspec_check_prefers_local_onlist_url():
9191
diagnostic["error_type"] == "check_onlist_files_exist"
9292
for diagnostic in diagnostics
9393
)
94+
95+
96+
def test_seqspec_check_errors_when_local_onlist_url_is_empty():
97+
spec_path = Path("tests/fixtures/onlist_read_clip/spec.yaml")
98+
spec = load_spec(spec_path)
99+
100+
barcode_region = spec.get_libspec("rna").get_region_by_id("barcode_a")[0]
101+
barcode_region.onlist.url = ""
102+
103+
diagnostics = seqspec_check(spec=spec)
104+
105+
assert any(
106+
diagnostic["error_type"] == "check_onlist_files_exist"
107+
and diagnostic["error_message"] == "local onlist 'barcode_a.txt' has empty url"
108+
for diagnostic in diagnostics
109+
)
110+
111+
112+
def test_seqspec_check_errors_when_local_file_url_is_empty():
113+
spec_path = Path("tests/fixtures/onlist_read_clip/spec.yaml")
114+
spec = load_spec(spec_path)
115+
116+
spec.sequence_spec[0].files[0].url = ""
117+
118+
diagnostics = seqspec_check(spec=spec)
119+
120+
assert any(
121+
diagnostic["error_type"] == "check_read_files_exist"
122+
and diagnostic["error_message"] == "local file 'rna_read.fastq.gz' has empty url"
123+
for diagnostic in diagnostics
124+
)

tests/test_utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from seqspec.utils import (
1111
load_spec_stream,
12+
local_resource_url,
1213
read_local_list,
1314
read_remote_list,
1415
get_remote_auth_token,
@@ -286,6 +287,21 @@ def test_local_onlist_locator_prefers_url_when_present():
286287
assert local_onlist_locator(onlist) == "nested/whitelist.txt"
287288

288289

290+
def test_local_onlist_locator_errors_when_url_is_empty():
291+
onlist = Onlist(
292+
file_id="ol1",
293+
filename="display.txt",
294+
filetype="txt",
295+
filesize=0,
296+
url="",
297+
urltype="local",
298+
md5="",
299+
)
300+
301+
with pytest.raises(ValueError, match="local onlist 'display.txt' has empty url"):
302+
local_onlist_locator(onlist)
303+
304+
289305
def test_read_local_list_prefers_url_when_present(tmp_path):
290306
nested = tmp_path / "nested"
291307
nested.mkdir()
@@ -302,3 +318,8 @@ def test_read_local_list_prefers_url_when_present(tmp_path):
302318
)
303319

304320
assert read_local_list(onlist, str(tmp_path)) == ["AAAA", "CCCC"]
321+
322+
323+
def test_local_resource_url_errors_when_url_is_empty():
324+
with pytest.raises(ValueError, match="local file 'display.fastq.gz' has empty url"):
325+
local_resource_url("", "display.fastq.gz", "file")

0 commit comments

Comments
 (0)