Skip to content

Commit df23ad9

Browse files
authored
Merge pull request #61 from matsengrp/remove-papermill-60
Replace papermill with config-reading in DMS/SHAPE-MaP notebooks
2 parents d34faa3 + de14048 commit df23ad9

15 files changed

+1679
-1970
lines changed

Snakefile

Lines changed: 54 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -347,18 +347,17 @@ rule process_dms_data_yu_ha:
347347
ha_numbering="data/dms_data/Yu_HA/site_numbering_map.csv"
348348
output:
349349
ha_dms="{output_dir}/dms_data/Yu_HA/processed_dms_data.csv"
350-
params:
351-
data_dir=config["data_dir"]
352350
log:
353351
"{output_dir}/logs/process_dms_data_yu_ha.log"
354352
shell:
355353
"""
356-
data_dir=$(realpath {params.data_dir}) && \
357354
cd notebooks && \
358-
papermill \
359-
process_dms_data_yu_ha.ipynb \
360-
process_dms_data_yu_ha.ipynb \
361-
-p data_dir $data_dir &> ../{log}
355+
jupyter nbconvert \
356+
--to notebook \
357+
--execute \
358+
--inplace \
359+
--ExecutePreprocessor.timeout=600 \
360+
process_dms_data_yu_ha.ipynb &> ../{log}
362361
"""
363362

364363
# Process NP DMS data from Bloom et al.
@@ -368,18 +367,17 @@ rule process_dms_data_bloom_np:
368367
np_data="data/dms_data/Bloom_NP/Supplementary_file_1.xls"
369368
output:
370369
np_dms="{output_dir}/dms_data/Bloom_NP/processed_dms_data.csv"
371-
params:
372-
data_dir=config["data_dir"]
373370
log:
374371
"{output_dir}/logs/process_dms_data_bloom_np.log"
375372
shell:
376373
"""
377-
data_dir=$(realpath {params.data_dir}) && \
378374
cd notebooks && \
379-
papermill \
380-
process_dms_data_bloom_np.ipynb \
381-
process_dms_data_bloom_np.ipynb \
382-
-p data_dir $data_dir &> ../{log}
375+
jupyter nbconvert \
376+
--to notebook \
377+
--execute \
378+
--inplace \
379+
--ExecutePreprocessor.timeout=600 \
380+
process_dms_data_bloom_np.ipynb &> ../{log}
383381
"""
384382

385383
# Process PB2 DMS data from Soh et al. (alignment QC)
@@ -389,18 +387,17 @@ rule process_dms_data_soh_pb2:
389387
pb2_data="data/dms_data/Soh_PB2/elife-45079-fig2-data1-v1.csv"
390388
output:
391389
touch("{output_dir}/.process_dms_data_soh_pb2.done")
392-
params:
393-
data_dir=config["data_dir"]
394390
log:
395391
"{output_dir}/logs/process_dms_data_soh_pb2.log"
396392
shell:
397393
"""
398-
data_dir=$(realpath {params.data_dir}) && \
399394
cd notebooks && \
400-
papermill \
401-
process_dms_data_soh_pb2.ipynb \
402-
process_dms_data_soh_pb2.ipynb \
403-
-p data_dir $data_dir &> ../{log}
395+
jupyter nbconvert \
396+
--to notebook \
397+
--execute \
398+
--inplace \
399+
--ExecutePreprocessor.timeout=600 \
400+
process_dms_data_soh_pb2.ipynb &> ../{log}
404401
"""
405402

406403
# Process NA DMS data from Wang et al. (sequence comparison)
@@ -410,18 +407,17 @@ rule process_dms_data_wang_na:
410407
na_data="data/dms_data/Wang_NA/msystems.00670-23-s0006.xlsx"
411408
output:
412409
na_dms="{output_dir}/dms_data/Wang_NA/processed_dms_data.csv"
413-
params:
414-
data_dir=config["data_dir"]
415410
log:
416411
"{output_dir}/logs/process_dms_data_wang_na.log"
417412
shell:
418413
"""
419-
data_dir=$(realpath {params.data_dir}) && \
420414
cd notebooks && \
421-
papermill \
422-
process_dms_data_wang_na.ipynb \
423-
process_dms_data_wang_na.ipynb \
424-
-p data_dir $data_dir &> ../{log}
415+
jupyter nbconvert \
416+
--to notebook \
417+
--execute \
418+
--inplace \
419+
--ExecutePreprocessor.timeout=600 \
420+
process_dms_data_wang_na.ipynb &> ../{log}
425421
"""
426422

427423
# Process PB1 DMS data from Li et al.
@@ -431,18 +427,17 @@ rule process_dms_data_li_pb1:
431427
pb1_data="data/dms_data/Li_PB1/jvi.01329-23-s0008.csv"
432428
output:
433429
pb1_dms="{output_dir}/dms_data/Li_PB1/processed_dms_data.csv"
434-
params:
435-
data_dir=config["data_dir"]
436430
log:
437431
"{output_dir}/logs/process_dms_data_li_pb1.log"
438432
shell:
439433
"""
440-
data_dir=$(realpath {params.data_dir}) && \
441434
cd notebooks && \
442-
papermill \
443-
process_dms_data_li_pb1.ipynb \
444-
process_dms_data_li_pb1.ipynb \
445-
-p data_dir $data_dir &> ../{log}
435+
jupyter nbconvert \
436+
--to notebook \
437+
--execute \
438+
--inplace \
439+
--ExecutePreprocessor.timeout=600 \
440+
process_dms_data_li_pb1.ipynb &> ../{log}
446441
"""
447442

448443
# Process M1 DMS data from Hom et al.
@@ -453,18 +448,17 @@ rule process_dms_data_hom_m1:
453448
fasta="data/dms_data/Hom_M1/PR8-M1.fasta"
454449
output:
455450
m1_dms="{output_dir}/dms_data/Hom_M1/processed_dms_data.csv"
456-
params:
457-
data_dir=config["data_dir"]
458451
log:
459452
"{output_dir}/logs/process_dms_data_hom_m1.log"
460453
shell:
461454
"""
462-
data_dir=$(realpath {params.data_dir}) && \
463455
cd notebooks && \
464-
papermill \
465-
process_dms_data_hom_m1.ipynb \
466-
process_dms_data_hom_m1.ipynb \
467-
-p data_dir $data_dir &> ../{log}
456+
jupyter nbconvert \
457+
--to notebook \
458+
--execute \
459+
--inplace \
460+
--ExecutePreprocessor.timeout=600 \
461+
process_dms_data_hom_m1.ipynb &> ../{log}
468462
"""
469463

470464
# Process NEP DMS data from Teo et al.
@@ -474,18 +468,17 @@ rule process_dms_data_teo_nep:
474468
nep_data="data/dms_data/Teo_NEP/mmc2.xlsx"
475469
output:
476470
nep_dms="{output_dir}/dms_data/Teo_NEP/processed_dms_data.csv"
477-
params:
478-
data_dir=config["data_dir"]
479471
log:
480472
"{output_dir}/logs/process_dms_data_teo_nep.log"
481473
shell:
482474
"""
483-
data_dir=$(realpath {params.data_dir}) && \
484475
cd notebooks && \
485-
papermill \
486-
process_dms_data_teo_nep.ipynb \
487-
process_dms_data_teo_nep.ipynb \
488-
-p data_dir $data_dir &> ../{log}
476+
jupyter nbconvert \
477+
--to notebook \
478+
--execute \
479+
--inplace \
480+
--ExecutePreprocessor.timeout=600 \
481+
process_dms_data_teo_nep.ipynb &> ../{log}
489482
"""
490483

491484
# Process PA DMS data from Chen et al.
@@ -495,18 +488,17 @@ rule process_dms_data_chen_pa:
495488
pa_data="data/dms_data/Chen_PA/fitness calculation.xlsx"
496489
output:
497490
pa_dms="{output_dir}/dms_data/Chen_PA/processed_dms_data.csv"
498-
params:
499-
data_dir=config["data_dir"]
500491
log:
501492
"{output_dir}/logs/process_dms_data_chen_pa.log"
502493
shell:
503494
"""
504-
data_dir=$(realpath {params.data_dir}) && \
505495
cd notebooks && \
506-
papermill \
507-
process_dms_data_chen_pa.ipynb \
508-
process_dms_data_chen_pa.ipynb \
509-
-p data_dir $data_dir &> ../{log}
496+
jupyter nbconvert \
497+
--to notebook \
498+
--execute \
499+
--inplace \
500+
--ExecutePreprocessor.timeout=600 \
501+
process_dms_data_chen_pa.ipynb &> ../{log}
510502
"""
511503

512504

@@ -550,18 +542,17 @@ rule process_shapemap_data:
550542
)
551543
output:
552544
"{output_dir}/shapemap/all_data.csv"
553-
params:
554-
data_dir=config["data_dir"]
555545
log:
556546
"{output_dir}/logs/process_shapemap_data.log"
557547
shell:
558548
"""
559-
data_dir=$(realpath {params.data_dir}) && \
560549
cd notebooks && \
561-
papermill \
562-
process_shapemap_data.ipynb \
563-
process_shapemap_data.ipynb \
564-
-p data_dir $data_dir &> ../{log}
550+
jupyter nbconvert \
551+
--to notebook \
552+
--execute \
553+
--inplace \
554+
--ExecutePreprocessor.timeout=600 \
555+
process_shapemap_data.ipynb &> ../{log}
565556
"""
566557

567558
# Align protein sequences across subtypes (only for HA and NA)

notebooks/analyze_fitness_effects.ipynb

Lines changed: 465 additions & 458 deletions
Large diffs are not rendered by default.

notebooks/analyze_genome_wide_rates.ipynb

Lines changed: 29 additions & 29 deletions
Large diffs are not rendered by default.

notebooks/analyze_site_specific_rates.ipynb

Lines changed: 391 additions & 384 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)