@@ -347,18 +347,17 @@ rule process_dms_data_yu_ha:
347347 ha_numbering = "data/dms_data/Yu_HA/site_numbering_map.csv"
348348 output :
349349 ha_dms = "{output_dir}/dms_data/Yu_HA/processed_dms_data.csv"
350- params :
351- data_dir = config ["data_dir" ]
352350 log :
353351 "{output_dir}/logs/process_dms_data_yu_ha.log"
354352 shell :
355353 """
356- data_dir=$(realpath {params.data_dir}) && \
357354 cd notebooks && \
358- papermill \
359- process_dms_data_yu_ha.ipynb \
360- process_dms_data_yu_ha.ipynb \
361- -p data_dir $data_dir &> ../{log}
355+ jupyter nbconvert \
356+ --to notebook \
357+ --execute \
358+ --inplace \
359+ --ExecutePreprocessor.timeout=600 \
360+ process_dms_data_yu_ha.ipynb &> ../{log}
362361 """
363362
364363# Process NP DMS data from Bloom et al.
@@ -368,18 +367,17 @@ rule process_dms_data_bloom_np:
368367 np_data = "data/dms_data/Bloom_NP/Supplementary_file_1.xls"
369368 output :
370369 np_dms = "{output_dir}/dms_data/Bloom_NP/processed_dms_data.csv"
371- params :
372- data_dir = config ["data_dir" ]
373370 log :
374371 "{output_dir}/logs/process_dms_data_bloom_np.log"
375372 shell :
376373 """
377- data_dir=$(realpath {params.data_dir}) && \
378374 cd notebooks && \
379- papermill \
380- process_dms_data_bloom_np.ipynb \
381- process_dms_data_bloom_np.ipynb \
382- -p data_dir $data_dir &> ../{log}
375+ jupyter nbconvert \
376+ --to notebook \
377+ --execute \
378+ --inplace \
379+ --ExecutePreprocessor.timeout=600 \
380+ process_dms_data_bloom_np.ipynb &> ../{log}
383381 """
384382
385383# Process PB2 DMS data from Soh et al. (alignment QC)
@@ -389,18 +387,17 @@ rule process_dms_data_soh_pb2:
389387 pb2_data = "data/dms_data/Soh_PB2/elife-45079-fig2-data1-v1.csv"
390388 output :
391389 touch ("{output_dir}/.process_dms_data_soh_pb2.done" )
392- params :
393- data_dir = config ["data_dir" ]
394390 log :
395391 "{output_dir}/logs/process_dms_data_soh_pb2.log"
396392 shell :
397393 """
398- data_dir=$(realpath {params.data_dir}) && \
399394 cd notebooks && \
400- papermill \
401- process_dms_data_soh_pb2.ipynb \
402- process_dms_data_soh_pb2.ipynb \
403- -p data_dir $data_dir &> ../{log}
395+ jupyter nbconvert \
396+ --to notebook \
397+ --execute \
398+ --inplace \
399+ --ExecutePreprocessor.timeout=600 \
400+ process_dms_data_soh_pb2.ipynb &> ../{log}
404401 """
405402
406403# Process NA DMS data from Wang et al. (sequence comparison)
@@ -410,18 +407,17 @@ rule process_dms_data_wang_na:
410407 na_data = "data/dms_data/Wang_NA/msystems.00670-23-s0006.xlsx"
411408 output :
412409 na_dms = "{output_dir}/dms_data/Wang_NA/processed_dms_data.csv"
413- params :
414- data_dir = config ["data_dir" ]
415410 log :
416411 "{output_dir}/logs/process_dms_data_wang_na.log"
417412 shell :
418413 """
419- data_dir=$(realpath {params.data_dir}) && \
420414 cd notebooks && \
421- papermill \
422- process_dms_data_wang_na.ipynb \
423- process_dms_data_wang_na.ipynb \
424- -p data_dir $data_dir &> ../{log}
415+ jupyter nbconvert \
416+ --to notebook \
417+ --execute \
418+ --inplace \
419+ --ExecutePreprocessor.timeout=600 \
420+ process_dms_data_wang_na.ipynb &> ../{log}
425421 """
426422
427423# Process PB1 DMS data from Li et al.
@@ -431,18 +427,17 @@ rule process_dms_data_li_pb1:
431427 pb1_data = "data/dms_data/Li_PB1/jvi.01329-23-s0008.csv"
432428 output :
433429 pb1_dms = "{output_dir}/dms_data/Li_PB1/processed_dms_data.csv"
434- params :
435- data_dir = config ["data_dir" ]
436430 log :
437431 "{output_dir}/logs/process_dms_data_li_pb1.log"
438432 shell :
439433 """
440- data_dir=$(realpath {params.data_dir}) && \
441434 cd notebooks && \
442- papermill \
443- process_dms_data_li_pb1.ipynb \
444- process_dms_data_li_pb1.ipynb \
445- -p data_dir $data_dir &> ../{log}
435+ jupyter nbconvert \
436+ --to notebook \
437+ --execute \
438+ --inplace \
439+ --ExecutePreprocessor.timeout=600 \
440+ process_dms_data_li_pb1.ipynb &> ../{log}
446441 """
447442
448443# Process M1 DMS data from Hom et al.
@@ -453,18 +448,17 @@ rule process_dms_data_hom_m1:
453448 fasta = "data/dms_data/Hom_M1/PR8-M1.fasta"
454449 output :
455450 m1_dms = "{output_dir}/dms_data/Hom_M1/processed_dms_data.csv"
456- params :
457- data_dir = config ["data_dir" ]
458451 log :
459452 "{output_dir}/logs/process_dms_data_hom_m1.log"
460453 shell :
461454 """
462- data_dir=$(realpath {params.data_dir}) && \
463455 cd notebooks && \
464- papermill \
465- process_dms_data_hom_m1.ipynb \
466- process_dms_data_hom_m1.ipynb \
467- -p data_dir $data_dir &> ../{log}
456+ jupyter nbconvert \
457+ --to notebook \
458+ --execute \
459+ --inplace \
460+ --ExecutePreprocessor.timeout=600 \
461+ process_dms_data_hom_m1.ipynb &> ../{log}
468462 """
469463
470464# Process NEP DMS data from Teo et al.
@@ -474,18 +468,17 @@ rule process_dms_data_teo_nep:
474468 nep_data = "data/dms_data/Teo_NEP/mmc2.xlsx"
475469 output :
476470 nep_dms = "{output_dir}/dms_data/Teo_NEP/processed_dms_data.csv"
477- params :
478- data_dir = config ["data_dir" ]
479471 log :
480472 "{output_dir}/logs/process_dms_data_teo_nep.log"
481473 shell :
482474 """
483- data_dir=$(realpath {params.data_dir}) && \
484475 cd notebooks && \
485- papermill \
486- process_dms_data_teo_nep.ipynb \
487- process_dms_data_teo_nep.ipynb \
488- -p data_dir $data_dir &> ../{log}
476+ jupyter nbconvert \
477+ --to notebook \
478+ --execute \
479+ --inplace \
480+ --ExecutePreprocessor.timeout=600 \
481+ process_dms_data_teo_nep.ipynb &> ../{log}
489482 """
490483
491484# Process PA DMS data from Chen et al.
@@ -495,18 +488,17 @@ rule process_dms_data_chen_pa:
495488 pa_data = "data/dms_data/Chen_PA/fitness calculation.xlsx"
496489 output :
497490 pa_dms = "{output_dir}/dms_data/Chen_PA/processed_dms_data.csv"
498- params :
499- data_dir = config ["data_dir" ]
500491 log :
501492 "{output_dir}/logs/process_dms_data_chen_pa.log"
502493 shell :
503494 """
504- data_dir=$(realpath {params.data_dir}) && \
505495 cd notebooks && \
506- papermill \
507- process_dms_data_chen_pa.ipynb \
508- process_dms_data_chen_pa.ipynb \
509- -p data_dir $data_dir &> ../{log}
496+ jupyter nbconvert \
497+ --to notebook \
498+ --execute \
499+ --inplace \
500+ --ExecutePreprocessor.timeout=600 \
501+ process_dms_data_chen_pa.ipynb &> ../{log}
510502 """
511503
512504
@@ -550,18 +542,17 @@ rule process_shapemap_data:
550542 )
551543 output :
552544 "{output_dir}/shapemap/all_data.csv"
553- params :
554- data_dir = config ["data_dir" ]
555545 log :
556546 "{output_dir}/logs/process_shapemap_data.log"
557547 shell :
558548 """
559- data_dir=$(realpath {params.data_dir}) && \
560549 cd notebooks && \
561- papermill \
562- process_shapemap_data.ipynb \
563- process_shapemap_data.ipynb \
564- -p data_dir $data_dir &> ../{log}
550+ jupyter nbconvert \
551+ --to notebook \
552+ --execute \
553+ --inplace \
554+ --ExecutePreprocessor.timeout=600 \
555+ process_shapemap_data.ipynb &> ../{log}
565556 """
566557
567558# Align protein sequences across subtypes (only for HA and NA)
0 commit comments