@@ -30,7 +30,7 @@ DATA_PATH = os.path.join(os.path.dirname(workflow.snakefile), "../data")
30
30
data_config_files = glob .glob (f"{ DATA_PATH } /*.csv" )
31
31
32
32
# Uncomment for test runs
33
- # data_config_files = glob.glob(f"{DATA_PATH}/test.csv")
33
+ data_config_files = glob .glob (f"{ DATA_PATH } /test.csv" )
34
34
35
35
# Only operate on EM-seq files (ignore BS-seq)
36
36
EMSEQ_ONLY = True
@@ -366,25 +366,6 @@ rule seqtk_subsample:
366
366
"""
367
367
368
368
369
- # Trim poly-g if this is a two-color system
370
- def trim_poly_g (input ):
371
- with gzip .open (input .r1_file , "rt" ) as r1_in , gzip .open (
372
- input .r2_file , "rt"
373
- ) as r2_in :
374
- for line_r1 , line_r2 in zip (r1_in , r2_in ):
375
- line_r1_instrument = line_r1 .split (":" )[0 ]
376
- line_r2_instrument = line_r2 .split (":" )[0 ]
377
- assert (
378
- line_r1_instrument == line_r2_instrument
379
- ), "R1 and R2 come from different instruments! Not possible."
380
- break
381
- return (
382
- "--trim_poly_g"
383
- if line_r1_instrument .startswith (tuple (["@A0" , "@NS" , "@NB" , "@VH" ]))
384
- else ""
385
- )
386
-
387
-
388
369
### fastp
389
370
# We adapter trimming, poly-g filtering, and initial quality filtering
390
371
# NOTE: We also do read trimming here, 10-15 BP depending on EM- vs BS-seq
@@ -416,10 +397,6 @@ rule fastp:
416
397
resources :
417
398
mem_mb = 6000
418
399
params :
419
- # Inspired to do poly-g trimming by the NEB EM-seq pipeline:
420
- # https://github.com/nebiolabs/EM-seq/blob/master/em-seq.nf
421
- # TODO: make this selective for NextSeq/NovaSeq-only runs?
422
- trim_poly_g_flag = lambda wildcards , input : trim_poly_g (input ),
423
400
# Note: --overrepresentation_analysis was overwhelming with our huge sequencing data, disabled for now.
424
401
## Trim specific to BS vs EM-seq input type
425
402
# Note that we could probably be less conservative, and trim less from EMSeq
@@ -439,7 +416,7 @@ rule fastp:
439
416
fastp --in1 {input.r1_file} --in2 {input.r2_file} \
440
417
--trim_front1 {params.trim_r1_5prime} --trim_tail1 {params.trim_r1_3prime} \
441
418
--trim_front2 {params.trim_r2_5prime} --trim_tail2 {params.trim_r2_3prime} \
442
- --length_required {params.minimum_length} {params.trim_poly_g_flag} \
419
+ --length_required {params.minimum_length} \
443
420
--adapter_sequence "{params.adapter_r1}" --adapter_sequence_r2 "{params.adapter_r2}" \
444
421
--json "{output.fastp_json}" --html "{output.fastp_html}" \
445
422
--thread {threads} --verbose --failed_out {output.fastp_failed} --stdout \
0 commit comments