@@ -290,27 +290,14 @@ rule raxml_ng_bootstrap:
290290 "2>{log}"
291291
292292
293- rule gotree_consensus_ml_tree :
294- input :
295- ml_trees = "results/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.mlTrees" ,
296- output :
297- consensus_tree = "results/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.consensusTree" ,
298- log :
299- "logs/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.consensusTree.log" ,
300- conda :
301- "../envs/gotree.yaml"
302- shell :
303- "gotree compute consensus -i {input.ml_trees} --freq-min 0.5 -o {output.consensus_tree} 2>{log}"
304-
305-
306293rule gotree_collapse_bootstrap_trees :
307294 input :
308295 bootstraps = "results/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.bootstraps.raxml.bootstraps" ,
309296 variant_sites = "results/raxml_ng/{individual}/input/max_{n_missing_cells}_missing/{individual}.ml_gt_and_likelihoods.filtered_sites.txt" ,
310297 output :
311- collapsed = "results/gotree/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.bootstraps.raxml.bootstraps" ,
298+ collapsed = "results/gotree/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.bootstraps.raxml.collapsed. bootstraps" ,
312299 log :
313- "logs/gotree/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.bootstraps.raxml.bootstraps.log" ,
300+ "logs/gotree/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.bootstraps.raxml.collapsed. bootstraps.log" ,
314301 conda :
315302 "../envs/gotree.yaml"
316303 params :
@@ -321,12 +308,12 @@ rule gotree_collapse_bootstrap_trees:
321308
322309rule gotree_collapse_tree :
323310 input :
324- best_tree = "results/raxml_ng /{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.{type}" ,
311+ best_tree = "results/{software} /{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.{type}" ,
325312 variant_sites = "results/raxml_ng/{individual}/input/max_{n_missing_cells}_missing/{individual}.ml_gt_and_likelihoods.filtered_sites.txt" ,
326313 output :
327- collapsed = "results/gotree /{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.{type}" ,
314+ collapsed = "results/{software} /{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.collapsed .{type}" ,
328315 log :
329- "logs/gotree /{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.{type}.log" ,
316+ "logs/{software} /{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.collapsed .{type}.log" ,
330317 conda :
331318 "../envs/gotree.yaml"
332319 params :
@@ -335,38 +322,20 @@ rule gotree_collapse_tree:
335322 "gotree collapse length --length {params.min_length} --input {input.best_tree} --output {output.collapsed} 2>{log} "
336323
337324
338- rule gotree_support_collapsed_trees :
339- input :
340- ref_tree = "results/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.{tree_type}" ,
341- bootstraps = "results/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.bootstraps.raxml.bootstraps" ,
342- output :
343- support = "results/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.raxml.support" ,
344- log :
345- "logs/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.raxml.support.log" ,
346- conda :
347- "../envs/gotree.yaml"
348- threads : 4
349- shell :
350- "( gotree compute support tbe "
351- " --threads {threads} "
352- " --bootstrap {input.bootstraps} "
353- " --reftree {input.ref_tree} "
354- " --out {output.support} "
355- ") 2>{log}"
356-
357-
358325rule extract_ml_tree_likelihoods :
359326 input :
360- log = "results/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search .raxml.log" ,
327+ log = "results/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.{infix} .raxml.log" ,
361328 output :
362- tsv = "results/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search .raxml.log_likelihoods.tsv" ,
329+ tsv = "results/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.{infix} .raxml.log_likelihoods.tsv" ,
363330 log :
364- "logs/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search .raxml.log_likelihoods.log" ,
331+ "logs/raxml_ng/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.{infix} .raxml.log_likelihoods.log" ,
365332 conda :
366333 "../envs/grep.yaml"
334+ params :
335+ search_string = lambda wc : "Bootstrap tree" if wc .infix == "bootstraps" else "ML tree search"
367336 shell :
368- '( grep "ML tree search #" {input.log} | '
369- r"sed -r -e 's/^.*ML tree search #([0-9]+), logLikelihood: (-[0-9]+.[0-9]+)$/\1\t\2/' | "
337+ '( grep "{params.search_string} #" {input.log} | '
338+ r"sed -r -e 's/^.*{params.search_string} #([0-9]+), logLikelihood: (-[0-9]+.[0-9]+)$/\1\t\2/' | "
370339 "sort -n -k 1,1 "
371340 ">{output.tsv} "
372341 ") 2>{log}"
@@ -401,6 +370,45 @@ rule cluster_info_dist_across_trees:
401370 "../scripts/cluster_info_dist.R"
402371
403372
373+ rule gotree_compute_consensus_tree :
374+ input :
375+ best_cluster_trees = "results/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.mlTrees.best_cluster.best_cluster_trees.newick" ,
376+ output :
377+ consensus_tree = "results/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.consensusTree" ,
378+ log :
379+ "logs/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.consensusTree.log" ,
380+ conda :
381+ "../envs/gotree.yaml"
382+ threads : 4
383+ shell :
384+ "( gotree compute consensus "
385+ " --input {input.best_cluster_trees} "
386+ " --freq-min 0.5 " # this should be the default, but without a min frequency we get an error about this
387+ " --threads {threads} "
388+ " --output {output.consensus_tree} "
389+ ") 2>{log}"
390+
391+
392+ rule gotree_support_collapsed_trees :
393+ input :
394+ ref_tree = "results/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.search.raxml.collapsed.{tree_type}" ,
395+ bootstraps = "results/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.bootstraps.raxml.collapsed.bootstraps" ,
396+ output :
397+ support = "results/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.raxml.collapsed.support" ,
398+ log :
399+ "logs/{software}/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.raxml.support.log" ,
400+ conda :
401+ "../envs/gotree.yaml"
402+ threads : 4
403+ shell :
404+ "( gotree compute support tbe "
405+ " --threads {threads} "
406+ " --bootstrap {input.bootstraps} "
407+ " --reftree {input.ref_tree} "
408+ " --out {output.support} "
409+ ") 2>{log}"
410+
411+
404412rule raxml_ng_rfdist_across_trees :
405413 input :
406414 trees = lambda wc : expand (
@@ -558,15 +566,15 @@ rule raxml_ng_bsconverge_to_tsv:
558566rule concatenate_raxml_ng_tsvs_per_individual_per_metric :
559567 input :
560568 rf_dists = lambda wc : expand (
561- "results/{{software}} /{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.{type}.raxml.{{metric}}.tsv" ,
569+ "results/raxml_ng /{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.{type}.raxml.{{metric}}.tsv" ,
562570 model = config ["raxml_ng" ]["models" ],
563571 n_missing_cells = config ["raxml_ng" ]["max_missing" ],
564572 type = ["startTree" , "mlTrees" , "bootstraps" ] if wc .metric == "rf_dist" else ["bootstraps" ],
565573 ),
566574 output :
567- tsv = "results/trees/{individual}.{metric}.{software}. max_missing_stable_topology_selection.tsv" ,
575+ tsv = "results/trees/{individual}.{metric}.max_missing_stable_topology_selection.tsv" ,
568576 log :
569- "logs/trees/{individual}.{metric}.{software}. max_missing_stable_topology_selection.log" ,
577+ "logs/trees/{individual}.{metric}.max_missing_stable_topology_selection.log" ,
570578 conda :
571579 "../envs/xsv.yaml"
572580 shell :
@@ -581,11 +589,11 @@ rule concatenate_raxml_ng_tsvs_per_individual_per_metric:
581589rule join_raxml_ng_tsvs_per_individual_across_metrics :
582590 input :
583591 bsconverge = "results/trees/{individual}.bsconverge.raxml_ng.max_missing_stable_topology_selection.tsv" ,
584- rf_dist = "results/trees/{individual}.rf_dist.{software}. max_missing_stable_topology_selection.tsv" ,
592+ rf_dist = "results/trees/{individual}.rf_dist.max_missing_stable_topology_selection.tsv" ,
585593 output :
586- tsv = "results/trees/{individual}.{software}. max_missing_stable_topology_selection.tsv" ,
594+ tsv = "results/trees/{individual}.max_missing_stable_topology_selection.tsv" ,
587595 log :
588- "logs/trees/{individual}.{software}. max_missing_stable_topology_selection.join_metrics.log" ,
596+ "logs/trees/{individual}.max_missing_stable_topology_selection.join_metrics.log" ,
589597 conda :
590598 "../envs/xsv.yaml"
591599 shell :
@@ -602,11 +610,11 @@ rule join_raxml_ng_tsvs_per_individual_across_metrics:
602610
603611rule plot_distinct_topologies_across_missingness :
604612 input :
605- tsv = "results/trees/{individual}.{software} .max_missing_stable_topology_selection.tsv" ,
613+ tsv = "results/trees/{individual}.gotree .max_missing_stable_topology_selection.tsv" ,
606614 output :
607- plot = "results/trees/{individual}.{software}. max_missing_stable_topology_selection.pdf" ,
615+ plot = "results/trees/{individual}.max_missing_stable_topology_selection.pdf" ,
608616 log :
609- "logs/trees/{individual}.{software}. max_missing_stable_topology_selection.log"
617+ "logs/trees/{individual}.max_missing_stable_topology_selection.log"
610618 conda :
611619 "../envs/tidyverse.yaml"
612620 script :
@@ -615,12 +623,12 @@ rule plot_distinct_topologies_across_missingness:
615623
616624rule plot_support_tree :
617625 input :
618- support = "results/gotree/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.raxml.support" ,
626+ support = "results/gotree/{individual}/results/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.raxml.collapsed. support" ,
619627 samples = config ["samples" ],
620628 output :
621- support = "results/trees/{individual}/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.{software}. support.{tree_type}.pdf" ,
629+ support = "results/trees/{individual}/{model}/max_{n_missing_cells}_missing/{individual}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.collapsed .pdf" ,
622630 log :
623- "logs/trees/{model}/max_{n_missing_cells}_missing/{individual}.{software}. support.{tree_type}.log" ,
631+ "logs/trees/{model}/max_{n_missing_cells}_missing/{individual}.support.{tree_type}.collapsed .log" ,
624632 conda :
625633 "../envs/ggtree.yaml"
626634 script :
@@ -630,21 +638,21 @@ rule plot_support_tree:
630638rule plot_values_across_missingness :
631639 input :
632640 support_trees = expand (
633- "results/{{software}} /{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.raxml.support" ,
641+ "results/gotree /{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.support.{tree_type}.raxml.collapsed .support" ,
634642 model = lookup ("raxml_ng/models" , within = config ),
635643 n_missing_cells = lookup (dpath = "raxml_ng/max_missing" , within = config ),
636644 tree_type = ["bestTree" , "consensusTree" ]
637645 ),
638646 cluster_info_dist = expand (
639- "results/{{software}} /{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.{type}.all.cluster_info_dist.tsv" ,
647+ "results/gotree /{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.{type}.all.cluster_info_dist.tsv.gz " ,
640648 model = lookup ("raxml_ng/models" , within = config ),
641649 n_missing_cells = lookup (dpath = "raxml_ng/max_missing" , within = config ),
642650 type = ["startTree" , "mlTrees" , "bootstraps" ],
643651 )
644652 output :
645- support_plot = "results/trees/{individual}.{software}. tree_values_across_missingness.pdf" ,
653+ support_plot = "results/trees/{individual}.tree_values_across_missingness.pdf" ,
646654 log :
647- "logs/trees/{individual}.{software}. tree_values_across_missingness_plot.log" ,
655+ "logs/trees/{individual}.tree_values_across_missingness_plot.log" ,
648656 conda :
649657 "../envs/ggtree.yaml"
650658 resources :
@@ -656,7 +664,7 @@ rule plot_values_across_missingness:
656664rule plot_support_values_and_branch_lengths :
657665 input :
658666 support_trees = expand (
659- "results/{{software}} /{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.support.raxml.support" ,
667+ "results/gotree /{{individual}}/results/{model}/max_{n_missing_cells}_missing/{{individual}}.{model}.max_{n_missing_cells}_missing.support.{{tree_type}}. raxml.collapsed .support" ,
660668 model = lookup ("raxml_ng/models" , within = config ),
661669 n_missing_cells = lookup (dpath = "raxml_ng/max_missing" , within = config ),
662670 ),
@@ -666,13 +674,13 @@ rule plot_support_values_and_branch_lengths:
666674 n_missing_cells = lookup (dpath = "raxml_ng/max_missing" , within = config ),
667675 ),
668676 output :
669- support_hist = "results/trees/{individual}.{software }.support.histogram.pdf" ,
670- branch_length_hist = "results/trees/{individual}.{software }.branch_length.histogram.pdf" ,
671- branch_length_ecdf = "results/trees/{individual}.{software }.branch_length.ecdf.pdf" ,
672- data_plot = "results/trees/{individual}.{software }.support_vs_branch_length.full_data.pdf" ,
673- summary_plot = "results/trees/{individual}.{software }.support_vs_branch_length.summary.pdf" ,
677+ support_hist = "results/trees/{individual}.{tree_type }.support.histogram.pdf" ,
678+ branch_length_hist = "results/trees/{individual}.{tree_type }.branch_length.histogram.pdf" ,
679+ branch_length_ecdf = "results/trees/{individual}.{tree_type }.branch_length.ecdf.pdf" ,
680+ data_plot = "results/trees/{individual}.{tree_type }.support_vs_branch_length.full_data.pdf" ,
681+ summary_plot = "results/trees/{individual}.{tree_type }.support_vs_branch_length.summary.pdf" ,
674682 log :
675- "logs/trees/{individual}.{software }.support_values_vs_branch_lengths.log" ,
683+ "logs/trees/{individual}.{tree_type }.support_values_vs_branch_lengths.log" ,
676684 conda :
677685 "../envs/ggtree.yaml"
678686 script :
0 commit comments