Skip to content

Commit 4e05473

Browse files
committed
testing picrust functionality
1 parent fb40c41 commit 4e05473

4 files changed

Lines changed: 88 additions & 4 deletions

File tree

mmeds/snakemake/rules/file_manipulation.smk

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,55 @@ rule extract_feature_table_tsv:
1414
"{output} "
1515
"tables/tmp_unzip_{wildcards.table}"
1616

17+
rule extract_feature_table_biom:
18+
""" Remove feature table biom file from qza archive """
19+
input:
20+
"tables/{table}.qza"
21+
output:
22+
"tables/{table}.biom"
23+
wildcard_constraints:
24+
table = "[^/]+"
25+
conda:
26+
"mmeds_test"
27+
shell:
28+
"extract_feature_table.sh "
29+
"{input} "
30+
"{output} "
31+
"biom "
32+
"tables/tmp_unzip_{wildcards.table}"
33+
34+
rule extract_feature_table_fasta:
35+
""" Remove underlying fasta data from qza archive """
36+
input:
37+
"tables/{table}.qza"
38+
output:
39+
"tables/{table}.fasta"
40+
wildcard_constraints:
41+
table = "[^/]+"
42+
conda:
43+
"mmeds_test"
44+
shell:
45+
"extract_feature_table.sh "
46+
"{input} "
47+
"{output} "
48+
"fasta "
49+
"tables/tmp_unzip_{wildcards.table}"
50+
51+
rule extract_feature_table_tsv:
52+
""" Remove feature table biom file from qza archive, convert to readable tsv for downstream analysis """
53+
input:
54+
"tables/{table}.qza"
55+
output:
56+
"tables/{table}.tsv"
57+
conda:
58+
"mmeds_test"
59+
shell:
60+
"extract_feature_table.sh "
61+
"{input} "
62+
"{output} "
63+
"tsv "
64+
"tables/tmp_unzip_{wildcards.table}"
65+
1766
rule extract_feature_table_tsv_class:
1867
input:
1968
"tables/{class}/{table}.qza"

mmeds/snakemake/rules/picrust2.smk

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
rule picrust2_core:
2+
""" Generate picrust2 tables for input feature table """
3+
input:
4+
rep_seqs = "tables/rep_seqs_table.fasta"
5+
biom_feature_table = "tables/asv_table.biom"
6+
output:
7+
picrust2_results = "picrust2_out"
8+
conda:
9+
# "qiime2-2020.8.0" # or should I use "qiime2-2023.9"
10+
/sc/arion/projects/MMEDS/.modules/picrust2
11+
shell:
12+
picrust2_pipeline.py -s {input.rep_seqs} -s {input.biom_feature_table} {output.picrust2_results}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
configfile: "config_file.yaml"
2+
report: "report.rst"
3+
4+
include: "{snakemake_dir}/common.smk"
5+
include: "{snakemake_dir}/file_manipulation.smk"
6+
include: "{snakemake_dir}/table_filtering.smk"
7+
8+
rule results:
9+
input:
10+
picrust2_core

scripts/extract_feature_table.sh

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,21 @@
44

55
in_file=$1
66
out_file=$2
7-
tmp_dir_name="${3:-tmp_unzip}"
7+
file_type="${3:-tsv}" # tsv, biom, or fasta
8+
tmp_dir_name="${4:-tmp_unzip}"
89
unzip -joq $in_file -d $tmp_dir_name
9-
biom convert --to-tsv -i $tmp_dir_name/feature-table.biom -o $out_file
10-
sed -i '1d;2s/^#//' $out_file
11-
rm -rf $tmp_dir_name
10+
11+
if [file_type = tsv]; then
12+
biom convert --to-tsv -i $tmp_dir_name/feature-table.biom -o $out_file
13+
sed -i '1d;2s/^#//' $out_file
14+
rm -rf $tmp_dir_name
15+
elif [file_type = biom]; then
16+
cp $tmp_dir_name/feature-table.biom $out_file
17+
rm -rf $tmp_dir_name
18+
elif [file_type = fasta]; then
19+
cp $tmp_dir_name/dna-sequences.fasta $out_file
20+
rm -rf $tmp_dir_name
21+
else
22+
echo "Invalid file_type argument"
23+
fi
24+

0 commit comments

Comments
 (0)