Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
*.o
*.pyc
venv
env
*out
*json
*txt
*noXY*
input_list*
run*sh
17 changes: 17 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM ubuntu

RUN apt-get update && apt-get install -y gfortran build-essential \
make gcc build-essential python python-dev wget libgsl23 \
gsl-bin libgsl-dev python-pip git \
libblas-dev liblapack-dev

RUN pip install PyVCF

RUN pip install numpy scipy

WORKDIR /opt

RUN git clone https://github.com/morrislab/smchet-challenge.git && cd smchet-challenge && git checkout master
RUN git clone https://github.com/morrislab/phylowgs.git && cd phylowgs && git checkout master

RUN cd phylowgs && g++ -o mh.o -O3 mh.cpp util.cpp `gsl-config --cflags --libs`
61 changes: 61 additions & 0 deletions multievolve.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
cwlVersion: v1.0
class: CommandLineTool
label: Multievolve
baseCommand: ["python", "/opt/phylowgs/multievolve.py"]
requirements:
- class: DockerRequirement
dockerPull: smcheteval/phylowgs:0.1

inputs:
num_chains:
type: int
default: 16
inputBinding:
prefix: --num-chains

random_seeds:
type: int[]
default: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
inputBinding:
prefix: --random-seeds

chain_inclusion_factor:
type: float
default: 1.1
inputBinding:
prefix: --chain-inclusion-factor

mcmc:
type: int
default: 5000
inputBinding:
prefix: --mcmc-samples

burnin:
type: int
default: 2000
inputBinding:
prefix: --burnin-samples

ssms:
type: File
inputBinding:
prefix: --ssms

cnvs:
type: File
inputBinding:
prefix: --cnvs

output_dir:
type: string
default: ./
inputBinding:
prefix: --output-dir

outputs:
tree_file:
type: File
outputBinding:
glob: trees.zip

50 changes: 50 additions & 0 deletions parser/create_phylowgs_inputs.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
cwlVersion: v1.0
class: CommandLineTool
label: PhyloWGS Input Prep
baseCommand: ["python","/opt/phylowgs/parser/create_phylowgs_inputs.py"]
requirements:
- class: DockerRequirement
dockerPull: smcheteval/phylowgs:0.1

inputs:
cnvs:
type: File
inputBinding:
prefix: "--cnvs s1="
separate: false

output_cnvs:
type: string
default: cnv_data.txt
inputBinding:
prefix: --output-cnvs

output_variants:
type: string
default: ssm_data.txt
inputBinding:
prefix: --output-variants

vcf_type:
type: string
default: "s1=mutect_smchet"
inputBinding:
prefix: --vcf-type

vcf_files:
type: File
inputBinding:
prefix: "s1="
separate: false
position: 5

outputs:
multievolve_cnvs:
type: File
outputBinding:
glob: $(inputs.output_cnvs)

multievolve_snvs:
type: File
outputBinding:
glob: $(inputs.output_variants)
Empty file modified parser/create_phylowgs_inputs.py
100644 → 100755
Empty file.
36 changes: 36 additions & 0 deletions parser/parse_cnvs.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
cwlVersion: v1.0
class: CommandLineTool
baseCommand: ["python","/opt/phylowgs/parser/parse_cnvs.py"]
requirements:
- class: DockerRequirement
dockerPull: smcheteval/phylowgs:0.1

inputs:
cnv_format:
type: string
default: battenberg-smchet
inputBinding:
prefix: --cnv-format

cellularity:
type: float
default: None
inputBinding:
prefix: --cellularity

cnv_output:
type: string
default: cnvs.txt
inputBinding:
prefix: --cnv-output

cnv_file:
type: File
inputBinding:
position: 4

outputs:
parser_output:
type: File
outputBinding:
glob: $(inputs.cnv_output)
Empty file modified parser/parse_cnvs.py
100644 → 100755
Empty file.
16 changes: 16 additions & 0 deletions parser/run_create_inputs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
# run_create_inputs.sh
#SBATCH --partition=exacloud
#SBATCH --output=parser-%j.out
#SBATCH --error=parser-%j.err
#SBATCH --job-name=run_smchet_create_inputs
#SBATCH --gres disk:1024
#SBATCH --mincpus=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=00:45:00

source /home/groups/EllrottLab/activate_conda
ABS_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

sbatch cwltool $ABS_PATH/create_phylowgs_inputs.cwl $ABS_PATH/run_create_inputs.json
16 changes: 16 additions & 0 deletions parser/run_parser.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
# run_parser.sh
#SBATCH --partition=exacloud
#SBATCH --output=parser-%j.out
#SBATCH --error=parser-%j.err
#SBATCH --job-name=run_smchet_parser
#SBATCH --gres disk:1024
#SBATCH --mincpus=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=00:45:00

source /home/groups/EllrottLab/activate_conda
ABS_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

sbatch cwltool --debug $ABS_PATH/parse_cnvs.cwl $ABS_PATH/parse_cnvs.json
79 changes: 79 additions & 0 deletions phylowgs.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
cwlVersion: v1.0
class: Workflow

requirements:
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement

inputs:
ssmFile:
type: File
cnvFile:
type: File
cellFxn:
type: float

steps:
parse:
run: ./parser/parse_cnvs.cwl
in:
cellularity: cellFxn
cnv_file: cnvFile
out:
- parser_output

prep_inputs:
run: ./parser/create_phylowgs_inputs.cwl
in:
cnvs: parse/parser_output
vcf_files: ssmFile
out:
- multievolve_snvs
- multievolve_cnvs

multievolve:
run: ./multievolve.cwl
in:
ssms: prep_inputs/multievolve_snvs
cnvs: prep_inputs/multievolve_cnvs
out:
- tree_file

write_results:
run: ./write_results.cwl
in:
tree_file: multievolve/tree_file
out:
- summary_results
- mutlist_results
- mutass_results

write_report:
run: ../smchet-challenge/create-smchet-report/write_report.cwl
in:
tree_summary: write_results/summary_results
mutation_list: write_results/mutlist_results
mutation_assignment: write_results/mutass_results
out:
- cellularity
- population
- proportion
- cluster_assignment
- cocluster_assignment

outputs:
cellularity_predfile:
type: File
outputSource: write_report/cellularity
population_predfile:
type: File
outputSource: write_report/population
proportion_predfile:
type: File
outputSource: write_report/proportion
cluster_assignment_predfile:
type: File
outputSource: write_report/cluster_assignment
cocluster_assignment_predfile:
type: File
outputSource: write_report/cocluster_assignment
16 changes: 16 additions & 0 deletions run_multi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
# run_multi.sh
#SBATCH --partition=exacloud
#SBATCH --output=multievolve-%j.out
#SBATCH --error=multievolve-%j.err
#SBATCH --job-name=run_smchet_multievolve
#SBATCH --gres disk:1024
#SBATCH --mincpus=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=06:00:00

source /home/groups/EllrottLab/activate_conda
ABS_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

sbatch cwltool $ABS_PATH/multievolve.cwl $ABS_PATH/run_multi.json
32 changes: 32 additions & 0 deletions run_phylowgs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

###https://github.com/Sage-Bionetworks/SMC-Het-Challenge-Examples/blob/master/PhyloWGS/command/run_phylowgs.sh

BATTEN=$1
VCF=$2
CELL=$3
PUR=(`sed "2q;d" ${CELL}`)
SNV_CALLER="mutect_smchet"
CNV_CALLER="battenberg-smchet"
MCMC=5000
BURNIN=2000
NCHAINS=16
RANDOM_SEED="`seq 1 ${NCHAINS}`"
#TODO: Fix cellularity
#TODO: subsampling
#if $subsample.run == "yes":
#--sample-size ${subsample.count}
#end if
#TODO: normal-cn
#if $only_normal_cn
#--only-normal-cn
#end if

python /opt/phylowgs/parser/parse_cnvs.py --cnv-format $CNV_CALLER --cellularity $PUR --cnv-output cnvs.txt $BATTEN

python /opt/phylowgs/parser/create_phylowgs_inputs.py --cnvs s1=cnvs.txt --output-cnvs cnv_data.txt --output-variants ssm_data.txt --vcf-type s1=$SNV_CALLER s1=$VCF
python /opt/phylowgs/multievolve.py --num-chains $NCHAINS --ssms ssm_data.txt --cnvs cnv_data.txt --burnin-samples $BURNIN --mcmc-samples $MCMC -r $RANDOM_SEED

python /opt/phylowgs/write_results.py tumour /opt/chains/trees.zip trees.json.gz mutations.json.gz mutation_assignments.json.gz

PYTHONPATH='/opt/phylowgs/' python /opt/smchet-challenge/create-smchet-report/write_report.py trees.json.gz mutations.json.gz mutation_assignments.json.gz /opt/outputs
16 changes: 16 additions & 0 deletions run_results.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
# run_results.sh
#SBATCH --partition=exacloud
#SBATCH --output=results-%j.out
#SBATCH --error=results-%j.err
#SBATCH --job-name=run_smchet_results
#SBATCH --gres disk:1024
#SBATCH --mincpus=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=00:45:00

source /home/groups/EllrottLab/activate_conda
ABS_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

sbatch cwltool --debug $ABS_PATH/write_results.cwl $ABS_PATH/write_results.json
Loading