Skip to content

Commit 248fd6d

Browse files
committed
add cli option for 'jump' for hmmr_em .
1 parent e8ff040 commit 248fd6d

4 files changed

Lines changed: 16 additions & 12 deletions

File tree

MACS3/Commands/hmmratac_cmd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def run(args):
187187
options.info("#2 Use EM algorithm to estimate means and stddevs of fragment lengths")
188188
options.info("# for mono-, di-, and tri-nucleosomal signals...")
189189
em_trainer = HMMR_EM(petrack, options.em_means[1:4],
190-
options.em_stddevs[1:4], seed=options.hmm_randomSeed)
190+
options.em_stddevs[1:4], seed=options.hmm_randomSeed, jump=options.em_jump)
191191
# the mean and stddev after EM training
192192
em_means = [options.em_means[0],]
193193
em_means.extend(em_trainer.fragMeans)

MACS3/Signal/HMMR_EM.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class HMMR_EM:
103103
max_fraglen: cython.int
104104
epsilon: cython.float # maximum difference to call the value is converged
105105
maxIter: cython.int # maximum iteration
106-
jump: cython.float
106+
jump: cython.float # to amplify the difference for faster convergence
107107
seed: cython.int # random seed for downsampling
108108
converged: cython.bint # wheter the EM is converged
109109
sample_percentage: cython.float
@@ -118,7 +118,7 @@ def __init__(self, petrack: PETrackI, init_means: cython.list,
118118
sample_percentage: cython.float = 10,
119119
epsilon: cython.float = 0.05,
120120
maxIter: cython.int = 20,
121-
jump: cython.float = 1.5, seed: cython.int = 12345):
121+
jump: cython.float = 0.5, seed: cython.int = 12345):
122122
"""Initialize HMMR_EM object. The first three parameters are required.
123123
124124
parameters:

bin/macs3

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -937,6 +937,10 @@ plus an extra option for the HMM model file like `macs3 hmmratac
937937
group_em.add_argument("--min-frag-p", dest="min_frag_p", type=float,
938938
help="We will exclude the abnormal fragments that can't be assigned to any of the four signal tracks. After we use EM to find the means and stddevs of the four distributions, we will calculate the likelihood that a given fragment length fit any of the four using normal distribution. If a fragment length is not like ('not like' means the probability is lower than MIN_FRAG_P) any of short, mono, di, or tri-nuc fragment, we will exclude it while generating the four signal tracks for later HMM training and prediction. The value should be between 0 and 1. Smaller the value, more 'abnormal' fragments will be allowed. So if you want to include more fragments, make this value smaller. Default=0.001",
939939
default=0.001)
940+
# To amplify the difference for faster convergence
941+
group_em.add_argument("--jump", dest="em_jump", type=float,
942+
help="This option will control the factor to amplify the difference between the observed and expected mean and variance values of the four distributions, then decide the expected mean and variance in the next EM step. Larger jump will lead to faster convergence but may be less stable. Default=0.5",
943+
default=0.5)
940944
# group for HMM
941945
group_hmm = argparser_hmmratac.add_argument_group("Hidden Markov Model arguments")
942946
#group_hmm.add_argument("-s", "--states", dest="hmm_states", type=int,

test/cmdlinetest

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -227,28 +227,28 @@ mkdir ${OUTPUTDIR_PREFIX}_run_hmmratac
227227
# macs3 hmmratac -i $ATACSEQBAM -n hmmratac_yeast500k --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac.log
228228

229229
# check both gaussian and poisson hmm models, save everything
230-
macs3 hmmratac -i $ATACSEQBAM --hmm-type gaussian -n hmmratac_yeast500k --save-digested --save-states --save-likelihoods --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac.log
231-
macs3 hmmratac -i $ATACSEQBAM --hmm-type poisson -n hmmratac_yeast500k_poisson --save-digested --save-states --save-likelihoods --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_poisson.log
230+
macs3 hmmratac -i $ATACSEQBAM --hmm-type gaussian -n hmmratac_yeast500k --jump 1.5 --save-digested --save-states --save-likelihoods --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac.log
231+
macs3 hmmratac -i $ATACSEQBAM --hmm-type poisson -n hmmratac_yeast500k_poisson --jump 1.5 --save-digested --save-states --save-likelihoods --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_poisson.log
232232

233233
# check bedpe for both gaussian, poisson
234-
macs3 hmmratac -i $ATACSEQBED -n hmmratac_yeast500k_bedpe -f BEDPE --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_bedpe.log
235-
macs3 hmmratac -i $ATACSEQBED --hmm-type poisson -n hmmratac_yeast500k_bedpe_poisson -f BEDPE --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_bedpe_poisson.log
234+
macs3 hmmratac -i $ATACSEQBED -n hmmratac_yeast500k_bedpe -f BEDPE --jump 1.5 --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_bedpe.log
235+
macs3 hmmratac -i $ATACSEQBED --hmm-type poisson -n hmmratac_yeast500k_bedpe_poisson -f BEDPE --jump 1.5 --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_bedpe_poisson.log
236236

237237
TRAINING_REGIONS_BED=${OUTPUTDIR_PREFIX}_run_hmmratac/hmmratac_yeast500k_training_regions.bed
238238
HMM_MODEL=${OUTPUTDIR_PREFIX}_run_hmmratac/hmmratac_yeast500k_model.json
239239
TRAINING_REGIONS_BED_POISSON=${OUTPUTDIR_PREFIX}_run_hmmratac/hmmratac_yeast500k_poisson_training_regions.bed
240240
HMM_MODEL_POISSON=${OUTPUTDIR_PREFIX}_run_hmmratac/hmmratac_yeast500k_poisson_model.json
241241

242242
echo "15.1 hmmratac load training regions from bedfile"
243-
macs3 hmmratac -i $ATACSEQBAM -n hmmratac_yeast500k_load_training_regions -t ${TRAINING_REGIONS_BED} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_training_regions.log
244-
macs3 hmmratac -i $ATACSEQBAM --hmm-type poisson -n hmmratac_yeast500k_poisson_load_training_regions -t ${TRAINING_REGIONS_BED_POISSON} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_training_regions_poisson.log
243+
macs3 hmmratac -i $ATACSEQBAM --jump 1.5 -n hmmratac_yeast500k_load_training_regions -t ${TRAINING_REGIONS_BED} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_training_regions.log
244+
macs3 hmmratac -i $ATACSEQBAM --jump 1.5 --hmm-type poisson -n hmmratac_yeast500k_poisson_load_training_regions -t ${TRAINING_REGIONS_BED_POISSON} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_training_regions_poisson.log
245245

246246
echo "15.2 hmmratac load hmm model file"
247-
macs3 hmmratac -i $ATACSEQBAM -n hmmratac_yeast500k_load_hmm_model --model ${HMM_MODEL} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_hmm_model.log
248-
macs3 hmmratac -i $ATACSEQBAM --hmm-type poisson -n hmmratac_yeast500k_poisson_load_hmm_model --model ${HMM_MODEL_POISSON} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_hmm_model_poisson.log
247+
macs3 hmmratac -i $ATACSEQBAM --jump 1.5 -n hmmratac_yeast500k_load_hmm_model --model ${HMM_MODEL} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_hmm_model.log
248+
macs3 hmmratac -i $ATACSEQBAM --jump 1.5 --hmm-type poisson -n hmmratac_yeast500k_poisson_load_hmm_model --model ${HMM_MODEL_POISSON} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_hmm_model_poisson.log
249249

250250
echo "15.3 hmmratac scATAC test"
251-
macs3 hmmratac -i $FRAGFILE -f FRAG --barcodes $BARCODESFILE -n hmmratac_scatac_test --hmm-type poisson -n hmmratac_scatac_test --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_scatac.log
251+
macs3 hmmratac -i $FRAGFILE -f FRAG --barcodes $BARCODESFILE -n hmmratac_scatac_test --hmm-type poisson --jump 1.5 --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_scatac.log
252252

253253
echo "16. search for errors in log files"
254254
flag=0

0 commit comments

Comments
 (0)