12
12
import time
13
13
import os
14
14
import sys
15
+ import multiprocessing as mp
15
16
import genepi
16
17
17
18
"""""" """""" """""" """""" """"""
@@ -36,7 +37,7 @@ def ArgumentsParser():
36
37
### define arguments for modeling
37
38
parser .add_argument ("-m" , required = False , default = "c" , choices = ["c" , "r" ], help = "choose model type: c for classification; r for regression" )
38
39
parser .add_argument ("-k" , required = False , default = 2 , help = "k of k-fold cross validation" )
39
- parser .add_argument ("-t" , required = False , default = 1 , help = "number of threads" )
40
+ parser .add_argument ("-t" , required = False , default = mp . cpu_count () , help = "number of threads" )
40
41
41
42
### define arguments for step1_downloadUCSCDB
42
43
parser_group_1 = parser .add_argument_group ("update UCSC database" )
@@ -91,6 +92,10 @@ def main(args=None):
91
92
str_outputFilePath = args .o
92
93
else :
93
94
str_outputFilePath = os .path .dirname (str_inputFileName_genotype )
95
+ int_thread = mp .cpu_count ()
96
+ if int (args .t ) is not None :
97
+ if int (args .t ) < mp .cpu_count ():
98
+ int_thread = int (args .t )
94
99
95
100
if str_inputFileName_genotype == "example" and str_inputFileName_phenotype == "example" :
96
101
str_command = "cp " + os .path .join (os .path .dirname (genepi .__file__ ), "example" , "sample.csv" ) + " " + str_outputFilePath
@@ -117,7 +122,7 @@ def main(args=None):
117
122
118
123
file_outputFile .writelines ("\t " + "-m (model type): " + "Classification" if args .m == "c" else "Regression" + "\n " )
119
124
file_outputFile .writelines ("\t " + "-k (k-fold cross validation): " + str (args .k ) + "\n " )
120
- file_outputFile .writelines ("\t " + "-t (number of threads): " + str (args . t ) + "\n " + "\n " )
125
+ file_outputFile .writelines ("\t " + "-t (number of threads): " + str (int_thread ) + "\n " + "\n " )
121
126
122
127
file_outputFile .writelines ("\t " + "--updatedb (enable function of update UCSC database): " + str (args .updatedb ) + "\n " )
123
128
file_outputFile .writelines ("\t " + "-b (human genome build): " + args .b + "\n " + "\n " )
@@ -150,27 +155,27 @@ def main(args=None):
150
155
151
156
if args .m == "c" :
152
157
### step4_singleGeneEpistasis_Logistic (for case/control trial)
153
- genepi .BatchSingleGeneEpistasisLogistic (os .path .join (str_outputFilePath , "snpSubsets" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (args . t ))
158
+ genepi .BatchSingleGeneEpistasisLogistic (os .path .join (str_outputFilePath , "snpSubsets" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (int_thread ))
154
159
### step5_crossGeneEpistasis_Logistic (for case/control trial)
155
- float_score_train , float_score_test = genepi .CrossGeneEpistasisLogistic (os .path .join (str_outputFilePath , "singleGeneResult" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (args . t ))
160
+ float_score_train , float_score_test = genepi .CrossGeneEpistasisLogistic (os .path .join (str_outputFilePath , "singleGeneResult" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (int_thread ))
156
161
file_outputFile .writelines ("Overall genetic feature performance (F1 score)" + "\n " )
157
162
file_outputFile .writelines ("Training: " + str (float_score_train ) + "\n " )
158
163
file_outputFile .writelines ("Testing (" + str (args .k ) + "-fold CV): " + str (float_score_test ) + "\n " + "\n " )
159
164
### step6_ensembleWithCovariates (for case/control trial)
160
- float_score_train , float_score_test = genepi .EnsembleWithCovariatesClassifier (os .path .join (str_outputFilePath , "crossGeneResult" , "Feature.csv" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (args . t ))
165
+ float_score_train , float_score_test = genepi .EnsembleWithCovariatesClassifier (os .path .join (str_outputFilePath , "crossGeneResult" , "Feature.csv" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (int_thread ))
161
166
file_outputFile .writelines ("Ensemble with co-variate performance (F1 score)" + "\n " )
162
167
file_outputFile .writelines ("Training: " + str (float_score_train ) + "\n " )
163
168
file_outputFile .writelines ("Testing (" + str (args .k ) + "-fold CV): " + str (float_score_test ) + "\n " + "\n " )
164
169
else :
165
170
### step4_singleGeneEpistasis_Lasso (for quantitative trial)
166
- genepi .BatchSingleGeneEpistasisLasso (os .path .join (str_outputFilePath , "snpSubsets" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (args . t ))
171
+ genepi .BatchSingleGeneEpistasisLasso (os .path .join (str_outputFilePath , "snpSubsets" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (int_thread ))
167
172
### step5_crossGeneEpistasis_Lasso (for quantitative trial)
168
- float_score_train , float_score_test = genepi .CrossGeneEpistasisLasso (os .path .join (str_outputFilePath , "singleGeneResult" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (args . t ))
173
+ float_score_train , float_score_test = genepi .CrossGeneEpistasisLasso (os .path .join (str_outputFilePath , "singleGeneResult" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (int_thread ))
169
174
file_outputFile .writelines ("Overall genetic feature performance (Average of the Pearson and Spearman correlation)" + "\n " )
170
175
file_outputFile .writelines ("Training: " + str (float_score_train ) + "\n " )
171
176
file_outputFile .writelines ("Testing (" + str (args .k ) + "-fold CV): " + str (float_score_test ) + "\n " + "\n " )
172
177
### step6_ensembleWithCovariates (for quantitative trial)
173
- float_score_train , float_score_test = genepi .EnsembleWithCovariatesRegressor (os .path .join (str_outputFilePath , "crossGeneResult" , "Feature.csv" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (args . t ))
178
+ float_score_train , float_score_test = genepi .EnsembleWithCovariatesRegressor (os .path .join (str_outputFilePath , "crossGeneResult" , "Feature.csv" ), str_inputFileName_phenotype , int_kOfKFold = int (args .k ), int_nJobs = int (int_thread ))
174
179
file_outputFile .writelines ("Ensemble with co-variate performance (Average of the Pearson and Spearman correlation)" + "\n " )
175
180
file_outputFile .writelines ("Training: " + str (float_score_train ) + "\n " )
176
181
file_outputFile .writelines ("Testing (" + str (args .k ) + "-fold CV): " + str (float_score_test ) + "\n " + "\n " )
0 commit comments