Skip to content

Commit e26b7e2

Browse files
committed
Parse advanced arguments
1 parent d78100a commit e26b7e2

45 files changed

Lines changed: 1387 additions & 794 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

include/valik/argument_parsing/validators.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,29 @@ struct power_of_two_validator
2222
}
2323
};
2424

25-
struct error_rate_validator
25+
struct float_in_range_validator
2626
{
27-
using option_value_type = float;
27+
using option_value_type = double; // used for all arithmetic types
28+
29+
option_value_type min;
30+
option_value_type max;
31+
32+
float_in_range_validator(option_value_type min_value, option_value_type max_value) : min{min_value}, max{max_value} {}
2833

2934
void operator()(option_value_type const & val) const
3035
{
31-
if ((val < 0.0) || (val > 0.2))
36+
if ((min > val) || (val > max))
3237
{
33-
throw sharg::validation_error{"The provided error rate is not in range [0, 0.2]."};
38+
throw sharg::validation_error{"Value must be in range [" + std::to_string(min) +
39+
", " + std::to_string(max) + "]."};
3440
}
3541
}
3642

3743
std::string get_help_page_message() const
3844
{
39-
return "Error rate must be in range [0, 0.2].";
45+
return "Value must be in range [" + std::to_string(min) +
46+
", " + std::to_string(max) + "].";
4047
}
41-
4248
};
4349

4450
class positive_integer_validator

include/valik/search/search_distributed.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,21 @@ bool search_distributed(search_arguments const & arguments, search_time_statisti
101101
process_args.insert(process_args.end(), {"-e", std::to_string(numEpsilon),
102102
"-l", std::to_string(arguments.pattern_size),
103103
"-o", std::string(cart_queries_path) + ".gff"});
104+
105+
106+
//!TODO: process arguments --disableThresh, --sortThresh, --numMatches in consolidation
107+
/*
108+
if (arguments.disableThresh != std::numeric_limits<size_t>::max())
109+
process_args.insert(process_args.end(), {"--disableThresh", std::to_string(arguments.disableThresh)});
110+
111+
process_args.insert(process_args.end(), {"--sortThresh", std::to_string(arguments.compactThresh)});
112+
process_args.insert(process_args.end(), {"--numMatches", std::to_string(arguments.numMatches)});
113+
*/
114+
process_args.insert(process_args.end(), {"--repeatPeriod", std::to_string(arguments.maxRepeatPeriod)});
115+
process_args.insert(process_args.end(), {"--repeatLength", std::to_string(arguments.minRepeatLength)});
116+
process_args.insert(process_args.end(), {"--verification", arguments.strVerificationMethod});
117+
process_args.insert(process_args.end(), {"--xDrop", std::to_string(arguments.xDrop)});
118+
process_args.insert(process_args.end(), {"--abundanceCut", std::to_string(arguments.qgramAbundanceCut)});
104119

105120
auto start = std::chrono::high_resolution_clock::now();
106121
external_process process(process_args);

include/valik/search/search_local.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,21 @@ bool search_local(search_arguments const & arguments, search_time_statistics & t
142142
threadOptions.numEpsilon = std::max(arguments.error_rate, (float) 0.00001);
143143
threadOptions.epsilon = stellar::utils::fraction::from_double(threadOptions.numEpsilon).limit_denominator();
144144
threadOptions.minLength = arguments.pattern_size;
145-
threadOptions.disableThresh = arguments.disableThresh;
146145
threadOptions.outputFile = cart_queries_path.string() + ".gff";
146+
147+
{
148+
//!TODO: process arguments --disableThresh, --sortThresh, --numMatches in consolidation
149+
/*
150+
threadOptions.disableThresh = arguments.disableThresh;
151+
threadOptions.compactThresh = arguments.compactThresh;
152+
threadOptions.numMatches = arguments.numMatches;
153+
*/
154+
threadOptions.maxRepeatPeriod = arguments.maxRepeatPeriod;
155+
threadOptions.minRepeatLength = arguments.minRepeatLength;
156+
threadOptions.strVerificationMethod = arguments.strVerificationMethod;
157+
threadOptions.xDrop = arguments.xDrop;
158+
threadOptions.qgramAbundanceCut = arguments.qgramAbundanceCut;
159+
}
147160

148161
using TDatabaseSegment = stellar::StellarDatabaseSegment<TAlphabet>;
149162
using TQuerySegment = seqan2::Segment<seqan2::String<TAlphabet> const, seqan2::InfixSegment>;

include/valik/shared.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ struct search_arguments final : public minimiser_threshold_arguments, public ste
160160
std::filesystem::path ref_meta_path{};
161161
std::filesystem::path query_meta_path{};
162162
bool distribute{false};
163-
164163
};
165164

166165
} // namespace valik

lib/stellar3

src/argument_parsing/search.cpp

Lines changed: 91 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments)
2222
.long_id = "query",
2323
.description = "Provide a path to the query file.",
2424
.required = true,
25-
.validator = sharg::input_file_validator{}});
25+
.validator = sharg::input_file_validator{{"fasta", "fa", "fq", "fastq"}}});
2626
parser.add_option(arguments.out_file,
2727
sharg::config{.short_id = '\0',
2828
.long_id = "output",
@@ -33,30 +33,11 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments)
3333
sharg::config{.short_id = 'e',
3434
.long_id = "error-rate",
3535
.description = "Choose the maximum allowed error rate of a local match.",
36-
.validator = error_rate_validator{}});
37-
parser.add_option(arguments.tau,
38-
sharg::config{.short_id = '\0',
39-
.long_id = "tau",
40-
.description = "Used in the dynamic thresholding. The higher tau, the lower the threshold.",
41-
.validator = sharg::arithmetic_range_validator{0, 1}});
42-
parser.add_option(arguments.threshold,
43-
sharg::config{.short_id = '\0',
44-
.long_id = "threshold",
45-
.description = "If set, this threshold is used instead of the probabilistic models.",
46-
.validator = sharg::arithmetic_range_validator{0, 1}});
47-
parser.add_option(arguments.p_max,
48-
sharg::config{.short_id = '\0',
49-
.long_id = "p_max",
50-
.description = "Used in the dynamic thresholding. The higher p_max, the lower the threshold.",
51-
.validator = sharg::arithmetic_range_validator{0, 1}});
36+
.validator = float_in_range_validator{0.0f, 0.2f}});
5237
parser.add_option(arguments.pattern_size,
5338
sharg::config{.short_id = '\0',
5439
.long_id = "pattern",
55-
.description = "Choose the pattern size. Default: half of first query sequence."});
56-
parser.add_option(arguments.overlap,
57-
sharg::config{.short_id = '\0',
58-
.long_id = "overlap",
59-
.description = "Choose how much sequential patterns overlap. Default: pattern size - 1."});
40+
.description = "Choose the minimium length of a local alignment. Default: half of first query sequence."});
6041
parser.add_flag(arguments.compressed,
6142
sharg::config{.short_id = '\0',
6243
.long_id = "compressed",
@@ -72,16 +53,8 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments)
7253
parser.add_flag(arguments.write_time,
7354
sharg::config{.short_id = '\0',
7455
.long_id = "time",
75-
.description = "Write timing file.",
56+
.description = "Write runtime log file.",
7657
.advanced = true});
77-
parser.add_option(arguments.cart_max_capacity,
78-
sharg::config{.short_id = '\0',
79-
.long_id = "cart_max_capacity",
80-
.description = "Number of elements to be stored in a single cart before it is send for processing."});
81-
parser.add_option(arguments.max_queued_carts,
82-
sharg::config{.short_id = '\0',
83-
.long_id = "max_queued_carts",
84-
.description = "Maximal number of carts that are full and are waiting to be processed."});
8558
parser.add_option(arguments.ref_meta_path,
8659
sharg::config{.short_id = '\0',
8760
.long_id = "ref-meta",
@@ -101,58 +74,97 @@ void init_search_parser(sharg::parser & parser, search_arguments & arguments)
10174
.long_id = "threads",
10275
.description = "Choose the number of threads.",
10376
.validator = positive_integer_validator{}});
77+
78+
/////////////////////////////////////////
79+
// Advanced options
80+
/////////////////////////////////////////
81+
parser.add_option(arguments.tau,
82+
sharg::config{.short_id = '\0',
83+
.long_id = "tau",
84+
.description = "Used in the dynamic thresholding. The higher tau, the lower the threshold.",
85+
.advanced = true,
86+
.validator = sharg::arithmetic_range_validator{0, 1}});
87+
parser.add_option(arguments.threshold,
88+
sharg::config{.short_id = '\0',
89+
.long_id = "threshold",
90+
.description = "If set, this threshold is used instead of the probabilistic models.",
91+
.advanced = true,
92+
.validator = sharg::arithmetic_range_validator{0, 1}});
93+
parser.add_option(arguments.p_max,
94+
sharg::config{.short_id = '\0',
95+
.long_id = "p_max",
96+
.description = "Used in the dynamic thresholding. The higher p_max, the lower the threshold.",
97+
.advanced = true,
98+
.validator = sharg::arithmetic_range_validator{0, 1}});
99+
parser.add_option(arguments.overlap,
100+
sharg::config{.short_id = '\0',
101+
.long_id = "overlap",
102+
.description = "Choose how much sequential patterns overlap. "
103+
"This determines how many potential matches are skipped in prefiltering."
104+
"(pattern_size - 1) considers all potential matches.",
105+
.advanced = true});
106+
parser.add_option(arguments.cart_max_capacity,
107+
sharg::config{.short_id = '\0',
108+
.long_id = "cart_max_capacity",
109+
.description = "Number of elements to be stored in a single cart before it is sent for processing.",
110+
.advanced = true});
111+
parser.add_option(arguments.max_queued_carts,
112+
sharg::config{.short_id = '\0',
113+
.long_id = "max_queued_carts",
114+
.description = "Maximal number of carts that are full and are waiting to be processed.",
115+
.advanced = true});
104116
parser.add_option(arguments.disableThresh,
105117
sharg::config{.short_id = '\0',
106118
.long_id = "disableThresh",
107-
.description = "Maximal number of verified matches before disabling verification for one query sequence.",
108-
.advanced = true,
119+
.description = "STELLAR: Maximal number of verified SWIFT filter matches before disabling verification for one query sequence.",
120+
.hidden = true,
109121
.validator = sharg::arithmetic_range_validator{1, 10000}});
110-
111-
/////////////////////////////////////////
112-
// Stellar options
113-
/////////////////////////////////////////
114-
/*
115-
// Filtering options
116-
parser.add_option(options.qGram,
117-
sharg::config{.short_id = 'k',
118-
.long_id = "kmer",
119-
.description = "Length of the q-grams.",
120-
.validator = sharg::arithmetic_range_validator{1, 32}});
121-
parser.add_option(options.maxRepeatPeriod,
122-
sharg::config{.short_id = '\0',
123-
.long_id = "repeatPeriod",
124-
.description = "Maximal period of low complexity repeats to be filtered.",
125-
.validator = sharg::arithmetic_range_validator{1, 32}});
126-
parser.add_option(options.minRepeatLength,
127-
sharg::config{.short_id = '\0',
128-
.long_id = "repeatLength",
129-
.description = "Minimal length of low complexity repeats to be filtered.",
130-
.validator = sharg::arithmetic_range_validator{1u, std::numeric_limits<uint32_t>::max()}});
131-
parser.add_option(options.qgramAbundanceCut,
132-
sharg::config{.short_id = 'c',
133-
.long_id = "abundanceCut",
134-
.description = "k-mer overabundance cut ratio.",
135-
.validator = float_in_range_validator{0, 1}});
136-
137-
// Verification options
138-
parser.add_option(options.xDrop,
139-
sharg::config{.short_id = 'x',
140-
.long_id = "xDrop",
141-
.description = "Maximal x-drop for extension."});
142-
parser.add_option(options.strVerificationMethod,
143-
sharg::config{.short_id = '\0',
144-
.long_id = "verification",
145-
.description = "Verification strategy: exact or bestLocal or bandedGlobal.",
146-
.validator = sharg::value_list_validator{"exact", "bestLocal", "bandedGlobal", "bandedGlobalExtend"}});
147-
parser.add_option(options.numMatches,
148-
sharg::config{.short_id = 'n',
149-
.long_id = "numMatches",
150-
.description = "Maximal number of kept matches per query and database. If STELLAR finds more matches, only the longest ones are kept."});
151-
parser.add_option(options.compactThresh,
152-
sharg::config{.short_id = 's',
153-
.long_id = "sortThresh",
154-
.description = "Number of matches triggering removal of duplicates. Choose a smaller value for saving space."});
155-
*/
122+
parser.add_option(arguments.compactThresh,
123+
sharg::config{.short_id = 's',
124+
.long_id = "sortThresh",
125+
.description = "STELLAR: Number of matches triggering removal of duplicates. Choose a smaller value for saving space.",
126+
.hidden = true});
127+
parser.add_option(arguments.qGram,
128+
sharg::config{.short_id = 'q',
129+
.long_id = "stellar-kmer",
130+
.description = "STELLAR: Length of the q-grams in the SWIFT algorithm.",
131+
.advanced = true,
132+
.validator = sharg::arithmetic_range_validator{1, 32}});
133+
parser.add_option(arguments.qgramAbundanceCut,
134+
sharg::config{.short_id = 'c',
135+
.long_id = "abundanceCut",
136+
.description = "STELLAR: k-mer overabundance cut ratio.",
137+
.advanced = true,
138+
.validator = float_in_range_validator{0.0f, 1.0f}});
139+
parser.add_option(arguments.maxRepeatPeriod,
140+
sharg::config{.short_id = '\0',
141+
.long_id = "repeatPeriod",
142+
.description = "STELLAR: Maximal period of low complexity repeats to be filtered.",
143+
.advanced = true,
144+
.validator = sharg::arithmetic_range_validator{1, 32}});
145+
parser.add_option(arguments.minRepeatLength,
146+
sharg::config{.short_id = '\0',
147+
.long_id = "repeatLength",
148+
.description = "STELLAR: Minimal length of low complexity repeats to be filtered.",
149+
.advanced = true,
150+
.validator = sharg::arithmetic_range_validator{1u, std::numeric_limits<uint32_t>::max()}});
151+
parser.add_option(arguments.xDrop,
152+
sharg::config{.short_id = 'x',
153+
.long_id = "xDrop",
154+
.description = "STELLAR: Maximal x-drop for extension.",
155+
.advanced = true});
156+
parser.add_option(arguments.strVerificationMethod,
157+
sharg::config{.short_id = '\0',
158+
.long_id = "verification",
159+
.description = "STELLAR: Verification strategy: exact or bestLocal or bandedGlobal.",
160+
.advanced = true,
161+
.validator = sharg::value_list_validator{"exact", "bestLocal", "bandedGlobal", "bandedGlobalExtend"}});
162+
parser.add_option(arguments.numMatches,
163+
sharg::config{.short_id = 'n',
164+
.long_id = "numMatches",
165+
.description = "STELLAR: Maximal number of kept matches per query and database."
166+
"If STELLAR finds more matches, only the longest ones are kept.",
167+
.hidden = true});
156168
}
157169

158170
void run_search(sharg::parser & parser)

test/cli/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,9 @@ target_use_datasources (valik_test FILES query_e0.06.fasta)
7575

7676
add_cli_test (dream_test.cpp)
7777
target_use_datasources (dream_test FILES 16bins13window1error.gff)
78-
target_use_datasources (dream_test FILES 16bins13window.ibf)
7978
target_use_datasources (dream_test FILES 16bins15window1error.gff)
80-
target_use_datasources (dream_test FILES 16bins15window.ibf)
8179
target_use_datasources (dream_test FILES 4bins13window1error.gff)
82-
target_use_datasources (dream_test FILES 4bins13window.ibf)
8380
target_use_datasources (dream_test FILES 4bins15window1error.gff)
84-
target_use_datasources (dream_test FILES 4bins15window.ibf)
8581
target_use_datasources (dream_test FILES dummy_reads.fastq)
8682
target_use_datasources (dream_test FILES query.fastq)
8783
target_use_datasources (dream_test FILES ref.fasta)

0 commit comments

Comments
 (0)