Skip to content

Commit 4328592

Browse files
author
David Heller
committed
[FIX] Use signed integer types for CLI parameters
- change args.min_var_length from uint32_t to int32_t - change args.max_var_length from uint32_t to int32_t - change args.max_tol_inserted_length from uint32_t to int32_t - change args.max_overlap from uint32_t to int32_t - expand CLI documentation (noting that params need non-negative value) - expand documentation of all functions accepting these arguments (noting that it expects a non-negative value) - explicitly check for non-negative value and terminate if negative - add test for this check with negative parameter - replace abs with std::abs in analyze_sa_tag_method.cpp Reason: When comparing signed with unsigned values (e.g. int32 and uint32), C++ implicitly converts the signed type into an unsigned type which leads to unexpected behavior (e.g. -1 gets converted to a large positive value). As a solution, we should not use unsigned types for quantities on which we might want to perform mathematical operations or comparisons. To make sure that the parameters do not take negative values we add an explicit check that terminates the program if the user gave a negative value.
1 parent 510273d commit 4328592

11 files changed

Lines changed: 132 additions & 48 deletions

File tree

include/iGenVar.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ struct cmd_arguments
1212
std::vector<detection_methods> methods{cigar_string, split_read, read_pairs, read_depth}; // default: all methods
1313
clustering_methods clustering_method{hierarchical_clustering}; // default: hierarchical clustering method
1414
refinement_methods refinement_method{no_refinement}; // default: no refinement
15-
uint32_t min_var_length = 30;
16-
uint32_t max_var_length = 1000000;
17-
uint32_t max_tol_inserted_length = 5;
18-
uint32_t max_overlap = 10;
15+
int32_t min_var_length = 30;
16+
int32_t max_var_length = 1000000;
17+
int32_t max_tol_inserted_length = 5;
18+
int32_t max_overlap = 10;
1919
};
2020

2121
void initialize_argument_parser(seqan3::argument_parser & parser, cmd_arguments & args);
@@ -38,10 +38,10 @@ void initialize_argument_parser(seqan3::argument_parser & parser, cmd_arguments
3838
* (0: no_refinement,
3939
* 1: sViper_refinement_method,
4040
* 2: sVirl_refinement_method) - *default: no refinement*\n
41-
* **args.min_var_length** - minimum length of variants to detect - *default: 30 bp*\n
42-
* **args.max_var_length** - maximum length of variants to detect - *default: 1,000,000 bp*\n
43-
* **args.max_tol_inserted_length** - longest tolerated inserted sequence at non-INS SV types - *default: 5 bp*\n
44-
* **args.max_overlap** - maximum overlap between alignment segments - *default: 10 bp*
41+
* **args.min_var_length** - minimum length of variants to detect (expected to be non-negative) - *default: 30 bp*\n
42+
* **args.max_var_length** - maximum length of variants to detect (expected to be non-negative) - *default: 1,000,000 bp*\n
43+
* **args.max_tol_inserted_length** - longest tolerated inserted sequence at non-INS SV types (expected to be non-negative) - *default: 5 bp*\n
44+
* **args.max_overlap** - maximum overlap between alignment segments (expected to be non-negative) - *default: 10 bp*
4545
*
4646
*
4747
* \details Detects novel junctions from read alignment records using different detection methods.

include/modules/sv_detection_methods/analyze_cigar_method.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* \param[in] cigar_string - CIGAR field of the SAM/BAM file
1111
* \param[in] query_sequence - SEQ field of the SAM/BAM file
1212
* \param[in, out] junctions - vector for storing junctions
13-
* \param[in] min_length - minimum length of variants to detect (default 30 bp)
13+
* \param[in] min_length - minimum length of variants to detect (default 30 bp, expected to be non-negative)
1414
*
1515
* \details This function steps through the CIGAR string and stores junctions with their position in reference and read.
1616
* We distinguish 4 cases of CIGAR operation characters:
@@ -36,4 +36,4 @@ void analyze_cigar(std::string const & read_name,
3636
std::vector<seqan3::cigar> & cigar_string,
3737
seqan3::dna5_vector const & query_sequence,
3838
std::vector<Junction> & junctions,
39-
uint32_t const min_length);
39+
int32_t const min_length);

include/modules/sv_detection_methods/analyze_sa_tag_method.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ void retrieve_aligned_segments(std::string const & sa_string, std::vector<Aligne
4242
* \param[in, out] junctions - vector for storing junctions
4343
* \param[in, out] query_sequence - SEQ field of the SAM/BAM file
4444
* \param[in] read_name - QNAME field of the SAM/BAM file
45-
* \param[in] min_length - minimum length of variants to detect
46-
* \param[in] max_overlap - maximum overlap between alignment segments
45+
* \param[in] min_length - minimum length of variants to detect (expected to be non-negative)
46+
* \param[in] max_overlap - maximum overlap between alignment segments (expected to be non-negative)
4747
*/
4848
void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segments,
4949
std::vector<Junction> & junctions,
5050
seqan3::dna5_vector const & query_sequence,
5151
std::string const & read_name,
52-
uint32_t const min_length,
53-
uint32_t const max_overlap);
52+
int32_t const min_length,
53+
int32_t const max_overlap);
5454

5555
/*! \brief Parse the SA tag from the SAM/BAM alignment of a chimeric/split-aligned read. Build
5656
* [aligned_segments](\ref AlignedSegment), one for each alignment segment of the read.
@@ -65,8 +65,8 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
6565
* \param[in] seq - SEQ field of the SAM/BAM file
6666
* \param[in] sa_tag - SA tag, one tag from the read of the SAM/BAM file
6767
* \param[in] args - command line arguments:\n
68-
* **args.min_var_length** - minimum length of variants to detect\n
69-
* **args.max_overlap** - maximum overlap between alignment segments
68+
* **args.min_var_length** - minimum length of variants to detect (expected to be non-negative)\n
69+
* **args.max_overlap** - maximum overlap between alignment segments (expected to be non-negative)
7070
* \param[in, out] junctions - vector for storing junctions
7171
*/
7272
void analyze_sa_tag(std::string const & query_name,

include/variant_detection/variant_detection.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ void detect_junctions_in_short_reads_sam_file(std::vector<Junction> & junctions,
3232
* **args.alignment_long_reads_file_path** - long reads input file, path to the sam/bam file\n
3333
* **args.methods** - list of methods for detecting junctions
3434
* (0: cigar_string, 1: split_read, 2: read_pairs, 3: read_depth) - *default: all methods*\n
35-
* **args.min_var_length** - minimum length of variants to detect - *default: 30 bp*\n
36-
* **args.max_overlap** - maximum overlap between alignment segments - *default: 10 bp*
35+
* **args.min_var_length** - minimum length of variants to detect (expected to be non-negative) - *default: 30 bp*\n
36+
* **args.max_overlap** - maximum overlap between alignment segments (expected to be non-negative) - *default: 10 bp*
3737
*
3838
*
3939
* \details Detects junctions from the CIGAR strings and supplementary alignment tags of read alignment records.

include/variant_detection/variant_output.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
*
1313
* \param[in] clusters - input junction clusters
1414
* \param[in] args - command line arguments:\n
15-
* **args.min_var_length** - minimum length of variants to detect - *default: 30 bp*\n
16-
* **args.max_var_length** - maximum length of variants to detect - *default: 1,000,000 bp*\n
17-
* **args.max_tol_inserted_length** - longest tolerated inserted sequence at non-INS SV types - *default: 5 bp*
15+
* **args.min_var_length** - minimum length of variants to detect (expected to be non-negative) - *default: 30 bp* \n
16+
* **args.max_var_length** - maximum length of variants to detect (expected to be non-negative) - *default: 1,000,000 bp*\n
17+
* **args.max_tol_inserted_length** - longest tolerated inserted sequence at non-INS SV types (expected to be non-negative) - *default: 5 bp*
1818
* \param[in, out] out_stream - output stream
1919
*
2020
* \details Extracts genomic variants from given junction clusters.
@@ -29,9 +29,9 @@ void find_and_output_variants(std::vector<Cluster> const & clusters,
2929
*
3030
* \param[in] clusters - input junction clusters
3131
* \param[in] args - command line arguments:\n
32-
* **args.min_var_length** - minimum length of variants to detect - *default: 30 bp*\n
33-
* **args.max_var_length** - maximum length of variants to detect - *default: 1,000,000 bp*\n
34-
* **args.max_tol_inserted_length** - longest tolerated inserted sequence at non-INS SV types - *default: 5 bp*
32+
* **args.min_var_length** - minimum length of variants to detect (expected to be non-negative) - *default: 30 bp*\n
33+
* **args.max_var_length** - maximum length of variants to detect (expected to be non-negative) - *default: 1,000,000 bp*\n
34+
* **args.max_tol_inserted_length** - longest tolerated inserted sequence at non-INS SV types (expected to be non-negative) - *default: 5 bp*
3535
* \param[in] output_file_path - output file path
3636
*
3737
* \details Extracts genomic variants from given junction clusters.

src/iGenVar.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,21 @@ void initialize_argument_parser(seqan3::argument_parser & parser, cmd_arguments
5656

5757
// Options - SV specifications:
5858
parser.add_option(args.min_var_length, 'l', "min_var_length",
59-
"Specify what should be the minimum length of your SVs to be detected.",
59+
"Specify what should be the minimum length of your SVs to be detected. "
60+
"This value needs to be non-negative.",
6061
seqan3::option_spec::advanced);
6162
parser.add_option(args.max_var_length, 'x', "max_var_length",
6263
"Specify what should be the maximum length of your SVs to be detected. "
63-
"SVs larger than this threshold can still be output as translocations.",
64+
"SVs larger than this threshold can still be output as translocations. "
65+
"This value needs to be non-negative.",
6466
seqan3::option_spec::advanced);
6567
parser.add_option(args.max_tol_inserted_length, 't', "max_tol_inserted_length",
66-
"Specify what should be the longest tolerated inserted sequence at sites of non-INS SVs.",
68+
"Specify what should be the longest tolerated inserted sequence at sites of non-INS SVs. "
69+
"This value needs to be non-negative.",
6770
seqan3::option_spec::advanced);
6871
parser.add_option(args.max_overlap, 'p', "max_overlap",
69-
"Specify the maximum allowed overlap between two alignment segments.",
72+
"Specify the maximum allowed overlap between two alignment segments. "
73+
"This value needs to be non-negative.",
7074
seqan3::option_spec::advanced);
7175
}
7276

@@ -167,6 +171,28 @@ int main(int argc, char ** argv)
167171
return -1;
168172
}
169173

174+
// Check that the given parameters are non-negative.
175+
if (args.min_var_length < 0)
176+
{
177+
seqan3::debug_stream << "[Error] You gave a negative min_var_length parameter.\n";
178+
return -1;
179+
}
180+
if (args.max_var_length < 0)
181+
{
182+
seqan3::debug_stream << "[Error] You gave a negative max_var_length parameter.\n";
183+
return -1;
184+
}
185+
if (args.max_tol_inserted_length < 0)
186+
{
187+
seqan3::debug_stream << "[Error] You gave a negative max_tol_inserted_length parameter.\n";
188+
return -1;
189+
}
190+
if (args.max_overlap < 0)
191+
{
192+
seqan3::debug_stream << "[Error] You gave a negative max_overlap parameter.\n";
193+
return -1;
194+
}
195+
170196
detect_variants_in_alignment_file(args);
171197

172198
return 0;

src/modules/sv_detection_methods/analyze_cigar_method.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ void analyze_cigar(std::string const & read_name,
1515
std::vector<seqan3::cigar> & cigar_string,
1616
seqan3::dna5_vector const & query_sequence,
1717
std::vector<Junction> & junctions,
18-
uint32_t const min_length)
18+
int32_t const min_length)
1919
{
2020
// Step through CIGAR string and store current position in reference and read
2121
int32_t pos_ref = query_start_pos;

src/modules/sv_detection_methods/analyze_sa_tag_method.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
5757
std::vector<Junction> & junctions,
5858
seqan3::dna5_vector const & query_sequence,
5959
std::string const & read_name,
60-
uint32_t const min_length,
61-
uint32_t const max_overlap)
60+
int32_t const min_length,
61+
int32_t const max_overlap)
6262
{
6363
for (size_t i = 1; i < aligned_segments.size(); i++)
6464
{
@@ -67,7 +67,7 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
6767
int32_t distance_on_read = next.get_query_start() - current.get_query_end();
6868
// Check that the overlap between two consecutive alignment segments
6969
// of the read is lower than the given threshold
70-
if (distance_on_read >= -static_cast<int32_t>(max_overlap))
70+
if (distance_on_read >= -max_overlap)
7171
{
7272
int32_t mate1_pos;
7373
if (current.orientation == strand::forward)
@@ -99,7 +99,7 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
9999
// have a large distance on the reference (e.g. deletion, inversion, tandem duplication), or
100100
// have a large distance on the read (e.g. insertion)
101101
if (current.ref_name != next.ref_name ||
102-
abs(distance_on_ref) >= min_length ||
102+
std::abs(distance_on_ref) >= min_length ||
103103
distance_on_read >= min_length)
104104
{
105105
Breakend mate1{current.ref_name,

test/api/detection_test.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ TEST(junction_detection, cigar_string_simple_del)
2727
// Deletion smaller than minimum variant size
2828
{
2929
std::vector<Junction> junctions_res{};
30-
uint64_t const min_var_length = 10;
30+
int32_t const min_var_length = 10;
3131
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
3232

3333
EXPECT_EQ(junctions_res.size(), 0);
@@ -36,7 +36,7 @@ TEST(junction_detection, cigar_string_simple_del)
3636
// Deletion larger than minimum variant size
3737
{
3838
std::vector<Junction> junctions_res{};
39-
uint64_t const min_var_length = 5;
39+
int32_t const min_var_length = 5;
4040
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
4141

4242
Breakend new_breakend_1 {chromosome, 15, strand::forward};
@@ -66,7 +66,7 @@ TEST(junction_detection, cigar_string_del_padding)
6666
seqan3::dna5_vector seq = {"GGGCTCATCGATCGATTTCGGATCGGGGGGCCCCCATTTTAAACGGCCCC"_dna5};
6767

6868
std::vector<Junction> junctions_res{};
69-
uint64_t const min_var_length = 5;
69+
int32_t const min_var_length = 5;
7070
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
7171

7272
Breakend new_breakend_1 {chromosome, 15, strand::forward};
@@ -94,7 +94,7 @@ TEST(junction_detection, cigar_string_simple_ins)
9494
seqan3::dna5_vector seq = {"GGGCTCATCGATCGATTTCGGATCGGGGGGCCCCCATTTTAAACGGCCCC"_dna5};
9595

9696
std::vector<Junction> junctions_res{};
97-
uint64_t const min_var_length = 5;
97+
int32_t const min_var_length = 5;
9898
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
9999

100100
Breakend new_breakend_1 {chromosome, 9, strand::forward};
@@ -122,7 +122,7 @@ TEST(junction_detection, cigar_string_ins_hardclip)
122122
seqan3::dna5_vector seq = {"GGGCTCATCGATCGATTTCGGATCGGGGGGCCCCCATTTTAAACGGCCCC"_dna5};
123123

124124
std::vector<Junction> junctions_res{};
125-
uint64_t const min_var_length = 5;
125+
int32_t const min_var_length = 5;
126126
analyze_cigar(read_name, chromosome, query_start_pos, cigar_string, seq, junctions_res, min_var_length);
127127

128128
Breakend new_breakend_1 {chromosome, 9, strand::forward};

test/api/input_file_test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ std::string const default_alignment_short_reads_file_path = DATADIR"paired_end_m
1212
std::string const default_alignment_long_reads_file_path = DATADIR"simulated.minimap2.hg19.coordsorted_cutoff.sam";
1313
std::filesystem::path const empty_output_path{};
1414
std::vector<detection_methods> const default_methods{cigar_string, split_read, read_pairs, read_depth};
15-
constexpr uint64_t default_min_length = 30;
16-
constexpr uint64_t default_max_overlap = 10;
15+
constexpr int32_t default_min_length = 30;
16+
constexpr int32_t default_max_overlap = 10;
1717

1818
// Explanation for the strings:
1919
// chr21\t41972615\tForward\tchr21\t41972616\tForward\t1\t1681

0 commit comments

Comments
 (0)