Skip to content

Commit 1058945

Browse files
committed
Add short sequence optimization: we can store up to k=60 inline
1 parent 23cdc72 commit 1058945

File tree

6 files changed

+513
-143
lines changed

6 files changed

+513
-143
lines changed

src/common/assembly_graph/construction/debruijn_graph_constructor.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,10 @@ class FastGraphFromSequencesConstructor {
515515
{
516516
TIME_TRACE_SCOPE("ConstructGraph::CollectLinkRecords");
517517
CollectLinkRecords(helper, graph, records, sequences);
518+
size_t short_seqs = 0;
519+
for (const auto &s : sequences)
520+
short_seqs += s.is_short();
521+
INFO("Total short graph edges: " << 2*short_seqs);
518522
}
519523
INFO("Ordering link records")
520524
// We sort by Vertex and then by EdgeID and RC/Start mask in order to combine together records accociated with the same vertex with a special order in each group

src/common/assembly_graph/core/observable_graph.hpp

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class ObservableGraph: public GraphCore<DataMaster> {
6464
void PrintHandlersNames() const;
6565

6666
void FireGameOver() const;
67-
67+
6868
//todo make Fire* protected once again with helper friend class
6969
void FireAddVertex(VertexId v) const;
7070

@@ -119,7 +119,7 @@ class ObservableGraph: public GraphCore<DataMaster> {
119119
void DeleteVertex(VertexId v);
120120

121121
void ForceDeleteVertex(VertexId v);
122-
122+
123123
using base::conjugate;
124124

125125
EdgeId AddEdge(EdgeData data, EdgeId id1 = 0, EdgeId id2 = 0);
@@ -419,21 +419,22 @@ template<class DataMaster>
419419
std::vector<typename ObservableGraph<DataMaster>::EdgeId>
420420
ObservableGraph<DataMaster>::CorrectMergePath(const std::vector<EdgeId>& path) const {
421421
for (size_t i = 0; i < path.size(); i++) {
422-
if (path[i] == base::conjugate(path[i])) {
423-
std::vector<EdgeId> result;
424-
if (i < path.size() - 1 - i) {
425-
for (size_t j = 0; j < path.size(); j++)
426-
result.push_back(base::conjugate(path[path.size() - 1 - j]));
427-
i = path.size() - 1 - i;
428-
} else {
429-
result = path;
430-
}
431-
size_t size = 2 * i + 1;
432-
for (size_t j = result.size(); j < size; j++) {
433-
result.push_back(base::conjugate(result[size - 1 - j]));
434-
}
435-
return result;
422+
if (path[i] != base::conjugate(path[i]))
423+
continue;
424+
425+
std::vector<EdgeId> result;
426+
if (i < path.size() - 1 - i) {
427+
for (size_t j = 0; j < path.size(); j++)
428+
result.push_back(base::conjugate(path[path.size() - 1 - j]));
429+
i = path.size() - 1 - i;
430+
} else {
431+
result = path;
432+
}
433+
size_t size = 2 * i + 1;
434+
for (size_t j = result.size(); j < size; j++) {
435+
result.push_back(base::conjugate(result[size - 1 - j]));
436436
}
437+
return result;
437438
}
438439
return path;
439440
}
@@ -445,13 +446,12 @@ typename ObservableGraph<DataMaster>::EdgeId
445446
std::vector<uint32_t> overlaps) {
446447
VERIFY(!path.empty());
447448
for (size_t i = 0; i < path.size(); i++)
448-
for (size_t j = i + 1; j < path.size(); j++) {
449+
for (size_t j = i + 1; j < path.size(); j++)
449450
VERIFY(path[i] != path[j]);
450-
}
451+
451452
if (path.size() == 1) {
452-
TRACE(
453-
"Path of single edge " << base::str(*(path.begin())) << ". Nothing to merge.");
454-
};
453+
TRACE("Path of single edge " << base::str(*(path.begin())) << ". Nothing to merge.");
454+
}
455455
// cerr << "Merging " << PrintDetailedPath(pObservableGraph<DataMaster><VertexIdT, EdgeIdT, VertexIt>ath) << endl;
456456
// cerr << "Conjugate " << PrintConjugatePath(path) << endl;
457457
auto corrected_path = CorrectMergePath(path);

src/common/io/reads/binary_converter.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ class BinaryWriter {
4646

4747
template<class Read>
4848
struct BarcodeTagger {
49-
uint64_t operator()(const Read &r) const { return r.aux().sequence().data()[0]; }
49+
uint64_t operator()(const Read &r) const {
50+
const Sequence s = r.aux().sequence();
51+
return *reinterpret_cast<const uint64_t*>(s.data());
52+
}
5053
};
5154

5255
public:

0 commit comments

Comments
 (0)