Skip to content

Commit 2fd94c5

Browse files
committed
Latest experiments and fixes
1 parent 1009d59 commit 2fd94c5

File tree

11 files changed

+60
-25
lines changed

11 files changed

+60
-25
lines changed

.stxxl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# file path,maximum capacity of the disk,access method
2-
disk=/tmp/stxxl,1G,syscall unlink
2+
disk=/d2/fernanda/stxxl,370GB,syscall unlink

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,12 @@ add_library(vcf_rlz STATIC ${SOURCES} ${VCF_RLZ_INCLUDE_DIRS})
4444
add_executable(fullbuild GenerateFromVCFFiles.cpp)
4545
add_executable(prebuild GenerateFromPreloadedFile.cpp)
4646
add_executable(timeC "${PROJECT_SOURCE_DIR}/experiments/ConstructionTime.cpp")
47+
add_executable(timeF "${PROJECT_SOURCE_DIR}/experiments/FindSnippetTime.cpp")
4748
add_executable(test main.cpp)
4849

4950
#target_include_directories(process PUBLIC ${STXXL_INCLUDE_DIRS} ${VCF_RLZ_INCLUDE_DIRS})
5051
target_link_libraries(vcf_rlz stxxl sdsl divsufsort divsufsort64 RLZ)
5152
target_link_libraries(fullbuild vcf_rlz sdsl RLZ stxxl sdsl divsufsort divsufsort64)
5253
target_link_libraries(prebuild vcf_rlz sdsl RLZ sdsl divsufsort divsufsort64)
5354
target_link_libraries(timeC vcf_rlz sdsl RLZ stxxl sdsl divsufsort divsufsort64)
54-
target_link_libraries(timeC vcf_rlz sdsl RLZ sdsl divsufsort divsufsort64)
55+
target_link_libraries(timeF vcf_rlz sdsl RLZ sdsl divsufsort divsufsort64)

GenerateFromPreloadedFile.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ int main(int argc, char **argv)
2424
VCFParsingInterpreter* Interpreter = new VCFParsingInterpreter();
2525

2626
Interpreter->InitializeFromPreloadedFile(argv[1]);
27-
27+
// cout << "Interpreter of size " << Interpreter->GetSize() / (1024 * 1024) << " MB" << endl;
2828
string response;
2929
while(response != "2")
3030
{

GenerateFromVCFFiles.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,12 @@ int main(int argc, char **argv)
7373

7474
cout << "[RLZ] Sorting process finished!" << endl;
7575

76+
cout << "[RLZ] Start indexing process..." << endl;
7677
VCFParsingInterpreter* Interpreter = new VCFParsingInterpreter();
7778

7879
Interpreter->InitializeFromParsing(argv[1]);
80+
81+
cout << "[RLZ] Indexing ended!" << endl;
7982

8083
string response;
8184
while(response != "2")

RLZ

VCF_parsing

experiments/ConstructionTime.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ int main(int argc, char **argv)
5252
}
5353
timeElapsed = timer.getMilisec();
5454

55-
results_file << destPath << "\t" << "PARSE" << "\t" << timeElapsed << "\t" << "?" << "\n";
55+
results_file << destPath << "\t" << "PARSE" << "\t" << timeElapsed << " (ms)" << "\n";
5656
results_file.flush();
5757
cout << " PARSE Ended: " << endl;
58-
cout << destPath << "\t" << "PARSE" << "\t" << timeElapsed << "\t" << "?" << "\n";
58+
cout << destPath << "\t" << "PARSE" << "\t" << timeElapsed << " (ms)" << "\n";
5959
}
6060
else
6161
{
@@ -69,10 +69,10 @@ int main(int argc, char **argv)
6969
VCFParsingSorter* Sorter = new VCFParsingSorter();
7070
Sorter->StartProcess(destPath);
7171
timeElapsed = timer.getMilisec();
72-
results_file << destPath << "\t" << "SORT" << "\t" << timeElapsed << "\t" << "?" << "\n";
72+
results_file << destPath << "\t" << "SORT" << "\t" << timeElapsed << " (ms)" << "\n";
7373
results_file.flush();
7474
cout << " SORT Ended: " << endl;
75-
cout << destPath << "\t" << "SORT" << "\t" << timeElapsed << "\t" << "?" << "\n";
75+
cout << destPath << "\t" << "SORT" << "\t" << timeElapsed << " (ms)" << "\n";
7676
delete Sorter;
7777
}
7878
else
@@ -88,10 +88,10 @@ int main(int argc, char **argv)
8888
Interpreter.InitializeFromParsing(destPath);
8989

9090
timeElapsed = timer.getMilisec();
91-
results_file << destPath << "\t" << "BUILD" << "\t" << timeElapsed << "\t" << size << "\n";
91+
results_file << destPath << "\t" << "BUILD" << "\t" << timeElapsed << " (ms)" << "\n";
9292
results_file.flush();
9393
cout << " BUILD Ended: " << endl;
94-
cout << destPath << "\t" << "BUILD" << "\t" << timeElapsed << "\t" << size << "\n";
94+
cout << destPath << "\t" << "BUILD" << "\t" << timeElapsed << " (ms)" << "\n";
9595
Interpreter.SaveInterpreter();
9696
}
9797
else

experiments/FindSnippetTime.cpp

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,28 +32,59 @@ int main(int argc, char **argv)
3232
VCFParsingInterpreter* Interpreter = new VCFParsingInterpreter();
3333

3434
Interpreter->InitializeFromPreloadedFile(argv[2]);
35+
36+
cout << "Structure size: " << Interpreter->GetSize()/ (1024 * 1024) << " (MB)" << endl;
37+
38+
Interpreter->Index->querytime_p1 = 0;
39+
Interpreter->Index->querytime_p2 = 0;
40+
Interpreter->Index->querytime_p3 = 0;
41+
Interpreter->Index->querytime_p4 = 0;
42+
Interpreter->Index->occs_a = 0;
43+
Interpreter->Index->occs_b = 0;
44+
Interpreter->Index->occs_c = 0;
3545
NanoTimer timer;
46+
double q_p1 = 0, q_p2 = 0, q_p3 = 0, q_p4 = 0;
3647
string aux;
37-
ll occ = 0;
48+
ll occ = 0, occ_a = 0, occ_b = 0, occ_c = 0;
3849

3950
ifstream patterns_file(argv[1]);
4051

4152
cout << "Init pattern search of: " << argv[1] << endl;
4253

43-
timer.reset();
4454

4555
while(getline(patterns_file, aux))
4656
{
4757
occ += Interpreter->FindSnippetExperimental(aux);
58+
// Recover values
59+
q_p1 += Interpreter->Index->querytime_p1;
60+
q_p2 += Interpreter->Index->querytime_p2;
61+
q_p3 += Interpreter->Index->querytime_p3;
62+
q_p4 += Interpreter->Index->querytime_p4;
63+
occ_a += Interpreter->Index->occs_a;
64+
occ_b += Interpreter->Index->occs_b;
65+
occ_c += Interpreter->Index->occs_c;
66+
67+
Interpreter->Index->querytime_p1 = 0;
68+
Interpreter->Index->querytime_p2 = 0;
69+
Interpreter->Index->querytime_p3 = 0;
70+
Interpreter->Index->querytime_p4 = 0;
71+
Interpreter->Index->occs_a = 0;
72+
Interpreter->Index->occs_b = 0;
73+
Interpreter->Index->occs_c = 0;
74+
4875
}
4976

50-
double timeElapsed = timer.getMilisec();
5177

5278
if(occ != 0)
5379
{
54-
cout << timeElapsed << " (ms)" << endl;
55-
cout << occ << " (occurences)" << endl;
56-
cout << (timeElapsed / occ) << " (real elapsed (ms))" << endl;
80+
cout << "[TT] " << q_p4 << endl;
81+
cout << "[T1] " << q_p1 << endl;
82+
cout << "[T2] " << q_p2 << endl;
83+
cout << "[T3] " << q_p3 << endl;
84+
cout << "[OT] " << occ << endl;
85+
cout << "[O1] " << occ_a << endl;
86+
cout << "[O2] " << occ_b << endl;
87+
cout << "[O3] " << occ_c << endl;
5788
}
5889

5990
cout << " Finished pattern search of: " << argv[1] << endl;

include/VCFParsingInterpreter.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ class VCFParsingInterpreter
5353
ll rel_pos_chrom;
5454

5555
// Reconstruction structures
56-
RelzIndexReference *Index;
5756
sd_vector<> bit_vector_S_i;
5857
sd_vector<>::rank_1_type rank_S_i;
5958
sd_vector<>::select_1_type select_S_i;
@@ -63,7 +62,7 @@ class VCFParsingInterpreter
6362
VCFParsingInterpreter();
6463

6564
// ~VCFParsingInterpreter();
66-
65+
RelzIndexReference *Index;
6766
vector<pair<sampleID, ll>> FindSnippet(string snippet, bool show = false);
6867
ll FindSnippetExperimental(string snippet, bool show = false);
6968
void InitializeFromParsing(char *destination_path);

src/VCFParsingInterpreter.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ void VCFParsingInterpreter::InitializeFromPreloadedFile(char *folder_path)
2222
Index->load(Destination_aux);
2323

2424
// Recover ID name data
25-
IDInfo_file_path = Destination_folder_name + ".idinfo";
25+
IDInfo_file_path = Destination_aux + ".idinfo";
2626

2727
// Recover numeric data
2828
fstream src(Destination_aux + ".data", INPUT_BINARY_FILE);
@@ -63,7 +63,7 @@ void VCFParsingInterpreter::InitializeFromParsing(string destination_path)
6363
cout << "[RLZ] Building factors from phrases" << endl;
6464
timer.reset();
6565
BuildFactors();
66-
cout << "F = " << factors.size() << "S = " << S_size << endl;
66+
// cout << "F = " << factors.size() << "S = " << S_size << endl;
6767
// for (pair<ll, ll> factor : factors)
6868
// {
6969
// cout << "(" << factor.first << "," << factor.second << ")" << endl;

0 commit comments

Comments
 (0)