Skip to content

Commit 9a2ed71

Browse files
committed
Merge pull request #136 from xosh/benchmark_lcp
Benchmark lcp
2 parents f18fdaf + 2af8a33 commit 9a2ed71

File tree

16 files changed

+378
-0
lines changed

16 files changed

+378
-0
lines changed

benchmark/lcp/.gitignore

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
*
2+
!.gitignore
3+
!lcp.config
4+
!README.md
5+
!bin/
6+
!src/
7+
!visualize/
8+
!compile_options.config
9+
!Makefile
10+
!results/
11+
!test_case.config

benchmark/lcp/Makefile

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
include ../../Make.helper
2+
CFLAGS = $(MY_CXX_FLAGS)
3+
SRC_DIR = src
4+
BIN_DIR = bin
5+
LIBS = -lsdsl -ldivsufsort -ldivsufsort64
6+
7+
C_OPTIONS:=$(call config_ids,compile_options.config)
8+
TC_IDS:=$(call config_ids,test_case.config)
9+
LCP_IDS:=$(call config_ids,lcp.config)
10+
11+
12+
DL = ${foreach TC_ID,$(TC_IDS),$(call config_select,test_case.config,$(TC_ID),2)}
13+
14+
LCP_EXECS = $(foreach LCP_ID,$(LCP_IDS),$(BIN_DIR)/build_$(LCP_ID))
15+
16+
RES_FILES = $(foreach TC_ID,$(TC_IDS),\
17+
results/$(TC_ID))
18+
19+
RESULT_FILE=results/all.txt
20+
21+
execs: $(BIN_DIR)/prep_sa_bwt $(LCP_EXECS)
22+
23+
timing: execs $(RES_FILES)
24+
@cat $(RES_FILES) > $(RESULT_FILE)
25+
@cd visualize;make
26+
27+
$(BIN_DIR)/prep_sa_bwt: $(SRC_DIR)/create_sa_bwt.cpp
28+
@echo "Compiling prep_sa_bwt"
29+
@$(MY_CXX) $(CFLAGS) $(C_OPTIONS) -L${SDSLLITE}/lib\
30+
$(SRC_DIR)/create_sa_bwt.cpp -I${SDSLLITE}/include -o bin/prep_sa_bwt $(LIBS)
31+
32+
precalc%: test_case.config $(DL) lcp.config
33+
$(eval TC_ID:=$(call dim,1,$*))
34+
$(eval LCP_TEX_NAME:=$(call config_select,lcp.config,$(LCP_ID),3))
35+
$(eval TC_TEX_NAME:=$(call config_select,test_case.config,$(TC_ID),3))
36+
$(eval TC_PATH:=$(call config_select,test_case.config,$(TC_ID),2))
37+
$(eval TC_SIZE:=$(shell wc -c <$(TC_PATH)))
38+
@echo "Running test case: $(TC_ID)"
39+
@echo "# TC_ID = $(TC_ID)" > results/$(TC_ID)
40+
@echo "# TC_TEX_NAME = $(TC_TEX_NAME)">> results/$(TC_ID)
41+
@echo "# TC_SIZE = $(TC_SIZE)">> results/$(TC_ID)
42+
@$(BIN_DIR)/prep_sa_bwt $(TC_PATH) >> results/$(TC_ID)
43+
44+
results/%: precalc%
45+
@$(foreach LCP_EXEC,$(LCP_EXECS),$(shell $(LCP_EXEC) >>$@;rm -f lcp_tmp.sdsl isa_tmp.sdsl))
46+
@rm *.sdsl
47+
48+
$(BIN_DIR)/build_%: $(SRC_DIR)/create_lcp.cpp lcp.config
49+
$(eval LCP_ID:=$(call dim,1,$*))
50+
$(eval LCP_TYPE:=$(call config_select,lcp.config,$(LCP_ID),2))
51+
@echo "Compiling build_$*"
52+
@$(MY_CXX) $(CFLAGS) $(C_OPTIONS) -DLCP_TYPE="$(LCP_TYPE)" -DLCPID="$(LCP_ID)" -L${SDSLLITE}/lib\
53+
$(SRC_DIR)/create_lcp.cpp -I${SDSLLITE}/include -o $@ $(LIBS)
54+
55+
56+
include ../Make.download
57+
58+
clean-build:
59+
@echo "Remove executables"
60+
rm -f $(BIN_DIR)/build*
61+
rm -f $(BIN_DIR)/prep*
62+
63+
clean-result:
64+
@echo "Remove results"
65+
rm -f results/*
66+
67+
cleanall: clean-build clean-result

benchmark/lcp/README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Benchmarking LCP algorithms
2+
3+
## Methodology
4+
5+
Explored dimensions:
6+
7+
* lcp algorithms
8+
* test cases
9+
10+
## Directory structure
11+
12+
* [bin](./bin): Contains the executables of the project.
13+
* [results](./results): Contains the results of the experiments.
14+
* [src](./src): Contains the source code of the benchmark.
15+
* [visualize](./visualize): Contains a `R`-script which generates
16+
a report in LaTeX format.
17+
18+
## Prerequisites
19+
20+
* For the visualization you need the following software:
21+
- [R][RPJ] with package `xtable`. You can install the
22+
package by calling `install.packages("xtable")` in R.
23+
- [pdflatex][LT] to generate the pdf reports.
24+
25+
## Usage
26+
27+
* `make timing` compiles the programs, downloads
28+
the test instances, builds the LCP arrays and generates a report located at
29+
`visualize/lcp.pdf`. The raw numbers of the timings
30+
can be found in the `results/all.txt`.
31+
* All created binaries and test results can be deleted
32+
by calling `make cleanall`.
33+
34+
## Customization of the benchmark
35+
36+
The project contains several configuration files:
37+
38+
* [wt.config][LCPCONFIG]: Specify different LCP algorithms.
39+
* [test_case.config][TCCONF]: Specify test instances by ID, path, LaTeX-name
40+
for the report, and download URL.
41+
* [compile_options.config][CCONF]: Specify compile options by option string.
42+
43+
Note that the benchmark will execute every combination of lcp algorithms and test cases.
44+
45+
[RPJ]: http://www.r-project.org/ "R"
46+
[LT]: http://www.tug.org/applications/pdftex/ "pdflatex"
47+
[LCPCONFIG]: ./lcp.config "lcp.config"
48+
[TCCONF]: ./test_case.config "test_case.config"
49+
[CCONF]: ./compile_options.config "compile_options.config"

benchmark/lcp/bin/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!.gitignore
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Compile options
2+
-O3 -funroll-loops -fomit-frame-pointer -ffast-math -DNDEBUG

benchmark/lcp/lcp.config

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# This file specifies wavelettrees that are used in the benchmark.
2+
#
3+
# Each LCP algorithm is specified by a 4-tupel: LCP_ID;LCP_ALGORITHM;LCP_LATEX_NAME;BWT_NEEDED
4+
# * LCP_ID : An identifier for the index. Only letters and underscores are allowed in ID.
5+
# * LCP_ALGORITHM : Corresponding lcp alogrithm.
6+
# * LCP_LATEX_NAME: LaTeX name for output in the benchmark report.
7+
# * BWT_NEEDED : T(rue) if lcp algorithm needs bwt as input, otherwise F(alse).
8+
kasai;construct_lcp_kasai<8>;lcp-kasai;F
9+
phi_algorithm;construct_lcp_PHI<8>;lcp-$\Phi$;F
10+
semi_extern_phi;construct_lcp_semi_extern_PHI;lcp-semi-extern-$\Phi$;F
11+
go;construct_lcp_go;lcp-go;T
12+
goPhi;construct_lcp_goPHI;lcp-go-$\Phi$;T
13+
bwtb;construct_lcp_bwt_based;lcp-bwt-based;T
14+
bwtb2;construct_lcp_bwt_based2;lcp-bwt-based2;T

benchmark/lcp/results/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!.gitignore

benchmark/lcp/src/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
*
2+
!.gitignore
3+
!create_lcp.cpp
4+
!create_sa_bwt.cpp

benchmark/lcp/src/create_lcp.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include <sdsl/sdsl_concepts.hpp>
2+
#include <sdsl/int_vector.hpp>
3+
#include <sdsl/construct.hpp>
4+
#include <sdsl/construct_lcp.hpp>
5+
#include <string>
6+
#include <chrono>
7+
8+
using namespace sdsl;
9+
using namespace std;
10+
using namespace std::chrono;
11+
12+
#define S(x) #x
13+
#define SX(x) S(x)
14+
15+
int main(int argc, char** argv)
16+
{
17+
memory_monitor::start();
18+
string dir = ".";
19+
string id = "tmp";
20+
cache_config config(false, dir, id);
21+
22+
register_cache_file(conf::KEY_TEXT, config);
23+
register_cache_file(conf::KEY_SA, config);
24+
register_cache_file(conf::KEY_BWT, config);
25+
26+
auto start = high_resolution_clock::now();
27+
LCP_TYPE(config);
28+
auto stop = high_resolution_clock::now();
29+
memory_monitor::stop();
30+
cout << "# " SX(LCPID) "_TIME = " << duration_cast<milliseconds>(stop-start).count()/(double)1000 << endl;
31+
cout << "# " SX(LCPID) "_MMPEAK = "<< memory_monitor::peak() << endl;
32+
33+
return 0;
34+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#include <sdsl/sdsl_concepts.hpp>
2+
#include <sdsl/int_vector.hpp>
3+
#include <sdsl/construct.hpp>
4+
#include <sdsl/construct_sa.hpp>
5+
#include <sdsl/construct_bwt.hpp>
6+
#include <string>
7+
#include <chrono>
8+
#include <iostream>
9+
10+
using namespace sdsl;
11+
using namespace std;
12+
using namespace std::chrono;
13+
14+
typedef bit_vector::size_type size_type;
15+
16+
//argv[1] = test file
17+
int main(int argc, char** argv)
18+
{
19+
memory_monitor::start();
20+
string file = argv[1];
21+
uint8_t num_bytes = 1; // Byte Alphabet
22+
string dir = ".";
23+
string id = "tmp";
24+
cache_config config(false, dir, id);
25+
26+
//load text
27+
auto start = high_resolution_clock::now();
28+
{
29+
int_vector<8> text;
30+
load_vector_from_file(text, file, num_bytes);
31+
if (contains_no_zero_symbol(text, file)) {
32+
append_zero_symbol(text);
33+
store_to_cache(text, conf::KEY_TEXT, config);
34+
}
35+
register_cache_file(conf::KEY_TEXT, config);
36+
}
37+
auto stop = high_resolution_clock::now();
38+
memory_monitor::stop();
39+
cout << "# TXT_TIME = " << duration_cast<milliseconds>(stop-start).count()/(double)1000 << endl;
40+
cout << "# TXT_MMPEAK = " << memory_monitor::peak() << endl;
41+
42+
//construct sa
43+
memory_monitor::start();
44+
start = high_resolution_clock::now();
45+
{
46+
construct_sa<8>(config);
47+
register_cache_file(conf::KEY_SA, config);
48+
}
49+
stop = high_resolution_clock::now();
50+
memory_monitor::stop();
51+
cout << "# SA_TIME = " << duration_cast<milliseconds>(stop-start).count()/(double)1000 << endl;
52+
cout << "# SA_MMPEAK = " << memory_monitor::peak() << endl;
53+
54+
//construct bwt
55+
memory_monitor::start();
56+
start = high_resolution_clock::now();
57+
{
58+
construct_bwt<8>(config);
59+
register_cache_file(conf::KEY_BWT, config);
60+
}
61+
stop = high_resolution_clock::now();
62+
memory_monitor::stop();
63+
cout << "# BWT_TIME = " << duration_cast<milliseconds>(stop-start).count()/(double)1000 <<endl;
64+
cout << "# BWT_MMPEAK = "<< memory_monitor::peak() << endl;
65+
66+
return 0;
67+
}
68+

0 commit comments

Comments
 (0)