Skip to content

Commit 061f3c6

Browse files
committed
Support legacy_matrix in cfpq_eval
1 parent 0fb8a32 commit 061f3c6

7 files changed

+163
-34
lines changed

README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,19 @@ For more details, refer to [docs/cli.md](docs/cli.md).
1818

1919
## Evaluation
2020

21-
The CFPQ_PyAlgo project includes a [CFPQ evaluator](cfpq_eval) tool for evaluating the performance
21+
The CFPQ_PyAlgo project includes a [CFPQ evaluator](cfpq_eval/README.md) tool for evaluating the performance
2222
of various CFPQ solvers.
2323

24-
For more details on [CFPQ evaluator](cfpq_eval) usage, refer to [docs/eval.md](docs/eval.md).
24+
For more details on [CFPQ evaluator](cfpq_eval/README.md) usage, refer to [docs/eval.md](docs/eval.md).
2525

26-
We used the [CFPQ evaluator](cfpq_eval) to compare our solver, FastMatrixCFPQ, with five
26+
We used the [CFPQ evaluator](cfpq_eval) to compare our solver, [FastMatrixCFPQ](cfpq_cli/README.md), with five
2727
state-of-the-art competitors:
2828
[PEARL](https://figshare.com/articles/dataset/ASE_2023_artifact/23702271),
2929
[POCR](https://github.com/kisslune/POCR),
3030
[KotGLL](https://github.com/vadyushkins/kotgll),
3131
[Graspan](https://github.com/Graspan/Graspan-C), and
3232
[Gigascale](https://bitbucket.org/jensdietrich/gigascale-pointsto-oopsla2015/src),
33-
as well as with the previous version of our solver, MatrixCFPQ.
33+
as well as with the previous version of our solver, [MatrixCFPQ](src/README.md).
3434
The input data was provided by the
3535
[CFPQ_Data](https://github.com/FormalLanguageConstrainedPathQuerying/CFPQ_Data),
3636
[CFPQ_JavaGraphMiner](https://github.com/FormalLanguageConstrainedPathQuerying/CFPQ_JavaGraphMiner), and

cfpq_eval/runners/all_pairs_cflr_tool_runner_facade.py

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
GraspanAllPairsCflrToolRunner)
99
from cfpq_eval.runners.kotgll_all_pairs_cflr_tool_runner import (
1010
KotgllAllPairsCflrToolRunner)
11+
from cfpq_eval.runners.legacy_matrix_all_pairs_cflr_tool_runner import (
12+
LegacyMatrixAllPairsCflrToolRunner)
1113
from cfpq_eval.runners.pearl_algo_all_pairs_cflr_tool_runner import (
1214
PearlAllPairsCflrToolRunner)
1315
from cfpq_eval.runners.pocr_algo_all_pairs_cflr_tool_runner import (
@@ -28,6 +30,7 @@ def run_appropriate_all_pairs_cflr_tool(
2830
"gigascale": GigascaleAllPairsCflrToolRunner,
2931
"graspan": GraspanAllPairsCflrToolRunner,
3032
"kotgll": KotgllAllPairsCflrToolRunner,
33+
"legacy_matrix": LegacyMatrixAllPairsCflrToolRunner,
3134
}.get(algo_settings, PyAlgoAllPairsCflrToolRunner)(
3235
algo_settings, graph_path, grammar_path, timeout_sec
3336
).run()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import os
2+
import re
3+
import subprocess
4+
from pathlib import Path
5+
from typing import Optional
6+
7+
import pandas as pd
8+
9+
from cfpq_eval.runners.all_pairs_cflr_tool_runner import (
10+
AbstractAllPairsCflrToolRunner, CflrToolRunResult
11+
)
12+
from cfpq_model.cnf_grammar_template import CnfGrammarTemplate, Symbol
13+
from cfpq_model.label_decomposed_graph import LabelDecomposedGraph
14+
from cfpq_model.model_utils import explode_indices
15+
16+
17+
class LegacyMatrixAllPairsCflrToolRunner(AbstractAllPairsCflrToolRunner):
18+
@property
19+
def base_command(self) -> Optional[str]:
20+
grammar = CnfGrammarTemplate.read_from_pocr_cnf_file(self.grammar_path)
21+
graph = LabelDecomposedGraph.read_from_pocr_graph_file(self.graph_path)
22+
23+
# Legacy Matrix doesn't support indexed symbols, we need to concat labels and indices
24+
graph, grammar = explode_indices(graph, grammar)
25+
graph_path = self.graph_path.parent / "legacy_matrix" / self.graph_path.name
26+
os.makedirs(graph_path.parent, exist_ok=True)
27+
self._write_legacy_graph(graph, graph_path)
28+
grammar_path = self.grammar_path.parent / "legacy_matrix" / self.grammar_path.name
29+
os.makedirs(grammar_path.parent, exist_ok=True)
30+
self._write_legacy_grammar(grammar, grammar_path)
31+
return f"python3 -m src.legacy_cflr {graph_path} {grammar_path}"
32+
33+
def parse_results(self, process: subprocess.CompletedProcess[str]) -> CflrToolRunResult:
34+
return CflrToolRunResult(
35+
s_edges=int(re.search(r"#(SEdges|CountEdges)\s+(\d+)", process.stdout).group(2)),
36+
time_sec=float(re.search(r"AnalysisTime\s+([\d.]+)", process.stdout).group(1)),
37+
ram_kb=self.parse_ram_usage_kb(process)
38+
)
39+
40+
@staticmethod
41+
def _write_legacy_graph(graph: LabelDecomposedGraph, graph_path: Path) -> None:
42+
with open(graph_path, 'w', encoding="utf-8") as output_file:
43+
for symbol, matrix in graph.matrices.items():
44+
edge_label = symbol.label
45+
(rows, columns, _) = matrix.to_coo()
46+
edges_df = pd.DataFrame({
47+
'source': rows,
48+
'label': edge_label,
49+
'destination': columns,
50+
})
51+
csv_string = edges_df.to_csv(sep=' ', index=False, header=False)
52+
output_file.write(csv_string)
53+
54+
@staticmethod
55+
def _write_legacy_grammar(grammar: CnfGrammarTemplate, grammar_path: Path) -> None:
56+
with (open(grammar_path, 'w', encoding="utf-8") as output_file):
57+
output_file.write(f"{grammar.start_nonterm.label}\n\n")
58+
59+
non_terms = grammar.non_terminals
60+
non_term_prefix = "NON_TERMINAL#"
61+
eps = f"{non_term_prefix}EPS"
62+
63+
terms_needing_non_term = set()
64+
eps_needed = False
65+
66+
def format(symbol: Symbol) -> str:
67+
if symbol in non_terms:
68+
return symbol.label
69+
else:
70+
terms_needing_non_term.add(symbol)
71+
return f"{non_term_prefix}{symbol.label}"
72+
73+
for lhs in grammar.epsilon_rules:
74+
output_file.write(f"{lhs.label} ->\n")
75+
for lhs, rhs in grammar.simple_rules:
76+
# Legacy Matrix doesn't support rules with
77+
# single non-terminal right-hand side (see CnfGrammar).
78+
# Hence, we need to add auxiliary EPS non-terminal.
79+
if rhs in non_terms:
80+
output_file.write(f"{lhs.label} -> {rhs.label} {eps}\n")
81+
eps_needed = True
82+
else:
83+
output_file.write(f"{lhs} -> {rhs.label}\n")
84+
for lhs, rhs1, rhs2 in grammar.complex_rules:
85+
# Legacy Matrix doesn't support terminals in complex rules (see CnfGrammar).
86+
# Hence, we need to add auxiliary non-terminals (see the next `for` loop).
87+
output_file.write(f"{lhs} -> {format(rhs1)} {format(rhs2)}\n")
88+
89+
for term in terms_needing_non_term:
90+
output_file.write(f"{non_term_prefix}{term.label} -> {term.label}\n")
91+
92+
if eps_needed:
93+
output_file.write(f"{eps} ->\n")

docs/eval.md

+28-27
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,25 @@ python3 -m cfpq_eval.eval_all_pairs_cflr algo_config.csv data_config.csv results
3434

3535
### Premade Configurations
3636

37-
The `CFPQ_eval` Docker image includes premade configurations located in the `/py_algo/configs` folder.
37+
The `CFPQ_eval` [Docker image](https://hub.docker.com/r/cfpq/py_algo_eval) includes premade configurations located in the `/py_algo/configs` folder.
3838

3939
### Algorithm Configuration
4040

4141
The `algo_config.csv` configuration should list algorithms and their settings.
4242

4343
Supported algorithms:
4444

45-
- `IncrementalAllPairsCFLReachabilityMatrix` (this tool)
46-
- `NonIncrementalAllPairsCFLReachabilityMatrix` (this tool)
45+
- [`IncrementalAllPairsCFLReachabilityMatrix`](cli.md)
46+
- [`NonIncrementalAllPairsCFLReachabilityMatrix`](cli.md)
4747
- [`pocr`](https://github.com/kisslune/POCR)
4848
- [`pearl`](https://figshare.com/articles/dataset/ASE_2023_artifact/23702271)
4949
- [`graspan`](https://github.com/Graspan/Graspan-C)
5050
- [`gigascale`](https://bitbucket.org/jensdietrich/gigascale-pointsto-oopsla2015/src)
5151
- [`kotgll`](https://github.com/vadyushkins/kotgll)
52+
- [`legacy_matrix`](../src/README.md)
5253

53-
For Matrix-based algorithms, options described in [cli.md](cli.md)
54-
can be used to change the performance.
54+
For first two algorithms, options described in [cli.md](cli.md)
55+
can be used to configure optimizations.
5556

5657
Here's an algorithm configuration example:
5758
```
@@ -82,28 +83,28 @@ memory usage, and output size, will be printed to `stdout`.
8283

8384
Here's an example of a mean execution time summary table:
8485
```
85-
============================================ TIME, SEC (grammar 'c_alias') ============================================
86-
| graph | fast matrix | fast matrix | matrix cfpq | pearl | pocr | kotgll | gigascale | graspan |
87-
| | cfpq | cfpq (no | | | | | | |
88-
| | | grammar | | | | | | |
89-
| | | rewrite) | | | | | | |
90-
|:---------|:--------------|:--------------|:--------------|:--------|:----------|:---------|:------------|:----------|
91-
| init | 1.2 ± 3% | 2.9 | 7.0 ± 1% | - | 85 | 23 ± 6% | - | 16 ± 14% |
92-
| mm | 1.3 ± 2% | 3.1 | 7.5 | - | 89 ± 1% | 25 ± 3% | - | 16 ± 5% |
93-
| block | 1.7 ± 2% | 4.1 | 11 ± 1% | - | 123 | 34 ± 3% | - | 21 ± 2% |
94-
| ipc | 1.7 ± 4% | 4.0 | 10 ± 1% | - | 121 ± 1% | 34 ± 1% | - | 21 ± 3% |
95-
| lib | 1.7 ± 2% | 4.0 | 11 ± 1% | - | 123 ± 1% | 34 ± 1% | - | 21 ± 3% |
96-
| arch | 1.7 ± 3% | 4.1 | 11 ± 1% | - | 123 ± 1% | 34 ± 5% | - | 22 ± 10% |
97-
| crypto | 1.7 ± 3% | 4.2 | 11 ± 1% | - | 125 ± 1% | 34 ± 2% | - | 22 ± 8% |
98-
| security | 1.8 ± 4% | 4.4 | 11 ± 1% | - | 129 ± 1% | 35 ± 5% | - | 22 ± 5% |
99-
| sound | 2.0 ± 2% | 5.0 | 12 | - | 140 ± 1% | 38 ± 5% | - | 24 ± 11% |
100-
| fs | 2.5 ± 2% | 6.9 | 17 | - | 230 ± 1% | 53 ± 1% | - | 34 ± 3% |
101-
| net | 2.6 ± 3% | 7.4 | 20 | - | 221 ± 1% | 52 ± 1% | - | 35 ± 2% |
102-
| drivers | 3.9 ± 2% | 12 ± 1% | 28 ± 1% | - | 755 ± 1% | 92 ± 3% | - | 69 ± 3% |
103-
| kernel | 6.1 ± 2% | 13 | 43 | - | 387 ± 1% | 118 ± 2% | - | 69 ± 3% |
104-
| apache | 6.5 ± 1% | 26 ± 1% | 84 | - | OOT | OOM | - | 601 ± 2% |
105-
| postgre | 10 ± 1% | 36 ± 1% | 104 | - | 5398 ± 1% | OOM | - | 427 ± 4% |
106-
=======================================================================================================================
86+
==================================== TIME, SEC (grammar 'c_alias') ====================================
87+
| graph | fast matrix | fast matrix | pearl | pocr | kotgll | gigascale | graspan |
88+
| | cfpq | cfpq (no | | | | | |
89+
| | | grammar | | | | | |
90+
| | | rewrite) | | | | | |
91+
|:---------|:--------------|:--------------|:--------|:----------|:---------|:------------|:----------|
92+
| init | 1.2 ± 3% | 2.9 | - | 85 | 23 ± 6% | - | 16 ± 14% |
93+
| mm | 1.3 ± 2% | 3.1 | - | 89 ± 1% | 25 ± 3% | - | 16 ± 5% |
94+
| block | 1.7 ± 2% | 4.1 | - | 123 | 34 ± 3% | - | 21 ± 2% |
95+
| ipc | 1.7 ± 4% | 4.0 | - | 121 ± 1% | 34 ± 1% | - | 21 ± 3% |
96+
| lib | 1.7 ± 2% | 4.0 | - | 123 ± 1% | 34 ± 1% | - | 21 ± 3% |
97+
| arch | 1.7 ± 3% | 4.1 | - | 123 ± 1% | 34 ± 5% | - | 22 ± 10% |
98+
| crypto | 1.7 ± 3% | 4.2 | - | 125 ± 1% | 34 ± 2% | - | 22 ± 8% |
99+
| security | 1.8 ± 4% | 4.4 | - | 129 ± 1% | 35 ± 5% | - | 22 ± 5% |
100+
| sound | 2.0 ± 2% | 5.0 | - | 140 ± 1% | 38 ± 5% | - | 24 ± 11% |
101+
| fs | 2.5 ± 2% | 6.9 | - | 230 ± 1% | 53 ± 1% | - | 34 ± 3% |
102+
| net | 2.6 ± 3% | 7.4 | - | 221 ± 1% | 52 ± 1% | - | 35 ± 2% |
103+
| drivers | 3.9 ± 2% | 12 ± 1% | - | 755 ± 1% | 92 ± 3% | - | 69 ± 3% |
104+
| kernel | 6.1 ± 2% | 13 | - | 387 ± 1% | 118 ± 2% | - | 69 ± 3% |
105+
| apache | 6.5 ± 1% | 26 ± 1% | - | OOT | OOM | - | 601 ± 2% |
106+
| postgre | 10 ± 1% | 36 ± 1% | - | 5398 ± 1% | OOM | - | 427 ± 4% |
107+
=======================================================================================================
107108
```
108109

109110
## Custom Tools Integration

performance.png

-12.1 KB
Loading

src/grammar/cnf_grammar.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,19 @@ def __init__(self):
1616
self.eps_rules = []
1717

1818
def __setitem__(self, key, value):
19-
if (isinstance(value, tuple) or isinstance(value, list)) and 1 <= len(value) <= 2:
19+
if (isinstance(value, tuple) or isinstance(value, list)) and len(value) <= 2:
2020
self.nonterms.add(key)
21-
if len(value) == 1:
21+
if len(value) == 0:
22+
self.eps_rules.append(key)
23+
elif len(value) == 1:
2224
self.simple_rules.append((key, value[0]))
2325
self.terms.add(value[0])
2426
else:
2527
self.complex_rules.append((key, value[0], value[1]))
2628
for x in value:
2729
self.nonterms.add(x)
2830
else:
29-
raise Exception('value must be str, (str, str) or [str, str]')
31+
raise Exception('value must be [], str, (str, str) or [str, str]')
3032

3133
@classmethod
3234
def from_cfg(cls, cfg: CFG):

src/legacy_cflr.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import sys
2+
from pathlib import Path
3+
from time import time
4+
from typing import List
5+
6+
from src.grammar.cnf_grammar import CnfGrammar
7+
from src.graph.graph import Graph
8+
from src.problems.Base.algo.matrix_base.matrix_base import MatrixBaseAlgo
9+
10+
11+
# Minimalistic CLI needed for integration with cfpq_eval,
12+
# not intended to be used by consumers
13+
def main(raw_args: List[str]):
14+
graph_path = raw_args[0]
15+
grammar_path = raw_args[1]
16+
algo = MatrixBaseAlgo()
17+
18+
algo.graph = Graph.from_txt(Path(graph_path))
19+
algo.graph.load_bool_graph()
20+
algo.grammar = CnfGrammar.from_cnf(grammar_path)
21+
22+
start = time()
23+
res = algo.solve()
24+
finish = time()
25+
print(f"AnalysisTime\t{finish - start}")
26+
print(f"#SEdges\t{res.matrix_S.nvals}")
27+
28+
29+
if __name__ == '__main__':
30+
main(raw_args=sys.argv[1:])

0 commit comments

Comments
 (0)