Skip to content

Commit 83c0699

Browse files
committed
Rework lexer/parser to use Antlr4 rather than something custom.
1 parent cb2d7d1 commit 83c0699

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+3792
-3012
lines changed

src/main/java/org/mitre/pickledcanary/PickledCanary.java

Lines changed: 85 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11

22
// Copyright (C) 2023 The MITRE Corporation All Rights Reserved
33

4-
package org.mitre.pickledcanary;
4+
package ghidra.pickledcanary;
55

66
import java.io.File;
7-
import java.util.LinkedList;
87
import java.util.List;
98
import java.util.concurrent.atomic.AtomicReference;
109

11-
import org.mitre.pickledcanary.patterngenerator.frontend.PatternAssembler;
12-
import org.mitre.pickledcanary.patterngenerator.frontend.PatternAssembler.AssembleType;
13-
import org.mitre.pickledcanary.querylanguage.lexer.Lexer;
14-
import org.mitre.pickledcanary.querylanguage.lexer.ParseTree;
15-
import org.mitre.pickledcanary.querylanguage.tokenizer.Token;
16-
import org.mitre.pickledcanary.querylanguage.tokenizer.Tokenizer;
10+
import org.antlr.v4.runtime.BaseErrorListener;
11+
import org.antlr.v4.runtime.CharStreams;
12+
import org.antlr.v4.runtime.CommonTokenStream;
13+
import org.antlr.v4.runtime.RecognitionException;
14+
import org.antlr.v4.runtime.Recognizer;
15+
16+
import org.mitre.pickledcanary.patterngenerator.PCVisitor;
17+
import org.mitre.pickledcanary.patterngenerator.generated.pc_grammar;
18+
import org.mitre.pickledcanary.patterngenerator.generated.pc_lexer;
1719
import org.mitre.pickledcanary.search.Pattern;
1820
import org.mitre.pickledcanary.search.SavedDataAddresses;
1921
import org.mitre.pickledcanary.search.VmSearch;
@@ -29,178 +31,111 @@
2931
import ghidra.util.task.TaskMonitor;
3032

3133
/**
32-
* This Class holds high-level static methods that are useful for parsing and/or
33-
* searching pickled canary patterns.
34+
* This Class holds high-level static methods that are useful for parsing and/or searching pickled
35+
* canary patterns.
3436
*/
3537
public class PickledCanary {
3638

39+
static class MyErrorListener extends BaseErrorListener {
40+
@Override
41+
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line,
42+
int charPositionInLine,
43+
String msg, RecognitionException e) {
44+
throw new RuntimeException("Pattern lexer encountered error when processing line " + line + ":" + charPositionInLine + " " + msg);
45+
}
46+
}
47+
3748
/**
38-
* You probably want to use
39-
* {@link #parseAndAssemble(TaskMonitor, Program, Address, String, Boolean)
40-
* parseAndAssemble} or
41-
* {@link #parseAndRunAll(TaskMonitor, Program, Address, String) parseAndRunAll}
42-
* instead.
49+
* Creates and runs the PC lexer and visitor across a given string pattern. The result can be
50+
* used to generate a JSON or {@link Pattern} output.
51+
* <p>
52+
* You probably want to use {@link #compile(TaskMonitor, String, Program, Address, boolean)
53+
* compile} or {@link #compile(TaskMonitor, String, Program, Address) compile} instead which
54+
* handle these later steps for you as well. This is probably only useful if you're looking to
55+
* capture both types of output later.
56+
*
57+
* @param monitor
58+
* @param pattern
59+
* The pattern to lex and visit.
60+
* @param currentProgram
61+
* The program to use when lexing and visiting the given pattern.
62+
* @param currentAddress
63+
* The address to use when lexing and visiting.
64+
* @return PCVisitor instance which has already visited all nodes of the given pattern.
4365
*/
44-
public static ParseTree parsePattern(TaskMonitor monitor, String query) {
66+
public static PCVisitor createAndRunVisitor(TaskMonitor monitor, String pattern,
67+
final Program currentProgram, final Address currentAddress) {
4568

4669
monitor.setIndeterminate(true);
70+
71+
MyErrorListener errorListener = new MyErrorListener();
4772

48-
monitor.setMessage("Tokenizing query.");
49-
final Tokenizer tokenizer = new Tokenizer(query);
50-
51-
final LinkedList<Token> tokens = tokenizer.tokenize(true);
73+
var chars = CharStreams.fromString(pattern);
74+
var lexer = new pc_lexer(chars);
75+
lexer.addErrorListener(errorListener);
76+
var commonTokenStream = new CommonTokenStream(lexer);
77+
var parser = new pc_grammar(commonTokenStream);
78+
parser.addErrorListener(errorListener);
5279

53-
monitor.setMessage("Lexing query.");
54-
final Lexer lexer = new Lexer(tokens);
80+
var progContext = parser.prog();
5581

56-
ParseTree out = lexer.lex();
82+
var visitor = new PCVisitor(currentProgram, currentAddress, monitor);
83+
visitor.visit(progContext);
5784

5885
monitor.setIndeterminate(false);
59-
return out;
60-
}
61-
62-
private static Object assembleInternal(AssembleType type, TaskMonitor monitor, Program program,
63-
Address currentAddress, ParseTree parseTree, Boolean removeDebugInfo) {
64-
// start a transaction so that we can undo any overwrites we do to the binary
65-
int transactionID = program.startTransaction("Pickled Canary Pattern Assemble");
66-
67-
monitor.setMessage("Creating pattern assembler");
68-
final PatternAssembler patternAssembler = new PatternAssembler();
69-
70-
Object pattern;
71-
72-
try {
73-
74-
monitor.setMessage("Assembling pattern");
75-
76-
pattern = patternAssembler.assemble(type, program, parseTree, currentAddress, program.getLanguage(),
77-
monitor, removeDebugInfo);
78-
79-
// end transaction - discard all overwrites to the binary
80-
program.endTransaction(transactionID, false);
81-
82-
} catch (Exception e) {
83-
// end transaction - discard all overwrites to the binary
84-
program.endTransaction(transactionID, false);
85-
throw e;
86-
}
87-
88-
return pattern;
86+
return visitor;
8987
}
88+
9089

9190
/**
9291
* Returns a JSON string of the compiled pattern.
93-
* <p>
94-
* You probably want to use
95-
* {@link #parseAndAssemble(TaskMonitor, Program, Address, String, Boolean)
96-
* parseAndAssemble} instead.
9792
*/
98-
public static String assemble(TaskMonitor monitor, Program program, Address currentAddress, ParseTree parseTree) {
99-
return (String) assembleInternal(AssembleType.JSON, monitor, program, currentAddress, parseTree, false);
93+
public static String compile(TaskMonitor monitor, String pattern, Program program,
94+
Address address, boolean removeDebugInfo) {
95+
return createAndRunVisitor(monitor, pattern, program, address).getJSON(removeDebugInfo);
10096
}
10197

102-
/**
103-
* Returns a JSON string of the compiled pattern without the compile_info key
104-
* information.
105-
* <p>
106-
* You probably want to use
107-
* {@link #parseAndAssemble(TaskMonitor, Program, Address, String, Boolean)
108-
* parseAndAssemble} instead.
109-
*/
110-
public static String assemble(TaskMonitor monitor, Program program, Address currentAddress, ParseTree parseTree,
111-
boolean removeDebugFlag) {
112-
return (String) assembleInternal(AssembleType.JSON, monitor, program, currentAddress, parseTree,
113-
removeDebugFlag);
98+
public static Pattern compile(TaskMonitor monitor, String pattern, Program program,
99+
Address address) {
100+
return createAndRunVisitor(monitor, pattern, program, address).getPattern();
114101
}
115102

116103
/**
117-
* Consider using
118-
* {@link #assemblePatternWrapped(TaskMonitor, Program, Address, ParseTree)
119-
* assemblePatternWrapped} which does the same thing as this function, but adds
120-
* a starting .* and match instructions
104+
* Runs the given pattern, returning all results in the given program.
121105
*/
122-
public static Pattern assemblePattern(TaskMonitor monitor, Program program, Address currentAddress,
123-
ParseTree parseTree) {
124-
return (Pattern) assembleInternal(AssembleType.PATTERN, monitor, program, currentAddress, parseTree, false);
125-
}
106+
public static List<SavedDataAddresses> parseAndRunAll(TaskMonitor monitor,
107+
Program program, Address address, String pattern) {
108+
Pattern patternCompiled = compileWrapped(monitor, pattern, program, address);
126109

127-
/**
128-
* This is the same as running {@link #parsePattern(TaskMonitor, String)
129-
* parsePattern} followed by
130-
* {@link #assemble(TaskMonitor, Program, Address, ParseTree) assemble).
131-
*
132-
* Returns a compiled JSON pattern.
133-
*/
134-
public static String parseAndAssemble(TaskMonitor monitor, Program program, Address currentAddress, String query,
135-
Boolean removeDebugInfo) {
136-
137-
final ParseTree parseTree = PickledCanary.parsePattern(monitor, query);
138-
return (String) assembleInternal(AssembleType.JSON, monitor, program, currentAddress, parseTree,
139-
removeDebugInfo);
140-
}
141-
142-
/**
143-
* Runs the given pattern (query), returning all results in the given program.
144-
*/
145-
public static List<SavedDataAddresses> parseAndRunAll(TaskMonitor monitor, Program program, Address currentAddress,
146-
String query) {
147-
final ParseTree parseTree = PickledCanary.parsePattern(monitor, query);
148-
149-
Pattern pattern = assemblePatternWrapped(monitor, program, currentAddress, parseTree);
150-
151-
return runAll(monitor, program, pattern);
152-
}
153-
154-
/**
155-
* Runs the given pattern (query), returning all results in the given program.
156-
*/
157-
public static void parseAndRunAll(TaskMonitor monitor, Program program, Address currentAddress, String query,
158-
Accumulator<SavedDataAddresses> accumulator) {
159-
final ParseTree parseTree = PickledCanary.parsePattern(monitor, query);
160-
161-
Pattern pattern = assemblePatternWrapped(monitor, program, currentAddress, parseTree);
162-
163-
runAll(monitor, program, pattern, accumulator);
110+
return runAll(monitor, program, patternCompiled);
164111
}
165112

166113
/**
167-
* Runs the given pattern (query), returning all results in the given program.
114+
* Runs the given pattern, returning all results in the given program.
168115
*/
169-
public static String parseAssembleAndRunAll(TaskMonitor monitor, Program program, Address currentAddress,
170-
String query, Accumulator<SavedDataAddresses> accumulator) {
171-
final ParseTree parseTree = PickledCanary.parsePattern(monitor, query);
172-
173-
Pattern pattern = assemblePatternWrapped(monitor, program, currentAddress, parseTree);
116+
public static void parseAndRunAll(TaskMonitor monitor, Program program,
117+
Address address,
118+
Accumulator<SavedDataAddresses> accumulator, String pattern) {
119+
Pattern patternCompiled = compileWrapped(monitor, pattern, program, address);
174120

175-
runAll(monitor, program, pattern, accumulator);
176-
177-
return assemble(monitor, program, currentAddress, parseTree);
121+
runAll(monitor, program, patternCompiled, accumulator);
178122
}
179123

180124
/**
181-
* Similar to assemblePattern, but adds a .* to the start of the pattern and
182-
* adds instructions to record the start of the match and when the pattern has
183-
* matched.
125+
* Similar to compile, but adds a .* to the start of the pattern and adds instructions to record
126+
* the start of the match and when the pattern has matched.
184127
*/
185-
public static Pattern assemblePatternWrapped(TaskMonitor monitor, Program program, Address currentAddress,
186-
ParseTree parseTree) {
187-
188-
final Pattern pattern = assemblePattern(monitor, program, currentAddress, parseTree);
189-
190-
monitor.setMessage("Preparing pattern");
191-
Pattern start = Pattern.getDotStar();
192-
start.append(Pattern.getSaveStart());
193-
pattern.prepend(start);
194-
pattern.append(Pattern.getMatch());
195-
return pattern;
196-
128+
public static Pattern compileWrapped(TaskMonitor monitor, String pattern, Program program,
129+
Address address) {
130+
return PickledCanary.createAndRunVisitor(monitor, pattern, program, address).getPatternWrapped();
197131
}
198132

199133
/**
200134
* Runs the given pattern on the given program. You may prefer to use
201-
* {@link #parseAndRunAll(TaskMonitor, Program, Address, String) parseAndRunAll}
135+
* {@link #parseAndRunAll(TaskMonitor, String, Program, Address) parseAndRunAll}
202136
*/
203-
public static List<SavedDataAddresses> runAll(TaskMonitor monitor, Program program, Pattern pattern) {
137+
public static List<SavedDataAddresses> runAll(TaskMonitor monitor, Program program,
138+
Pattern pattern) {
204139
monitor.setMessage("Searching");
205140
VmSearch vm = new VmSearch(pattern, program.getMemory());
206141

@@ -209,7 +144,7 @@ public static List<SavedDataAddresses> runAll(TaskMonitor monitor, Program progr
209144

210145
/**
211146
* Runs the given pattern on the given program. You may prefer to use
212-
* {@link #parseAndRunAll(TaskMonitor, Program, Address, String) parseAndRunAll}
147+
* {@link #parseAndRunAll(TaskMonitor, String, Program, Address) parseAndRunAll}
213148
*/
214149
public static void runAll(TaskMonitor monitor, Program program, Pattern pattern,
215150
Accumulator<SavedDataAddresses> accumulator) {
@@ -224,13 +159,13 @@ public enum AskFileType {
224159
}
225160

226161
/**
227-
* Custom version of Ghidra's {@code askFile} method. Allows user to select a
228-
* file.
162+
* Custom version of Ghidra's {@code askFile} method. Allows user to select a file.
229163
*
230-
* @param isSave true if asking user to save JSON file; false if asking user to
231-
* choose ptn file
164+
* @param isSave
165+
* true if asking user to save JSON file; false if asking user to choose ptn file
232166
* @return file object to read or write to
233-
* @throws CancelledException for if user clicks cancel button
167+
* @throws CancelledException
168+
* for if user clicks cancel button
234169
*/
235170
public static File pcAskFile(final boolean isSave, final AskFileType type, File previousFile)
236171
throws CancelledException {
@@ -246,7 +181,8 @@ public static File pcAskFile(final boolean isSave, final AskFileType type, File
246181
String jsonFileName;
247182
if ((type == AskFileType.JSON) && ptnFileName.endsWith(".ptn")) {
248183
jsonFileName = ptnFileName.replace(".ptn", ".json");
249-
} else {
184+
}
185+
else {
250186
jsonFileName = ptnFileName;
251187
}
252188
selectedFile = new File(jsonFileName);

0 commit comments

Comments
 (0)