Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,34 @@ Awk awk = new Awk();
String result = awk.run("{ print toupper($0) }", "hello world");
```

Evaluate expressions the same way:

```java
Awk awk = new Awk();
Object value = awk.eval("2 + 3");
```

For repeated evaluations, compile the expression once and reuse the tuples:

```java
AwkSettings settings = new AwkSettings();
settings.setFieldSeparator(",");

Awk awk = new Awk(settings);
AwkTuples expression = awk.compileForEval("$2");

Object first = awk.eval(expression, "alpha,beta");
Object second = awk.eval(expression, "left,right");
```

Both `Awk.eval(...)` and advanced direct `AVM.eval(...)` reuse the compiled
tuple metadata to choose the read-only eval fast path automatically when the
expression is side-effect free.

When your application already has structured rows, implement
`org.metricshub.jawk.jrt.InputSource` and feed fields directly to
`Awk.eval(...)` or `Awk.invoke(...)` without serializing them back to text.

See [AWK in Java documentation](https://metricshub.org/Jawk/java.html) for more details and advanced usage.

## Writing tests with `AwkTestSupport`
Expand Down
228 changes: 176 additions & 52 deletions src/main/java/org/metricshub/jawk/Awk.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ public class Awk {
*/
private AstNode lastAst;

private volatile EvalRuntimePrototype evalRuntimePrototype;

private volatile long evalRuntimePrototypeVersion = Long.MIN_VALUE;

/**
* Create a new instance of Awk without extensions.
*/
Expand Down Expand Up @@ -680,7 +684,7 @@ public AwkTuples compileForEval(String expression, boolean disableOptimizeParam)
* @throws IOException if anything goes wrong with the evaluation
*/
public Object eval(String expression) throws IOException {
return eval(compileForEval(expression), null, settings.getFieldSeparator());
return eval(compileForEval(expression));
}

/**
Expand All @@ -693,42 +697,27 @@ public Object eval(String expression) throws IOException {
* @throws IOException if anything goes wrong with the evaluation
*/
public Object eval(String expression, String input) throws IOException {
return eval(compileForEval(expression), input, settings.getFieldSeparator());
return eval(compileForEval(expression), input);
}

/**
* Evaluates the specified AWK tuples, i.e. the result of the execution of the
* TERNARY_EXPRESSION AST (the value that has been pushed in the stack).
* Evaluates the specified AWK expression using a structured {@link InputSource}
* to populate {@code $0}, {@code $1}, etc.
*
* @param tuples Tuples returned by {@link Awk#compileForEval(String)}
* @param input Optional text input (that will be available as $0, and tokenized as $1, $2, etc.)
* @param fieldSeparator Value of the FS global variable used for parsing the input
* @param expression Expression to evaluate (e.g. {@code $2 "-" $3})
* @param source structured input source providing the current record
* @return the value of the specified expression
* @throws IOException if anything goes wrong with the evaluation
*/
private Object eval(AwkTuples tuples, String input, String fieldSeparator) throws IOException {

AwkSettings evalSettings = new AwkSettings();
evalSettings.setDefaultRS("\n");
evalSettings.setDefaultORS("\n");
evalSettings.setFieldSeparator(fieldSeparator);
evalSettings
.setOutputStream(
new PrintStream(new ByteArrayOutputStream(), false, StandardCharsets.UTF_8.name()));

Awk evalAwk = new Awk(new ExtensionSetup(this.extensionFunctions, this.extensionInstances), evalSettings);
AVM avm = evalAwk.createAvm();
InputStream is = input != null ? toInputStream(input) : toInputStream("");
InputSource source = new StreamInputSource(is, avm, avm.getJrt());
return avm.eval(tuples, source);
public Object eval(String expression, InputSource source) throws IOException {
return eval(compileForEval(expression), source);
}

/**
* Evaluates the specified AWK expression using a structured {@link InputSource}
* to populate {@code $0}, {@code $1}, etc.
* <p>
* This method is named separately from the {@code eval} family to avoid
* overload ambiguity when callers pass {@code null}.
* This is a source-compatible alias for {@link #eval(String, InputSource)}.
* </p>
*
* @param expression Expression to evaluate (e.g. {@code $2 "-" $3})
Expand All @@ -737,62 +726,86 @@ private Object eval(AwkTuples tuples, String input, String fieldSeparator) throw
* @throws IOException if anything goes wrong with the evaluation
*/
public Object evalSource(String expression, InputSource source) throws IOException {
return evalSource(expression, source, null);
return eval(expression, source);
}

/**
* Evaluates the specified AWK expression using a structured {@link InputSource}
* to populate {@code $0}, {@code $1}, etc.
* Evaluates pre-compiled tuples without input.
*
* @param tuples tuples returned by {@link Awk#compileForEval(String)}
* @return the value of the specified expression
* @throws IOException if anything goes wrong with the evaluation
*/
public Object eval(AwkTuples tuples) throws IOException {
return eval(tuples, (String) null);
}

/**
* Evaluates pre-compiled tuples using a text input value exposed as {@code $0}.
* <p>
* This method is named separately from the {@code eval} family to avoid
* overload ambiguity when callers pass {@code null}.
* The created runtime automatically uses the read-only eval fast path when the
* tuple metadata allows it.
* </p>
*
* @param expression Expression to evaluate (e.g. {@code $2 "-" $3})
* @param source structured input source providing the current record
* @param fieldSeparator Value of the FS global variable (may be {@code null})
* @param tuples Tuples returned by {@link Awk#compileForEval(String)}
* @param input Optional text input (that will be available as $0, and tokenized
* as $1, $2, etc.)
* @return the value of the specified expression
* @throws IOException if anything goes wrong with the evaluation
*/
public Object evalSource(String expression, InputSource source, String fieldSeparator) throws IOException {
return evalSource(compileForEval(expression), source, fieldSeparator);
public Object eval(AwkTuples tuples, String input) throws IOException {
AwkTuples compiledTuples = Objects.requireNonNull(tuples, "tuples");
AVM activeEvalAvm = createEvalAvm();
InputSource source = new SingleRecordInputSource(input);
return activeEvalAvm.eval(compiledTuples, source);
}

/**
* Evaluates pre-compiled AWK tuples using a structured {@link InputSource}
* to populate {@code $0}, {@code $1}, etc.
* Evaluates pre-compiled tuples using a structured {@link InputSource} to
* populate {@code $0}, {@code $1}, etc.
* <p>
* This method is named separately from the {@code eval} family to avoid
* overload ambiguity when callers pass {@code null}.
* The created runtime automatically uses the read-only eval fast path when the
* tuple metadata allows it.
* </p>
*
* @param tuples Tuples returned by {@link Awk#compileForEval(String)}
* @param source structured input source providing the current record
* @param fieldSeparator Value of the FS global variable (may be {@code null})
* @return the value of the specified expression
* @throws IOException if anything goes wrong with the evaluation
*/
public Object evalSource(AwkTuples tuples, InputSource source, String fieldSeparator) throws IOException {

AwkSettings evalSettings = new AwkSettings();
evalSettings.setDefaultRS("\n");
evalSettings.setDefaultORS("\n");
evalSettings.setFieldSeparator(fieldSeparator);
evalSettings
.setOutputStream(
new PrintStream(new ByteArrayOutputStream(), false, StandardCharsets.UTF_8.name()));
public Object eval(AwkTuples tuples, InputSource source) throws IOException {
AwkTuples compiledTuples = Objects.requireNonNull(tuples, "tuples");
InputSource resolvedSource = Objects.requireNonNull(source, "source");
AVM activeEvalAvm = createEvalAvm();
return activeEvalAvm.eval(compiledTuples, resolvedSource);
}

Awk evalAwk = new Awk(new ExtensionSetup(this.extensionFunctions, this.extensionInstances), evalSettings);
AVM avm = evalAwk.createAvm();
return avm.eval(tuples, source);
/**
* Evaluates pre-compiled tuples using a structured {@link InputSource} to
* populate {@code $0}, {@code $1}, etc.
* <p>
* This is a source-compatible alias for {@link #eval(AwkTuples, InputSource)}.
* </p>
*
* @param tuples Tuples returned by {@link Awk#compileForEval(String)}
* @param source structured input source providing the current record
* @return the value of the specified expression
* @throws IOException if anything goes wrong with the evaluation
*/
public Object evalSource(AwkTuples tuples, InputSource source) throws IOException {
return eval(tuples, source);
}

protected AwkTuples createTuples() {
return new AwkTuples();
}

protected AVM createAvm() {
return new AVM(this.settings, this.extensionInstances);
return createAvm(this.settings);
}

protected AVM createAvm(AwkSettings settingsParam) {
return new AVM(settingsParam, this.extensionInstances);
}

/**
Expand All @@ -805,6 +818,42 @@ private static InputStream toInputStream(String input) {
return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
}

/**
* Creates the per-call AVM used by {@link #eval(AwkTuples, String)} and
* {@link #eval(AwkTuples, InputSource)} from the cached eval-settings
* snapshot.
*
* @return a fresh AVM configured for expression evaluation
*/
private AVM createEvalAvm() {
return createAvm(getEvalRuntimePrototype().settingsSnapshot);
}

/**
* Returns the cached behavioral snapshot used for eval runtimes.
* <p>
* The snapshot is rebuilt only when the owning {@link AwkSettings} instance
* changes, which keeps repeated eval calls cheap while still honoring
* runtime configuration changes such as FS or locale updates.
* </p>
*
* @return the current eval runtime prototype
*/
private EvalRuntimePrototype getEvalRuntimePrototype() {
long currentVersion = settings.getModificationCount();
EvalRuntimePrototype cachedPrototype = evalRuntimePrototype;
if (cachedPrototype != null && evalRuntimePrototypeVersion == currentVersion) {
return cachedPrototype;
}
synchronized (this) {
if (evalRuntimePrototype == null || evalRuntimePrototypeVersion != currentVersion) {
evalRuntimePrototype = EvalRuntimePrototype.fromSettings(settings);
evalRuntimePrototypeVersion = currentVersion;
}
return evalRuntimePrototype;
}
}

/**
* Lists metadata for the {@link JawkExtension} implementations discovered on
* the class path.
Expand All @@ -815,4 +864,79 @@ public static Map<String, JawkExtension> listAvailableExtensions() {
return ExtensionRegistry.listExtensions();
}

private static final class EvalRuntimePrototype {

private static final PrintStream DEV_NULL = createDevNull();

private final AwkSettings settingsSnapshot;

private EvalRuntimePrototype(AwkSettings settingsSnapshot) {
this.settingsSnapshot = settingsSnapshot;
}

private static EvalRuntimePrototype fromSettings(AwkSettings source) {
AwkSettings snapshot = new AwkSettings();
snapshot.setVariables(source.getVariables());
snapshot.setUseSortedArrayKeys(source.isUseSortedArrayKeys());
snapshot.setCatchIllegalFormatExceptions(source.isCatchIllegalFormatExceptions());
snapshot.setLocale(source.getLocale());
snapshot.setDefaultRS(source.getDefaultRS());
snapshot.setDefaultORS(source.getDefaultORS());
snapshot.setFieldSeparator(source.getFieldSeparator());
snapshot.setOutputStream(DEV_NULL);
return new EvalRuntimePrototype(snapshot);
}

private static PrintStream createDevNull() {
try {
return new PrintStream(new NullOutputStream(), false, StandardCharsets.UTF_8.name());
} catch (IOException e) {
throw new IllegalStateException("UTF-8 must always be available", e);
}
}
}

private static final class SingleRecordInputSource implements InputSource {

private final String record;

private boolean consumed;

private SingleRecordInputSource(String record) {
this.record = record;
}

@Override
public boolean nextRecord() {
if (consumed || record == null) {
return false;
}
consumed = true;
return true;
}

@Override
public String getRecordText() {
return consumed ? record : null;
}

@Override
public List<String> getFields() {
return null;
}

@Override
public boolean isFromFilenameList() {
return false;
}
}

private static final class NullOutputStream extends OutputStream {

@Override
public void write(int b) {
// Discard eval output.
}
}

}
7 changes: 6 additions & 1 deletion src/main/java/org/metricshub/jawk/SandboxedAwk.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ protected AwkTuples createTuples() {

@Override
protected AVM createAvm() {
return new SandboxedAVM(getSettings(), getExtensionInstances());
return createAvm(getSettings());
}

@Override
protected AVM createAvm(AwkSettings settingsParam) {
return new SandboxedAVM(settingsParam, getExtensionInstances());
}
}
Loading
Loading