diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java new file mode 100644 index 000000000..97d1cbd9f --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java @@ -0,0 +1,33 @@ +package io.cdap.wrangler.api.parser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class ByteSize extends Token { + private static final Pattern BYTE_PATTERN = Pattern.compile("(?i)(\\d+(\\.\\d+)?)(B|KB|MB|GB|TB)"); + private final long bytes; + + public ByteSize(String value) { + super(value); + Matcher matcher = BYTE_PATTERN.matcher(value.trim()); + if (!matcher.matches()) { + throw new IllegalArgumentException("Invalid byte size format: " + value); + } + + double number = Double.parseDouble(matcher.group(1)); + String unit = matcher.group(3).toUpperCase(); + + switch (unit) { + case "B": this.bytes = (long) number; break; + case "KB": this.bytes = (long) (number * 1024); break; + case "MB": this.bytes = (long) (number * 1024 * 1024); break; + case "GB": this.bytes = (long) (number * 1024 * 1024 * 1024); break; + case "TB": this.bytes = (long) (number * 1024L * 1024 * 1024 * 1024); break; + default: throw new IllegalArgumentException("Unknown byte unit: " + unit); + } + } + + public long getBytes() { + return bytes; + } +} diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java new file mode 100644 index 000000000..9c81912c5 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java @@ -0,0 +1,32 @@ +package io.cdap.wrangler.api.parser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class TimeDuration extends Token { + private static final Pattern TIME_PATTERN = Pattern.compile("(?i)(\\d+(\\.\\d+)?)(ms|s|m|h)"); + private final long milliseconds; + + public TimeDuration(String value) { + super(value); + Matcher matcher = TIME_PATTERN.matcher(value.trim()); + if (!matcher.matches()) { + throw new IllegalArgumentException("Invalid time duration format: " + value); + } + + double number = Double.parseDouble(matcher.group(1)); + String unit = matcher.group(3).toLowerCase(); + + switch (unit) { + case "ms": this.milliseconds = (long) number; break; + case "s": this.milliseconds = (long) (number * 1000); break; + case "m": this.milliseconds = (long) (number * 60 * 1000); break; + case "h": this.milliseconds = (long) (number * 60 * 60 * 1000); break; + default: throw new IllegalArgumentException("Unknown time unit: " + unit); + } + } + + public long getMilliseconds() { + return milliseconds; + } +} diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java index 8c93b0e6a..1ba1f003f 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java @@ -152,5 +152,12 @@ public enum TokenType implements Serializable { * Represents the enumerated type for the object of type {@code String} with restrictions * on characters that can be present in a string. */ - IDENTIFIER + IDENTIFIER, + +BYTE_SIZE, + + + +TIME_DURATION + } diff --git a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 index 7c517ed6a..ffbfca7c2 100644 --- a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 +++ b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 @@ -140,7 +140,7 @@ numberRange ; value - : String | Number | Column | Bool + : String | Number | Column | Bool | BYTE_SIZE | TIME_DURATION ; ecommand @@ -257,6 +257,16 @@ Number : Int ('.' Digit*)? ; + +BYTE_SIZE + : [0-9]+ ( 'B' | 'KB' | 'MB' | 'GB' | 'TB' ) + ; + +TIME_DURATION + : [0-9]+ ( 'ms' | 's' | 'm' | 'h' | 'd' ) + ; + + Identifier : [a-zA-Z_\-] [a-zA-Z_0-9\-]* ;