-
Notifications
You must be signed in to change notification settings - Fork 125
Add starting point for ByteArray-backed bytecode generator #1134
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
78568e5
b921269
c56816b
b5b0c29
d60554c
115c3f0
7de8637
6c6e377
b1ae1f4
bdda6f3
bcdf133
5e1983a
b95ea51
2cac7a2
39294f3
a6960eb
6f571dc
50045b7
ad746ca
785e1dd
0ad98d6
558a2b6
ec2a964
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,14 +2,99 @@ | |
| // SPDX-License-Identifier: Apache-2.0 | ||
| package com.amazon.ion.bytecode.bin11 | ||
|
|
||
| import com.amazon.ion.Decimal | ||
| import com.amazon.ion.IonException | ||
| import com.amazon.ion.Timestamp | ||
| import com.amazon.ion.bytecode.BytecodeEmitter | ||
| import com.amazon.ion.bytecode.BytecodeGenerator | ||
| import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTable | ||
| import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsBigInteger | ||
| import com.amazon.ion.bytecode.bin11.bytearray.ShortTimestampDecoder | ||
| import com.amazon.ion.bytecode.util.AppendableConstantPoolView | ||
| import com.amazon.ion.bytecode.util.ByteSlice | ||
| import com.amazon.ion.bytecode.util.BytecodeBuffer | ||
| import com.amazon.ion.bytecode.util.unsignedToInt | ||
| import com.amazon.ion.impl.bin.utf8.Utf8StringDecoder | ||
| import com.amazon.ion.impl.bin.utf8.Utf8StringDecoderPool | ||
| import edu.umd.cs.findbugs.annotations.SuppressFBWarnings | ||
| import java.math.BigInteger | ||
| import java.nio.ByteBuffer | ||
|
|
||
| @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "constructor does not make a defensive copy of source as a performance optimization") | ||
| internal class ByteArrayBytecodeGenerator11 | ||
| @SuppressFBWarnings("URF_UNREAD_FIELD", justification = "field will be read once this class is implemented") | ||
| constructor( | ||
| internal class ByteArrayBytecodeGenerator11( | ||
| private val source: ByteArray, | ||
| private var i: Int, | ||
| ) { | ||
| // TODO: This should implement BytecodeGenerator | ||
| private var currentPosition: Int, | ||
| ) : BytecodeGenerator { | ||
| private val utf8Decoder: Utf8StringDecoder = Utf8StringDecoderPool.getInstance().orCreate | ||
|
|
||
| override fun refill( | ||
| destination: BytecodeBuffer, | ||
| constantPool: AppendableConstantPoolView, | ||
| macroSrc: IntArray, | ||
| macroIndices: IntArray, | ||
| symTab: Array<String?> | ||
| ) { | ||
| var opcode = 0 | ||
|
austnwil marked this conversation as resolved.
Outdated
|
||
| while (currentPosition < source.size && !isSystemValue(opcode)) { | ||
| opcode = source[currentPosition++].unsignedToInt() | ||
| val handler = OpcodeHandlerTable.handler(opcode) | ||
| currentPosition += handler.convertOpcodeToBytecode( | ||
| opcode, | ||
| source, | ||
| currentPosition, | ||
| destination, | ||
| constantPool, | ||
| macroSrc, | ||
| macroIndices, | ||
| symTab | ||
| ) | ||
| } | ||
|
|
||
| if (currentPosition < source.size) { | ||
| BytecodeEmitter.emitRefill(destination) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I attempted to work on writing tests for a different |
||
| } else { | ||
| BytecodeEmitter.emitEndOfInput(destination) | ||
| } | ||
| } | ||
|
|
||
| override fun readBigIntegerReference(position: Int, length: Int): BigInteger { | ||
| return readFixedIntAsBigInteger(source, position, length) | ||
| } | ||
|
|
||
| override fun readDecimalReference(position: Int, length: Int): Decimal { | ||
| TODO("Not yet implemented") | ||
| } | ||
|
|
||
| override fun readShortTimestampReference(position: Int, opcode: Int): Timestamp { | ||
| return ShortTimestampDecoder.readTimestamp(source, position, opcode) | ||
| } | ||
|
|
||
| override fun readTimestampReference(position: Int, length: Int): Timestamp { | ||
| TODO("Not yet implemented") | ||
| } | ||
|
|
||
| override fun readTextReference(position: Int, length: Int): String { | ||
| val buffer = ByteBuffer.wrap(source, position, length) | ||
| return utf8Decoder.decode(buffer, length) | ||
| } | ||
|
|
||
| override fun readBytesReference(position: Int, length: Int): ByteSlice { | ||
| return ByteSlice(source, position, position + length) | ||
| } | ||
|
|
||
| override fun ionMinorVersion(): Int { | ||
| return 1 | ||
| } | ||
|
austnwil marked this conversation as resolved.
Outdated
|
||
|
|
||
| override fun getGeneratorForMinorVersion(minorVersion: Int): BytecodeGenerator { | ||
| return when (minorVersion) { | ||
| 1 -> ByteArrayBytecodeGenerator11(source, currentPosition) | ||
| // TODO: update with ByteArrayBytecodeGenerator10 once it implements BytecodeGenerator | ||
| else -> throw IonException("Minor version $minorVersion not yet implemented for ByteArray-backed data sources.") | ||
| } | ||
| } | ||
|
|
||
| private fun isSystemValue(opcode: Int): Boolean { | ||
| return opcode in 0xE0..0xE8 | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,221 @@ | ||
| // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
| package com.amazon.ion.bytecode.bin11.bytearray | ||
|
|
||
| import com.amazon.ion.Timestamp | ||
| import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt16 | ||
| import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt32 | ||
| import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt8AsShort | ||
| import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsLong | ||
| import java.math.BigDecimal | ||
|
|
||
| /** | ||
| * Helper class for decoding the various short timestamp encoding variants from a [ByteArray]. | ||
|
austnwil marked this conversation as resolved.
Outdated
|
||
| */ | ||
| internal object ShortTimestampDecoder { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about just |
||
| private const val MASK_4 = 0b1111 | ||
| private const val MASK_5 = 0b11111 | ||
| private const val MASK_6 = 0b111111 | ||
| private const val MASK_7 = 0b1111111 | ||
| private const val MASK_4L = 0b1111L | ||
| private const val MASK_5L = 0b11111L | ||
| private const val MASK_6L = 0b111111L | ||
| private const val MASK_7L = 0b1111111L | ||
| private const val MASK_10L = 0b1111111111L | ||
| private const val MASK_20L = 0b11111111111111111111L | ||
| private const val MASK_30L = 0b111111111111111111111111111111L | ||
| private const val MASK_UTC_OR_UNKNOWN_BIT = 0b1000_00000000_00000000_00000000 | ||
| private const val MASK_UTC_OR_UNKNOWN_BITL = 0b1000_00000000_00000000_00000000L | ||
|
|
||
| private val opcodeToDecoderFunctionTable = arrayOf( | ||
| ShortTimestampDecoder::readTimestampToYear, | ||
| ShortTimestampDecoder::readTimestampToMonth, | ||
| ShortTimestampDecoder::readTimestampToDay, | ||
| ShortTimestampDecoder::readTimestampToMinuteUTCOrUnknown, | ||
| ShortTimestampDecoder::readTimestampToSecondUTCOrUnknown, | ||
| ShortTimestampDecoder::readTimestampToMillisecondUTCOrUnknown, | ||
| ShortTimestampDecoder::readTimestampToMicrosecondUTCOrUnknown, | ||
| ShortTimestampDecoder::readTimestampToNanosecondUTCOrUnknown, | ||
| ShortTimestampDecoder::readTimestampToMinuteWithOffset, | ||
| ShortTimestampDecoder::readTimestampToSecondWithOffset, | ||
| ShortTimestampDecoder::readTimestampToMillisecondWithOffset, | ||
| ShortTimestampDecoder::readTimestampToMicrosecondWithOffset, | ||
| ShortTimestampDecoder::readTimestampToNanosecondWithOffset, | ||
| ) | ||
|
|
||
| fun readTimestampToYear(source: ByteArray, position: Int): Timestamp { | ||
| val year = readFixedInt8AsShort(source, position).toInt() | ||
| return Timestamp.forYear(year + 1970) | ||
| } | ||
|
|
||
| fun readTimestampToMonth(source: ByteArray, position: Int): Timestamp { | ||
| val yearAndMonth = readFixedInt16(source, position).toInt() | ||
| val year = yearAndMonth.and(MASK_7) | ||
| val month = yearAndMonth.shr(7) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI, |
||
|
|
||
| return Timestamp.forMonth(year + 1970, month) | ||
| } | ||
|
|
||
| fun readTimestampToDay(source: ByteArray, position: Int): Timestamp { | ||
| val yearMonthAndDay = readFixedInt16(source, position).toInt() | ||
| val year = yearMonthAndDay.and(MASK_7) | ||
| val month = yearMonthAndDay.shr(7).and(MASK_4) | ||
| val day = yearMonthAndDay.shr(11) | ||
|
|
||
| return Timestamp.forDay(year + 1970, month, day) | ||
| } | ||
|
|
||
| fun readTimestampToMinuteUTCOrUnknown(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedInt32(source, position) | ||
| val year = data.and(MASK_7) | ||
| val month = data.shr(7).and(MASK_4) | ||
| val day = data.shr(11).and(MASK_5) | ||
| val hour = data.shr(16).and(MASK_5) | ||
| val minute = data.shr(21).and(MASK_6) | ||
| val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BIT) != 0 | ||
|
|
||
| return Timestamp.forMinute(year + 1970, month, day, hour, minute, if (isUTC) 0 else null) | ||
| } | ||
|
|
||
| fun readTimestampToSecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 5) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val second = data.shr(28).and(MASK_6L).toInt() | ||
| val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L | ||
|
|
||
| return Timestamp.forSecond(year + 1970, month, day, hour, minute, second, if (isUTC) 0 else null) | ||
| } | ||
|
|
||
| fun readTimestampToMillisecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 6) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val second = data.shr(28).and(MASK_6L) | ||
| val fractionalSecond = data.shr(34).and(MASK_10L) | ||
| val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L | ||
|
|
||
| val secondBigDecimal = BigDecimal.valueOf(second) | ||
| val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 3) | ||
| return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) | ||
| } | ||
|
|
||
| fun readTimestampToMicrosecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 7) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val second = data.shr(28).and(MASK_6L) | ||
| val fractionalSecond = data.shr(34).and(MASK_20L) | ||
| val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L | ||
|
|
||
| val secondBigDecimal = BigDecimal.valueOf(second) | ||
| val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 6) | ||
| return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) | ||
| } | ||
|
|
||
| fun readTimestampToNanosecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 8) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val second = data.shr(28).and(MASK_6L) | ||
| val fractionalSecond = data.ushr(34).and(MASK_30L) | ||
| val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L | ||
|
|
||
| val secondBigDecimal = BigDecimal.valueOf(second) | ||
| val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 9) | ||
| return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) | ||
| } | ||
|
|
||
| fun readTimestampToMinuteWithOffset(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 5) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val offset = data.shr(27).and(MASK_7L).toInt() | ||
|
|
||
| return Timestamp.forMinute(year + 1970, month, day, hour, minute, (offset - 56) * 15) | ||
| } | ||
|
|
||
| fun readTimestampToSecondWithOffset(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 5) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val offset = data.shr(27).and(MASK_7L).toInt() | ||
| val second = data.shr(34).and(MASK_6L).toInt() | ||
|
|
||
| return Timestamp.forSecond(year + 1970, month, day, hour, minute, second, (offset - 56) * 15) | ||
| } | ||
|
|
||
| fun readTimestampToMillisecondWithOffset(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 7) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val offset = data.shr(27).and(MASK_7L).toInt() | ||
| val second = data.shr(34).and(MASK_6L) | ||
| val fractionalSecond = data.shr(40).and(MASK_10L) | ||
|
|
||
| val secondBigDecimal = BigDecimal.valueOf(second) | ||
| val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 3) | ||
| return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) | ||
| } | ||
|
|
||
| fun readTimestampToMicrosecondWithOffset(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 8) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val offset = data.shr(27).and(MASK_7L).toInt() | ||
| val second = data.shr(34).and(MASK_6L) | ||
| val fractionalSecond = data.shr(40).and(MASK_20L) | ||
|
|
||
| val secondBigDecimal = BigDecimal.valueOf(second) | ||
| val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 6) | ||
| return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) | ||
| } | ||
|
|
||
| fun readTimestampToNanosecondWithOffset(source: ByteArray, position: Int): Timestamp { | ||
| val data = readFixedIntAsLong(source, position, 8) | ||
| val highFractionalSecondByte = readFixedInt8AsShort(source, position + 8).toLong().and(MASK_6L) | ||
| val year = data.and(MASK_7L).toInt() | ||
| val month = data.shr(7).and(MASK_4L).toInt() | ||
| val day = data.shr(11).and(MASK_5L).toInt() | ||
| val hour = data.shr(16).and(MASK_5L).toInt() | ||
| val minute = data.shr(21).and(MASK_6L).toInt() | ||
| val offset = data.shr(27).and(MASK_7L).toInt() | ||
| val second = data.shr(34).and(MASK_6L) | ||
| val fractionalSecond = data.ushr(40).or(highFractionalSecondByte.shl(24)) | ||
|
|
||
| val secondBigDecimal = BigDecimal.valueOf(second) | ||
| val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 9) | ||
| return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) | ||
| } | ||
|
|
||
| fun readTimestamp(source: ByteArray, position: Int, precisionAndOffsetMode: Int): Timestamp { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was just the low nibble. Looks like the bytecode reference does call for the entire opcode, so I made that change |
||
| // TODO: calling function references like this might be slower than just using a conditional or other solutions. | ||
| // Might be worth looking into. | ||
| val decoder = opcodeToDecoderFunctionTable[precisionAndOffsetMode] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think it will be. A The extra indirection here also limits the JVM's ability to perform method inlining. For example, if an application only reads data that uses 0x85 opcode, the JVM could inline that particular method into this method, eliminating the overhead of the function call. However, when you're using method references like this, hinders that ability. FYI, I think I wrote a note about that here: https://github.com/popematt/ion-java/blob/6f5274ecc5e5812f08884a6d1aa1c4d7546861fe/src/main/java/com/amazon/ion/v8/TimestampHelper.kt#L34-L38 So why are use using an array of function references for the opcode lookup? The main reasons are that (1) the opcode handler table is already going to be hard for the JVM to predict anyway because there's going to be lots of different opcodes in normal Ion data, (2) the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great explanation! |
||
| return decoder(source, position) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not a blocker.
We don't need to have helper functions for everything. In this case, it's worth asking what is this actually helping with? Some of the other helper functions in the class handle things like spreading the bits for a long value or converting from float to the int bits.
If you compare these two, I think the version with
BytecodeHelperdoesn't add value, and maybe even obscures what's going on.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point