-
Notifications
You must be signed in to change notification settings - Fork 38
test(js) refactor decoding tests #738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Turtelll
wants to merge
25
commits into
maplibre:main
Choose a base branch
from
Turtelll:refactor-tests
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+2,447
−1,195
Open
Changes from 9 commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
a4ed712
refactor integerStreamDecoder tests to use encoded data to docode and…
c02f57e
Merge branch 'main' of https://github.com/maplibre/maplibre-tile-spec…
df7cdea
refactor propertyDecoder.spec.ts and move hleper functions to decodin…
a7194d1
refactor stringDecoder.spec.ts
6571ed6
removed unused util
15421b6
add tests for decodeSharedDictionary
34e9b3c
run formating
1bd06ce
run formating
652da10
Merge branch 'main' of https://github.com/maplibre/maplibre-tile-spec…
985b487
add tests for double property Columns
7f143bd
add tests for unsinged int property Columns and column skipping
20098b4
add tests for unsinged int property Columns and column skipping
96ec2d2
add tests for string propertyColumn and skipping complex columns in p…
496d468
move stream/column creators to util file
11873e2
fix string propertyColumn test
3760065
add tests for decodeIntStream to integerStreamDecoder.spec.ts
ecea015
add tests for decodeFloat64Buffer to integerStreamDecoder.spec.ts
c68a637
add tests for decodeNullableIntStream to integerStreamDecoder.spec.ts
cdee902
add tests for decodeIntStream with morton and a delta encoder
315fb35
move encodings to new folder
3155e85
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 8e50b85
clean up decodingTestUtils.ts
c3613d5
Merge branch 'refactor-tests' of https://github.com/Turtelll/maplibre…
134cd3f
clean up encodingUtils.ts
1d97fa1
add stringEncoder.ts
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,310 @@ | ||
| import { PhysicalStreamType } from "../metadata/tile/physicalStreamType"; | ||
| import { LogicalStreamType } from "../metadata/tile/logicalStreamType"; | ||
| import { LogicalLevelTechnique } from "../metadata/tile/logicalLevelTechnique"; | ||
| import { PhysicalLevelTechnique } from "../metadata/tile/physicalLevelTechnique"; | ||
| import { DictionaryType } from "../metadata/tile/dictionaryType"; | ||
| import { LengthType } from "../metadata/tile/lengthType"; | ||
| import { OffsetType } from "../metadata/tile/offsetType"; | ||
| import { type RleEncodedStreamMetadata, type StreamMetadata } from "../metadata/tile/streamMetadataDecoder"; | ||
| import IntWrapper from "./intWrapper"; | ||
| import { type Column, type Field, ComplexType, ScalarType } from "../metadata/tileset/tilesetMetadata"; | ||
|
|
||
| export function createStreamMetadata( | ||
| logicalTechnique1: LogicalLevelTechnique, | ||
| logicalTechnique2: LogicalLevelTechnique = LogicalLevelTechnique.NONE, | ||
| numValues: number = 3, | ||
| ): StreamMetadata { | ||
| return { | ||
| physicalStreamType: PhysicalStreamType.DATA, | ||
| logicalStreamType: new LogicalStreamType(DictionaryType.NONE), | ||
| logicalLevelTechnique1: logicalTechnique1, | ||
| logicalLevelTechnique2: logicalTechnique2, | ||
| physicalLevelTechnique: PhysicalLevelTechnique.VARINT, | ||
| numValues, | ||
| byteLength: 10, | ||
| decompressedCount: numValues, | ||
| }; | ||
| } | ||
|
|
||
| export function createRleMetadata( | ||
| logicalTechnique1: LogicalLevelTechnique, | ||
| logicalTechnique2: LogicalLevelTechnique, | ||
| runs: number, | ||
| numRleValues: number, | ||
| ): RleEncodedStreamMetadata { | ||
| return { | ||
| physicalStreamType: PhysicalStreamType.DATA, | ||
| logicalStreamType: new LogicalStreamType(DictionaryType.NONE), | ||
| logicalLevelTechnique1: logicalTechnique1, | ||
| logicalLevelTechnique2: logicalTechnique2, | ||
| physicalLevelTechnique: PhysicalLevelTechnique.VARINT, | ||
| numValues: runs * 2, | ||
| byteLength: 10, | ||
| decompressedCount: numRleValues, | ||
| runs, | ||
| numRleValues, | ||
| }; | ||
| } | ||
|
|
||
| export function createStructFieldStreams( | ||
| offsetIndices: number[], | ||
| presentValues: boolean[], | ||
| isPresent: boolean = true, | ||
| ): Uint8Array { | ||
| if (!isPresent) { | ||
| // Field not present in tile: encode numStreams = 0 | ||
| const buffer = new Uint8Array(5); | ||
| const offset = new IntWrapper(0); | ||
| encodeSingleVarintInt32(0, buffer, offset); | ||
| return buffer.slice(0, offset.get()); | ||
| } | ||
|
|
||
| // Encode numStreams = 2 (PRESENT + OFFSET streams) | ||
| const numStreamsBuffer = new Uint8Array(5); | ||
| const numStreamsOffset = new IntWrapper(0); | ||
| encodeSingleVarintInt32(2, numStreamsBuffer, numStreamsOffset); | ||
| const numStreamsEncoded = numStreamsBuffer.slice(0, numStreamsOffset.get()); | ||
|
|
||
| // Encode PRESENT stream (Boolean RLE) | ||
| const presentMetadata = { | ||
| physicalStreamType: PhysicalStreamType.PRESENT, | ||
| logicalStreamType: new LogicalStreamType(DictionaryType.NONE), | ||
| logicalLevelTechnique1: LogicalLevelTechnique.NONE, | ||
| logicalLevelTechnique2: LogicalLevelTechnique.NONE, | ||
| physicalLevelTechnique: PhysicalLevelTechnique.VARINT, | ||
| numValues: presentValues.length, | ||
| byteLength: 0, | ||
| decompressedCount: presentValues.length, | ||
| }; | ||
| const encodedPresent = buildEncodedStream(presentMetadata, encodeBooleanRle(presentValues)); | ||
|
|
||
| // Encode OFFSET stream (dictionary indices) | ||
| const offsetMetadata = { | ||
| physicalStreamType: PhysicalStreamType.OFFSET, | ||
| logicalStreamType: new LogicalStreamType(undefined, OffsetType.STRING), | ||
| logicalLevelTechnique1: LogicalLevelTechnique.NONE, | ||
| logicalLevelTechnique2: LogicalLevelTechnique.NONE, | ||
| physicalLevelTechnique: PhysicalLevelTechnique.VARINT, | ||
| numValues: offsetIndices.length, | ||
| byteLength: 0, | ||
| decompressedCount: offsetIndices.length, | ||
| }; | ||
| const encodedOffsets = buildEncodedStream(offsetMetadata, encodeVarintInt32Array(new Int32Array(offsetIndices))); | ||
|
|
||
| return concatenateBuffers(numStreamsEncoded, encodedPresent, encodedOffsets); | ||
| } | ||
|
|
||
| export function createColumnMetadataForStruct( | ||
| columnName: string, | ||
| childFields: Array<{ name: string; type?: number }>, | ||
| ): Column { | ||
| const children: Field[] = childFields.map((fieldConfig) => ({ | ||
| name: fieldConfig.name, | ||
| nullable: true, | ||
| scalarField: { | ||
| physicalType: fieldConfig.type ?? ScalarType.STRING, | ||
| type: "physicalType" as const, | ||
| }, | ||
| type: "scalarField" as const, | ||
| })); | ||
|
|
||
| return { | ||
| name: columnName, | ||
| nullable: false, | ||
| complexType: { | ||
| physicalType: ComplexType.STRUCT, | ||
| children, | ||
| type: "physicalType" as const, | ||
| }, | ||
| type: "complexType" as const, | ||
| }; | ||
| } | ||
|
|
||
| export function buildEncodedStream( | ||
| streamMetadata: StreamMetadata | RleEncodedStreamMetadata, | ||
| encodedData: Uint8Array, | ||
| ): Uint8Array { | ||
| // Update byteLength to match actual encoded data length | ||
| const updatedMetadata = { | ||
| ...streamMetadata, | ||
| byteLength: encodedData.length, | ||
| }; | ||
|
|
||
| const metadataBuffer = encodeStreamMetadata(updatedMetadata); | ||
| const result = new Uint8Array(metadataBuffer.length + encodedData.length); | ||
| result.set(metadataBuffer, 0); | ||
| result.set(encodedData, metadataBuffer.length); | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| export function encodeStreamMetadata(metadata: StreamMetadata | RleEncodedStreamMetadata): Uint8Array { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be part of the encoding?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is really specific to the testdata streams so i would keep it here |
||
| const buffer = new Uint8Array(100); // Oversized, will trim | ||
| let writeOffset = 0; | ||
|
|
||
| // Encode stream type byte (first byte) | ||
| // physicalStreamType in upper 4 bits, type-specific value in lower 4 bits | ||
| const physicalTypeIndex = Object.values(PhysicalStreamType).indexOf(metadata.physicalStreamType); | ||
| let lowerNibble = 0; | ||
|
|
||
| switch (metadata.physicalStreamType) { | ||
| case PhysicalStreamType.DATA: | ||
| lowerNibble = | ||
| metadata.logicalStreamType.dictionaryType !== undefined | ||
| ? Object.values(DictionaryType).indexOf(metadata.logicalStreamType.dictionaryType) | ||
| : 0; | ||
| break; | ||
| case PhysicalStreamType.OFFSET: | ||
| lowerNibble = | ||
| metadata.logicalStreamType.offsetType !== undefined | ||
| ? Object.values(OffsetType).indexOf(metadata.logicalStreamType.offsetType) | ||
| : 0; | ||
| break; | ||
| case PhysicalStreamType.LENGTH: | ||
| lowerNibble = | ||
| metadata.logicalStreamType.lengthType !== undefined | ||
| ? Object.values(LengthType).indexOf(metadata.logicalStreamType.lengthType) | ||
| : 0; | ||
| break; | ||
| } | ||
|
|
||
| const streamTypeByte = (physicalTypeIndex << 4) | lowerNibble; | ||
| buffer[writeOffset++] = streamTypeByte; | ||
|
|
||
| // Encode encodings header byte (second byte) | ||
| // llt1 in bits 5-7, llt2 in bits 2-4, plt in bits 0-1 | ||
| const llt1Index = Object.values(LogicalLevelTechnique).indexOf(metadata.logicalLevelTechnique1); | ||
| const llt2Index = Object.values(LogicalLevelTechnique).indexOf(metadata.logicalLevelTechnique2); | ||
| const pltIndex = Object.values(PhysicalLevelTechnique).indexOf(metadata.physicalLevelTechnique); | ||
| const encodingsHeader = (llt1Index << 5) | (llt2Index << 2) | pltIndex; | ||
| buffer[writeOffset++] = encodingsHeader; | ||
|
|
||
| // Encode numValues and byteLength as varints | ||
| const offset = new IntWrapper(writeOffset); | ||
| encodeSingleVarintInt32(metadata.numValues, buffer, offset); | ||
| encodeSingleVarintInt32(metadata.byteLength, buffer, offset); | ||
|
|
||
| // If RLE, encode runs and numRleValues | ||
| if ("runs" in metadata && "numRleValues" in metadata) { | ||
| encodeSingleVarintInt32(metadata.runs, buffer, offset); | ||
| encodeSingleVarintInt32(metadata.numRleValues, buffer, offset); | ||
| } | ||
|
|
||
| return buffer.slice(0, offset.get()); | ||
| } | ||
|
|
||
| export function encodeSingleVarintInt32(value: number, dst: Uint8Array, offset: IntWrapper): void { | ||
| let v = value; | ||
| while (v > 0x7f) { | ||
| dst[offset.get()] = (v & 0x7f) | 0x80; | ||
| offset.increment(); | ||
| v >>>= 7; | ||
| } | ||
| dst[offset.get()] = v & 0x7f; | ||
| offset.increment(); | ||
| } | ||
|
|
||
| export function encodeVarintInt32Array(values: Int32Array): Uint8Array { | ||
| const buffer = new Uint8Array(values.length * 5); | ||
| const offset = new IntWrapper(0); | ||
|
|
||
| for (const value of values) { | ||
| encodeSingleVarintInt32(value, buffer, offset); | ||
| } | ||
| return buffer.slice(0, offset.get()); | ||
| } | ||
|
|
||
| export function encodeZigZag32(value: number): number { | ||
| return (value << 1) ^ (value >> 31); | ||
| } | ||
|
|
||
| export function encodeSingleVarintInt64(value: bigint, dst: Uint8Array, offset: IntWrapper): void { | ||
| let v = value; | ||
| while (v > 0x7fn) { | ||
| dst[offset.get()] = Number(v & 0x7fn) | 0x80; | ||
| offset.increment(); | ||
| v >>= 7n; | ||
| } | ||
| dst[offset.get()] = Number(v & 0x7fn); | ||
| offset.increment(); | ||
| } | ||
|
|
||
| export function encodeVarintInt64Array(values: BigInt64Array): Uint8Array { | ||
| const buffer = new Uint8Array(values.length * 10); | ||
| const offset = new IntWrapper(0); | ||
|
|
||
| for (const value of values) { | ||
| encodeSingleVarintInt64(value, buffer, offset); | ||
| } | ||
| return buffer.slice(0, offset.get()); | ||
| } | ||
|
|
||
| export function encodeZigZag64(value: bigint): bigint { | ||
| return (value << 1n) ^ (value >> 63n); | ||
| } | ||
|
|
||
| export function encodeFloatsLE(values: Float32Array): Uint8Array { | ||
| const buffer = new Uint8Array(values.length * 4); | ||
| const view = new DataView(buffer.buffer); | ||
|
|
||
| for (let i = 0; i < values.length; i++) { | ||
| view.setFloat32(i * 4, values[i], true); | ||
| } | ||
|
|
||
| return buffer; | ||
| } | ||
|
|
||
| export function encodeBooleanRle(values: boolean[]): Uint8Array { | ||
| // Pack booleans into bytes (8 booleans per byte) | ||
| const numBytes = Math.ceil(values.length / 8); | ||
| const packed = new Uint8Array(numBytes); | ||
|
|
||
| for (let i = 0; i < values.length; i++) { | ||
| if (values[i]) { | ||
| const byteIndex = Math.floor(i / 8); | ||
| const bitIndex = i % 8; | ||
| packed[byteIndex] |= 1 << bitIndex; | ||
| } | ||
| } | ||
|
|
||
| const result = new Uint8Array(1 + numBytes); | ||
| result[0] = 256 - numBytes; | ||
| result.set(packed, 1); | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| export function concatenateBuffers(...buffers: Uint8Array[]): Uint8Array { | ||
| const totalLength = buffers.reduce((sum, buf) => sum + buf.length, 0); | ||
| const result = new Uint8Array(totalLength); | ||
| let offset = 0; | ||
|
|
||
| for (const buffer of buffers) { | ||
| result.set(buffer, offset); | ||
| offset += buffer.length; | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| export function encodeStrings(strings: string[]): Uint8Array { | ||
| const encoder = new TextEncoder(); | ||
| const encoded = strings.map((s) => encoder.encode(s)); | ||
| const totalLength = encoded.reduce((sum, arr) => sum + arr.length, 0); | ||
| const result = new Uint8Array(totalLength); | ||
| let offset = 0; | ||
| for (const arr of encoded) { | ||
| result.set(arr, offset); | ||
| offset += arr.length; | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| export function createStringLengths(strings: string[]): Int32Array { | ||
| const lengths = new Int32Array(strings.length); | ||
| const encoder = new TextEncoder(); | ||
| for (let i = 0; i < strings.length; i++) { | ||
| lengths[i] = encoder.encode(strings[i]).length; | ||
| } | ||
| return lengths; | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I might be reading this wrong, but I see some encoding methods that are not "hard coded".
If this is the case, and there might be in the far future a javascript encoder, we might as well place these methods in files that are symmetrical to the decoding files (in terms of naming).
Later on, we could use them in the test like you did.
Let me know if this makes sense.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems there's a lot of logic in this file, I'm not sure if it is related to the previous comment or not, but it's worth taking it into account.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean i can place them in a encoder file structure, but the encodings I included for the tests are really only the most basic encodings and half of the logic in this file is needed for creating stream(metadata). If I were to set those up manually we would have at least 10000 of extra code. The only thing that is "hard coded" are the FSST Symbol Table Streams.
TLDR: half of decodingTestUtils.ts is only for test setup and encodings like ZigZag can be moved to a symmetrical encoder directory. But I am not sure if this makes sense right now
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's start with something, move as much code as possible to encoding folder/files.
If you can take a bit more time to create basic encoding methods that can facilitate for "full cycle encode-decode" tests that would be the best approach, but I don't know how complicated that is, I would assume that for the basic stuff this should be trivial.
This will both make the tests easier to read and maintain and help us in the future if we decide to create a js encoder.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok i will move them to encoding folders and add tests where coverage is still lacking.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"full cycle encode-decode" tests are already happening