-
Notifications
You must be signed in to change notification settings - Fork 34
fix: ipip-499 profile name, perf. and tests #458
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+465
−13
Merged
Changes from 4 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
37229b3
test: fix IPIP-499 profile name and add spec-matching CID tests
lidel 1a82209
perf: O(1) incremental directory size estimation in DirFlat
lidel 0d47153
Merge remote-tracking branch 'origin/main' into fix/ipip-499-profile-…
achingbrain 46b7af5
Merge branch 'main' into fix/ipip-499-profile-name-and-tests
achingbrain 83197b6
chore: update noble dep
achingbrain 6fdb739
Merge branch 'main' into fix/ipip-499-profile-name-and-tests
achingbrain 4a3f46e
chore: reduce duplication
achingbrain File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,158 @@ | ||
| /** | ||
| * Protobuf size calculation utilities for DAG-PB nodes. | ||
| * | ||
| * Computes exact serialized sizes matching @ipld/dag-pb's encoding | ||
| * without allocating byte arrays. Used by DirFlat to avoid O(N) | ||
| * re-serialization on every file insert. | ||
| * | ||
| * Ported from @ipld/dag-pb/src/pb-encode.js (sov, len64, sizeLink, sizeNode) | ||
| * and boxo's directory.go estimatedSize logic. | ||
| */ | ||
|
|
||
| import type { Mtime } from 'ipfs-unixfs' | ||
|
|
||
| // --- varint helpers (from @ipld/dag-pb/src/pb-encode.js:166-214) --- | ||
|
|
||
| const maxInt32 = 2 ** 32 | ||
|
|
||
| const len8tab = [ | ||
| 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | ||
| 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | ||
| 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | ||
| 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | ||
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | ||
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | ||
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | ||
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | ||
| 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | ||
| 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | ||
| 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | ||
| 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | ||
| 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | ||
| 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | ||
| 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | ||
| 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 | ||
| ] | ||
|
|
||
| function len64 (x: number): number { | ||
| let n = 0 | ||
| if (x >= maxInt32) { | ||
| x = Math.floor(x / maxInt32) | ||
| n = 32 | ||
| } | ||
| if (x >= (1 << 16)) { | ||
| x >>>= 16 | ||
| n += 16 | ||
| } | ||
| if (x >= (1 << 8)) { | ||
| x >>>= 8 | ||
| n += 8 | ||
| } | ||
| return n + len8tab[x] | ||
| } | ||
|
|
||
| /** | ||
| * Protobuf varint byte size, matching @ipld/dag-pb's sov(). | ||
| */ | ||
| export function varintLen (x: number): number { | ||
| if (x % 2 === 0) { | ||
| x++ | ||
| } | ||
| return Math.floor((len64(x) + 6) / 7) | ||
| } | ||
|
|
||
| /** | ||
| * Compute UTF-8 byte length of a JS string without allocation. | ||
| * | ||
| * Safe to assume UTF-8 because @ipld/dag-pb always encodes PBLink.Name | ||
| * via TextEncoder (UTF-8) and decodes via TextDecoder (UTF-8). | ||
| * This produces the same result as textEncoder.encode(str).length | ||
| * without the Uint8Array allocation on every put() call. | ||
| */ | ||
| export function utf8ByteLength (str: string): number { | ||
| let len = 0 | ||
| for (let i = 0; i < str.length; i++) { | ||
| const c = str.charCodeAt(i) | ||
| if (c < 0x80) { | ||
| // ASCII: 1 UTF-8 byte | ||
| len++ | ||
| } else if (c < 0x800) { | ||
| // U+0080 - U+07FF: 2 UTF-8 bytes | ||
| len += 2 | ||
| } else if (c >= 0xD800 && c <= 0xDBFF && i + 1 < str.length) { | ||
| // Surrogate pair (JS encodes code points above U+FFFF as two | ||
| // UTF-16 surrogates). The pair maps to one code point that takes | ||
| // 4 UTF-8 bytes. Lone surrogates cannot occur here because names | ||
| // always round-trip through @ipld/dag-pb's TextEncoder/TextDecoder | ||
| // which only produce valid UTF-8 strings. | ||
| i++ | ||
| len += 4 | ||
| } else { | ||
| // U+0800 - U+FFFF: 3 UTF-8 bytes | ||
| len += 3 | ||
| } | ||
| } | ||
| return len | ||
| } | ||
|
|
||
| /** | ||
| * Exact bytes a single PBLink adds to the PBNode encoding. | ||
| * | ||
| * Matches sizeLink() + its wrapper in sizeNode() from pb-encode.js: | ||
| * linkLen = Hash(1+sov(cidLen)+cidLen) + Name(1+sov(nameLen)+nameLen) + Tsize(1+sov(tsize)) | ||
| * total = 1 + sov(linkLen) + linkLen | ||
| */ | ||
| export function linkSerializedSize (nameByteLen: number, cidByteLength: number, tsize: number): number { | ||
| // Hash field: tag(1) + varint(cidLen) + cidBytes | ||
| let linkLen = 1 + varintLen(cidByteLength) + cidByteLength | ||
| // Name field: tag(1) + varint(nameLen) + nameBytes | ||
| linkLen += 1 + varintLen(nameByteLen) + nameByteLen | ||
| // Tsize field: tag(1) + varint(tsize) | ||
| linkLen += 1 + varintLen(tsize) | ||
| // PBNode Links wrapper: tag(1) + varint(linkLen) + linkBytes | ||
| return 1 + varintLen(linkLen) + linkLen | ||
| } | ||
|
|
||
| // Default mode for directories (0o755 = 493) | ||
| const DIR_DEFAULT_MODE = 0o755 | ||
|
|
||
| /** | ||
| * Exact bytes the PBNode Data field adds for a UnixFS directory. | ||
| * | ||
| * Directory-only: the type field is hardcoded to directory (2 bytes) and | ||
| * the default mode is 0o755. Do not use for file nodes (different type | ||
| * byte, different default mode 0o644). | ||
| * | ||
| * For the common case (no mode, no mtime) this is always 4 bytes: | ||
| * innerSize=2 [0x08,0x01], wrapper 1+1+2=4. | ||
| */ | ||
| export function dataFieldSerializedSize (mode?: number, mtime?: Mtime): number { | ||
| // UnixFS inner: type field [0x08, 0x01] = 2 bytes for directory | ||
| let innerSize = 2 | ||
|
|
||
| // mode (field 7, varint) -- only encoded if set and not the default | ||
| if (mode !== undefined && mode !== DIR_DEFAULT_MODE) { | ||
| innerSize += 1 + varintLen(mode) | ||
| } | ||
|
|
||
| // mtime (field 8, nested UnixTime message) | ||
| if (mtime != null) { | ||
| let mtimeInner = 0 | ||
| // Seconds (field 1, int64 varint) | ||
| const secs = Number(mtime.secs) | ||
| if (secs < 0) { | ||
| // negative int64 always takes 10 bytes in protobuf two's complement | ||
| mtimeInner += 1 + 10 | ||
| } else { | ||
| mtimeInner += 1 + varintLen(secs) | ||
| } | ||
| // FractionalNanoseconds (field 2, fixed32) -- optional | ||
| if (mtime.nsecs != null) { | ||
| mtimeInner += 1 + 4 | ||
| } | ||
| innerSize += 1 + varintLen(mtimeInner) + mtimeInner | ||
| } | ||
|
|
||
| // PBNode Data wrapper: tag(1) + varint(innerSize) + innerBytes | ||
| return 1 + varintLen(innerSize) + innerSize | ||
| } |
86 changes: 86 additions & 0 deletions
86
packages/ipfs-unixfs-importer/test/helpers/deterministic.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| /** | ||
| * Deterministic pseudo-random byte generation matching kubo's Go test helpers. | ||
| * | ||
| * Algorithm: | ||
| * 1. key = sha256(utf8(seed)) -- 32 bytes | ||
| * 2. nonce = 12 zero bytes (ChaCha20-IETF) | ||
| * 3. return chacha20(key, nonce, zeros(size)) -- keystream XOR zeros = keystream | ||
| * | ||
| * @see https://github.com/ipfs/kubo/blob/master/test/cli/testutils/random_deterministic.go | ||
| */ | ||
|
|
||
| import { chacha20 } from '@noble/ciphers/chacha' | ||
|
achingbrain marked this conversation as resolved.
Outdated
|
||
| import { sha256 } from 'multiformats/hashes/sha2' | ||
|
|
||
| /** | ||
| * 39-char alphabet matching Go's testutils.AlphabetEasy exactly. | ||
| */ | ||
| const ALPHABET_EASY = 'abcdefghijklmnopqrstuvwxyz01234567890-_' | ||
|
|
||
| const CHACHA20_BLOCK_LEN = 64 | ||
|
|
||
| /** | ||
| * Produce `size` deterministic pseudo-random bytes seeded by `seed`. | ||
| * Matches Go's DeterministicRandomReaderBytes. | ||
| */ | ||
| export async function deterministicRandomBytes (size: number, seed: string): Promise<Uint8Array> { | ||
| const hash = await sha256.digest(new TextEncoder().encode(seed)) | ||
| const key = hash.digest // 32 bytes | ||
| const nonce = new Uint8Array(12) // 12 zero bytes | ||
| return chacha20(key, nonce, new Uint8Array(size)) | ||
| } | ||
|
|
||
| /** | ||
| * Produce `size` deterministic pseudo-random bytes as an async iterable, | ||
| * yielding in 1 MiB chunks to avoid allocating huge buffers at once. | ||
| * Maintains ChaCha20 block counter across chunks to match Go's streaming reader. | ||
| */ | ||
| export async function * deterministicRandomStream (size: number, seed: string): AsyncGenerator<Uint8Array> { | ||
| const CHUNK = 1_048_576 // 1 MiB | ||
| const hash = await sha256.digest(new TextEncoder().encode(seed)) | ||
| const key = hash.digest | ||
| const nonce = new Uint8Array(12) | ||
|
|
||
| let remaining = size | ||
| let counter = 0 | ||
| while (remaining > 0) { | ||
| const n = Math.min(remaining, CHUNK) | ||
| yield chacha20(key, nonce, new Uint8Array(n), undefined, counter) | ||
| counter += Math.ceil(n / CHACHA20_BLOCK_LEN) | ||
| remaining -= n | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Generate deterministic filenames matching kubo's createDeterministicFiles. | ||
| * | ||
| * Files 0..count-2 get `nameLen` chars, the last file gets `lastNameLen` chars. | ||
| * Each byte from the ChaCha20 stream is mapped through AlphabetEasy modulo. | ||
| * | ||
| * Note: the stream is 1 MiB, so total bytes consumed | ||
| * ((count-1)*nameLen + lastNameLen) must not exceed 1,048,576. | ||
| */ | ||
| export async function deterministicFilenames ( | ||
| count: number, | ||
| nameLen: number, | ||
| lastNameLen: number, | ||
| seed: string | ||
| ): Promise<string[]> { | ||
| // Match Go: DeterministicRandomReader("1MiB", seed) - always 1 MiB stream | ||
| const stream = await deterministicRandomBytes(1_048_576, seed) | ||
|
|
||
| const names: string[] = [] | ||
| let offset = 0 | ||
|
|
||
| for (let i = 0; i < count; i++) { | ||
| const currentLen = (i === count - 1) ? lastNameLen : nameLen | ||
| let name = '' | ||
| for (let j = 0; j < currentLen; j++) { | ||
| name += ALPHABET_EASY[stream[offset + j] % ALPHABET_EASY.length] | ||
| } | ||
| names.push(name) | ||
| offset += currentLen | ||
| } | ||
|
|
||
| return names | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.