-
Notifications
You must be signed in to change notification settings - Fork 34
Expand file tree
/
Copy pathpb-size.ts
More file actions
158 lines (145 loc) · 5.02 KB
/
pb-size.ts
File metadata and controls
158 lines (145 loc) · 5.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
/**
* Protobuf size calculation utilities for DAG-PB nodes.
*
* Computes exact serialized sizes matching @ipld/dag-pb's encoding
* without allocating byte arrays. Used by DirFlat to avoid O(N)
* re-serialization on every file insert.
*
* Ported from @ipld/dag-pb/src/pb-encode.js (sov, len64, sizeLink, sizeNode)
* and boxo's directory.go estimatedSize logic.
*/
import type { Mtime } from 'ipfs-unixfs'
// --- varint helpers (from @ipld/dag-pb/src/pb-encode.js:166-214) ---
const maxInt32 = 2 ** 32
const len8tab = [
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
]
function len64 (x: number): number {
let n = 0
if (x >= maxInt32) {
x = Math.floor(x / maxInt32)
n = 32
}
if (x >= (1 << 16)) {
x >>>= 16
n += 16
}
if (x >= (1 << 8)) {
x >>>= 8
n += 8
}
return n + len8tab[x]
}
/**
* Protobuf varint byte size, matching @ipld/dag-pb's sov().
*/
export function varintLen (x: number): number {
if (x % 2 === 0) {
x++
}
return Math.floor((len64(x) + 6) / 7)
}
/**
* Compute UTF-8 byte length of a JS string without allocation.
*
* Safe to assume UTF-8 because @ipld/dag-pb always encodes PBLink.Name
* via TextEncoder (UTF-8) and decodes via TextDecoder (UTF-8).
* This produces the same result as textEncoder.encode(str).length
* without the Uint8Array allocation on every put() call.
*/
export function utf8ByteLength (str: string): number {
let len = 0
for (let i = 0; i < str.length; i++) {
const c = str.charCodeAt(i)
if (c < 0x80) {
// ASCII: 1 UTF-8 byte
len++
} else if (c < 0x800) {
// U+0080 - U+07FF: 2 UTF-8 bytes
len += 2
} else if (c >= 0xD800 && c <= 0xDBFF && i + 1 < str.length) {
// Surrogate pair (JS encodes code points above U+FFFF as two
// UTF-16 surrogates). The pair maps to one code point that takes
// 4 UTF-8 bytes. Lone surrogates cannot occur here because names
// always round-trip through @ipld/dag-pb's TextEncoder/TextDecoder
// which only produce valid UTF-8 strings.
i++
len += 4
} else {
// U+0800 - U+FFFF: 3 UTF-8 bytes
len += 3
}
}
return len
}
/**
* Exact bytes a single PBLink adds to the PBNode encoding.
*
* Matches sizeLink() + its wrapper in sizeNode() from pb-encode.js:
* linkLen = Hash(1+sov(cidLen)+cidLen) + Name(1+sov(nameLen)+nameLen) + Tsize(1+sov(tsize))
* total = 1 + sov(linkLen) + linkLen
*/
export function linkSerializedSize (nameByteLen: number, cidByteLength: number, tsize: number): number {
// Hash field: tag(1) + varint(cidLen) + cidBytes
let linkLen = 1 + varintLen(cidByteLength) + cidByteLength
// Name field: tag(1) + varint(nameLen) + nameBytes
linkLen += 1 + varintLen(nameByteLen) + nameByteLen
// Tsize field: tag(1) + varint(tsize)
linkLen += 1 + varintLen(tsize)
// PBNode Links wrapper: tag(1) + varint(linkLen) + linkBytes
return 1 + varintLen(linkLen) + linkLen
}
// Default mode for directories (0o755 = 493)
const DIR_DEFAULT_MODE = 0o755
/**
* Exact bytes the PBNode Data field adds for a UnixFS directory.
*
* Directory-only: the type field is hardcoded to directory (2 bytes) and
* the default mode is 0o755. Do not use for file nodes (different type
* byte, different default mode 0o644).
*
* For the common case (no mode, no mtime) this is always 4 bytes:
* innerSize=2 [0x08,0x01], wrapper 1+1+2=4.
*/
export function dataFieldSerializedSize (mode?: number, mtime?: Mtime): number {
// UnixFS inner: type field [0x08, 0x01] = 2 bytes for directory
let innerSize = 2
// mode (field 7, varint) -- only encoded if set and not the default
if (mode !== undefined && mode !== DIR_DEFAULT_MODE) {
innerSize += 1 + varintLen(mode)
}
// mtime (field 8, nested UnixTime message)
if (mtime != null) {
let mtimeInner = 0
// Seconds (field 1, int64 varint)
const secs = Number(mtime.secs)
if (secs < 0) {
// negative int64 always takes 10 bytes in protobuf two's complement
mtimeInner += 1 + 10
} else {
mtimeInner += 1 + varintLen(secs)
}
// FractionalNanoseconds (field 2, fixed32) -- optional
if (mtime.nsecs != null) {
mtimeInner += 1 + 4
}
innerSize += 1 + varintLen(mtimeInner) + mtimeInner
}
// PBNode Data wrapper: tag(1) + varint(innerSize) + innerBytes
return 1 + varintLen(innerSize) + innerSize
}