66 * libp2p-gossipsub without protocol-level cooperation. The exact
77 * encoding (after Codex review feedback on PR #501):
88 *
9- * msgId(topic, payload, fromIdentityId, seqno ) :=
9+ * msgId(topic, payload, publisherId, sequenceNumber ) :=
1010 * sha256(
1111 * u32_be(len(topic)) ‖ topic
1212 * ‖ u32_be(len(payload)) ‖ payload
13- * ‖ u32_be(len(fromIdentityId )) ‖ fromIdentityId
14- * ‖ u64_be(seqno )
13+ * ‖ u32_be(len(publisherId )) ‖ publisherId
14+ * ‖ u64_be(sequenceNumber )
1515 * )
1616 *
1717 * Why length framing
2323 * the encoding injective — distinct tuples always hash to distinct
2424 * inputs.
2525 *
26- * Why include seqno
27- * -----------------
26+ * Why include sequenceNumber
27+ * --------------------------
2828 * The whole point of gossipsub's msgId is dedup-with-retries: a peer
2929 * publishing the same payload twice (e.g. resending after a network
3030 * blip) must produce TWO distinct msgIds, otherwise the second
3333 * sequence number in the hash preserves that semantic without
3434 * forfeiting cross-backend determinism: every backend with a notion
3535 * of per-publisher monotonic ordering (gossipsub seqno, iroh-gossip
36- * sequence, etc.) maps the same (topic, payload, from , seq) tuple
37- * to the same hash.
36+ * sequence, etc.) maps the same (topic, payload, publisher , seq)
37+ * tuple to the same hash.
3838 *
3939 * Why throw on unsigned
4040 * ---------------------
41- * Codex review feedback on PR #501 round 4: with `type: 'unsigned'`
42- * the message has no publisher identity (no `from`) and no seqno.
43- * The earlier draft fell back to `fromBytes = []` and `seqno = 0n`,
44- * which means two different publishers sending the same payload on
45- * the same topic produce the SAME msgId — one publish gets falsely
46- * deduplicated. (The upstream default for unsigned —
47- * `sha256(data)` — has the same property, but a public function
48- * shouldn't replicate that pitfall in a freshly-shipped contract.)
41+ * Codex review feedback on PR #501 round 4: with no `from` and no
42+ * seqno, two different publishers sending the same payload would
43+ * produce the SAME msgId — false dedup. The upstream default for
44+ * unsigned (`sha256(data)`) has the same property, but a freshly-
45+ * shipped public function shouldn't replicate that pitfall. V10
46+ * configures gossipsub StrictSign by default so unsigned messages
47+ * don't appear in the wild today; throwing makes the unsupported
48+ * case loud and catches accidental misuse via the public re-export.
4949 *
50- * V10 configures gossipsub with the StrictSign default, so unsigned
51- * messages don't appear in the wild today. Throwing here makes the
52- * "unsigned not supported in this msgId scheme" stance explicit:
50+ * Why split into raw + adapter
51+ * ----------------------------
52+ * Codex review feedback on PR #501 round 5: the round-4 signature
53+ * accepted a libp2p `Message` directly, which made the "cross-
54+ * backend dedup" framing aspirational rather than concrete. A
55+ * future iroh-gossip backend would have its own message type with
56+ * its own ways of representing publisher and sequence — at which
57+ * point either we'd duplicate the framing logic (drift risk) or
58+ * have to refactor every consumer to convert through the libp2p
59+ * shape.
5360 *
54- * - any code path that tries to publish an unsigned message
55- * fails loudly (easy to debug),
56- * - external consumers of the exported function can't accidentally
57- * hit the false-dedup case,
58- * - a future PR that wants to support unsigned has to deliberately
59- * extend the scheme with a per-message identity (nonce / hash
60- * prefix / etc.), pinned by tests.
61+ * Round-5 split:
62+ * - `dkgGossipMsgIdRaw({ topic, data, publisherIdBytes,
63+ * sequenceNumber })` — backend-agnostic primitive over canonical
64+ * value types. Every backend's adapter normalises into this and
65+ * the framing/hash lives here once.
66+ * - `dkgGossipMsgId(msg: libp2p.Message)` — thin libp2p adapter:
67+ * unwraps `from.toMultihash().bytes` and `sequenceNumber`,
68+ * enforces signed-only (because libp2p's unsigned variant has
69+ * no publisher identity to feed in).
70+ * A future `dkgGossipMsgIdIroh(msg: iroh.GossipMessage)` adapter
71+ * goes alongside; the framing lives once in `dkgGossipMsgIdRaw`.
6172 *
62- * v1 ships only `LibP2PGossipBackend`, so this function only changes
63- * which sha256 inputs gossipsub feeds itself; nothing observable on
64- * the wire. Locking the constant in NOW (rather than after a second
65- * backend ships) avoids a future synchronised mid-flight upgrade.
73+ * Wiring
74+ * ------
75+ * v1 ships only the function and tests. The actual `msgIdFn` wiring
76+ * in `node.ts` is intentionally deferred — see RFC 07 §5.4 + Phase 5
77+ * for the rolling-upgrade rationale and the coordinated-cutover plan.
78+ *
79+ * @experimental Public API but intentionally unwired. The encoding
80+ * is pinned by `gossip-msg-id.test.ts`; downstream consumers may
81+ * import for inspection / future-backend adapters but should not
82+ * rely on the in-process libp2p mesh routing through it yet.
6683 */
6784import { sha256 } from '@noble/hashes/sha2.js' ;
6885import type { Message } from '@libp2p/gossipsub' ;
@@ -92,15 +109,38 @@ export class DkgGossipUnsignedMessageError extends Error {
92109 }
93110}
94111
95- export function dkgGossipMsgId ( msg : Message ) : Uint8Array {
96- if ( msg . type !== 'signed' ) {
97- throw new DkgGossipUnsignedMessageError ( ) ;
98- }
112+ /**
113+ * Inputs for the backend-agnostic msgId primitive.
114+ *
115+ * - `topic` — gossip topic string (UTF-8 encoded inside the function).
116+ * - `data` — raw payload bytes.
117+ * - `publisherIdBytes` — canonical bytes identifying the publisher.
118+ * For libp2p, this is `peerId.toMultihash().bytes`. For other
119+ * backends, the equivalent canonical identity bytes.
120+ * - `sequenceNumber` — per-publisher monotonic sequence (gossipsub
121+ * seqno, iroh sequence, etc.).
122+ *
123+ * @experimental
124+ */
125+ export interface DkgGossipMsgIdInput {
126+ topic : string ;
127+ data : Uint8Array ;
128+ publisherIdBytes : Uint8Array ;
129+ sequenceNumber : bigint ;
130+ }
99131
100- const topicBytes = new TextEncoder ( ) . encode ( msg . topic ) ;
101- const data = msg . data ;
102- const fromBytes = msg . from . toMultihash ( ) . bytes ;
103- const seqno = msg . sequenceNumber ;
132+ /**
133+ * Backend-agnostic msgId primitive. Every gossip backend adapter
134+ * normalises into `DkgGossipMsgIdInput` and the framing + hash
135+ * lives here once.
136+ *
137+ * @experimental
138+ */
139+ export function dkgGossipMsgIdRaw ( input : DkgGossipMsgIdInput ) : Uint8Array {
140+ const topicBytes = new TextEncoder ( ) . encode ( input . topic ) ;
141+ const data = input . data ;
142+ const fromBytes = input . publisherIdBytes ;
143+ const seqno = input . sequenceNumber ;
104144
105145 const total =
106146 4 + topicBytes . length +
@@ -123,3 +163,24 @@ export function dkgGossipMsgId(msg: Message): Uint8Array {
123163
124164 return sha256 ( buf ) ;
125165}
166+
167+ /**
168+ * libp2p-gossipsub adapter. Suitable as the `msgIdFn` parameter of
169+ * `gossipsub({ ... })` when (eventually) wired in `node.ts`.
170+ *
171+ * Throws `DkgGossipUnsignedMessageError` if `msg.type !== 'signed'`.
172+ *
173+ * @experimental Public but intentionally unwired in v1; see file
174+ * doc-comment + RFC 07 §5.4 for the rollout plan.
175+ */
176+ export function dkgGossipMsgId ( msg : Message ) : Uint8Array {
177+ if ( msg . type !== 'signed' ) {
178+ throw new DkgGossipUnsignedMessageError ( ) ;
179+ }
180+ return dkgGossipMsgIdRaw ( {
181+ topic : msg . topic ,
182+ data : msg . data ,
183+ publisherIdBytes : msg . from . toMultihash ( ) . bytes ,
184+ sequenceNumber : msg . sequenceNumber ,
185+ } ) ;
186+ }
0 commit comments