Skip to content

Commit 135aac7

Browse files
authored
Merge pull request #559 from OriginTrail/feat/publisher-storage-ack-typed-declines
feat(publisher): structured StorageACK declines instead of stream resets
2 parents 2aa7f9b + cf95297 commit 135aac7

8 files changed

Lines changed: 1029 additions & 49 deletions

File tree

packages/core/src/proto/index.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,13 @@ export {
8787

8888
export {
8989
type StorageACKMsg,
90+
type StorageACKDeclineCode,
91+
STORAGE_ACK_DECLINE_CODES,
92+
TRANSIENT_STORAGE_ACK_DECLINE_CODES,
9093
encodeStorageACK,
9194
decodeStorageACK,
95+
isStorageACKDecline,
96+
isTransientStorageACKDeclineCode,
9297
} from './storage-ack.js';
9398

9499
export {

packages/core/src/proto/storage-ack.ts

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,88 @@ const { Type, Field } = protobuf;
2626
* byte-for-byte.
2727
*/
2828

29+
/**
30+
* Declinable reasons a core node can return on `/dkg/10.0.0/storage-ack`
31+
* instead of a signed ACK. These are situations where the core
32+
* legitimately cannot produce an ACK for THIS PEER right now — a
33+
* well-formed publish request that this specific core just can't
34+
* satisfy.
35+
*
36+
* Codes split into two classes that the publisher treats differently:
37+
*
38+
* - **Transient** ({@link TRANSIENT_STORAGE_ACK_DECLINE_CODES}):
39+
* the local condition is expected to clear on its own (e.g. SWM is
40+
* catching up via gossip). The publisher retries the same peer with
41+
* the existing transport backoff before giving up — keeps quorum
42+
* reachable when replication is just slightly behind the publish.
43+
*
44+
* - **Permanent** (every other code):
45+
* the condition will not change on a fast retry (e.g. the operational
46+
* signer was rotated off-chain). The publisher deselects this peer
47+
* for THIS request and moves on to others; the reason is surfaced
48+
* in the final error if quorum still fails.
49+
*
50+
* Malformed-request errors are NOT declines and do NOT belong here.
51+
* The handler keeps `throw`ing on those so the publisher sees them as
52+
* stream errors with the original message and surfaces them to the
53+
* caller immediately rather than fanning out to every core looking
54+
* for a different answer.
55+
*
56+
* String values are part of the wire format: they are populated into
57+
* `StorageACK.declineCode` and surfaced in publisher logs / error
58+
* messages. Keep them stable across releases. Adding a new code is a
59+
* non-breaking change (older publishers see it as the catch-all
60+
* "unknown decline" path); renaming or removing one IS breaking.
61+
*/
62+
export const STORAGE_ACK_DECLINE_CODES = {
63+
/** SWM CONSTRUCT returned no quads for the request. */
64+
NO_DATA_IN_SWM: 'NO_DATA_IN_SWM',
65+
/** SWM has data but its merkle root does not match the publisher's. */
66+
MERKLE_MISMATCH_IN_SWM: 'MERKLE_MISMATCH_IN_SWM',
67+
/** Operational signer was just removed / rotated off-chain. */
68+
SIGNER_NOT_REGISTERED: 'SIGNER_NOT_REGISTERED',
69+
} as const;
70+
71+
export type StorageACKDeclineCode =
72+
(typeof STORAGE_ACK_DECLINE_CODES)[keyof typeof STORAGE_ACK_DECLINE_CODES];
73+
74+
/**
75+
* Decline codes whose root cause is expected to clear on its own
76+
* (typically SWM replication catching up via gossip). The publisher
77+
* retries these against the same peer through the normal transport
78+
* backoff before giving up, so a peer that would have ACKed seconds
79+
* later still contributes to quorum.
80+
*
81+
* Membership of this set is part of the protocol contract between
82+
* publisher and core — promoting / demoting a code is a behavior
83+
* change, not a wire change.
84+
*/
85+
export const TRANSIENT_STORAGE_ACK_DECLINE_CODES: ReadonlySet<string> = new Set<string>([
86+
STORAGE_ACK_DECLINE_CODES.NO_DATA_IN_SWM,
87+
STORAGE_ACK_DECLINE_CODES.MERKLE_MISMATCH_IN_SWM,
88+
]);
89+
90+
/** True iff `code` names a decline the publisher should retry rather than treat as permanent. */
91+
export function isTransientStorageACKDeclineCode(code: string | undefined): boolean {
92+
return typeof code === 'string' && TRANSIENT_STORAGE_ACK_DECLINE_CODES.has(code);
93+
}
94+
95+
/**
96+
* Wire schema. Fields 1–5 are the original ACK shape; fields 6–7 carry
97+
* a decline payload. Two optional strings (rather than a `oneof`) keep
98+
* the change strictly additive: old encoders never set the new fields,
99+
* old decoders silently ignore them, so cross-version traffic is
100+
* unchanged. New decoders inspect `declineCode` first — when it is
101+
* non-empty the message is a decline and the ACK fields are unset.
102+
*/
29103
export const StorageACKSchema = new Type('StorageACK')
30104
.add(new Field('merkleRoot', 1, 'bytes'))
31105
.add(new Field('coreNodeSignatureR', 2, 'bytes'))
32106
.add(new Field('coreNodeSignatureVS', 3, 'bytes'))
33107
.add(new Field('contextGraphId', 4, 'string'))
34-
.add(new Field('nodeIdentityId', 5, 'uint64'));
108+
.add(new Field('nodeIdentityId', 5, 'uint64'))
109+
.add(new Field('declineCode', 6, 'string'))
110+
.add(new Field('declineMessage', 7, 'string'));
35111

36112
type Long = { low: number; high: number; unsigned: boolean };
37113

@@ -41,6 +117,31 @@ export interface StorageACKMsg {
41117
coreNodeSignatureVS: Uint8Array;
42118
contextGraphId: string;
43119
nodeIdentityId: number | Long;
120+
/**
121+
* When non-empty, this message is a decline rather than a signed ACK
122+
* — see {@link STORAGE_ACK_DECLINE_CODES}. Old senders never set this
123+
* field; old receivers ignore it. New receivers MUST check this
124+
* before treating the message as an ACK (signature/merkleRoot are
125+
* unset on declines).
126+
*/
127+
declineCode?: string;
128+
/**
129+
* Human-readable reason that accompanies `declineCode`. Surfaced into
130+
* publisher logs and the final `storage_ack_insufficient` error so
131+
* operators can diagnose hosting / replication issues without having
132+
* to ssh into individual cores.
133+
*/
134+
declineMessage?: string;
135+
}
136+
137+
/**
138+
* Convenience: returns true iff the message is a decline (i.e.
139+
* `declineCode` is set to a non-empty string). Keeps callers from
140+
* having to remember the empty-string-as-undefined idiom that
141+
* protobufjs uses for unset string fields.
142+
*/
143+
export function isStorageACKDecline(msg: StorageACKMsg): boolean {
144+
return typeof msg.declineCode === 'string' && msg.declineCode.length > 0;
44145
}
45146

46147
export function encodeStorageACK(msg: StorageACKMsg): Uint8Array {

packages/core/test/v10-proto.test.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,61 @@ describe('StorageACKMsg', () => {
130130
it('deterministic encoding', () => {
131131
expect(encodeStorageACK(ack)).toEqual(encodeStorageACK(ack));
132132
});
133+
134+
it('decodes an old ACK (no decline fields) without populating declineCode', async () => {
135+
const decoded = decodeStorageACK(encodeStorageACK(ack));
136+
expect(decoded.declineCode == null || decoded.declineCode === '').toBe(true);
137+
expect(decoded.declineMessage == null || decoded.declineMessage === '').toBe(true);
138+
const { isStorageACKDecline } = await import('../src/proto/storage-ack.js');
139+
expect(isStorageACKDecline(decoded)).toBe(false);
140+
});
141+
142+
it('decline-only message: empty ACK fields + populated decline code/message round-trip', async () => {
143+
const { STORAGE_ACK_DECLINE_CODES, isStorageACKDecline } = await import('../src/proto/storage-ack.js');
144+
const decline: StorageACKMsg = {
145+
merkleRoot: new Uint8Array(0),
146+
coreNodeSignatureR: new Uint8Array(0),
147+
coreNodeSignatureVS: new Uint8Array(0),
148+
contextGraphId: '15',
149+
nodeIdentityId: 0,
150+
declineCode: STORAGE_ACK_DECLINE_CODES.NO_DATA_IN_SWM,
151+
declineMessage:
152+
'No data found in SWM graph did:dkg:context-graph:15/_shared_memory for entities: urn:a, urn:b',
153+
};
154+
const decoded = decodeStorageACK(encodeStorageACK(decline));
155+
expect(decoded.declineCode).toBe('NO_DATA_IN_SWM');
156+
expect(decoded.declineMessage).toContain('No data found in SWM graph');
157+
expect(decoded.contextGraphId).toBe('15');
158+
expect(isStorageACKDecline(decoded)).toBe(true);
159+
expect(new Uint8Array(decoded.merkleRoot).length).toBe(0);
160+
expect(new Uint8Array(decoded.coreNodeSignatureR).length).toBe(0);
161+
});
162+
163+
it('a new decoder reading bytes from an old encoder still yields a valid ACK (forward compat)', () => {
164+
// Literal pre-decline wire shape from the old 5-field schema:
165+
// 1=merkleRoot, 2=signatureR, 3=signatureVS, 4=contextGraphId,
166+
// 5=nodeIdentityId. Keeping this as bytes catches regressions where
167+
// the new schema stops decoding historical ACK payloads even though
168+
// the current encoder still omits unset decline fields.
169+
const wire = Uint8Array.from([
170+
0x0a, 0x20,
171+
...new Array(32).fill(0xa5),
172+
0x12, 0x20,
173+
...new Array(32).fill(0x11),
174+
0x1a, 0x20,
175+
...new Array(32).fill(0x22),
176+
0x22, 0x06,
177+
0x63, 0x67, 0x2d, 0x31, 0x30, 0x30,
178+
0x28, 0x07,
179+
]);
180+
const decoded = decodeStorageACK(wire);
181+
expect(decoded.contextGraphId).toBe('cg-100');
182+
expect(Number(decoded.nodeIdentityId)).toBe(7);
183+
expect(new Uint8Array(decoded.merkleRoot)).toEqual(new Uint8Array(32).fill(0xa5));
184+
expect(new Uint8Array(decoded.coreNodeSignatureR)).toEqual(new Uint8Array(32).fill(0x11));
185+
expect(new Uint8Array(decoded.coreNodeSignatureVS)).toEqual(new Uint8Array(32).fill(0x22));
186+
expect(decoded.declineCode == null || decoded.declineCode === '').toBe(true);
187+
});
133188
});
134189

135190
// ── SwmShareAck (rc.9 PR-D) ───────────────────────────────────────────

0 commit comments

Comments
 (0)