From 9df13e1dfed860c2f03de1ca39c5af31922f8c74 Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 27 Feb 2026 18:06:08 +0000 Subject: [PATCH 01/11] For Voice packets, parse RTP Header fields for Sequence, Timestamp, and SSRC. Refactored so that instead of pure Buffer, we now send AudioPacket (interface extending Buffer) which has readonly fields sequence, timestamp, and ssrc. --- .../voice/src/receive/AudioReceiveStream.ts | 22 +++++++++++++++++++ packages/voice/src/receive/VoiceReceiver.ts | 14 +++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/packages/voice/src/receive/AudioReceiveStream.ts b/packages/voice/src/receive/AudioReceiveStream.ts index 394b5f9707af..c06044563683 100644 --- a/packages/voice/src/receive/AudioReceiveStream.ts +++ b/packages/voice/src/receive/AudioReceiveStream.ts @@ -36,6 +36,28 @@ export interface AudioReceiveStreamOptions extends ReadableOptions { end: EndBehavior; } +/** + * A Buffer containing a decoded Opus packet with RTP header metadata. + */ +export interface AudioPacket extends Buffer { + /** + * The RTP sequence number of this packet (16-bit, wraps at 65535). + */ + readonly sequence: number; + + /** + * The RTP timestamp of this packet (32-bit, wraps at 2^32 - 1). + */ + readonly timestamp: number; + + /** + * The synchronization source identifier for this packet (32-bit). + * A change in SSRC indicates a new RTP stream, and any stateful + * codec (e.g. Opus) decoder should be reset. + */ + readonly ssrc: number; +} + export function createDefaultAudioReceiveStreamOptions(): AudioReceiveStreamOptions { return { end: { diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index 90e4015cb9b1..fb66719172ee 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -10,6 +10,7 @@ import { methods } from '../util/Secretbox'; import { AudioReceiveStream, createDefaultAudioReceiveStreamOptions, + type AudioPacket, type AudioReceiveStreamOptions, } from './AudioReceiveStream'; import { SSRCMap } from './SSRCMap'; @@ -19,6 +20,15 @@ const HEADER_EXTENSION_BYTE = Buffer.from([0xbe, 0xde]); const UNPADDED_NONCE_LENGTH = 4; const AUTH_TAG_LENGTH = 16; +function createAudioPacket(buffer: Buffer, sequence: number, timestamp: number, ssrc: number): AudioPacket { + Object.defineProperties(buffer, { + sequence: { value: sequence, writable: false, enumerable: false, configurable: false }, + timestamp: { value: timestamp, writable: false, enumerable: false, configurable: false }, + ssrc: { value: ssrc, writable: false, enumerable: false, configurable: false }, + }); + return buffer as AudioPacket; +} + /** * Attaches to a VoiceConnection, allowing you to receive audio packets from other * users that are speaking. @@ -165,6 +175,8 @@ export class VoiceReceiver { */ public onUdpMessage(msg: Buffer) { if (msg.length <= 8) return; + const sequence = msg.readUInt16BE(2); + const timestamp = msg.readUInt32BE(4); const ssrc = msg.readUInt32BE(8); const userData = this.ssrcMap.get(ssrc); @@ -184,7 +196,7 @@ export class VoiceReceiver { this.connectionData.secretKey, userData.userId, ); - if (packet) stream.push(packet); + if (packet) stream.push(createAudioPacket(packet, sequence, timestamp, ssrc)); } catch (error) { stream.destroy(error as Error); } From f04d08bca5ce4d312799f19fe922ec37219830a9 Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 27 Feb 2026 22:38:45 +0000 Subject: [PATCH 02/11] Add VoiceReceiver tests for parsing RTP Packet header values (sequence, timestamp, ssrc) --- .../voice/__tests__/VoiceReceiver.test.ts | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/packages/voice/__tests__/VoiceReceiver.test.ts b/packages/voice/__tests__/VoiceReceiver.test.ts index 65702cd13e7d..c1b9d16de045 100644 --- a/packages/voice/__tests__/VoiceReceiver.test.ts +++ b/packages/voice/__tests__/VoiceReceiver.test.ts @@ -68,6 +68,47 @@ describe('VoiceReceiver', () => { expect(stream.read()).toEqual(RTP_PACKET.opusFrame); }); + test.each([ + ['Desktop', RTP_PACKET_DESKTOP, 10_217, 4_157_324_497], + ['Chrome', RTP_PACKET_CHROME, 18_143, 660_155_095], + ['Android', RTP_PACKET_ANDROID, 14_800, 3_763_991_879], + ])('onUdpMessage: RTP header metadata from %s', async (_testName, RTP_PACKET, expectedSeq, expectedTs) => { + receiver['decrypt'] = vitest.fn().mockImplementationOnce(() => RTP_PACKET.decrypted); + + const spy = vitest.spyOn(receiver.ssrcMap, 'get'); + spy.mockImplementation(() => ({ + audioSSRC: RTP_PACKET.ssrc, + userId: '123', + })); + + const stream = receiver.subscribe('123'); + + receiver['onUdpMessage'](RTP_PACKET.packet); + await nextTick(); + const packet = stream.read(); + expect(packet.sequence).toEqual(expectedSeq); + expect(packet.timestamp).toEqual(expectedTs); + expect(packet.ssrc).toEqual(RTP_PACKET.ssrc); + }); + + test('onUdpMessage: AudioPacket is backwards compatible', async () => { + receiver['decrypt'] = vitest.fn().mockImplementationOnce(() => RTP_PACKET_DESKTOP.decrypted); + + const spy = vitest.spyOn(receiver.ssrcMap, 'get'); + spy.mockImplementation(() => ({ + audioSSRC: RTP_PACKET_DESKTOP.ssrc, + userId: '123', + })); + + const stream = receiver.subscribe('123'); + + receiver['onUdpMessage'](RTP_PACKET_DESKTOP.packet); + await nextTick(); + const packet = stream.read(); + expect(Buffer.isBuffer(packet)).toBe(true); + expect(packet).toEqual(RTP_PACKET_DESKTOP.opusFrame); + }); + test('onUdpMessage: <8 bytes packet', () => { expect(() => receiver['onUdpMessage'](Buffer.alloc(4))).not.toThrow(); }); From 5046f133dd238523263fc17df293fffa3dc6b2a1 Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 27 Feb 2026 23:34:20 +0000 Subject: [PATCH 03/11] Changes from automated PR feedback: - Improve docstring use (also moved method to be private static to be more in-line with rest of code, and improved clarity of naming) - Fix pre-existing issue (min packet length was 8 bytes, but was expecting reading a uint32 at offset 8, so actual min length is 12) - Fix AudioPacket description --- .../voice/src/receive/AudioReceiveStream.ts | 14 +++--- packages/voice/src/receive/VoiceReceiver.ts | 46 ++++++++++++------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/packages/voice/src/receive/AudioReceiveStream.ts b/packages/voice/src/receive/AudioReceiveStream.ts index c06044563683..72c5390b835a 100644 --- a/packages/voice/src/receive/AudioReceiveStream.ts +++ b/packages/voice/src/receive/AudioReceiveStream.ts @@ -37,7 +37,7 @@ export interface AudioReceiveStreamOptions extends ReadableOptions { } /** - * A Buffer containing a decoded Opus packet with RTP header metadata. + * A Buffer containing encoded Opus packet data and key RTP Header metadata. */ export interface AudioPacket extends Buffer { /** @@ -46,16 +46,16 @@ export interface AudioPacket extends Buffer { readonly sequence: number; /** - * The RTP timestamp of this packet (32-bit, wraps at 2^32 - 1). + * The synchronization source identifier for this packet (32-bit). + * A change in SSRC indicates a new RTP stream, so any associated + * decoder should be reset. */ - readonly timestamp: number; + readonly ssrc: number; /** - * The synchronization source identifier for this packet (32-bit). - * A change in SSRC indicates a new RTP stream, and any stateful - * codec (e.g. Opus) decoder should be reset. + * The RTP timestamp of this packet (32-bit, wraps at 2^32 - 1). */ - readonly ssrc: number; + readonly timestamp: number; } export function createDefaultAudioReceiveStreamOptions(): AudioReceiveStreamOptions { diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index fb66719172ee..3aff3f0abfc3 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -20,15 +20,6 @@ const HEADER_EXTENSION_BYTE = Buffer.from([0xbe, 0xde]); const UNPADDED_NONCE_LENGTH = 4; const AUTH_TAG_LENGTH = 16; -function createAudioPacket(buffer: Buffer, sequence: number, timestamp: number, ssrc: number): AudioPacket { - Object.defineProperties(buffer, { - sequence: { value: sequence, writable: false, enumerable: false, configurable: false }, - timestamp: { value: timestamp, writable: false, enumerable: false, configurable: false }, - ssrc: { value: ssrc, writable: false, enumerable: false, configurable: false }, - }); - return buffer as AudioPacket; -} - /** * Attaches to a VoiceConnection, allowing you to receive audio packets from other * users that are speaking. @@ -141,9 +132,14 @@ export class VoiceReceiver { * @param nonce - The nonce buffer used by the connection for encryption * @param secretKey - The secret key used by the connection for encryption * @param userId - The user id that sent the packet + * @param ssrc - already-parsed SSRC (Synchronization Source Identifier) from the RTP Header * @returns The parsed Opus packet */ - private parsePacket(buffer: Buffer, mode: string, nonce: Buffer, secretKey: Uint8Array, userId: string) { + private parsePacket(buffer: Buffer, mode: string, nonce: Buffer, secretKey: Uint8Array, userId: string, ssrc: number) { + // Parse key RTP Header fields + const sequence = buffer.readUInt16BE(2); + const timestamp = buffer.readUInt32BE(4); + let packet: Buffer = this.decrypt(buffer, mode, nonce, secretKey); if (!packet) throw new Error('Failed to parse packet'); @@ -164,7 +160,26 @@ export class VoiceReceiver { if (daveSession) packet = daveSession.decrypt(packet, userId)!; } - return packet; + // Extend packet with RTP header information + return VoiceReceiver.addPacketHeaders(packet, sequence, timestamp, ssrc); + } + + /** + * Extends the Buffer for Opus audio data with RTP Header information + * + * @param buffer the opus packet data to extend + * @param sequence the sequence number of the packet + * @param timestamp see definition in + * @param ssrc x + * @returns the input buffer, with RTP header information added + */ + private static addPacketHeaders(buffer: Buffer, sequence: number, timestamp: number, ssrc: number): AudioPacket { + Object.defineProperties(buffer, { + sequence: { value: sequence, writable: false, enumerable: false, configurable: false }, + timestamp: { value: timestamp, writable: false, enumerable: false, configurable: false }, + ssrc: { value: ssrc, writable: false, enumerable: false, configurable: false }, + }); + return buffer as AudioPacket; } /** @@ -174,9 +189,7 @@ export class VoiceReceiver { * @internal */ public onUdpMessage(msg: Buffer) { - if (msg.length <= 8) return; - const sequence = msg.readUInt16BE(2); - const timestamp = msg.readUInt32BE(4); + if (msg.length <= 12) return; const ssrc = msg.readUInt32BE(8); const userData = this.ssrcMap.get(ssrc); @@ -195,8 +208,9 @@ export class VoiceReceiver { this.connectionData.nonceBuffer, this.connectionData.secretKey, userData.userId, - ); - if (packet) stream.push(createAudioPacket(packet, sequence, timestamp, ssrc)); + ssrc + ); + if (packet) stream.push(packet); } catch (error) { stream.destroy(error as Error); } From df349ca4d1777580aaed85aa2873d47815d121da Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 27 Feb 2026 23:46:05 +0000 Subject: [PATCH 04/11] formatting: prettier wants to chop down new longer parsePacket signature --- packages/voice/src/receive/VoiceReceiver.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index 3aff3f0abfc3..e3a9d030e34f 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -135,7 +135,14 @@ export class VoiceReceiver { * @param ssrc - already-parsed SSRC (Synchronization Source Identifier) from the RTP Header * @returns The parsed Opus packet */ - private parsePacket(buffer: Buffer, mode: string, nonce: Buffer, secretKey: Uint8Array, userId: string, ssrc: number) { + private parsePacket( + buffer: Buffer, + mode: string, + nonce: Buffer, + secretKey: Uint8Array, + userId: string, + ssrc: number, + ) { // Parse key RTP Header fields const sequence = buffer.readUInt16BE(2); const timestamp = buffer.readUInt32BE(4); @@ -208,8 +215,8 @@ export class VoiceReceiver { this.connectionData.nonceBuffer, this.connectionData.secretKey, userData.userId, - ssrc - ); + ssrc, + ); if (packet) stream.push(packet); } catch (error) { stream.destroy(error as Error); From b9399b599c561bf8286ef56cb85183bb8fa8efb8 Mon Sep 17 00:00:00 2001 From: Peter Date: Sat, 28 Feb 2026 01:06:40 +0000 Subject: [PATCH 05/11] parsePacket: account for case where daveSession.decrypt could return null --- packages/voice/src/receive/VoiceReceiver.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index e3a9d030e34f..d4e39dfd98a3 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -168,7 +168,11 @@ export class VoiceReceiver { } // Extend packet with RTP header information - return VoiceReceiver.addPacketHeaders(packet, sequence, timestamp, ssrc); + if (packet) { + return VoiceReceiver.addPacketHeaders(packet, sequence, timestamp, ssrc); + } else { + return null; + } } /** From 023b654ff7b8a39f0ea9e336ef5b67d0fd1d4cb0 Mon Sep 17 00:00:00 2001 From: Peter Date: Sun, 1 Mar 2026 12:11:42 +0000 Subject: [PATCH 06/11] docstring suggested change on packages/voice/src/receive/VoiceReceiver.ts Co-authored-by: Qjuh <76154676+Qjuh@users.noreply.github.com> --- packages/voice/src/receive/VoiceReceiver.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index d4e39dfd98a3..02fcafa2dc43 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -178,8 +178,8 @@ export class VoiceReceiver { /** * Extends the Buffer for Opus audio data with RTP Header information * - * @param buffer the opus packet data to extend - * @param sequence the sequence number of the packet + * @param buffer - the opus packet data to extend + * @param sequence - the sequence number of the packet * @param timestamp see definition in * @param ssrc x * @returns the input buffer, with RTP header information added From 57034c1bdb4c075b4dd3b450c838d3d615f342ac Mon Sep 17 00:00:00 2001 From: Peter Date: Sun, 1 Mar 2026 12:17:53 +0000 Subject: [PATCH 07/11] Improve addPacketHeaders docstring per review, adding missing docs and fixing style to use hyphens. Also move addPacketHeaders to a bare function below the class per review comment --- packages/voice/src/receive/VoiceReceiver.ts | 38 ++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index 02fcafa2dc43..d69a5dad5a47 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -169,30 +169,12 @@ export class VoiceReceiver { // Extend packet with RTP header information if (packet) { - return VoiceReceiver.addPacketHeaders(packet, sequence, timestamp, ssrc); + return addPacketHeaders(packet, sequence, timestamp, ssrc); } else { return null; } } - /** - * Extends the Buffer for Opus audio data with RTP Header information - * - * @param buffer - the opus packet data to extend - * @param sequence - the sequence number of the packet - * @param timestamp see definition in - * @param ssrc x - * @returns the input buffer, with RTP header information added - */ - private static addPacketHeaders(buffer: Buffer, sequence: number, timestamp: number, ssrc: number): AudioPacket { - Object.defineProperties(buffer, { - sequence: { value: sequence, writable: false, enumerable: false, configurable: false }, - timestamp: { value: timestamp, writable: false, enumerable: false, configurable: false }, - ssrc: { value: ssrc, writable: false, enumerable: false, configurable: false }, - }); - return buffer as AudioPacket; - } - /** * Called when the UDP socket of the attached connection receives a message. * @@ -248,3 +230,21 @@ export class VoiceReceiver { return stream; } } + +/** + * Extends the Buffer for Opus audio data with RTP Header information + * + * @param buffer - the opus packet data to extend + * @param sequence - NTP Header sequence value for the packet + * @param timestamp - NTP Header timestamp value for the packet + * @param ssrc - NTP Header synchronization source identifier (SSRC) for the packet + * @returns the input buffer, with RTP header information added + */ +function addPacketHeaders(buffer: Buffer, sequence: number, timestamp: number, ssrc: number): AudioPacket { + Object.defineProperties(buffer, { + sequence: { value: sequence, writable: false, enumerable: false, configurable: false }, + timestamp: { value: timestamp, writable: false, enumerable: false, configurable: false }, + ssrc: { value: ssrc, writable: false, enumerable: false, configurable: false }, + }); + return buffer as AudioPacket; +} From 7575a2554991ede3cd1860d7e9000d2a1ab139bd Mon Sep 17 00:00:00 2001 From: Peter Date: Sun, 1 Mar 2026 12:29:21 +0000 Subject: [PATCH 08/11] Amend RTP packet handling to parse instead of looking for magic bytes. Minor naming changes as part of this because we are working with two Buffers (raw RTP packet, decrypted RTP/DAVE payload). Change tests in VoiceReceiver.test.ts that were directly testing `decrypt` to instead test `parsePacket` --- packages/voice/__mocks__/rtp.ts | 5 +- .../voice/__tests__/VoiceReceiver.test.ts | 27 +++++-- packages/voice/src/receive/VoiceReceiver.ts | 79 ++++++++++--------- 3 files changed, 66 insertions(+), 45 deletions(-) diff --git a/packages/voice/__mocks__/rtp.ts b/packages/voice/__mocks__/rtp.ts index 805477694aa9..8bd34a1a25c7 100644 --- a/packages/voice/__mocks__/rtp.ts +++ b/packages/voice/__mocks__/rtp.ts @@ -1,8 +1,8 @@ import { Buffer } from 'node:buffer'; // The following constants are silence packets collected from various platforms because Discord did not previously send header extensions -// The header extension (extra data in decrypted vs opusFrame) can be detected in the position of {encrypted.subarray(12,14)} if it is equal to 0xbe,0xde -// The header extension length will then follow as an integer and can be removed from the decrypted data (see ../src/receive/VoiceReceiver.ts:parsePacket) +// The header extension (extra data in decrypted vs opusFrame) is indicated by the X bit (4th bit of byte 0) in the RTP header +// The header extension length follows the CSRC identifiers and can be removed from the decrypted data (see ../src/receive/VoiceReceiver.ts:parsePacket) export const RTP_PACKET_DESKTOP = { ssrc: 341_124, @@ -48,6 +48,7 @@ export const XCHACHA20_SAMPLE = { 133, 174, 108, 144, 251, 110, ]), + // First 8 bytes are Header Extension payload decrypted: Buffer.from([ 0x32, 0x64, 0xe6, 0x62, 0x10, 0xe3, 0x90, 0x02, 0x78, 0x07, 0xd6, 0x2f, 0x52, 0x23, 0x20, 0x9a, 0xab, 0x2c, 0xcc, 0x1c, 0x88, 0x8e, 0xcb, 0xd9, 0x4d, 0xe5, 0x33, 0x7a, 0x4b, 0x2b, 0xed, 0xa7, 0xaf, 0x5f, 0x8d, 0xb2, 0x59, 0x99, diff --git a/packages/voice/__tests__/VoiceReceiver.test.ts b/packages/voice/__tests__/VoiceReceiver.test.ts index c1b9d16de045..a4b54e6e1866 100644 --- a/packages/voice/__tests__/VoiceReceiver.test.ts +++ b/packages/voice/__tests__/VoiceReceiver.test.ts @@ -172,15 +172,17 @@ describe('VoiceReceiver', () => { }); }); - describe('decrypt', () => { - test('decrypt: aead_xchacha20_poly1305_rtpsize', () => { + describe('parsePacket', () => { + test('parsePacket: aead_xchacha20_poly1305_rtpsize', () => { const nonceSpace = Buffer.alloc(24); - const decrypted = receiver['decrypt']( + const packet = receiver['parsePacket']( XCHACHA20_SAMPLE.encrypted, 'aead_xchacha20_poly1305_rtpsize', nonceSpace, XCHACHA20_SAMPLE.key, + '123', + 48_921, ); const expectedNonce = Buffer.concat([ @@ -189,17 +191,24 @@ describe('VoiceReceiver', () => { ]); expect(nonceSpace.equals(expectedNonce)).toEqual(true); - expect(decrypted.equals(XCHACHA20_SAMPLE.decrypted)).toEqual(true); + // Extension data (8 bytes) should be stripped from the 61-byte decrypted payload + expect(packet).toHaveLength(53); + expect(packet!.equals(XCHACHA20_SAMPLE.decrypted.subarray(8))).toEqual(true); + expect(packet!.sequence).toEqual(22_118); + expect(packet!.timestamp).toEqual(3_220_386_864); + expect(packet!.ssrc).toEqual(48_921); }); - test('decrypt: aead_aes256gcm_rtpsize', () => { + test('parsePacket: aead_aes256gcm_rtpsize', () => { const nonceSpace = Buffer.alloc(12); - const decrypted = receiver['decrypt']( + const packet = receiver['parsePacket']( AES256GCM_SAMPLE.encrypted, 'aead_aes256_gcm_rtpsize', nonceSpace, AES256GCM_SAMPLE.key, + '123', + 50_615, ); const expectedNonce = Buffer.concat([ @@ -208,7 +217,11 @@ describe('VoiceReceiver', () => { ]); expect(nonceSpace.equals(expectedNonce)).toEqual(true); - expect(decrypted.equals(AES256GCM_SAMPLE.decrypted)).toEqual(true); + // No extension (X=0), so decrypted payload is the opus frame directly + expect(packet!.equals(AES256GCM_SAMPLE.decrypted)).toEqual(true); + expect(packet!.sequence).toEqual(41_884); + expect(packet!.timestamp).toEqual(2_668_332_016); + expect(packet!.ssrc).toEqual(50_615); }); }); }); diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index d69a5dad5a47..061f589317e1 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -16,7 +16,6 @@ import { import { SSRCMap } from './SSRCMap'; import { SpeakingMap } from './SpeakingMap'; -const HEADER_EXTENSION_BYTE = Buffer.from([0xbe, 0xde]); const UNPADDED_NONCE_LENGTH = 4; const AUTH_TAG_LENGTH = 16; @@ -79,18 +78,24 @@ export class VoiceReceiver { } } - private decrypt(buffer: Buffer, mode: string, nonce: Buffer, secretKey: Uint8Array) { + /** + * Decrypt RTP packet payload + * + * @param buffer - RTP packet buffer + * @param mode - cipher mode + * @param nonce - encryption nonce + * @param secretKey - encryption key + * @param headerSize - size of the unencrypted RTP header (fixed header + CSRC + extension header) + * @returns decrypted packet payload + */ + private decrypt(buffer: Buffer, mode: string, nonce: Buffer, secretKey: Uint8Array, headerSize: number) { // Copy the last 4 bytes of unpadded nonce to the padding of (12 - 4) or (24 - 4) bytes buffer.copy(nonce, 0, buffer.length - UNPADDED_NONCE_LENGTH); - let headerSize = 12; - const first = buffer.readUint8(); - if ((first >> 4) & 0x01) headerSize += 4; - - // The unencrypted RTP header contains 12 bytes, HEADER_EXTENSION and the extension size + // The unencrypted RTP header is used as AAD (authenticated but not encrypted) const header = buffer.subarray(0, headerSize); - // Encrypted contains the extension, if any, the opus packet, and the auth tag + // Encrypted contains the extension data, if any, the opus packet, and the auth tag const encrypted = buffer.subarray(headerSize, buffer.length - AUTH_TAG_LENGTH - UNPADDED_NONCE_LENGTH); const authTag = buffer.subarray( buffer.length - AUTH_TAG_LENGTH - UNPADDED_NONCE_LENGTH, @@ -127,7 +132,7 @@ export class VoiceReceiver { /** * Parses an audio packet, decrypting it to yield an Opus packet. * - * @param buffer - The buffer to parse + * @param rtp - The incoming RTP packet buffer to be parsed * @param mode - The encryption mode * @param nonce - The nonce buffer used by the connection for encryption * @param secretKey - The secret key used by the connection for encryption @@ -135,41 +140,43 @@ export class VoiceReceiver { * @param ssrc - already-parsed SSRC (Synchronization Source Identifier) from the RTP Header * @returns The parsed Opus packet */ - private parsePacket( - buffer: Buffer, - mode: string, - nonce: Buffer, - secretKey: Uint8Array, - userId: string, - ssrc: number, - ) { + private parsePacket(rtp: Buffer, mode: string, nonce: Buffer, secretKey: Uint8Array, userId: string, ssrc: number) { // Parse key RTP Header fields - const sequence = buffer.readUInt16BE(2); - const timestamp = buffer.readUInt32BE(4); - - let packet: Buffer = this.decrypt(buffer, mode, nonce, secretKey); - if (!packet) throw new Error('Failed to parse packet'); - - // Strip decrypted RTP Header Extension if present - // The header is only indicated in the original data, so compare with buffer first - if (buffer.subarray(12, 14).compare(HEADER_EXTENSION_BYTE) === 0) { - const headerExtensionLength = buffer.subarray(14).readUInt16BE(); - packet = packet.subarray(4 * headerExtensionLength); + const first = rtp.readUint8(); + const hasHeaderExtension = Boolean((first >> 4) & 0x01); // X field + const cc = first & 0x0f; // CSRC Count field + const sequence = rtp.readUInt16BE(2); + const timestamp = rtp.readUInt32BE(4); + + // Compute unencrypted header size: fixed header + CSRC Identifiers + extension header if present + let headerSize = 12 + 4 * cc; + const extensionHeaderOffset = headerSize; // where the extension header starts, if present + if (hasHeaderExtension) headerSize += 4; // extension header (profile ID + length) + + // Decrypt the RTP Payload + let payload: Buffer = this.decrypt(rtp, mode, nonce, secretKey, headerSize); + if (!payload) throw new Error('Failed to parse packet'); + + // Skip the decrypted RTP Header Extension data if present + if (hasHeaderExtension) { + // Extension Header Length field + const headerExtensionLength = rtp.readUInt16BE(extensionHeaderOffset + 2); + payload = payload.subarray(4 * headerExtensionLength); } - // Decrypt packet if in a DAVE session. + // Decrypt payload if in a DAVE session. if ( this.voiceConnection.state.status === VoiceConnectionStatus.Ready && (this.voiceConnection.state.networking.state.code === NetworkingStatusCode.Ready || this.voiceConnection.state.networking.state.code === NetworkingStatusCode.Resuming) ) { const daveSession = this.voiceConnection.state.networking.state.dave; - if (daveSession) packet = daveSession.decrypt(packet, userId)!; + if (daveSession) payload = daveSession.decrypt(payload, userId)!; } - // Extend packet with RTP header information - if (packet) { - return addPacketHeaders(packet, sequence, timestamp, ssrc); + // Return Opus packet data Buffer, enriched with RTP header information + if (payload) { + return addPacketHeaders(payload, sequence, timestamp, ssrc); } else { return null; } @@ -235,9 +242,9 @@ export class VoiceReceiver { * Extends the Buffer for Opus audio data with RTP Header information * * @param buffer - the opus packet data to extend - * @param sequence - NTP Header sequence value for the packet - * @param timestamp - NTP Header timestamp value for the packet - * @param ssrc - NTP Header synchronization source identifier (SSRC) for the packet + * @param sequence - RTP Header sequence value for the packet + * @param timestamp - RTP Header timestamp value for the packet + * @param ssrc - RTP Header synchronization source identifier (SSRC) for the packet * @returns the input buffer, with RTP header information added */ function addPacketHeaders(buffer: Buffer, sequence: number, timestamp: number, ssrc: number): AudioPacket { From 5d344b6c8651eb302bb7372f9082b6c15850a261 Mon Sep 17 00:00:00 2001 From: Peter Date: Sun, 1 Mar 2026 14:23:32 +0000 Subject: [PATCH 09/11] Per review feedback, change AudioPacket to a dedicated interface rather than an extension of Buffer. This breaks backwards-compatibility for existing AudioReceiveStream users, but is cleaner and allows for future extensibility. --- .../__tests__/AudioReceiveStream.test.ts | 15 ++++--- .../voice/__tests__/VoiceReceiver.test.ts | 18 +++++--- .../voice/src/receive/AudioReceiveStream.ts | 21 +++++---- packages/voice/src/receive/VoiceReceiver.ts | 43 +++++++------------ 4 files changed, 48 insertions(+), 49 deletions(-) diff --git a/packages/voice/__tests__/AudioReceiveStream.test.ts b/packages/voice/__tests__/AudioReceiveStream.test.ts index 6e650f2b437a..ec412ae0da2d 100644 --- a/packages/voice/__tests__/AudioReceiveStream.test.ts +++ b/packages/voice/__tests__/AudioReceiveStream.test.ts @@ -4,14 +4,15 @@ import { describe, test, expect } from 'vitest'; import { SILENCE_FRAME } from '../src/audio/AudioPlayer'; import { AudioReceiveStream, EndBehaviorType } from '../src/receive/AudioReceiveStream'; -const DUMMY_BUFFER = Buffer.allocUnsafe(16); +const DUMMY_PACKET = { payload: Buffer.allocUnsafe(16), sequence: 0, timestamp: 0, ssrc: 0 }; +const SILENCE_PACKET = { payload: SILENCE_FRAME, sequence: 0, timestamp: 0, ssrc: 0 }; async function wait(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)); } async function stepSilence(stream: AudioReceiveStream, increment: number) { - stream.push(SILENCE_FRAME); + stream.push(SILENCE_PACKET); await wait(increment); expect(stream.readable).toEqual(true); } @@ -19,10 +20,10 @@ async function stepSilence(stream: AudioReceiveStream, increment: number) { describe('AudioReceiveStream', () => { test('Manual end behavior', async () => { const stream = new AudioReceiveStream({ end: { behavior: EndBehaviorType.Manual } }); - stream.push(DUMMY_BUFFER); + stream.push(DUMMY_PACKET); expect(stream.readable).toEqual(true); await wait(200); - stream.push(DUMMY_BUFFER); + stream.push(DUMMY_PACKET); expect(stream.readable).toEqual(true); stream.push(null); await wait(200); @@ -40,7 +41,7 @@ describe('AudioReceiveStream', () => { await stepSilence(stream, increment); } - stream.push(DUMMY_BUFFER); + stream.push(DUMMY_PACKET); await wait(duration); expect(stream.readableEnded).toEqual(true); @@ -57,7 +58,7 @@ describe('AudioReceiveStream', () => { await stepSilence(stream, increment); } - stream.push(DUMMY_BUFFER); + stream.push(DUMMY_PACKET); for (let index = increment; index < duration; index += increment) { await stepSilence(stream, increment); @@ -75,7 +76,7 @@ describe('AudioReceiveStream', () => { const stream = new AudioReceiveStream({ end: { behavior: EndBehaviorType.AfterInactivity, duration: 100 } }); stream.resume(); - stream.push(DUMMY_BUFFER); + stream.push(DUMMY_PACKET); expect(stream.readable).toEqual(true); expect(stream.readableEnded).toEqual(false); diff --git a/packages/voice/__tests__/VoiceReceiver.test.ts b/packages/voice/__tests__/VoiceReceiver.test.ts index a4b54e6e1866..69ce9ef9a506 100644 --- a/packages/voice/__tests__/VoiceReceiver.test.ts +++ b/packages/voice/__tests__/VoiceReceiver.test.ts @@ -65,7 +65,8 @@ describe('VoiceReceiver', () => { receiver['onUdpMessage'](RTP_PACKET.packet); await nextTick(); - expect(stream.read()).toEqual(RTP_PACKET.opusFrame); + const packet = stream.read(); + expect(packet.payload).toEqual(RTP_PACKET.opusFrame); }); test.each([ @@ -91,7 +92,7 @@ describe('VoiceReceiver', () => { expect(packet.ssrc).toEqual(RTP_PACKET.ssrc); }); - test('onUdpMessage: AudioPacket is backwards compatible', async () => { + test('onUdpMessage: AudioPacket has payload and header fields', async () => { receiver['decrypt'] = vitest.fn().mockImplementationOnce(() => RTP_PACKET_DESKTOP.decrypted); const spy = vitest.spyOn(receiver.ssrcMap, 'get'); @@ -105,8 +106,11 @@ describe('VoiceReceiver', () => { receiver['onUdpMessage'](RTP_PACKET_DESKTOP.packet); await nextTick(); const packet = stream.read(); - expect(Buffer.isBuffer(packet)).toBe(true); - expect(packet).toEqual(RTP_PACKET_DESKTOP.opusFrame); + expect(Buffer.isBuffer(packet.payload)).toBe(true); + expect(packet.payload).toEqual(RTP_PACKET_DESKTOP.opusFrame); + expect(typeof packet.sequence).toBe('number'); + expect(typeof packet.timestamp).toBe('number'); + expect(typeof packet.ssrc).toBe('number'); }); test('onUdpMessage: <8 bytes packet', () => { @@ -192,8 +196,8 @@ describe('VoiceReceiver', () => { expect(nonceSpace.equals(expectedNonce)).toEqual(true); // Extension data (8 bytes) should be stripped from the 61-byte decrypted payload - expect(packet).toHaveLength(53); - expect(packet!.equals(XCHACHA20_SAMPLE.decrypted.subarray(8))).toEqual(true); + expect(packet!.payload).toHaveLength(53); + expect(packet!.payload.equals(XCHACHA20_SAMPLE.decrypted.subarray(8))).toEqual(true); expect(packet!.sequence).toEqual(22_118); expect(packet!.timestamp).toEqual(3_220_386_864); expect(packet!.ssrc).toEqual(48_921); @@ -218,7 +222,7 @@ describe('VoiceReceiver', () => { expect(nonceSpace.equals(expectedNonce)).toEqual(true); // No extension (X=0), so decrypted payload is the opus frame directly - expect(packet!.equals(AES256GCM_SAMPLE.decrypted)).toEqual(true); + expect(packet!.payload.equals(AES256GCM_SAMPLE.decrypted)).toEqual(true); expect(packet!.sequence).toEqual(41_884); expect(packet!.timestamp).toEqual(2_668_332_016); expect(packet!.ssrc).toEqual(50_615); diff --git a/packages/voice/src/receive/AudioReceiveStream.ts b/packages/voice/src/receive/AudioReceiveStream.ts index 72c5390b835a..5e7dc11bfd06 100644 --- a/packages/voice/src/receive/AudioReceiveStream.ts +++ b/packages/voice/src/receive/AudioReceiveStream.ts @@ -37,16 +37,21 @@ export interface AudioReceiveStreamOptions extends ReadableOptions { } /** - * A Buffer containing encoded Opus packet data and key RTP Header metadata. + * An audio packet containing encoded Opus payload data and key RTP Header metadata. */ -export interface AudioPacket extends Buffer { +export interface AudioPacket { + /** + * The encoded Opus payload data. + */ + readonly payload: Buffer; + /** * The RTP sequence number of this packet (16-bit, wraps at 65535). */ readonly sequence: number; /** - * The synchronization source identifier for this packet (32-bit). + * The RTP synchronization source identifier for this packet (32-bit). * A change in SSRC indicates a new RTP stream, so any associated * decoder should be reset. */ @@ -89,22 +94,22 @@ export class AudioReceiveStream extends Readable { this.end = end; } - public override push(buffer: Buffer | null) { + public override push(packet: AudioPacket | null) { if ( - buffer && + packet && (this.end.behavior === EndBehaviorType.AfterInactivity || (this.end.behavior === EndBehaviorType.AfterSilence && - (buffer.compare(SILENCE_FRAME) !== 0 || this.endTimeout === undefined))) + (packet.payload.compare(SILENCE_FRAME) !== 0 || this.endTimeout === undefined))) ) { this.renewEndTimeout(this.end); } - if (buffer === null) { + if (packet === null) { // null marks EOF for stream process.nextTick(() => this.destroy()); } - return super.push(buffer); + return super.push(packet); } private renewEndTimeout(end: EndBehavior & { duration: number }) { diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index 061f589317e1..b6c8fb0d53ae 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -138,9 +138,16 @@ export class VoiceReceiver { * @param secretKey - The secret key used by the connection for encryption * @param userId - The user id that sent the packet * @param ssrc - already-parsed SSRC (Synchronization Source Identifier) from the RTP Header - * @returns The parsed Opus packet + * @returns Decrypted Opus payload and RTP header information, or null if DAVE decrypt failed in a way that should be ignored */ - private parsePacket(rtp: Buffer, mode: string, nonce: Buffer, secretKey: Uint8Array, userId: string, ssrc: number) { + private parsePacket( + rtp: Buffer, + mode: string, + nonce: Buffer, + secretKey: Uint8Array, + userId: string, + ssrc: number, + ): AudioPacket | null { // Parse key RTP Header fields const first = rtp.readUint8(); const hasHeaderExtension = Boolean((first >> 4) & 0x01); // X field @@ -171,15 +178,15 @@ export class VoiceReceiver { this.voiceConnection.state.networking.state.code === NetworkingStatusCode.Resuming) ) { const daveSession = this.voiceConnection.state.networking.state.dave; - if (daveSession) payload = daveSession.decrypt(payload, userId)!; - } + if (daveSession) { + payload = daveSession.decrypt(payload, userId)!; - // Return Opus packet data Buffer, enriched with RTP header information - if (payload) { - return addPacketHeaders(payload, sequence, timestamp, ssrc); - } else { - return null; + if (!payload) return null; // decryption failed but should be ignored + } } + + // Construct AudioPacket with Opus payload and RTP header information + return { payload, sequence, timestamp, ssrc }; } /** @@ -237,21 +244,3 @@ export class VoiceReceiver { return stream; } } - -/** - * Extends the Buffer for Opus audio data with RTP Header information - * - * @param buffer - the opus packet data to extend - * @param sequence - RTP Header sequence value for the packet - * @param timestamp - RTP Header timestamp value for the packet - * @param ssrc - RTP Header synchronization source identifier (SSRC) for the packet - * @returns the input buffer, with RTP header information added - */ -function addPacketHeaders(buffer: Buffer, sequence: number, timestamp: number, ssrc: number): AudioPacket { - Object.defineProperties(buffer, { - sequence: { value: sequence, writable: false, enumerable: false, configurable: false }, - timestamp: { value: timestamp, writable: false, enumerable: false, configurable: false }, - ssrc: { value: ssrc, writable: false, enumerable: false, configurable: false }, - }); - return buffer as AudioPacket; -} From 8dd7fc50bf578d9a7168c1edc18d8181513a1701 Mon Sep 17 00:00:00 2001 From: Peter Date: Tue, 3 Mar 2026 00:22:33 +0000 Subject: [PATCH 10/11] Per review suggestion, replace AudioPacket interface with full class. Documented constructor as not a public interface to discourage end-users from using it and breaking if we extend AudioPacket in the future --- .../voice/src/receive/AudioReceiveStream.ts | 34 ++++++++++++++----- packages/voice/src/receive/VoiceReceiver.ts | 4 +-- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/packages/voice/src/receive/AudioReceiveStream.ts b/packages/voice/src/receive/AudioReceiveStream.ts index 5e7dc11bfd06..2dc0aa07f10f 100644 --- a/packages/voice/src/receive/AudioReceiveStream.ts +++ b/packages/voice/src/receive/AudioReceiveStream.ts @@ -39,28 +39,44 @@ export interface AudioReceiveStreamOptions extends ReadableOptions { /** * An audio packet containing encoded Opus payload data and key RTP Header metadata. */ -export interface AudioPacket { +export class AudioPacket { /** - * The encoded Opus payload data. + * Encoded Opus payload data. */ - readonly payload: Buffer; + public readonly payload: Buffer; /** - * The RTP sequence number of this packet (16-bit, wraps at 65535). + * RTP sequence number of this packet (16-bit, wraps at 65535). */ - readonly sequence: number; + public readonly sequence: number; /** - * The RTP synchronization source identifier for this packet (32-bit). + * RTP synchronization source identifier for this packet (32-bit). * A change in SSRC indicates a new RTP stream, so any associated * decoder should be reset. */ - readonly ssrc: number; + public readonly ssrc: number; /** - * The RTP timestamp of this packet (32-bit, wraps at 2^32 - 1). + * RTP timestamp of this packet (32-bit, wraps at 2^32 - 1, 48kHz clock). */ - readonly timestamp: number; + public readonly timestamp: number; + + /** + * Construct a new AudioPacket. + * **This is not a stable public API.** + * + * @param payload - Opus payload + * @param sequence - RTP Sequence Number + * @param timestamp - RTP Timestamp + * @param ssrc - RTP Synchronization Source Identifier + */ + public constructor(payload: Buffer, sequence: number, timestamp: number, ssrc: number) { + this.payload = payload; + this.sequence = sequence; + this.timestamp = timestamp; + this.ssrc = ssrc; + } } export function createDefaultAudioReceiveStreamOptions(): AudioReceiveStreamOptions { diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index b6c8fb0d53ae..aedb43e54103 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -10,7 +10,7 @@ import { methods } from '../util/Secretbox'; import { AudioReceiveStream, createDefaultAudioReceiveStreamOptions, - type AudioPacket, + AudioPacket, type AudioReceiveStreamOptions, } from './AudioReceiveStream'; import { SSRCMap } from './SSRCMap'; @@ -186,7 +186,7 @@ export class VoiceReceiver { } // Construct AudioPacket with Opus payload and RTP header information - return { payload, sequence, timestamp, ssrc }; + return new AudioPacket(payload, sequence, timestamp, ssrc); } /** From fef397debd1faa6a052aae2412d9f33e08784dd9 Mon Sep 17 00:00:00 2001 From: Peter Date: Wed, 4 Mar 2026 12:39:59 +0000 Subject: [PATCH 11/11] Remove note on AudioPacket constructor about it not being public per review feedback --- packages/voice/src/receive/AudioReceiveStream.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/voice/src/receive/AudioReceiveStream.ts b/packages/voice/src/receive/AudioReceiveStream.ts index 2dc0aa07f10f..aa3ccc1b0100 100644 --- a/packages/voice/src/receive/AudioReceiveStream.ts +++ b/packages/voice/src/receive/AudioReceiveStream.ts @@ -64,7 +64,6 @@ export class AudioPacket { /** * Construct a new AudioPacket. - * **This is not a stable public API.** * * @param payload - Opus payload * @param sequence - RTP Sequence Number