diff --git a/packages/voice/src/networking/Networking.ts b/packages/voice/src/networking/Networking.ts index ded6c85a03ee..5004baed38d1 100644 --- a/packages/voice/src/networking/Networking.ts +++ b/packages/voice/src/networking/Networking.ts @@ -8,6 +8,7 @@ import type { VoiceReceivePayload, VoiceSpeakingFlags } from 'discord-api-types/ import { VoiceEncryptionMode, VoiceOpcodes } from 'discord-api-types/voice/v8'; import type { CloseEvent } from 'ws'; import * as secretbox from '../util/Secretbox'; +import { RTP_OPUS_PAYLOAD_TYPE } from '../util/constants'; import { noop } from '../util/util'; import { DAVESession, getMaxProtocolVersion } from './DAVESession'; import { VoiceUDPSocket } from './VoiceUDPSocket'; @@ -745,7 +746,7 @@ export class Networking extends EventEmitter { private createAudioPacket(opusPacket: Buffer, connectionData: ConnectionData, daveSession?: DAVESession) { const rtpHeader = Buffer.alloc(12); rtpHeader[0] = 0x80; - rtpHeader[1] = 0x78; + rtpHeader[1] = RTP_OPUS_PAYLOAD_TYPE; const { sequence, timestamp, ssrc } = connectionData; diff --git a/packages/voice/src/receive/VoiceReceiver.ts b/packages/voice/src/receive/VoiceReceiver.ts index 90e4015cb9b1..94fd95f683c0 100644 --- a/packages/voice/src/receive/VoiceReceiver.ts +++ b/packages/voice/src/receive/VoiceReceiver.ts @@ -7,6 +7,7 @@ import { VoiceOpcodes } from 'discord-api-types/voice/v8'; import { VoiceConnectionStatus, type VoiceConnection } from '../VoiceConnection'; import { NetworkingStatusCode, type ConnectionData } from '../networking/Networking'; import { methods } from '../util/Secretbox'; +import { RTP_OPUS_PAYLOAD_TYPE } from '../util/constants'; import { AudioReceiveStream, createDefaultAudioReceiveStreamOptions, @@ -137,6 +138,15 @@ export class VoiceReceiver { let packet: Buffer = this.decrypt(buffer, mode, nonce, secretKey); if (!packet) throw new Error('Failed to parse packet'); + // Strip padding (RFC3550 5.1) + const hasPadding = buffer[0] && Boolean(buffer[0] & 0b100000); + if (hasPadding) { + const paddingAmount = packet[packet.length - 1]!; + if (paddingAmount < packet.length) { + packet = packet.subarray(0, packet.length - paddingAmount); + } + } + // Strip decrypted RTP Header Extension if present // The header is only indicated in the original data, so compare with buffer first if (buffer.subarray(12, 14).compare(HEADER_EXTENSION_BYTE) === 0) { @@ -176,6 +186,13 @@ export class VoiceReceiver { if (!stream) return; if (this.connectionData.encryptionMode && this.connectionData.nonceBuffer && this.connectionData.secretKey) { + // As a guard, we shouldn't parse packets that (1) aren't voice packets and (2) are not in the right RTP version + if ((msg[1]! & 0x7f) !== RTP_OPUS_PAYLOAD_TYPE) return; + + // Ignore packets not in RTP version 2 + const rtpVersion = msg[0]! >> 6; + if (rtpVersion !== 2) return; + try { const packet = this.parsePacket( msg, diff --git a/packages/voice/src/util/constants.ts b/packages/voice/src/util/constants.ts new file mode 100644 index 000000000000..4b3d2225c6fa --- /dev/null +++ b/packages/voice/src/util/constants.ts @@ -0,0 +1 @@ +export const RTP_OPUS_PAYLOAD_TYPE = 0x78; diff --git a/packages/voice/src/util/index.ts b/packages/voice/src/util/index.ts index 265b33ec40cd..5856d2efcaed 100644 --- a/packages/voice/src/util/index.ts +++ b/packages/voice/src/util/index.ts @@ -1,3 +1,4 @@ +export * from './constants'; export * from './generateDependencyReport'; export * from './entersState'; export type * from './adapter';