@@ -7,6 +7,7 @@ import { VoiceOpcodes } from 'discord-api-types/voice/v8';
77import { VoiceConnectionStatus , type VoiceConnection } from '../VoiceConnection' ;
88import { NetworkingStatusCode , type ConnectionData } from '../networking/Networking' ;
99import { methods } from '../util/Secretbox' ;
10+ import { RTP_OPUS_PAYLOAD_TYPE } from '../util/constants' ;
1011import {
1112 AudioReceiveStream ,
1213 createDefaultAudioReceiveStreamOptions ,
@@ -137,6 +138,15 @@ export class VoiceReceiver {
137138 let packet : Buffer = this . decrypt ( buffer , mode , nonce , secretKey ) ;
138139 if ( ! packet ) throw new Error ( 'Failed to parse packet' ) ;
139140
141+ // Strip padding (RFC3550 5.1)
142+ const hasPadding = buffer [ 0 ] && Boolean ( buffer [ 0 ] & 0b100000 ) ;
143+ if ( hasPadding ) {
144+ const paddingAmount = packet [ packet . length - 1 ] ! ;
145+ if ( paddingAmount < packet . length ) {
146+ packet = packet . subarray ( 0 , packet . length - paddingAmount ) ;
147+ }
148+ }
149+
140150 // Strip decrypted RTP Header Extension if present
141151 // The header is only indicated in the original data, so compare with buffer first
142152 if ( buffer . subarray ( 12 , 14 ) . compare ( HEADER_EXTENSION_BYTE ) === 0 ) {
@@ -176,6 +186,13 @@ export class VoiceReceiver {
176186 if ( ! stream ) return ;
177187
178188 if ( this . connectionData . encryptionMode && this . connectionData . nonceBuffer && this . connectionData . secretKey ) {
189+ // As a guard, we shouldn't parse packets that (1) aren't voice packets and (2) are not in the right RTP version
190+ if ( ( msg [ 1 ] ! & 0x7f ) !== RTP_OPUS_PAYLOAD_TYPE ) return ;
191+
192+ // Ignore packets not in RTP version 2
193+ const rtpVersion = msg [ 0 ] ! >> 6 ;
194+ if ( rtpVersion !== 2 ) return ;
195+
179196 try {
180197 const packet = this . parsePacket (
181198 msg ,
0 commit comments