Skip to content

RTC: audio packet jitter buffer. #4295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 86 additions & 7 deletions trunk/src/app/srs_app_rtc_source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -908,18 +908,18 @@ srs_error_t SrsRtcRtpBuilder::on_audio(SrsSharedPtrMessage* msg)
return srs_error_wrap(err, "format consume audio");
}

// Try to init codec when startup or codec changed.
if (format->acodec && (err = init_codec(format->acodec->id)) != srs_success) {
return srs_error_wrap(err, "init codec");
}

// Ignore if no format->acodec, it means the codec is not parsed, or unknown codec.
// @issue https://github.com/ossrs/srs/issues/1506#issuecomment-562079474
if (!format->acodec) {
return err;
}

// ts support audio codec: aac/mp3
// Try to init codec when startup or codec changed.
if ((err = init_codec(format->acodec->id)) != srs_success) {
return srs_error_wrap(err, "init codec");
}

// support audio codec: aac/mp3
SrsAudioCodecId acodec = format->acodec->id;
if (acodec != SrsAudioCodecIdAAC && acodec != SrsAudioCodecIdMP3) {
return err;
Expand Down Expand Up @@ -1476,14 +1476,93 @@ srs_error_t SrsRtcFrameBuilder::on_rtp(SrsRtpPacket *pkt)
}

if (pkt->is_audio()) {
err = transcode_audio(pkt);
err = packet_audio(pkt);
} else {
err = packet_video(pkt);
}

return err;
}

srs_error_t SrsRtcFrameBuilder::packet_audio(SrsRtpPacket* src)
{
srs_error_t err = srs_success;

uint16_t seq = src->header.get_sequence();
int64_t now = srs_get_system_time() / 1000;

// Initialize if this is the first packet
if (audio_buffer_.empty()) {
last_audio_seq_num_ = seq - 1;
last_audio_process_time_ms_ = now;
}

// Check if packet is too old (already processed)
if (srs_rtp_seq_distance(last_audio_seq_num_, seq) < 0) {
// Packet is older than what we've already processed, discard it
srs_warn("Discard late audio packet, seq=%u, last_seq=%u", seq, last_audio_seq_num_);
return err;
}

// Store packet in jitter buffer
audio_buffer_[seq] = src->copy();

// Try to process packets in the sliding window
bool force_process = audio_buffer_.size() >= AUDIO_JITTER_BUFFER_SIZE ||
(now - last_audio_process_time_ms_) > MAX_AUDIO_WAIT_MS;
uint16_t window_end = last_audio_seq_num_ + SLIDING_WINDOW_SIZE;

while (!audio_buffer_.empty()) {
auto it = audio_buffer_.begin();
uint16_t next_seq = it->first;

// Check if the packet is within our sliding window
if (!force_process) {
// If packet is before window start (shouldn't happen normally)
if (srs_rtp_seq_distance(last_audio_seq_num_, next_seq) < 0) {
// Process it anyway as it's already late
srs_warn("Late audio packet, seq=%u, expected>=%u", next_seq, last_audio_seq_num_);
} else if (srs_rtp_seq_distance(next_seq, window_end) < 0) {
// If packet is beyond window end, stop processing
srs_warn("Audio packet beyond window end, seq=%u, window_end=%u", next_seq, window_end);
break;
} else if (srs_rtp_seq_distance(last_audio_seq_num_, next_seq) > 1) {// If there's a gap and we haven't exceeded wait time, wait for missing packets
Copy link
Member

@duiniuluantanqin duiniuluantanqin Feb 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// If there's a gap and we haven't exceeded wait time, wait for missing packets
This comment has been duplicated.

TRANS_BY_GPT4

// If there's a gap and we haven't exceeded wait time, wait for missing packets
if ((now - last_audio_process_time_ms_) <= MAX_AUDIO_WAIT_MS) {
break;
}
srs_warn("Audio packet loss, expected=%u, got=%u", last_audio_seq_num_ + 1, next_seq);
}
}

// Process the packet
SrsRtpPacket* pkt = it->second;
audio_buffer_.erase(it);

// Update last sequence number
last_audio_seq_num_ = next_seq;

// Process the packet
if ((err = transcode_audio(pkt)) != srs_success) {
srs_freep(pkt);
return srs_error_wrap(err, "transcode audio");
}

srs_freep(pkt);
last_audio_process_time_ms_ = now;

// Update window end for next iteration
window_end = last_audio_seq_num_ + SLIDING_WINDOW_SIZE;
}

// If buffer is getting too full, force process oldest packets
if (audio_buffer_.size() >= AUDIO_JITTER_BUFFER_SIZE * 0.8) {
srs_warn("Audio jitter buffer nearly full, size=%zu", audio_buffer_.size());
}

return err;
}

srs_error_t SrsRtcFrameBuilder::transcode_audio(SrsRtpPacket *pkt)
{
srs_error_t err = srs_success;
Expand Down
15 changes: 15 additions & 0 deletions trunk/src/app/srs_app_rtc_source.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ const int kAudioPayloadType = 111;
// Firefox defaults as 126, Chrome is 102.
const int kVideoPayloadType = 102;

// Audio jitter buffer size (in packets)
const int AUDIO_JITTER_BUFFER_SIZE = 100;
// Sliding window size for continuous processing
const int SLIDING_WINDOW_SIZE = 10;
// Maximum waiting time for out-of-order packets (in ms)
const int MAX_AUDIO_WAIT_MS = 100;

class SrsNtp
{
public:
Expand Down Expand Up @@ -335,6 +342,13 @@ class SrsRtcFrameBuilder
uint16_t header_sn_;
uint16_t lost_sn_;
int64_t rtp_key_frame_ts_;

// Audio jitter buffer, map sequence number to packet
std::map<uint16_t, SrsRtpPacket*> audio_buffer_;
// Last processed sequence number
uint16_t last_audio_seq_num_;
// Last time we processed the jitter buffer
int64_t last_audio_process_time_ms_;
private:
// The state for timestamp sync state. -1 for init. 0 not sync. 1 sync.
int sync_state_;
Expand All @@ -351,6 +365,7 @@ class SrsRtcFrameBuilder
virtual void on_unpublish();
virtual srs_error_t on_rtp(SrsRtpPacket *pkt);
private:
srs_error_t packet_audio(SrsRtpPacket* pkt);
srs_error_t transcode_audio(SrsRtpPacket *pkt);
void packet_aac(SrsCommonMessage* audio, char* data, int len, uint32_t pts, bool is_header);
private:
Expand Down
Loading