Open
Description
Based on the Project Aria code for receiving RGB streams, I wrote a script to push RGB images and audio simultaneously to an RTMP server. However, the stream produced by this code exhibits issues: the video flickers and the audio does not work properly.
If I only push the video without the audio, the video displays normally. This suggests that the issue likely lies in the audio processing part, but I haven't been able to pinpoint the exact problem.
Any insights or suggestions would be greatly appreciated!
import subprocess
import cv2
import numpy as np
from common import quit_keypress
import aria.sdk as aria
from projectaria_tools.core.sensor_data import ImageDataRecord
RTMP_URL = "rtmp://172.17.0.1:1935/xxxxx/livestream" # RTMP streaming URL
class StreamingClientObserver:
def __init__(self):
self.images = {}
self.audio_data = None
def on_image_received(self, image: np.array, record: ImageDataRecord):
self.images[record.camera_id] = image
def on_audio_received(self, audio: 'AudioData', record: 'AudioMetadata', **kwargs):
self.audio_data = np.clip(np.array(audio.data), -32768, 32767).astype(np.int16)
def start_ffmpeg_process():
"""
Start the FFmpeg process for pushing video and audio streams to RTMP.
"""
command = [
"ffmpeg",
"-y", # Overwrite output file
"-f", "rawvideo", # Input as raw video stream
"-vcodec", "rawvideo",
"-pix_fmt", "rgb24", # Pixel format as RGB24
"-s", "1408x1408", # Input resolution (adjust based on the device)
"-r", "10", # Frame rate
"-i", "-", # Video read from standard input
"-f", "s32le", # Audio input format as 16-bit little-endian
"-ar", "48000", # Audio sampling rate
"-ac", "2", # Mono channel
"-i", "-", # Audio read from standard input
"-c:v", "libx264", # Video encoding as H.264
"-preset", "veryfast", # Encoding speed
"-c:a", "aac", # Audio encoding as AAC
"-b:a", "128k", # Audio bitrate
"-f", "flv", # Output format as FLV
RTMP_URL, # Streaming URL
]
return subprocess.Popen(command, stdin=subprocess.PIPE)
def main():
# Set SDK log level
aria.set_log_level(aria.Level.Info)
# Create a StreamingClient instance
streaming_client = aria.StreamingClient()
# Configure subscription
config = streaming_client.subscription_config
config.subscriber_data_type = aria.StreamingDataType.Rgb | aria.StreamingDataType.Audio # Subscribe to RGB and audio data streams
config.message_queue_size[aria.StreamingDataType.Rgb] = 1024 # Set queue size to 1
config.message_queue_size[aria.StreamingDataType.Audio] = 1024 # Set queue size to 1
options = aria.StreamingSecurityOptions()
options.use_ephemeral_certs = True # Use ephemeral certificates
config.security_options = options
streaming_client.subscription_config = config
# Create and set observer
observer = StreamingClientObserver()
streaming_client.set_streaming_client_observer(observer)
# Start subscription
print("Start listening to image and audio data")
streaming_client.subscribe()
# Start FFmpeg streaming
ffmpeg_process = start_ffmpeg_process()
try:
while not quit_keypress():
# Process RGB images
if aria.CameraId.Rgb in observer.images:
rgb_image = np.rot90(observer.images[aria.CameraId.Rgb], -1) # Rotate image
try:
# Write image to FFmpeg's standard input
ffmpeg_process.stdin.write(rgb_image.tobytes())
except BrokenPipeError:
print("FFmpeg process closed.")
break
del observer.images[aria.CameraId.Rgb]
# Process audio data
if observer.audio_data is not None:
try:
# Write audio data to FFmpeg's standard input
ffmpeg_process.stdin.write(observer.audio_data.tobytes())
except BrokenPipeError:
print("FFmpeg process closed.")
break
observer.audio_data = None
finally:
# Stop subscription
print("Stop listening to image and audio data")
streaming_client.unsubscribe()
# Close FFmpeg process
ffmpeg_process.stdin.close()
ffmpeg_process.wait()
if __name__ == "__main__":
main()
The screen when pushing audio and video.
Only push the video screen.
Metadata
Metadata
Assignees
Labels
No labels