Skip to content

Audio and RGB video stream acquisition problem #185

Open
@SunHaoranskywalkers

Description

@SunHaoranskywalkers

Based on the Project Aria code for receiving RGB streams, I wrote a script to push RGB images and audio simultaneously to an RTMP server. However, the stream produced by this code exhibits issues: the video flickers and the audio does not work properly.

If I only push the video without the audio, the video displays normally. This suggests that the issue likely lies in the audio processing part, but I haven't been able to pinpoint the exact problem.

Any insights or suggestions would be greatly appreciated!

import subprocess
import cv2
import numpy as np
from common import quit_keypress
import aria.sdk as aria
from projectaria_tools.core.sensor_data import ImageDataRecord

RTMP_URL = "rtmp://172.17.0.1:1935/xxxxx/livestream" # RTMP streaming URL

class StreamingClientObserver:
    def __init__(self):
        self.images = {}
        self.audio_data = None

    def on_image_received(self, image: np.array, record: ImageDataRecord):
        self.images[record.camera_id] = image

    def on_audio_received(self, audio: 'AudioData', record: 'AudioMetadata', **kwargs):
    	self.audio_data = np.clip(np.array(audio.data), -32768, 32767).astype(np.int16)


def start_ffmpeg_process():
    """
    Start the FFmpeg process for pushing video and audio streams to RTMP.
    """
    command = [
        "ffmpeg",
        "-y",  # Overwrite output file
        "-f", "rawvideo",  # Input as raw video stream
        "-vcodec", "rawvideo",
        "-pix_fmt", "rgb24",  # Pixel format as RGB24
        "-s", "1408x1408",  # Input resolution (adjust based on the device)
        "-r", "10",  # Frame rate
        "-i", "-",  # Video read from standard input
        "-f", "s32le",  # Audio input format as 16-bit little-endian
        "-ar", "48000",  # Audio sampling rate
        "-ac", "2",  # Mono channel
        "-i", "-",  # Audio read from standard input
        "-c:v", "libx264",  # Video encoding as H.264
        "-preset", "veryfast",  # Encoding speed
        "-c:a", "aac",  # Audio encoding as AAC
        "-b:a", "128k",  # Audio bitrate
        "-f", "flv",  # Output format as FLV
        RTMP_URL,  # Streaming URL
    ]
    return subprocess.Popen(command, stdin=subprocess.PIPE)


def main():
    # Set SDK log level
    aria.set_log_level(aria.Level.Info)

    # Create a StreamingClient instance
    streaming_client = aria.StreamingClient()

    # Configure subscription
    config = streaming_client.subscription_config
    config.subscriber_data_type = aria.StreamingDataType.Rgb | aria.StreamingDataType.Audio  # Subscribe to RGB and audio data streams
    config.message_queue_size[aria.StreamingDataType.Rgb] = 1024  # Set queue size to 1
    config.message_queue_size[aria.StreamingDataType.Audio] = 1024  # Set queue size to 1
    options = aria.StreamingSecurityOptions()
    options.use_ephemeral_certs = True  # Use ephemeral certificates
    config.security_options = options
    streaming_client.subscription_config = config

    # Create and set observer
    observer = StreamingClientObserver()
    streaming_client.set_streaming_client_observer(observer)

    # Start subscription
    print("Start listening to image and audio data")
    streaming_client.subscribe()

    # Start FFmpeg streaming
    ffmpeg_process = start_ffmpeg_process()

    try:
        while not quit_keypress():
            # Process RGB images
            if aria.CameraId.Rgb in observer.images:
                rgb_image = np.rot90(observer.images[aria.CameraId.Rgb], -1)  # Rotate image
                try:
                    # Write image to FFmpeg's standard input
                    ffmpeg_process.stdin.write(rgb_image.tobytes())
                except BrokenPipeError:
                    print("FFmpeg process closed.")
                    break
                del observer.images[aria.CameraId.Rgb]

            # Process audio data
            if observer.audio_data is not None:
                try:
                    # Write audio data to FFmpeg's standard input
                    ffmpeg_process.stdin.write(observer.audio_data.tobytes())
                except BrokenPipeError:
                    print("FFmpeg process closed.")
                    break
                observer.audio_data = None
    finally:
        # Stop subscription
        print("Stop listening to image and audio data")
        streaming_client.unsubscribe()

        # Close FFmpeg process
        ffmpeg_process.stdin.close()
        ffmpeg_process.wait()


if __name__ == "__main__":
    main()

The screen when pushing audio and video.

Image

Only push the video screen.

Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions