Audio and RGB video stream acquisition problem

Based on the Project Aria code for receiving RGB streams, I wrote a script to push RGB images and audio simultaneously to an RTMP server. However, the stream produced by this code exhibits issues: the video flickers and the audio does not work properly.  

If I only push the video without the audio, the video displays normally. This suggests that the issue likely lies in the audio processing part, but I haven't been able to pinpoint the exact problem.  

Any insights or suggestions would be greatly appreciated!

```
import subprocess
import cv2
import numpy as np
from common import quit_keypress
import aria.sdk as aria
from projectaria_tools.core.sensor_data import ImageDataRecord

RTMP_URL = "rtmp://172.17.0.1:1935/xxxxx/livestream" # RTMP streaming URL

class StreamingClientObserver:
    def __init__(self):
        self.images = {}
        self.audio_data = None

    def on_image_received(self, image: np.array, record: ImageDataRecord):
        self.images[record.camera_id] = image

    def on_audio_received(self, audio: 'AudioData', record: 'AudioMetadata', **kwargs):
    	self.audio_data = np.clip(np.array(audio.data), -32768, 32767).astype(np.int16)


def start_ffmpeg_process():
    """
    Start the FFmpeg process for pushing video and audio streams to RTMP.
    """
    command = [
        "ffmpeg",
        "-y",  # Overwrite output file
        "-f", "rawvideo",  # Input as raw video stream
        "-vcodec", "rawvideo",
        "-pix_fmt", "rgb24",  # Pixel format as RGB24
        "-s", "1408x1408",  # Input resolution (adjust based on the device)
        "-r", "10",  # Frame rate
        "-i", "-",  # Video read from standard input
        "-f", "s32le",  # Audio input format as 16-bit little-endian
        "-ar", "48000",  # Audio sampling rate
        "-ac", "2",  # Mono channel
        "-i", "-",  # Audio read from standard input
        "-c:v", "libx264",  # Video encoding as H.264
        "-preset", "veryfast",  # Encoding speed
        "-c:a", "aac",  # Audio encoding as AAC
        "-b:a", "128k",  # Audio bitrate
        "-f", "flv",  # Output format as FLV
        RTMP_URL,  # Streaming URL
    ]
    return subprocess.Popen(command, stdin=subprocess.PIPE)


def main():
    # Set SDK log level
    aria.set_log_level(aria.Level.Info)

    # Create a StreamingClient instance
    streaming_client = aria.StreamingClient()

    # Configure subscription
    config = streaming_client.subscription_config
    config.subscriber_data_type = aria.StreamingDataType.Rgb | aria.StreamingDataType.Audio  # Subscribe to RGB and audio data streams
    config.message_queue_size[aria.StreamingDataType.Rgb] = 1024  # Set queue size to 1
    config.message_queue_size[aria.StreamingDataType.Audio] = 1024  # Set queue size to 1
    options = aria.StreamingSecurityOptions()
    options.use_ephemeral_certs = True  # Use ephemeral certificates
    config.security_options = options
    streaming_client.subscription_config = config

    # Create and set observer
    observer = StreamingClientObserver()
    streaming_client.set_streaming_client_observer(observer)

    # Start subscription
    print("Start listening to image and audio data")
    streaming_client.subscribe()

    # Start FFmpeg streaming
    ffmpeg_process = start_ffmpeg_process()

    try:
        while not quit_keypress():
            # Process RGB images
            if aria.CameraId.Rgb in observer.images:
                rgb_image = np.rot90(observer.images[aria.CameraId.Rgb], -1)  # Rotate image
                try:
                    # Write image to FFmpeg's standard input
                    ffmpeg_process.stdin.write(rgb_image.tobytes())
                except BrokenPipeError:
                    print("FFmpeg process closed.")
                    break
                del observer.images[aria.CameraId.Rgb]

            # Process audio data
            if observer.audio_data is not None:
                try:
                    # Write audio data to FFmpeg's standard input
                    ffmpeg_process.stdin.write(observer.audio_data.tobytes())
                except BrokenPipeError:
                    print("FFmpeg process closed.")
                    break
                observer.audio_data = None
    finally:
        # Stop subscription
        print("Stop listening to image and audio data")
        streaming_client.unsubscribe()

        # Close FFmpeg process
        ffmpeg_process.stdin.close()
        ffmpeg_process.wait()


if __name__ == "__main__":
    main()
```

The screen when pushing audio and video.

![Image](https://github.com/user-attachments/assets/2526e8ca-88d1-49a0-a9e8-1447a4a40096)

Only push the video screen.

![Image](https://github.com/user-attachments/assets/bfe415da-7390-42f4-bdac-3c5a38d674c1)


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Audio and RGB video stream acquisition problem #185

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Audio and RGB video stream acquisition problem #185

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions