fix(cloud_asr): usage of old API

robgee86 · robgee86 · commit 281fb83bcd87 · 2026-04-27T19:18:43.000+02:00
diff --git a/src/arduino/app_bricks/cloud_asr/cloud_asr.py b/src/arduino/app_bricks/cloud_asr/cloud_asr.py
@@ -9,11 +9,12 @@
 import threading
 import time
 from contextlib import contextmanager
-from typing import Generator, Optional, Union, Iterator, Generator, cast
+from typing import Generator, Union, Iterator, Generator, cast
 
 import numpy as np
 
 from arduino.app_peripherals.microphone import Microphone
+from arduino.app_peripherals.microphone.base_microphone import BaseMicrophone
 from arduino.app_utils import Logger, brick
 
 from .providers import ASRProvider, CloudProvider, DEFAULT_PROVIDER, provider_factory
@@ -45,25 +46,36 @@ def __init__(
         self,
         api_key: str = os.getenv("API_KEY", ""),
         provider: CloudProvider = DEFAULT_PROVIDER,
-        mic: Optional[Microphone] = None,
+        mic: BaseMicrophone | None = None,
         language: str = os.getenv("LANGUAGE", ""),
         silence_timeout: float = 10.0,
     ):
-        if mic:
+        if mic is not None:
             logger.info(f"[{self.__class__.__name__}] Using provided microphone: {mic}")
             self._mic = mic
+            self._owns_mic = False
         else:
             self._mic = Microphone()
+            self._owns_mic = True
 
         self._language = language
         self.silence_timeout = silence_timeout
-        self._mic_lock = threading.Lock()
         self._provider: ASRProvider = provider_factory(
             api_key=api_key,
             name=provider,
             language=self._language,
             sample_rate=self._mic.sample_rate,
         )
+    
+    def start(self):
+        """Start the ASR service by initializing the microphone."""
+        if self._owns_mic:
+            self._mic.start()
+
+    def stop(self):
+        """Stop the ASR service by releasing the microphone."""
+        if self._owns_mic:
+            self._mic.stop()
 
     def _transcribe_stream(self, duration: float = 60.0) -> Generator[ASREvent, None, None]:
         """Perform continuous speech-to-text recognition with detailed events.
@@ -84,12 +96,6 @@ def _transcribe_stream(self, duration: float = 60.0) -> Generator[ASREvent, None
         overall_deadline = time.monotonic() + duration
         silence_deadline = time.monotonic() + self.silence_timeout
 
-        with self._mic_lock:
-            if self._mic.is_recording.is_set():
-                raise RuntimeError("Microphone is busy.")
-            self._mic.start()
-            logger.info(f"[{self.__class__.__name__}] Microphone started.")
-
         def _send():
             try:
                 for chunk in self._mic.stream():
@@ -178,10 +184,6 @@ def _recv():
         finally:
             logger.info("Releasing ASR resources...")
             stop_event.set()
-            with self._mic_lock:
-                if self._mic.is_recording.is_set():
-                    self._mic.stop()
-                    logger.info(f"[{self.__class__.__name__}] Microphone stopped.")
             send_thread.join(timeout=1)
             recv_thread.join(timeout=1)
             provider.stop()
diff --git a/tests/arduino/app_bricks/cloud_asr/test_cloud_asr.py b/tests/arduino/app_bricks/cloud_asr/test_cloud_asr.py
@@ -12,33 +12,35 @@
 
 from arduino.app_bricks.cloud_asr import CloudASR, CloudProvider
 from arduino.app_bricks.cloud_asr.providers import ASRProviderEvent, ASRProviderError
+from arduino.app_peripherals.microphone.base_microphone import BaseMicrophone
 from arduino.app_utils.app import App
 
 
-class MockMicrophone:
+class MockMicrophone(BaseMicrophone):
     """Lightweight microphone stub that yields pre-loaded chunks."""
 
-    def __init__(self, chunks: Iterable, sample_rate: int = 16000, delay_between_chunks: float = 0.0):
-        self.sample_rate = sample_rate
-        self.is_recording = threading.Event()
+    def __init__(
+        self,
+        chunks: Iterable,
+        sample_rate: int = 16000,
+        channels: int = 1,
+        format: type | np.dtype | str = np.int16,
+        buffer_size: int = 1024,
+        auto_reconnect: bool = True
+    ):
+        super().__init__(sample_rate=sample_rate, channels=channels, format=format, buffer_size=buffer_size, auto_reconnect=auto_reconnect)
         self._chunks: List = list(chunks)
-        self._delay = delay_between_chunks
-        self.start_calls = 0
-        self.stop_calls = 0
 
-    def start(self):
-        self.start_calls += 1
-        self.is_recording.set()
+    def _open_microphone(self):
+        pass
 
-    def stop(self):
-        self.stop_calls += 1
-        self.is_recording.clear()
+    def _close_microphone(self):
+        pass
 
-    def stream(self):
-        while self.is_recording.is_set() and self._chunks:
-            if self._delay:
-                time.sleep(self._delay)
-            yield self._chunks.pop(0)
+    def _read_audio(self):
+        if not self._chunks:
+            return None
+        return self._chunks.pop(0)
 
 
 class DummyProvider:
@@ -88,21 +90,19 @@ def _factory(
 
 def test_transcribe_stream_use_microphone_state(make_provider):
     mic = MockMicrophone(chunks=[])
+    mic.start()
     provider = make_provider(events=[ASRProviderEvent(type="text", data="mock")])
     asr = CloudASR(api_key="dummy", mic=mic, provider=CloudProvider.OPENAI_TRANSCRIBE)
 
     try:
         with asr.transcribe_stream() as stream:
             next(stream)
-            assert mic.start_calls == 1
-            assert mic.is_recording.is_set()
             assert provider.start_called is True
 
-        assert mic.stop_calls == 1
-        assert not mic.is_recording.is_set()
         assert provider.stop_called is True
     finally:
-        App.unregister(asr)
+        asr.stop()
+        mic.stop()
 
 
 def test_transcribe_stream_aggregates_partial_text_in_append_mode(make_provider):
@@ -112,10 +112,8 @@ def test_transcribe_stream_aggregates_partial_text_in_append_mode(make_provider)
         ASRProviderEvent(type="text", data=None),
     ]
     audio_chunks = [np.array([1, 2, 3], dtype=np.int16), None, np.array([4, 5, 6], dtype=np.int16)]
-    mic = MockMicrophone(
-        chunks=audio_chunks,
-        delay_between_chunks=0.002,
-    )
+    mic = MockMicrophone(audio_chunks)
+    mic.start()
     provider = make_provider(events=events, partial_mode="append", audio_chunks_len=sum(ch is not None for ch in audio_chunks))
     asr = CloudASR(api_key="dummy", mic=mic, provider=CloudProvider.OPENAI_TRANSCRIBE)
 
@@ -127,8 +125,9 @@ def test_transcribe_stream_aggregates_partial_text_in_append_mode(make_provider)
                 if ev.type == "text":
                     break
     finally:
-        App.unregister(asr)
-
+        asr.stop()
+        mic.stop()
+    
     assert provider.start_called is True
     assert [msg.type for msg in results] == ["partial_text", "partial_text", "text"]
     assert [msg.data for msg in results[:2]] == ["Hel", "lo"]
@@ -149,10 +148,8 @@ def test_transcribe_stream_resets_partial_buffer_in_replace_mode(make_provider):
         ASRProviderEvent(type="text", data=None),
     ]
     audio_chunks = [np.ones(4, dtype=np.int16) for _ in range(5)]
-    mic = MockMicrophone(
-        chunks=audio_chunks,
-        delay_between_chunks=0.002,
-    )
+    mic = MockMicrophone(audio_chunks)
+    mic.start()
     provider = make_provider(events=events, partial_mode="replace", audio_chunks_len=sum(ch is not None for ch in audio_chunks))
     asr = CloudASR(api_key="dummy", mic=mic, provider=CloudProvider.GOOGLE_SPEECH)
 
@@ -167,7 +164,8 @@ def test_transcribe_stream_resets_partial_buffer_in_replace_mode(make_provider):
                 if text_count == 2:
                     break
     finally:
-        App.unregister(asr)
+        asr.stop()
+        mic.stop()
 
     assert provider.start_called is True
     assert [msg.type for msg in results] == ["partial_text", "partial_text", "text", "partial_text", "text"]
@@ -186,8 +184,8 @@ def recv(self):
 
     mic = MockMicrophone(
         chunks=[np.array([7, 8], dtype=np.int16), np.array([9, 10], dtype=np.int16)],
-        delay_between_chunks=0.001,
     )
+    mic.start()
     asr = CloudASR(api_key="dummy", mic=mic, provider=CloudProvider.OPENAI_TRANSCRIBE)
 
     try:
@@ -197,7 +195,8 @@ def recv(self):
         assert isinstance(exc, ASRProviderError)
         assert str(exc) == "boom"
     finally:
-        App.unregister(asr)
+        asr.stop()
+        mic.stop()
 
     assert provider.start_called is True
     assert provider.stop_called is True