Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
811b58c
SpeechToText AutostopSilenceTimeout
VladislavAntonyuk Feb 23, 2026
7f98647
2017
VladislavAntonyuk Feb 23, 2026
36edccb
Update src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechT…
VladislavAntonyuk Feb 23, 2026
fe6d968
Fix too long value
VladislavAntonyuk Feb 23, 2026
96fdd51
fix current state notification
VladislavAntonyuk Feb 23, 2026
31eb967
Fix NRE
VladislavAntonyuk Feb 24, 2026
23646f6
remove using
VladislavAntonyuk Feb 24, 2026
ca9d16c
Merge branch 'main' into speech-to-text-autostop-silence-timeout
VladislavAntonyuk Apr 2, 2026
a12df44
Fix PR comments
VladislavAntonyuk Apr 2, 2026
889f7c8
Merge branch 'main' into speech-to-text-autostop-silence-timeout
TheCodeTraveler Apr 5, 2026
88875da
Update Formatting
TheCodeTraveler Apr 5, 2026
efa7d92
Add `const nuint audioEngineBusTap = 0;`
TheCodeTraveler Apr 5, 2026
0916a5e
Call `StopListenAsync` when `InternalStartListeningAsync` fails
TheCodeTraveler Apr 5, 2026
3493e48
Update src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechT…
TheCodeTraveler Apr 5, 2026
a62f2b7
Merge branch 'speech-to-text-autostop-silence-timeout' of https://git…
TheCodeTraveler Apr 5, 2026
a449bb1
Add bounds check before casing `double` to `long`
TheCodeTraveler Apr 5, 2026
0a32c83
Update samples/CommunityToolkit.Maui.Sample/ViewModels/Essentials/Spe…
TheCodeTraveler Apr 5, 2026
f9fa744
Merge branch 'speech-to-text-autostop-silence-timeout' of https://git…
TheCodeTraveler Apr 5, 2026
9ff6f51
Add `GC.SuppressFinalize(this);`
TheCodeTraveler Apr 5, 2026
a39f7b8
Subscribe `RecognitionResultUpdated` before `StartListenAsync`, Add `…
TheCodeTraveler Apr 5, 2026
a78461c
Call `OnSpeechToTextStateChanged` after disposing all fields
TheCodeTraveler Apr 5, 2026
4183fa5
Update samples/CommunityToolkit.Maui.Sample/CommunityToolkit.Maui.Sam…
TheCodeTraveler Apr 5, 2026
afafd09
Retrieve IDispatchTimer from MainThread
TheCodeTraveler Apr 5, 2026
f11a32a
Call `OnSpeechToTextStateChanged` After Disposing all IDisposables
TheCodeTraveler Apr 5, 2026
0c3afdd
Call `StopListenAsync` if `InternalStartListening` fails
TheCodeTraveler Apr 5, 2026
50b5217
Use `NSErrorException`
TheCodeTraveler Apr 5, 2026
3575a55
Retrieve `IDispatchTimer` from MainThread
TheCodeTraveler Apr 5, 2026
78941a5
Add `SpeechToTextOptionsDefaults.AutoStopSilenceTimeout`
TheCodeTraveler Apr 5, 2026
a9ee101
Merge branch 'speech-to-text-autostop-silence-timeout' of https://git…
TheCodeTraveler Apr 5, 2026
779b467
Fix SpeechToTextPage Culture Picker
TheCodeTraveler Apr 5, 2026
57675be
use `is not`
TheCodeTraveler Apr 5, 2026
e068e03
Remove invalid `MemberNotNull`
TheCodeTraveler Apr 5, 2026
199d515
Use `const audioEngineBusTap`
TheCodeTraveler Apr 5, 2026
b4bea62
Add `token.ThrowIfCancellationRequested`
TheCodeTraveler Apr 5, 2026
1b2c98b
Add `CanExecute` Properties
TheCodeTraveler Apr 5, 2026
a2b572b
Use `TwoLetterISOLanguageName` for Android compatibility
TheCodeTraveler Apr 5, 2026
2e22eb7
Call `ResetTimer()` in audio bus callback
TheCodeTraveler Apr 5, 2026
72e7ab5
Fix Type Name
TheCodeTraveler Apr 5, 2026
08f53cc
Re-enable `CanStartListenExecute` and disable `CanStopListenExecute` …
TheCodeTraveler Apr 5, 2026
a02d11a
Remove `speechToText.RecognitionResultUpdated -= HandleRecognitionRes…
TheCodeTraveler Apr 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
using AVFoundation;
using Microsoft.Maui.Dispatching;
using Speech;

namespace CommunityToolkit.Maui.Media;

public sealed partial class OfflineSpeechToTextImplementation
{
AVAudioEngine? audioEngine;
readonly IDispatcherTimer? silenceTimer = Dispatcher.GetForCurrentThread()?.CreateTimer();
readonly AVAudioEngine audioEngine = new();
SFSpeechRecognizer? speechRecognizer;
SFSpeechRecognitionTask? recognitionTask;
SFSpeechAudioBufferRecognitionRequest? liveSpeechRequest;
Expand All @@ -19,12 +21,11 @@ public sealed partial class OfflineSpeechToTextImplementation
/// <inheritdoc />
public ValueTask DisposeAsync()
{
audioEngine?.Dispose();
audioEngine.Dispose();
speechRecognizer?.Dispose();
liveSpeechRequest?.Dispose();
recognitionTask?.Dispose();

audioEngine = null;
speechRecognizer = null;
liveSpeechRequest = null;
recognitionTask = null;
Expand All @@ -41,12 +42,6 @@ public Task<bool> RequestPermissions(CancellationToken cancellationToken = defau
return taskResult.Task.WaitAsync(cancellationToken);
}

static Task<bool> IsSpeechPermissionAuthorized(CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
return Task.FromResult(SFSpeechRecognizer.AuthorizationStatus is SFSpeechRecognizerAuthorizationStatus.Authorized);
}

static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession)
{
sharedAvAudioSession = AVAudioSession.SharedInstance();
Expand All @@ -62,10 +57,72 @@ static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession)

void InternalStopListening()
{
audioEngine?.InputNode.RemoveTapOnBus(0);
audioEngine?.Stop();
silenceTimer?.Tick -= OnSilenceTimerTick;
silenceTimer?.Stop();
liveSpeechRequest?.EndAudio();
recognitionTask?.Cancel();
recognitionTask?.Finish();
audioEngine.Stop();
audioEngine.InputNode.RemoveTapOnBus(0);

OnSpeechToTextStateChanged(CurrentState);

recognitionTask?.Dispose();
speechRecognizer?.Dispose();
liveSpeechRequest?.Dispose();

speechRecognizer = null;
liveSpeechRequest = null;
recognitionTask = null;
}

void OnSilenceTimerTick(object? sender, EventArgs e)
{
InternalStopListening();
}

SFSpeechRecognitionTask CreateSpeechRecognizerTask(SFSpeechRecognizer sfSpeechRecognizer, SFSpeechAudioBufferRecognitionRequest sfSpeechAudioBufferRecognitionRequest)
{
int currentIndex = 0;
return sfSpeechRecognizer.GetRecognitionTask(sfSpeechAudioBufferRecognitionRequest, (result, err) =>
{
if (err is not null)
{
currentIndex = 0;
InternalStopListening();
OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription)));
}
else
{
if (result.Final)
{
currentIndex = 0;
InternalStopListening();
OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString));
}
else
{
RestartTimer();
if (currentIndex <= 0)
{
OnSpeechToTextStateChanged(CurrentState);
}

OnRecognitionResultUpdated(result.BestTranscription.FormattedString);
}
}
});
}

void InitSilenceTimer(SpeechToTextOptions options)
{
silenceTimer?.Tick += OnSilenceTimerTick;
silenceTimer?.Interval = options.AutoStopSilenceTimeout;
silenceTimer?.Start();
}

void RestartTimer()
{
silenceTimer?.Stop();
silenceTimer?.Start();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ static Intent CreateSpeechIntent(SpeechToTextOptions options)
intent.PutExtra(RecognizerIntent.ExtraLanguage, javaLocale);
intent.PutExtra(RecognizerIntent.ExtraLanguagePreference, javaLocale);
intent.PutExtra(RecognizerIntent.ExtraOnlyReturnLanguagePreference, javaLocale);

intent.PutExtra(RecognizerIntent.ExtraSpeechInputCompleteSilenceLengthMillis, options.AutoStopSilenceTimeout.TotalMilliseconds);
intent.PutExtra(RecognizerIntent.ExtraSpeechInputPossiblyCompleteSilenceLengthMillis, options.AutoStopSilenceTimeout.TotalMilliseconds);
return intent;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace CommunityToolkit.Maui.Media;
/// <inheritdoc />
public sealed partial class OfflineSpeechToTextImplementation
{
[MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))]
[MemberNotNull(nameof(recognitionTask), nameof(liveSpeechRequest))]
[SupportedOSPlatform("ios13.0")]
[SupportedOSPlatform("maccatalyst")]
Task InternalStartListening(SpeechToTextOptions options, CancellationToken token = default)
Expand All @@ -27,7 +27,6 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
throw new ArgumentException("Speech recognizer is not available");
}

audioEngine = new AVAudioEngine();
liveSpeechRequest = new SFSpeechAudioBufferRecognitionRequest()
{
ShouldReportPartialResults = options.ShouldReportPartialResults,
Expand All @@ -48,39 +47,9 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
throw new ArgumentException("Error starting audio engine - " + error.LocalizedDescription);
}

var currentIndex = 0;
recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) =>
{
if (err is not null)
{
InternalStopListening();
OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription)));
}
else
{
if (result.Final)
{
currentIndex = 0;
InternalStopListening();
OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString));
}
else
{
if (currentIndex <= 0)
{
OnSpeechToTextStateChanged(CurrentState);
}

for (var i = currentIndex; i < result.BestTranscription.Segments.Length; i++)
{
var s = result.BestTranscription.Segments[i].Substring;
currentIndex++;
OnRecognitionResultUpdated(s);
}
}
}
});

InitSilenceTimer(options);
recognitionTask = CreateSpeechRecognizerTask(speechRecognizer, liveSpeechRequest);

return Task.CompletedTask;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using AVFoundation;
using Speech;

Expand All @@ -8,7 +7,7 @@ namespace CommunityToolkit.Maui.Media;
/// <inheritdoc />
public sealed partial class OfflineSpeechToTextImplementation
{
[MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))]
[MemberNotNull(nameof(recognitionTask), nameof(liveSpeechRequest))]
Task InternalStartListening(SpeechToTextOptions options, CancellationToken token = default)
{
speechRecognizer = new SFSpeechRecognizer(NSLocale.FromLocaleIdentifier(options.Culture.Name));
Expand All @@ -19,10 +18,6 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
throw new ArgumentException("Speech recognizer is not available");
}

audioEngine = new AVAudioEngine
{
AutoShutdownEnabled = false
};
liveSpeechRequest = new SFSpeechAudioBufferRecognitionRequest()
{
ShouldReportPartialResults = options.ShouldReportPartialResults,
Expand Down Expand Up @@ -59,38 +54,8 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token
throw new Exception(error.LocalizedDescription);
}

var currentIndex = 0;
recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) =>
{
if (err is not null)
{
InternalStopListening();
OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription)));
}
else
{
if (result.Final)
{
currentIndex = 0;
InternalStopListening();
OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString));
}
else
{
if (currentIndex <= 0)
{
OnSpeechToTextStateChanged(CurrentState);
}

for (var i = currentIndex; i < result.BestTranscription.Segments.Length; i++)
{
var s = result.BestTranscription.Segments[i].Substring;
currentIndex++;
OnRecognitionResultUpdated(s);
}
}
}
});
InitSilenceTimer(options);
recognitionTask = CreateSpeechRecognizerTask(speechRecognizer, liveSpeechRequest);

return Task.CompletedTask;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ public event EventHandler<SpeechToTextStateChangedEventArgs> StateChanged
public async Task StartListenAsync(SpeechToTextOptions options, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
if (CurrentState != SpeechToTextState.Stopped)
{
return;
}

await InternalStartListening(options, cancellationToken);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ Task InternalStartListening(SpeechToTextOptions options, CancellationToken token

offlineSpeechRecognizer.AudioStateChanged += OfflineSpeechRecognizer_StateChanged;

offlineSpeechRecognizer.InitialSilenceTimeout = TimeSpan.MaxValue;
offlineSpeechRecognizer.BabbleTimeout = TimeSpan.MaxValue;
offlineSpeechRecognizer.InitialSilenceTimeout = options.AutoStopSilenceTimeout;
offlineSpeechRecognizer.BabbleTimeout = options.AutoStopSilenceTimeout;

offlineSpeechRecognizer.SetInputToDefaultAudioDevice();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
using AVFoundation;
using CoreFoundation;
using Microsoft.Maui.Dispatching;
using Speech;

namespace CommunityToolkit.Maui.Media;

public sealed partial class SpeechToTextImplementation
{
AVAudioEngine? audioEngine;
readonly IDispatcherTimer? silenceTimer = Dispatcher.GetForCurrentThread()?.CreateTimer();
readonly AVAudioEngine audioEngine = new();
SFSpeechRecognizer? speechRecognizer;
SFSpeechRecognitionTask? recognitionTask;
SFSpeechAudioBufferRecognitionRequest? liveSpeechRequest;
Expand All @@ -19,12 +22,11 @@ public sealed partial class SpeechToTextImplementation
/// <inheritdoc />
public ValueTask DisposeAsync()
{
audioEngine?.Dispose();
audioEngine.Dispose();
speechRecognizer?.Dispose();
liveSpeechRequest?.Dispose();
recognitionTask?.Dispose();

audioEngine = null;
speechRecognizer = null;
liveSpeechRequest = null;
recognitionTask = null;
Expand Down Expand Up @@ -56,11 +58,22 @@ static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession)

void StopRecording()
{
audioEngine?.InputNode.RemoveTapOnBus(0);
audioEngine?.Stop();
silenceTimer?.Tick -= OnSilenceTimerTick;
silenceTimer?.Stop();
liveSpeechRequest?.EndAudio();
recognitionTask?.Cancel();
recognitionTask?.Finish();
audioEngine.Stop();
audioEngine.InputNode.RemoveTapOnBus(0);

OnSpeechToTextStateChanged(CurrentState);

recognitionTask?.Dispose();
speechRecognizer?.Dispose();
liveSpeechRequest?.Dispose();

speechRecognizer = null;
liveSpeechRequest = null;
recognitionTask = null;
}

Task InternalStopListeningAsync(CancellationToken cancellationToken)
Expand All @@ -69,4 +82,55 @@ Task InternalStopListeningAsync(CancellationToken cancellationToken)
StopRecording();
return Task.CompletedTask;
}

void OnSilenceTimerTick(object? sender, EventArgs e)
{
StopRecording();
}

SFSpeechRecognitionTask CreateSpeechRecognizerTask(SFSpeechRecognizer sfSpeechRecognizer, SFSpeechAudioBufferRecognitionRequest sfSpeechAudioBufferRecognitionRequest)
{
int currentIndex = 0;
return sfSpeechRecognizer.GetRecognitionTask(sfSpeechAudioBufferRecognitionRequest, (result, err) =>
{
if (err is not null)
{
currentIndex = 0;
StopRecording();
OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription)));
}
else
{
if (result.Final)
{
currentIndex = 0;
StopRecording();
OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString));
}
else
{
RestartTimer();
if (currentIndex <= 0)
{
OnSpeechToTextStateChanged(CurrentState);
}

OnRecognitionResultUpdated(result.BestTranscription.FormattedString);
}
}
});
}

void InitSilenceTimer(SpeechToTextOptions options)
{
silenceTimer?.Tick += OnSilenceTimerTick;
silenceTimer?.Interval = options.AutoStopSilenceTimeout;
silenceTimer?.Start();
}

void RestartTimer()
{
silenceTimer?.Stop();
silenceTimer?.Start();
}
}
Loading
Loading