Skip to content

[feat]: add useSpeechRecognition hook #282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
"@siberiacancode/vitest": "^2.1.0",
"@testing-library/dom": "^10.4.0",
"@testing-library/react": "^16.2.0",
"@types/dom-speech-recognition": "^0.0.4",
"@types/react": "^18.3.18",
"@types/react-dom": "^18.3.5",
"@types/web-bluetooth": "^0.0.21",
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/bundle/hooks/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ export * from './useScrollTo/useScrollTo';
export * from './useSessionStorage/useSessionStorage';
export * from './useSet/useSet';
export * from './useShare/useShare';
export * from './useSpeechRecognition/useSpeechRecognition';
export * from './useStateHistory/useStateHistory';
export * from './useStep/useStep';
export * from './useSticky/useSticky';
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import { useCallback, useEffect, useRef, useState } from 'react';
const getSpeechRecognitionAPI = () => {
const SpeechRecognition = window.SpeechRecognition ?? window.webkitSpeechRecognition;
const SpeechGrammarList = window.SpeechGrammarList ?? window.webkitSpeechGrammarList;
const supported = Boolean(SpeechRecognition && SpeechGrammarList);
return { SpeechRecognition, SpeechGrammarList, supported };
};
const DEFAULT_TRANSCRIPT = {
transcript: '',
interimTranscript: ''
};
/**
* @name useSpeechRecognition
* @description - Hook that provides a streamlined interface for incorporating speech-to-text functionality.
* @category Sensor
*
* @param {UseSpeechRecognitionOptions} [options] Configuration options for speech recognition.
* @param {boolean} [options.continuous=false] Whether recognition should continue after pauses.
* @param {SpeechGrammarList} [options.grammars] A list of grammar rules.
* @param {boolean} [options.interimResults=false] Whether interim results should be provided.
* @param {string} [options.language="en-US"] The language for recognition, as a valid BCP 47 tag.
* @param {number} [options.maxAlternatives=1] The maximum number of alternative transcripts to return.
* @param {() => void} [options.onEnd] Callback invoked when recognition stops.
* @param {(error: SpeechRecognitionErrorEvent) => void} [options.onError] Callback invoked on a recognition error.
* @param {(transcript: string, isFinal: boolean, alternatives?: string[]) => void} [options.onResult] Callback invoked when recognition produces a result. When interim results are enabled, the callback is invoked with the interim transcript if the result is not final.
* @param {() => void} [options.onStart] Callback invoked when recognition starts.
* @returns {UseSpeechRecognitionReturn} State, utility methods, and callbacks for interacting with the speech recognition API.
*
* @example
* const { start, stop, reset } = useSpeechRecognition({
* language: 'en-US',
* interimResults: true,
* onResult: (transcript, isFinal) => console.log(transcript, isFinal)
* });
*/
export const useSpeechRecognition = (options = {}) => {
const { supported, SpeechRecognition, SpeechGrammarList } = getSpeechRecognitionAPI();
const speechRecognitionRef = useRef(null);
const [listening, setListening] = useState(false);
const [error, setError] = useState(null);
const [transcript, setTranscript] = useState(DEFAULT_TRANSCRIPT);
const {
onEnd: onEndCallback,
onError: onErrorCallback,
onResult: onResultCallback,
onStart: onStartCallback
} = options;
const {
continuous = false,
grammars = supported ? new SpeechGrammarList() : undefined,
interimResults = false,
language = 'en-US',
maxAlternatives = 1
} = options;
const reset = useCallback(() => {
setError(null);
setTranscript(DEFAULT_TRANSCRIPT);
}, []);
const onStart = useCallback(() => {
reset();
setListening(true);
onStartCallback?.();
}, [onStartCallback, reset]);
const onEnd = useCallback(() => {
setTranscript((prev) => ({ ...prev, interimTranscript: '' }));
setListening(false);
onEndCallback?.();
speechRecognitionRef.current = null;
}, [onEndCallback]);
const onResult = useCallback(
(event) => {
let finalTranscript = '';
let interimTranscript = '';
let isFinal = false;
let alternativesForLastResult = null;
const results = Array.from(event.results);
results.forEach((recognitionResult, index) => {
const { transcript: currentTranscript } = recognitionResult[0];
if (recognitionResult.isFinal) {
finalTranscript += currentTranscript;
} else if (options.interimResults) {
interimTranscript += currentTranscript;
}
if (index === results.length - 1) {
isFinal = recognitionResult.isFinal;
alternativesForLastResult = Array.from(recognitionResult).map(
(result) => result.transcript
);
}
});
setTranscript({ transcript: finalTranscript, interimTranscript });
const transcriptForCallback = isFinal ? finalTranscript : interimTranscript;
if (options.maxAlternatives !== undefined && options.maxAlternatives > 1) {
onResultCallback?.(transcriptForCallback, isFinal, alternativesForLastResult ?? []);
return;
}
onResultCallback?.(transcriptForCallback, isFinal);
},
[onResultCallback, options.interimResults, options.maxAlternatives]
);
const onError = useCallback(
(event) => {
setError(event);
setListening(false);
onErrorCallback?.(event);
speechRecognitionRef.current = null;
},
[onErrorCallback]
);
const start = useCallback(() => {
if (!supported || listening) return;
const speechRecognitionInstance = new SpeechRecognition();
speechRecognitionInstance.continuous = continuous;
speechRecognitionInstance.grammars = grammars;
speechRecognitionInstance.interimResults = interimResults;
speechRecognitionInstance.lang = language;
speechRecognitionInstance.maxAlternatives = maxAlternatives;
speechRecognitionInstance.onstart = onStart;
speechRecognitionInstance.onend = onEnd;
speechRecognitionInstance.onerror = onError;
speechRecognitionInstance.onresult = onResult;
speechRecognitionRef.current = speechRecognitionInstance;
speechRecognitionRef.current.start();
}, [
supported,
listening,
onResult,
onEnd,
onStart,
onError,
continuous,
grammars,
interimResults,
language,
maxAlternatives
]);
const stop = useCallback(() => {
if (!speechRecognitionRef.current) return;
speechRecognitionRef.current.stop();
}, []);
const abort = useCallback(() => {
if (!speechRecognitionRef.current) return;
reset();
setListening(false);
onEndCallback?.();
speechRecognitionRef.current.abort();
speechRecognitionRef.current = null;
}, [reset, onEndCallback]);
// NOTE(@rupeq): do this on unmount only w/o defining the deps array
useEffect(() => () => abort(), []);
return { supported, listening, error, start, stop, abort, reset, ...transcript };
};
1 change: 1 addition & 0 deletions packages/core/src/hooks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ export * from './useScrollTo/useScrollTo';
export * from './useSessionStorage/useSessionStorage';
export * from './useSet/useSet';
export * from './useShare/useShare';
export * from './useSpeechRecognition/useSpeechRecognition';
export * from './useStateHistory/useStateHistory';
export * from './useStep/useStep';
export * from './useSticky/useSticky';
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import { useState } from 'react';

import { useSpeechRecognition } from './useSpeechRecognition';

const SHARED_STYLES = {
marginRight: '1rem',
display: 'flex',
alignItems: 'center',
gap: '0.5rem'
};

const Demo = () => {
const [language, setLanguage] = useState<string>('en-US');
const [continuous, setContinuous] = useState<boolean>(false);
const [interimResults, setInterimResults] = useState<boolean>(false);
const [maxAlternatives, setMaxAlternatives] = useState<number>(1);

const { transcript, interimTranscript, error, listening, supported, start, stop, abort, reset } =
useSpeechRecognition({
language,
continuous,
interimResults,
maxAlternatives
});

const handleLanguageChange = (event: React.ChangeEvent<HTMLInputElement>) =>
setLanguage(event.target.value);

if (!supported) {
return <p>Your browser does not support the Speech Recognition API.</p>;
}

return (
<>
<section style={{ marginBottom: '1rem', ...SHARED_STYLES, marginRight: 'unset' }}>
<label style={SHARED_STYLES}>
<input
checked={language === 'en-US'}
name='language'
type='radio'
value='en-US'
onChange={handleLanguageChange}
/>
English (US)
</label>
<label style={SHARED_STYLES}>
<input
checked={language === 'ru-RU'}
name='language'
type='radio'
value='ru-RU'
onChange={handleLanguageChange}
/>
Russian
</label>
<label style={SHARED_STYLES}>
<input
checked={language === 'es-ES'}
name='language'
type='radio'
value='es-ES'
onChange={handleLanguageChange}
/>
Spanish
</label>
</section>
<section style={{ marginBottom: '1rem' }}>
<label style={SHARED_STYLES}>
<input
checked={continuous}
type='checkbox'
onChange={() => setContinuous((prev) => !prev)}
/>
Continuous
</label>
<label style={SHARED_STYLES}>
<input
checked={interimResults}
type='checkbox'
onChange={() => setInterimResults((prev) => !prev)}
/>
Interim Results
</label>
</section>
<section style={{ marginBottom: '1rem' }}>
<label>
Max Alternatives:&nbsp;
<select
value={maxAlternatives}
onChange={(e) => setMaxAlternatives(Number(e.target.value))}
>
<option value={1}>1</option>
<option value={2}>2</option>
<option value={3}>3</option>
<option value={4}>4</option>
</select>
</label>
</section>

<section style={{ marginBottom: '1rem' }}>
<button
disabled={listening}
style={{ marginRight: '0.5rem' }}
type='button'
onClick={start}
>
Start
</button>
<button
disabled={!listening}
style={{ marginRight: '0.5rem' }}
type='button'
onClick={stop}
>
Stop
</button>
<button
disabled={!listening}
style={{ marginRight: '0.5rem' }}
type='button'
onClick={abort}
>
Abort
</button>
<button disabled={continuous && listening} type='button' onClick={reset}>
Reset
</button>
</section>

<section>
<h3>Status</h3>
<p>
<strong>Listening:</strong> {listening ? 'Yes' : 'No'}
</p>
{error && (
<p style={{ color: 'red' }}>
<strong>Error:</strong> {error.error ?? 'Unknown error'}
</p>
)}
</section>

<section>
<h3>Transcript</h3>
<p>
<strong>Final:</strong> {transcript}
</p>
{interimResults && (
<p>
<strong>Interim:</strong> {interimTranscript}
</p>
)}
</section>
</>
);
};

export default Demo;
Loading