Skip to content

Commit a2392b0

Browse files
authored
Jetpack AI: Add machinery to generate audio transcriptions (#35691)
* Create function to process transcriptions * Expose the audio transcription function to the world * Add first version of the use-audio-transcription hook * Expose the hook to the world * Expose use-audio-transcription hook types * Add changelog file * Add missing line * Add audio transcription demo component * changelog * Update testing component labels * Bump ai-client version * Fix imports after module settings change * Import apiFetch the right way * Move apiFetch special handling to a dedicated file * Introduce onReady and onError callbacks for the transcription hook * Add demo for the transcription hook
1 parent dd4b85c commit a2392b0

File tree

10 files changed

+213
-15
lines changed

10 files changed

+213
-15
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Significance: minor
2+
Type: added
3+
4+
AI Client: add support for audio transcriptions.

projects/js-packages/ai-client/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"private": false,
33
"name": "@automattic/jetpack-ai-client",
4-
"version": "0.6.2-alpha",
4+
"version": "0.7.0-alpha",
55
"description": "A JS client for consuming Jetpack AI services",
66
"homepage": "https://github.com/Automattic/jetpack/tree/HEAD/projects/js-packages/ai-client/#readme",
77
"bugs": {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/**
2+
* External dependencies
3+
*/
4+
import apiFetchMod from '@wordpress/api-fetch';
5+
6+
// @wordpress/api-fetch (as of 6.47.0) declares itself in such a way that tsc and node see the function at apiFetchMod.default
7+
// while some other environments (including code running inside WordPress itself) see it at apiFetch.
8+
// See https://arethetypeswrong.github.io/?p=@wordpress/[email protected]
9+
// This is a helper to simplify the usage of the api-fetch module on the ai-client package.
10+
type ApiFetchType = typeof apiFetchMod.default;
11+
const apiFetch: ApiFetchType = ( apiFetchMod.default ?? apiFetchMod ) as ApiFetchType;
12+
13+
export default apiFetch;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/**
2+
* External dependencies
3+
*/
4+
import debugFactory from 'debug';
5+
/**
6+
* Internal dependencies
7+
*/
8+
import apiFetch from '../api-fetch/index.js';
9+
import requestJwt from '../jwt/index.js';
10+
11+
const debug = debugFactory( 'jetpack-ai-client:audio-transcription' );
12+
13+
/**
14+
* The response from the audio transcription service.
15+
*/
16+
type AudioTranscriptionResponse = {
17+
/**
18+
* The transcribed text.
19+
*/
20+
text: string;
21+
};
22+
23+
/**
24+
* A function that takes an audio blob and transcribes it.
25+
*
26+
* @param {Blob} audio - The audio to be transcribed, from a recording or from a file.
27+
* @param {string} feature - The feature name that is calling the transcription.
28+
* @returns {Promise<string>} - The promise of a string containing the transcribed audio.
29+
*/
30+
export default async function transcribeAudio( audio: Blob, feature?: string ): Promise< string > {
31+
debug( 'Transcribing audio: %o. Feature: %o', audio, feature );
32+
33+
// Get a token to use the transcription service
34+
let token = '';
35+
try {
36+
token = ( await requestJwt() ).token;
37+
} catch ( error ) {
38+
debug( 'Error getting token: %o', error );
39+
return Promise.reject( error );
40+
}
41+
42+
// Build a FormData object to hold the audio file
43+
const formData = new FormData();
44+
formData.append( 'audio_file', audio );
45+
46+
try {
47+
const headers = {
48+
Authorization: `Bearer ${ token }`,
49+
};
50+
51+
const response: AudioTranscriptionResponse = await apiFetch( {
52+
url: `https://public-api.wordpress.com/wpcom/v2/jetpack-ai-transcription${
53+
feature ? `?feature=${ feature }` : ''
54+
}`,
55+
method: 'POST',
56+
body: formData,
57+
headers,
58+
} );
59+
60+
debug( 'Transcription response: %o', response );
61+
62+
return response.text;
63+
} catch ( error ) {
64+
debug( 'Transcription error response: %o', error );
65+
return Promise.reject( error );
66+
}
67+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/**
2+
* External dependencies
3+
*/
4+
import { useCallback, useState } from '@wordpress/element';
5+
import debugFactory from 'debug';
6+
/**
7+
* Internal dependencies
8+
*/
9+
import transcribeAudio from '../../audio-transcription/index.js';
10+
11+
const debug = debugFactory( 'jetpack-ai-client:use-audio-transcription' );
12+
13+
/**
14+
* The response from the audio transcription hook.
15+
*/
16+
export type UseAudioTranscriptionReturn = {
17+
transcriptionResult: string;
18+
isTranscribingAudio: boolean;
19+
transcriptionError: string;
20+
transcribeAudio: ( audio: Blob ) => void;
21+
};
22+
23+
/**
24+
* The props for the audio transcription hook.
25+
*/
26+
export type UseAudioTranscriptionProps = {
27+
feature: string;
28+
onReady?: ( transcription: string ) => void;
29+
onError?: ( error: string ) => void;
30+
};
31+
32+
/**
33+
* A hook to handle audio transcription.
34+
*
35+
* @param {string} feature - The feature name that is calling the transcription.
36+
* @returns {UseAudioTranscriptionReturn} - Object with properties to get the transcription data.
37+
*/
38+
export default function useAudioTranscription( {
39+
feature,
40+
onReady,
41+
onError,
42+
}: UseAudioTranscriptionProps ): UseAudioTranscriptionReturn {
43+
const [ transcriptionResult, setTranscriptionResult ] = useState< string >( '' );
44+
const [ transcriptionError, setTranscriptionError ] = useState< string >( '' );
45+
const [ isTranscribingAudio, setIsTranscribingAudio ] = useState( false );
46+
47+
const handleAudioTranscription = useCallback(
48+
( audio: Blob ) => {
49+
debug( 'Transcribing audio' );
50+
51+
/**
52+
* Reset the transcription result and error.
53+
*/
54+
setTranscriptionResult( '' );
55+
setTranscriptionError( '' );
56+
setIsTranscribingAudio( true );
57+
58+
/**
59+
* Call the audio transcription library.
60+
*/
61+
transcribeAudio( audio, feature )
62+
.then( transcriptionText => {
63+
setTranscriptionResult( transcriptionText );
64+
onReady?.( transcriptionText );
65+
} )
66+
.catch( error => {
67+
setTranscriptionError( error.message );
68+
onError?.( error.message );
69+
} )
70+
.finally( () => setIsTranscribingAudio( false ) );
71+
},
72+
[ transcribeAudio, setTranscriptionResult, setTranscriptionError, setIsTranscribingAudio ]
73+
);
74+
75+
return {
76+
transcriptionResult,
77+
isTranscribingAudio,
78+
transcriptionError,
79+
transcribeAudio: handleAudioTranscription,
80+
};
81+
}

projects/js-packages/ai-client/src/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
export { default as requestJwt } from './jwt/index.js';
55
export { default as SuggestionsEventSource } from './suggestions-event-source/index.js';
66
export { default as askQuestion } from './ask-question/index.js';
7+
export { default as transcribeAudio } from './audio-transcription/index.js';
78

89
/*
910
* Hooks
1011
*/
1112
export { default as useAiSuggestions } from './hooks/use-ai-suggestions/index.js';
1213
export { default as useMediaRecording } from './hooks/use-media-recording/index.js';
14+
export { default as useAudioTranscription } from './hooks/use-audio-transcription/index.js';
1315

1416
/*
1517
* Components: Icons

projects/js-packages/ai-client/src/jwt/index.ts

+4-7
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
* External dependencies
33
*/
44
import { isSimpleSite } from '@automattic/jetpack-shared-extension-utils';
5-
import apiFetchMod from '@wordpress/api-fetch';
65
import debugFactory from 'debug';
6+
/**
7+
* Internal dependencies
8+
*/
9+
import apiFetch from '../api-fetch/index.js';
710
/*
811
* Types & constants
912
*/
@@ -27,12 +30,6 @@ type TokenDataEndpointResponseProps = {
2730

2831
const debug = debugFactory( 'jetpack-ai-client:jwt' );
2932

30-
// @wordpress/api-fetch (as of 6.47.0) declares itself in such a way that tsc and node see the function at apiFetchMod.default
31-
// while some other environments (including code running inside WordPress itself) see it at apiFetch.
32-
// See https://arethetypeswrong.github.io/?p=@wordpress/[email protected]
33-
type ApiFetchType = typeof apiFetchMod.default;
34-
const apiFetch: ApiFetchType = ( apiFetchMod.default ?? apiFetchMod ) as ApiFetchType;
35-
3633
const JWT_TOKEN_ID = 'jetpack-ai-jwt';
3734
const JWT_TOKEN_EXPIRATION_TIME = 2 * 60 * 1000; // 2 minutes
3835

projects/js-packages/ai-client/src/types.ts

+8
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,15 @@ export type PromptProp = PromptMessagesProp | string;
3232
* Data Flow types
3333
*/
3434
export type { UseAiContextOptions } from './data-flow/use-ai-context.js';
35+
36+
/*
37+
* Hook types
38+
*/
3539
export type { RequestingErrorProps } from './hooks/use-ai-suggestions/index.js';
40+
export type {
41+
UseAudioTranscriptionProps,
42+
UseAudioTranscriptionReturn,
43+
} from './hooks/use-audio-transcription/index.js';
3644

3745
/*
3846
* Requests types
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Significance: minor
2+
Type: other
3+
4+
Jetpack AI: include audio transcription usage example to Voice-to-Content block.

projects/plugins/jetpack/extensions/blocks/voice-to-content/edit.tsx

+29-7
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import {
66
micIcon,
77
playerPauseIcon,
88
useMediaRecording,
9+
useAudioTranscription,
10+
UseAudioTranscriptionReturn,
911
} from '@automattic/jetpack-ai-client';
1012
import { ThemeProvider } from '@automattic/jetpack-components';
11-
import { Button, Modal, Icon } from '@wordpress/components';
13+
import { Button, Modal, Icon, FormFileUpload } from '@wordpress/components';
1214
import { useDispatch } from '@wordpress/data';
1315
import { useCallback } from '@wordpress/element';
1416
import { __ } from '@wordpress/i18n';
@@ -80,6 +82,22 @@ function ContextualRow( { state, error = null, audioURL = null } ) {
8082
function ActionButtons( { state, mediaControls } ) {
8183
const { start, pause, resume, stop } = mediaControls ?? {};
8284

85+
const onTranscriptionReady = ( transcription: string ) => {
86+
// eslint-disable-next-line no-console
87+
console.log( 'Transcription ready: ', transcription );
88+
};
89+
90+
const onTranscriptionError = ( error: string ) => {
91+
// eslint-disable-next-line no-console
92+
console.log( 'Transcription error: ', error );
93+
};
94+
95+
const { transcribeAudio }: UseAudioTranscriptionReturn = useAudioTranscription( {
96+
feature: 'voice-to-content',
97+
onReady: onTranscriptionReady,
98+
onError: onTranscriptionError,
99+
} );
100+
83101
const recordingHandler = useCallback( () => {
84102
if ( state === 'inactive' ) {
85103
start?.( 1000 ); // Stream audio on 1 second intervals
@@ -90,8 +108,11 @@ function ActionButtons( { state, mediaControls } ) {
90108
}
91109
}, [ state, start, pause, resume ] );
92110

93-
const uploadHandler = () => {
94-
throw new Error( 'Not implemented' );
111+
const uploadHandler = event => {
112+
if ( event.currentTarget.files.length > 0 ) {
113+
const file = event.currentTarget.files[ 0 ];
114+
transcribeAudio( file );
115+
}
95116
};
96117

97118
const doneHandler = useCallback( () => {
@@ -122,13 +143,14 @@ function ActionButtons( { state, mediaControls } ) {
122143
</Button>
123144
) }
124145
{ [ 'inactive', 'error' ].includes( state ) && (
125-
<Button
126-
className="jetpack-ai-voice-to-content__button"
146+
<FormFileUpload
147+
accept="audio/*"
148+
onChange={ uploadHandler }
127149
variant="secondary"
128-
onClick={ uploadHandler }
150+
className="jetpack-ai-voice-to-content__button"
129151
>
130152
{ __( 'Upload audio', 'jetpack' ) }
131-
</Button>
153+
</FormFileUpload>
132154
) }
133155
{ [ 'recording', 'paused' ].includes( state ) && (
134156
<Button

0 commit comments

Comments
 (0)