Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/en/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,9 @@ pages:
fireworks:
description: fireworks.ai
title: Fireworks.ai
google-gemini-audio-speech:
description: aistudio.google.com
title: Google Gemini
microsoft-speech:
description: speech.microsoft.com
fields:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function handleDeleteProvider(providerId: string) {
<fieldset
v-if="persistedChatProvidersMetadata.length > 0"
flex="~ row gap-4"
min-w-0 of-x-auto scroll-smooth
min-w-0 overflow-x-auto scroll-smooth
role="radiogroup"
>
<RadioCardSimple
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ onUnmounted(() => {
<fieldset
v-if="configuredTranscriptionProvidersMetadata.length > 0"
flex="~ row gap-4"
min-w-0 of-x-auto scroll-smooth
min-w-0 overflow-x-auto scroll-smooth
role="radiogroup"
>
<RadioCardSimple
Expand Down
2 changes: 1 addition & 1 deletion packages/stage-pages/src/pages/settings/modules/speech.vue
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ function handleDeleteProvider(providerId: string) {
<div max-w-full>
<fieldset
v-if="configuredSpeechProvidersMetadata.length > 0" flex="~ row gap-4"
min-w-0 of-x-auto scroll-smooth role="radiogroup"
min-w-0 overflow-x-auto scroll-smooth role="radiogroup"
>
<RadioCardSimple
v-for="metadata in configuredSpeechProvidersMetadata"
Expand Down
2 changes: 1 addition & 1 deletion packages/stage-pages/src/pages/settings/modules/vision.vue
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ function formatRelativeTime(timestamp: number | null) {
<div :class="['max-w-full']">
<fieldset
v-if="persistedChatProvidersMetadata.length > 0"
:class="['flex', 'min-w-0', 'flex-row', 'gap-4', 'of-x-auto', 'scroll-smooth']"
:class="['flex', 'min-w-0', 'flex-row', 'gap-4', 'overflow-x-auto', 'scroll-smooth']"
role="radiogroup"
>
<RadioCardSimple
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
<script setup lang="ts">
import type { SpeechProvider } from '@xsai-ext/providers/utils'

import {
Alert,
SpeechPlayground,
SpeechProviderSettings,
} from '@proj-airi/stage-ui/components'
import { useProviderValidation } from '@proj-airi/stage-ui/composables/use-provider-validation'
import { useSpeechStore } from '@proj-airi/stage-ui/stores/modules/speech'
import { useProvidersStore } from '@proj-airi/stage-ui/stores/providers'
import { FieldCombobox, FieldRange } from '@proj-airi/ui'
import { storeToRefs } from 'pinia'
import { computed, onMounted } from 'vue'
import { useI18n } from 'vue-i18n'

const speechStore = useSpeechStore()
const providersStore = useProvidersStore()
const { providers } = storeToRefs(providersStore)
const { t } = useI18n()

interface GoogleGeminiSpeechProviderConfig {
apiKey?: string
baseUrl?: string
model?: string
voice?: string
temperature?: number
}

const providerId = 'google-gemini-audio-speech'
const defaultModel = 'gemini-2.5-flash-preview-tts'

const config = computed(() => providers.value[providerId] as GoogleGeminiSpeechProviderConfig | undefined)

function ensureProviderConfig(): GoogleGeminiSpeechProviderConfig {
if (!providers.value[providerId])
providers.value[providerId] = {}

return providers.value[providerId] as GoogleGeminiSpeechProviderConfig
}

const providerModels = computed(() => providersStore.getModelsForProvider(providerId))
const modelOptions = computed(() => {
return (providerModels.value.length > 0 ? providerModels.value : []).map(model => ({
value: model.id,
label: model.name,
}))
})

const availableVoices = computed(() => speechStore.availableVoices[providerId] || [])

const model = computed({
get: () => config.value?.model || defaultModel,
set: (value) => {
ensureProviderConfig().model = value
},
})

const temperature = computed({
get: () => config.value?.temperature ?? 1.0,
set: (value) => {
ensureProviderConfig().temperature = value
},
})

const apiKeyConfigured = computed(() => !!providers.value[providerId]?.apiKey)

onMounted(async () => {
ensureProviderConfig()

if (!config.value?.model)
model.value = defaultModel

await providersStore.loadModelsForConfiguredProviders()
await providersStore.fetchModelsForProvider(providerId)
await speechStore.loadVoicesForProvider(providerId)
})

async function handleGenerateSpeech(input: string, voiceId: string, _useSSML: boolean, modelId?: string) {
const provider = await providersStore.getProviderInstance<SpeechProvider<string>>(providerId)
if (!provider)
throw new Error('Failed to initialize speech provider')

const providerConfig = providersStore.getProviderConfig(providerId)
const modelToUse = modelId || model.value || defaultModel
const voiceToUse = voiceId || '' as string

return await speechStore.speech(
provider,
modelToUse,
input,
voiceToUse,
providerConfig,
)
}

const {
isValidating,
isValid,
validationMessage,
forceValid,
} = useProviderValidation(providerId)
</script>

<template>
<SpeechProviderSettings
:provider-id="providerId"
:default-model="defaultModel"
>
<template #voice-settings>
<FieldCombobox
v-model="model"
label="Model"
description="Select the Gemini TTS model to use for speech generation"
:options="modelOptions"
placeholder="Select a Gemini model..."
/>
<FieldRange
v-model="temperature"
label="Temperature"
description="Controls randomness in speech generation. Lower values make speech more predictable, higher values make it more creative."
:min="0"
:max="2"
:step="0.1"
:format-value="(value) => value.toFixed(1)"
/>
</template>

<template #playground>
<SpeechPlayground
:available-voices="availableVoices"
:generate-speech="handleGenerateSpeech"
:api-key-configured="apiKeyConfigured"
:voices-loading="speechStore.isLoadingSpeechProviderVoices"
default-text="Hello! This is a test of the Google Gemini Speech."
/>
</template>

<template #advanced-settings>
<Alert v-if="!isValid && isValidating === 0 && validationMessage" type="error">
<template #title>
<div class="w-full flex items-center justify-between">
<span>{{ t('settings.dialogs.onboarding.validationFailed') }}</span>
<button
type="button"
class="ml-2 rounded bg-red-100 px-2 py-0.5 text-xs text-red-600 font-medium transition-colors dark:bg-red-800/30 hover:bg-red-200 dark:text-red-300 dark:hover:bg-red-700/40"
@click="forceValid"
>
{{ t('settings.pages.providers.common.continueAnyway') }}
</button>
</div>
</template>
<template v-if="validationMessage" #content>
<div class="whitespace-pre-wrap break-all">
{{ validationMessage }}
</div>
</template>
</Alert>
<Alert v-if="isValid && isValidating === 0" type="success">
<template #title>
{{ t('settings.dialogs.onboarding.validationSuccess') }}
</template>
</Alert>
</template>
</SpeechProviderSettings>
</template>

<route lang="yaml">
meta:
layout: settings
stageTransition:
name: slide
</route>
2 changes: 2 additions & 0 deletions packages/stage-ui/src/stores/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ import { useAuthStore } from './auth'
import { createAliyunNLSProvider as createAliyunNlsStreamProvider } from './providers/aliyun/stream-transcription'
import { convertProviderDefinitionsToMetadata } from './providers/converters'
import { models as elevenLabsModels } from './providers/elevenlabs/list-models'
import { buildGoogleGeminiSpeechProvider } from './providers/google-gemini-speech'
import { buildOpenAICompatibleProvider } from './providers/openai-compatible-builder'
import { buildOpenRouterAudioSpeechProvider } from './providers/openrouter/audio-speech'
import { createWebSpeechAPIProvider } from './providers/web-speech-api'
Expand Down Expand Up @@ -2236,6 +2237,7 @@ export const useProvidersStore = defineStore('providers', () => {
},
},
},
'google-gemini-audio-speech': buildGoogleGeminiSpeechProvider(v => baseUrlValidator.value(v)),
}

// Progressive migration bridge:
Expand Down
Loading
Loading