Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/en/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,9 @@ pages:
fireworks:
description: fireworks.ai
title: Fireworks.ai
google-gemini-audio-speech:
description: aistudio.google.com
title: Google Gemini
microsoft-speech:
description: speech.microsoft.com
fields:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function handleDeleteProvider(providerId: string) {
<fieldset
v-if="persistedChatProvidersMetadata.length > 0"
flex="~ row gap-4"
min-w-0 of-x-auto scroll-smooth
min-w-0 overflow-x-auto scroll-smooth
role="radiogroup"
>
<RadioCardSimple
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ onUnmounted(() => {
<fieldset
v-if="configuredTranscriptionProvidersMetadata.length > 0"
flex="~ row gap-4"
min-w-0 of-x-auto scroll-smooth
min-w-0 overflow-x-auto scroll-smooth
role="radiogroup"
>
<RadioCardSimple
Expand Down
2 changes: 1 addition & 1 deletion packages/stage-pages/src/pages/settings/modules/speech.vue
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ function handleDeleteProvider(providerId: string) {
<div max-w-full>
<fieldset
v-if="configuredSpeechProvidersMetadata.length > 0" flex="~ row gap-4"
min-w-0 of-x-auto scroll-smooth role="radiogroup"
min-w-0 overflow-x-auto scroll-smooth role="radiogroup"
>
<RadioCardSimple
v-for="metadata in configuredSpeechProvidersMetadata"
Expand Down
2 changes: 1 addition & 1 deletion packages/stage-pages/src/pages/settings/modules/vision.vue
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ function formatRelativeTime(timestamp: number | null) {
<div :class="['max-w-full']">
<fieldset
v-if="persistedChatProvidersMetadata.length > 0"
:class="['flex', 'min-w-0', 'flex-row', 'gap-4', 'of-x-auto', 'scroll-smooth']"
:class="['flex', 'min-w-0', 'flex-row', 'gap-4', 'overflow-x-auto', 'scroll-smooth']"
role="radiogroup"
>
<RadioCardSimple
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
<script setup lang="ts">
import type { SpeechProvider } from '@xsai-ext/providers/utils'

import {
Alert,
SpeechPlayground,
SpeechProviderSettings,
} from '@proj-airi/stage-ui/components'
import { useProviderValidation } from '@proj-airi/stage-ui/composables/use-provider-validation'
import { useSpeechStore } from '@proj-airi/stage-ui/stores/modules/speech'
import { useProvidersStore } from '@proj-airi/stage-ui/stores/providers'
import { FieldCombobox, FieldRange } from '@proj-airi/ui'
import { storeToRefs } from 'pinia'
import { computed, onMounted } from 'vue'
import { useI18n } from 'vue-i18n'

const speechStore = useSpeechStore()
const providersStore = useProvidersStore()
const { providers } = storeToRefs(providersStore)
const { t } = useI18n()

interface GoogleGeminiSpeechProviderConfig {
apiKey?: string
baseUrl?: string
model?: string
voice?: string
temperature?: number
}

const providerId = 'google-gemini-audio-speech'
const defaultModel = 'gemini-2.5-flash-preview-tts'

const config = computed(() => providers.value[providerId] as GoogleGeminiSpeechProviderConfig | undefined)

function ensureProviderConfig(): GoogleGeminiSpeechProviderConfig {
if (!providers.value[providerId])
providers.value[providerId] = {}

return providers.value[providerId] as GoogleGeminiSpeechProviderConfig
}

const providerModels = computed(() => providersStore.getModelsForProvider(providerId))
const modelOptions = computed(() => {
return (providerModels.value.length > 0 ? providerModels.value : []).map(model => ({
value: model.id,
label: model.name,
}))
})

const availableVoices = computed(() => speechStore.availableVoices[providerId] || [])

const model = computed({
get: () => config.value?.model || defaultModel,
set: (value) => {
ensureProviderConfig().model = value
},
})

const temperature = computed({
get: () => config.value?.temperature ?? 1.0,
set: (value) => {
ensureProviderConfig().temperature = value
},
})

const apiKeyConfigured = computed(() => !!providers.value[providerId]?.apiKey)

onMounted(async () => {
ensureProviderConfig()

if (!config.value?.model)
model.value = defaultModel

await providersStore.loadModelsForConfiguredProviders()
await providersStore.fetchModelsForProvider(providerId)
await speechStore.loadVoicesForProvider(providerId)
})

async function handleGenerateSpeech(input: string, voiceId: string, _useSSML: boolean, modelId?: string) {
const provider = await providersStore.getProviderInstance<SpeechProvider<string>>(providerId)
if (!provider)
throw new Error('Failed to initialize speech provider')

const providerConfig = providersStore.getProviderConfig(providerId)
const modelToUse = modelId || model.value || defaultModel
const voiceToUse = voiceId || '' as string

return await speechStore.speech(
provider,
modelToUse,
input,
voiceToUse,
providerConfig,
)
}

const {
isValidating,
isValid,
validationMessage,
forceValid,
} = useProviderValidation(providerId)
</script>

<template>
<SpeechProviderSettings
:provider-id="providerId"
:default-model="defaultModel"
>
<template #voice-settings>
<FieldCombobox
v-model="model"
label="Model"
description="Select the Gemini TTS model to use for speech generation"
:options="modelOptions"
placeholder="Select a Gemini model..."
/>
<FieldRange
v-model="temperature"
label="Temperature"
description="Controls randomness in speech generation. Lower values make speech more predictable, higher values make it more creative."
:min="0"
:max="2"
:step="0.1"
:format-value="(value) => value.toFixed(1)"
/>
</template>

<template #playground>
<SpeechPlayground
:available-voices="availableVoices"
:generate-speech="handleGenerateSpeech"
:api-key-configured="apiKeyConfigured"
:voices-loading="speechStore.isLoadingSpeechProviderVoices"
default-text="Hello! This is a test of the Google Gemini Speech."
/>
</template>

<template #advanced-settings>
<Alert v-if="!isValid && isValidating === 0 && validationMessage" type="error">
<template #title>
<div class="w-full flex items-center justify-between">
<span>{{ t('settings.dialogs.onboarding.validationFailed') }}</span>
<button
type="button"
class="ml-2 rounded bg-red-100 px-2 py-0.5 text-xs text-red-600 font-medium transition-colors dark:bg-red-800/30 hover:bg-red-200 dark:text-red-300 dark:hover:bg-red-700/40"
@click="forceValid"
>
{{ t('settings.pages.providers.common.continueAnyway') }}
</button>
</div>
</template>
<template v-if="validationMessage" #content>
<div class="whitespace-pre-wrap break-all">
{{ validationMessage }}
</div>
</template>
</Alert>
<Alert v-if="isValid && isValidating === 0" type="success">
<template #title>
{{ t('settings.dialogs.onboarding.validationSuccess') }}
</template>
</Alert>
</template>
</SpeechProviderSettings>
</template>

<route lang="yaml">
meta:
layout: settings
stageTransition:
name: slide
</route>
2 changes: 2 additions & 0 deletions packages/stage-ui/src/stores/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ import { useAuthStore } from './auth'
import { createAliyunNLSProvider as createAliyunNlsStreamProvider } from './providers/aliyun/stream-transcription'
import { convertProviderDefinitionsToMetadata } from './providers/converters'
import { models as elevenLabsModels } from './providers/elevenlabs/list-models'
import { buildGoogleGeminiSpeechProvider } from './providers/google-gemini-speech'
import { buildOpenAICompatibleProvider } from './providers/openai-compatible-builder'
import { buildOpenRouterAudioSpeechProvider } from './providers/openrouter/audio-speech'
import { createWebSpeechAPIProvider } from './providers/web-speech-api'
Expand Down Expand Up @@ -2236,6 +2237,7 @@ export const useProvidersStore = defineStore('providers', () => {
},
},
},
'google-gemini-audio-speech': buildGoogleGeminiSpeechProvider(v => baseUrlValidator.value(v)),
}

// Progressive migration bridge:
Expand Down
Loading
Loading