higress-group · fasuizu-br · Feb 21, 2026 · Feb 21, 2026
@@ -0,0 +1,97 @@
+# Speech AI MCP Server
+
+Speech processing suite for AI agents. Pronunciation assessment, speech-to-text, and text-to-speech — all accessible as MCP tools through Higress.
+
+## Features
+
+- **Pronunciation Assessment**: Score English pronunciation at phoneme, word, and sentence levels (0-100). Exceeds human expert inter-annotator agreement.
+- **Speech-to-Text**: Transcribe audio with word-level timestamps and confidence scores. Sub-300ms latency.
+- **Text-to-Speech**: Generate natural speech with 12 English voices (American and British). Speed control 0.5x-2.0x.
+
+## Getting Started
+
+### 1. Get an API Key
+
+Visit [brainiall.com](https://brainiall.com) or subscribe via [Azure Marketplace](https://azuremarketplace.microsoft.com).
+
+### 2. Configure in Higress
+
+Register your API key on the [mcp.higress.ai](https://mcp.higress.ai) interface to receive a generated SSE endpoint URL.
+
+### 3. Connect Your MCP Client
+
+Add the generated URL to your MCP client configuration:
+
+```json
+{
+  "mcpServers": {
+    "speech-ai": {
+      "url": "https://mcp.higress.ai/speech-ai/your-token"
+    }
+  }
+}
+```
+
+## Tool Reference
+
+### assess_pronunciation
+
+Score pronunciation of spoken audio against reference text.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `audio` | string | Yes | Base64-encoded audio (WAV, MP3, FLAC, OGG) |
+| `text` | string | Yes | Reference text to score against |
+| `format` | string | No | Audio format (default: "wav") |
+
+**Returns:** Overall score (0-100), sentence score, confidence (0-1), word-level scores with phoneme breakdown.
+
+### transcribe_audio
+
+Transcribe spoken audio to text with word-level timestamps.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `audio` | string | Yes | Base64-encoded audio |
+
+**Returns:** Transcribed text, audio duration, word-level timestamps.
+
+### synthesize_speech
+
+Generate speech from text with selectable voices.
+
+**Parameters:**
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `text` | string | Yes | Text to synthesize (max 5000 chars) |
+| `voice` | string | No | Voice ID (default: "af_heart") |
+| `speed` | number | No | Speed multiplier 0.5-2.0 (default: 1.0) |
+
+**Returns:** WAV audio data.
+
+### list_tts_voices
+
+List all 12 available English voices with metadata.
+
+### check_pronunciation_service / check_stt_service / check_tts_service
+
+Health check endpoints for each service.
+
+## Available Voices
+
+| ID | Name | Gender | Accent |
+|----|------|--------|--------|
+| af_heart | Heart | Female | American |
+| af_bella | Bella | Female | American |
+| am_adam | Adam | Male | American |
+| am_michael | Michael | Male | American |
+| bf_emma | Emma | Female | British |
+| bm_lewis | Lewis | Male | British |
+| +6 more voices | | | |
+
+## Support
+
+- Email: fasuizu@brainiall.com
+- Website: [brainiall.com](https://brainiall.com)
@@ -0,0 +1,147 @@
+server:
+  name: speech-ai
+  config:
+    apiKey: ""
+
+tools:
+  - name: assess_pronunciation
+    description: "Score English pronunciation at phoneme, word, and sentence levels (0-100). Compares spoken audio against reference text. Returns calibrated scores with IPA phoneme breakdown."
+    args:
+      - name: audio
+        description: "Base64-encoded audio data (WAV, MP3, FLAC, or OGG)"
+        type: string
+        required: true
+      - name: text
+        description: "Reference text that the speaker was supposed to say"
+        type: string
+        required: true
+      - name: format
+        description: "Audio format of the encoded data"
+        type: string
+        required: false
+        default: "wav"
+    requestTemplate:
+      url: "https://apim-ai-apis.azure-api.net/pronunciation/assess/base64"
+      method: POST
+      headers:
+        - key: "Ocp-Apim-Subscription-Key"
+          value: "{{.config.apiKey}}"
+        - key: "Content-Type"
+          value: "application/json"
+      body: |
+        {"audio":"{{.args.audio}}","text":"{{.args.text}}","format":"{{.args.format}}"}
+    responseTemplate:
+      body: |
+        Overall Score: {{.response.body.overallScore}}/100
+        Sentence Score: {{.response.body.sentenceScore}}/100
+        Confidence: {{.response.body.confidence}}
+        Words: {{.response.body.words}}
+
+  - name: check_pronunciation_service
+    description: "Check the health status of the pronunciation assessment service"
+    args: []
+    requestTemplate:
+      url: "https://apim-ai-apis.azure-api.net/pronunciation/health"
+      method: GET
+      headers:
+        - key: "Ocp-Apim-Subscription-Key"
+          value: "{{.config.apiKey}}"
+    responseTemplate:
+      body: |
+        Status: {{.response.body.status}}
+        Version: {{.response.body.version}}
+
+  - name: transcribe_audio
+    description: "Transcribe spoken audio to text with word-level timestamps and confidence scores. Supports WAV, MP3, FLAC, and OGG."
+    args:
+      - name: audio
+        description: "Base64-encoded audio data"
+        type: string
+        required: true
+    requestTemplate:
+      url: "https://apim-ai-apis.azure-api.net/stt/transcribe/base64"
+      method: POST
+      headers:
+        - key: "Ocp-Apim-Subscription-Key"
+          value: "{{.config.apiKey}}"
+        - key: "Content-Type"
+          value: "application/json"
+      body: |
+        {"audio":"{{.args.audio}}"}
+    responseTemplate:
+      body: |
+        Text: {{.response.body.text}}
+        Duration: {{.response.body.audioDurationMs}}ms
+        Words: {{.response.body.words}}
+
+  - name: check_stt_service
+    description: "Check the health status of the speech-to-text service"
+    args: []
+    requestTemplate:
+      url: "https://apim-ai-apis.azure-api.net/stt/health"
+      method: GET
+      headers:
+        - key: "Ocp-Apim-Subscription-Key"
+          value: "{{.config.apiKey}}"
+    responseTemplate:
+      body: |
+        Status: {{.response.body.status}}
+        Version: {{.response.body.version}}
+
+  - name: synthesize_speech
+    description: "Generate natural speech from text. 12 English voices (American + British, male + female). Speed adjustable 0.5x-2.0x. Returns WAV audio."
+    args:
+      - name: text
+        description: "Text to synthesize into speech (max 5000 characters)"
+        type: string
+        required: true
+      - name: voice
+        description: "Voice ID (af_heart, af_bella, am_adam, am_michael, bf_emma, bm_lewis, etc.)"
+        type: string
+        required: false
+        default: "af_heart"
+      - name: speed
+        description: "Speech speed multiplier (0.5 = slow, 1.0 = normal, 2.0 = fast)"
+        type: number
+        required: false
+        default: 1.0
+    requestTemplate:
+      url: "https://apim-ai-apis.azure-api.net/tts/synthesize"
+      method: POST
+      headers:
+        - key: "Ocp-Apim-Subscription-Key"
+          value: "{{.config.apiKey}}"
+        - key: "Content-Type"
+          value: "application/json"
+      body: |
+        {"text":"{{.args.text}}","voice":"{{.args.voice}}","speed":{{.args.speed}}}
+    responseTemplate:
+      body: |
+        Audio synthesized successfully. Duration and audio data returned.
+
+  - name: list_tts_voices
+    description: "List all available text-to-speech voices with their metadata"
+    args: []
+    requestTemplate:
+      url: "https://apim-ai-apis.azure-api.net/tts/voices"
+      method: GET
+      headers:
+        - key: "Ocp-Apim-Subscription-Key"
+          value: "{{.config.apiKey}}"
+    responseTemplate:
+      body: |
+        {{.response.body.voices}}
+
+  - name: check_tts_service
+    description: "Check the health status of the text-to-speech service"
+    args: []
+    requestTemplate:
+      url: "https://apim-ai-apis.azure-api.net/tts/health"
+      method: GET
+      headers:
+        - key: "Ocp-Apim-Subscription-Key"
+          value: "{{.config.apiKey}}"
+    responseTemplate:
+      body: |
+        Status: {{.response.body.status}}
+        Version: {{.response.body.version}}