Skip to content

Commit 1397d13

Browse files
(feat): export JsonMessage (#58)
1 parent 0e09fbf commit 1397d13

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1020
-214
lines changed

.fernignore

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ src/wrapper
1212
src/index.ts
1313

1414
# EVI WebSocket
15+
src/api/resources/empathicVoice/client/Client.ts
1516
src/api/resources/empathicVoice/resources/chat/index.ts
1617
src/api/resources/empathicVoice/resources/chat/client
1718
src/core/websocket

.mock/definition/empathic-voice/__package__.yml

+498-71
Large diffs are not rendered by default.

.mock/definition/empathic-voice/configs.yml

+40
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ service:
4949
provider: provider
5050
ellm_model:
5151
allow_short_responses: true
52+
timeouts:
53+
inactivity:
54+
enabled: true
55+
max_duration:
56+
enabled: true
5257
create-config:
5358
path: /v0/evi/configs
5459
method: POST
@@ -75,6 +80,7 @@ service:
7580
type: optional<list<optional<root.PostedBuiltinTool>>>
7681
docs: Built-in tool specification for a Config.
7782
event_messages: optional<root.PostedEventMessageSpecs>
83+
timeouts: optional<root.PostedTimeoutSpecs>
7884
response:
7985
docs: Created
8086
type: root.ReturnConfig
@@ -127,6 +133,13 @@ service:
127133
on_new_chat:
128134
enabled: true
129135
text: text
136+
timeouts:
137+
inactivity:
138+
enabled: true
139+
duration_secs: 1
140+
max_duration:
141+
enabled: true
142+
duration_secs: 1
130143
list-config-versions:
131144
path: /v0/evi/configs/{id}
132145
method: GET
@@ -178,6 +191,11 @@ service:
178191
provider: provider
179192
ellm_model:
180193
allow_short_responses: true
194+
timeouts:
195+
inactivity:
196+
enabled: true
197+
max_duration:
198+
enabled: true
181199
create-config-version:
182200
path: /v0/evi/configs/{id}
183201
method: POST
@@ -205,6 +223,7 @@ service:
205223
type: optional<list<optional<root.PostedBuiltinTool>>>
206224
docs: Built-in tool specification for a Config.
207225
event_messages: optional<root.PostedEventMessageSpecs>
226+
timeouts: optional<root.PostedTimeoutSpecs>
208227
response:
209228
docs: Created
210229
type: root.ReturnConfig
@@ -258,6 +277,13 @@ service:
258277
on_new_chat:
259278
enabled: true
260279
text: text
280+
timeouts:
281+
inactivity:
282+
enabled: true
283+
duration_secs: 1
284+
max_duration:
285+
enabled: true
286+
duration_secs: 1
261287
delete-config:
262288
path: /v0/evi/configs/{id}
263289
method: DELETE
@@ -354,6 +380,13 @@ service:
354380
on_new_chat:
355381
enabled: true
356382
text: text
383+
timeouts:
384+
inactivity:
385+
enabled: true
386+
duration_secs: 1
387+
max_duration:
388+
enabled: true
389+
duration_secs: 1
357390
delete-config-version:
358391
path: /v0/evi/configs/{id}/version/{version}
359392
method: DELETE
@@ -443,3 +476,10 @@ service:
443476
on_new_chat:
444477
enabled: true
445478
text: text
479+
timeouts:
480+
inactivity:
481+
enabled: true
482+
duration_secs: 1
483+
max_duration:
484+
enabled: true
485+
duration_secs: 1

.mock/definition/expression-measurement/__package__.yml

+13-11
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ types:
219219
unique identifiers will be assigned to face bounding boxes to
220220
differentiate different faces. If `false`, all faces will be tagged
221221
with an `unknown` ID.
222+
default: false
222223
min_face_size:
223224
type: optional<integer>
224225
docs: >-
@@ -235,6 +236,7 @@ types:
235236
docs: >-
236237
Whether to extract and save the detected faces in the artifacts zip
237238
created by each job.
239+
default: false
238240
FacePrediction:
239241
properties:
240242
frame:
@@ -391,13 +393,12 @@ types:
391393
docs: >-
392394
If provided, a `POST` request will be made to the URL with the
393395
generated predictions on completion or the error message on failure.
394-
validation:
395-
format: url
396396
notify:
397397
type: optional<boolean>
398398
docs: >-
399399
Whether to send an email notification to the user upon job
400400
completion/failure.
401+
default: false
401402
InferencePrediction:
402403
properties:
403404
file:
@@ -429,13 +430,12 @@ types:
429430
docs: >-
430431
If provided, a `POST` request will be made to the URL with the
431432
generated predictions on completion or the error message on failure.
432-
validation:
433-
format: url
434433
notify:
435434
type: optional<boolean>
436435
docs: >-
437436
Whether to send an email notification to the user upon job
438437
completion/failure.
438+
default: false
439439
files: list<File>
440440
InferenceResults:
441441
properties:
@@ -519,6 +519,7 @@ types:
519519
unique identifiers will be assigned to spoken words to differentiate
520520
different speakers. If `false`, all speakers will be tagged with an
521521
`unknown` ID.
522+
default: false
522523
LanguagePrediction:
523524
properties:
524525
text:
@@ -585,6 +586,7 @@ types:
585586
unique identifiers will be assigned to spoken words to differentiate
586587
different speakers. If `false`, all speakers will be tagged with an
587588
`unknown` ID.
589+
default: false
588590
NerPrediction:
589591
properties:
590592
entity:
@@ -669,6 +671,7 @@ types:
669671
unique identifiers will be assigned to spoken words to differentiate
670672
different speakers. If `false`, all speakers will be tagged with an
671673
`unknown` ID.
674+
default: false
672675
ProsodyPrediction:
673676
properties:
674677
text:
@@ -908,13 +911,12 @@ types:
908911
docs: >-
909912
If provided, a `POST` request will be made to the URL with the
910913
generated predictions on completion or the error message on failure.
911-
validation:
912-
format: url
913914
notify:
914915
type: optional<boolean>
915916
docs: >-
916917
Whether to send an email notification to the user upon job
917918
completion/failure.
919+
default: false
918920
CustomModel:
919921
discriminated: false
920922
union:
@@ -944,11 +946,10 @@ types:
944946
task: optional<Task>
945947
evaluation: optional<EvaluationArgs>
946948
alternatives: optional<list<Alternative>>
947-
callback_url:
948-
type: optional<string>
949-
validation:
950-
format: url
951-
notify: optional<boolean>
949+
callback_url: optional<string>
950+
notify:
951+
type: optional<boolean>
952+
default: false
952953
TrainingCustomModel:
953954
properties:
954955
id: string
@@ -963,6 +964,7 @@ types:
963964
unique identifiers will be assigned to spoken words to differentiate
964965
different speakers. If `false`, all speakers will be tagged with an
965966
`unknown` ID.
967+
default: false
966968
confidence_threshold:
967969
type: optional<double>
968970
docs: >-

.mock/definition/expression-measurement/stream.yml

+4
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ types:
198198
identifiers will be assigned to face bounding boxes to differentiate
199199
different faces. If false, all faces will be tagged with an "unknown"
200200
ID.
201+
default: false
201202
fps_pred:
202203
type: optional<double>
203204
docs: >
@@ -327,6 +328,7 @@ types:
327328
328329
Use reset_stream when one audio file is done being processed and you
329330
do not want context to leak across files.
331+
default: false
330332
raw_text:
331333
type: optional<boolean>
332334
docs: >
@@ -336,6 +338,7 @@ types:
336338
This parameter is useful if you want to send text to be processed by
337339
the language model, but it cannot be used with other file types like
338340
audio, image, or video.
341+
default: false
339342
job_details:
340343
type: optional<boolean>
341344
docs: >
@@ -348,6 +351,7 @@ types:
348351
349352
350353
This parameter is useful to get the unique job ID.
354+
default: false
351355
payload_id:
352356
type: optional<string>
353357
docs: >

.mock/fern.config.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
22
"organization" : "hume",
3-
"version" : "0.31.9"
3+
"version" : "0.31.24"
44
}

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "hume",
3-
"version": "0.8.1-beta6",
3+
"version": "0.8.1-beta7",
44
"private": false,
55
"repository": "https://github.com/HumeAI/hume-typescript-sdk",
66
"main": "./index.js",

src/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.ts

+1
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@ export interface PostedConfig {
2424
/** Built-in tool specification for a Config. */
2525
builtinTools?: (Hume.empathicVoice.PostedBuiltinTool | undefined)[];
2626
eventMessages?: Hume.empathicVoice.PostedEventMessageSpecs;
27+
timeouts?: Hume.empathicVoice.PostedTimeoutSpecs;
2728
}

src/api/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.ts

+1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@ export interface PostedConfigVersion {
2020
/** Built-in tool specification for a Config. */
2121
builtinTools?: (Hume.empathicVoice.PostedBuiltinTool | undefined)[];
2222
eventMessages?: Hume.empathicVoice.PostedEventMessageSpecs;
23+
timeouts?: Hume.empathicVoice.PostedTimeoutSpecs;
2324
}

src/api/resources/empathicVoice/types/AssistantEnd.ts

+5-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
* When provided, the output is an assistant end message.
77
*/
88
export interface AssistantEnd {
9-
/** The type of message sent through the socket; for an Assistant End message, this must be `assistant_end`. */
9+
/**
10+
* The type of message sent through the socket; for an Assistant End message, this must be `assistant_end`.
11+
*
12+
* This message indicates the conclusion of the assistant’s response, signaling that the assistant has finished speaking for the current conversational turn.
13+
*/
1014
type: "assistant_end";
1115
/** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
1216
customSessionId?: string;

src/api/resources/empathicVoice/types/AssistantInput.ts

+6-2
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@
66
* When provided, the input is spoken by EVI.
77
*/
88
export interface AssistantInput {
9-
/** The type of message sent through the socket; for an Assistant Input message, this must be `assistant_input`. */
9+
/** The type of message sent through the socket; must be `assistant_input` for our server to correctly identify and process it as an Assistant Input message. */
1010
type: "assistant_input";
1111
/** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
1212
customSessionId?: string;
13-
/** Text to be synthesized. */
13+
/**
14+
* Assistant text to synthesize into spoken audio and insert into the conversation.
15+
*
16+
* EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the user’s expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/empathic-voice-interface-evi/chat/chat#receive.Assistant%20Message.type).
17+
*/
1418
text: string;
1519
}

src/api/resources/empathicVoice/types/AssistantMessage.ts

+7-3
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,20 @@ import * as Hume from "../../../index";
88
* When provided, the output is an assistant message.
99
*/
1010
export interface AssistantMessage {
11-
/** The type of message sent through the socket; for an Assistant Message, this must be `assistant_message`. */
11+
/**
12+
* The type of message sent through the socket; for an Assistant Message, this must be `assistant_message`.
13+
*
14+
* This message contains both a transcript of the assistant’s response and the expression measurement predictions of the assistant’s audio output.
15+
*/
1216
type: "assistant_message";
1317
/** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
1418
customSessionId?: string;
15-
/** ID of the assistant message. */
19+
/** ID of the assistant message. Allows the Assistant Message to be tracked and referenced. */
1620
id?: string;
1721
/** Transcript of the message. */
1822
message: Hume.empathicVoice.ChatMessage;
1923
/** Inference model results. */
2024
models: Hume.empathicVoice.Inference;
21-
/** Indicates if this message was constructed from a text input message. */
25+
/** Indicates if this message was inserted into the conversation as text from an [Assistant Input message](/reference/empathic-voice-interface-evi/chat/chat#send.Assistant%20Input.text). */
2226
fromText: boolean;
2327
}

src/api/resources/empathicVoice/types/AudioConfiguration.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
import * as Hume from "../../../index";
66

77
export interface AudioConfiguration {
8-
/** Audio encoding. */
8+
/** Encoding format of the audio input, such as `linear16`. */
99
encoding: Hume.empathicVoice.Encoding;
10-
/** Number of channels. */
10+
/** Number of audio channels. */
1111
channels: number;
12-
/** Audio sample rate. */
12+
/** Audio sample rate. Number of samples per second in the audio input, measured in Hertz. */
1313
sampleRate: number;
1414
}

src/api/resources/empathicVoice/types/AudioInput.ts

+12-2
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,20 @@
66
* When provided, the input is audio.
77
*/
88
export interface AudioInput {
9-
/** The type of message sent through the socket; for an Audio Input message, this must be `audio_input`. */
9+
/**
10+
* The type of message sent through the socket; must be `audio_input` for our server to correctly identify and process it as an Audio Input message.
11+
*
12+
* This message is used for sending audio input data to EVI for processing and expression measurement. Audio data should be sent as a continuous stream, encoded in Base64.
13+
*/
1014
type: "audio_input";
1115
/** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
1216
customSessionId?: string;
13-
/** Base64 encoded audio input. */
17+
/**
18+
* Base64 encoded audio input to insert into the conversation.
19+
*
20+
* The audio input must be captured and transmitted to EVI as a continuous stream, with the audio data sent in small chunks for better transcription quality. When capturing audio through the browser, we recommend recording the audio in 100ms intervals and adjusting from there to determine if smaller or larger chunks are needed. These chunks should be continuously sent to EVI as Audio Input messages.
21+
*
22+
* The content of an Audio Input message is treated as the user’s speech to EVI. EVI processes the audio, conducts expression measurement using the prosody model, and responds accordingly.
23+
*/
1424
data: string;
1525
}

src/api/resources/empathicVoice/types/AudioOutput.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ export interface AudioOutput {
1010
type: "audio_output";
1111
/** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
1212
customSessionId?: string;
13-
/** ID of the audio output. */
13+
/** ID of the audio output. Allows the Audio Output message to be tracked and referenced. */
1414
id: string;
15-
/** Base64 encoded audio output. */
15+
/** Base64 encoded audio output. This encoded audio is transmitted to the client, where it can be decoded and played back as part of the user interaction. */
1616
data: string;
1717
}

src/api/resources/empathicVoice/types/BuiltInTool.ts

+3
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,7 @@
22
* This file was auto-generated by Fern from our API Definition.
33
*/
44

5+
/**
6+
* Name of the built-in tool. Set to `web_search` to equip EVI with the built-in Web Search tool.
7+
*/
58
export type BuiltInTool = "web_search";

src/api/resources/empathicVoice/types/BuiltinToolConfig.ts

+1
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ import * as Hume from "../../../index";
66

77
export interface BuiltinToolConfig {
88
name: Hume.empathicVoice.BuiltInTool;
9+
/** Optional text passed to the supplemental LLM if the tool call fails. The LLM then uses this text to generate a response back to the user, ensuring continuity in the conversation. */
910
fallbackContent?: string;
1011
}

0 commit comments

Comments
 (0)