-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathframe.clj
311 lines (259 loc) · 9.05 KB
/
frame.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
(ns voice-fn.frame
"Defines the core frame concept and frame creation functions for the voice-fn pipeline.
A frame represents a discrete unit of data or control flow in the pipeline."
(:require
[malli.clj-kondo :as mc]
[malli.core :as m]
[malli.error :as me]
[voice-fn.schema :as schema]))
(defrecord Frame [type data ts])
(defn frame? [frame]
(instance? Frame frame))
(defn create-frame
[type data]
(let [ts (System/currentTimeMillis)]
(map->Frame {:type type
:frame/type type
:data data
:frame/data data
:ts ts
:frame/ts ts})))
(defn system-frame?
"Returns true if the frame is a system frame that should be processed immediately"
[frame]
(let [frame-type (:frame/type frame)]
(or (= frame-type :frame.system/start)
(= frame-type :frame.system/stop)
(= frame-type :frame.control/bot-interrupt)
(= frame-type :frame.user/speech-start)
(= frame-type :frame.user/speech-stop)
(= frame-type :frame.control/interrupt-start)
(= frame-type :frame.control/interrupt-stop))))
(defmacro defframe
"Define a frame creator function and its predicate with schema validation.
Usage: (defframe audio-input
\"Doc string\"
{:type :frame.audio/input-raw
:schema [:map [:data AudioData]])}"
[name docstring {:keys [type schema] :or {schema :any}}]
(let [frame-schema [:map
[:frame/type [:= type]]
[:frame/data schema]
[:frame/ts :any]]
frame-schema-name (symbol (str name "-schema"))
pred-name (symbol (str name "?"))]
`(do
;; Define the frame schema
(def ~frame-schema-name ~frame-schema)
;; Define the frame creator function with schema validation
(def ~name
~docstring
(fn
[data#]
(let [frame# (create-frame ~type data#)]
(when-let [err# (me/humanize (m/explain ~frame-schema frame#))]
(throw (ex-info "Invalid frame data"
{:error err#
:frame frame#})))
frame#)))
;; Define the predicate function
(def ~pred-name
(fn [frame#]
(and (frame? frame#)
(nil? (m/explain ~frame-schema-name frame#)))))
;; Add clj-kondo type hints
(m/=> ~name [:=> [:cat ~schema] ~frame-schema-name])
(m/=> ~pred-name [:=> [:cat any?] :boolean]))))
;;
;; System Frames
;; These frames control core pipeline functionality
;;
(defframe system-start
"Frame sent when the pipeline begins"
{:type :frame.system/start
:schema :boolean})
(def FramePredicate
[:fn {:error/message "Must be a function that takes a frame and returns boolean"
:gen/fmap (fn [_] system-start?)} ; Example generator
(fn [f]
(and (fn? f)
(try
(boolean? (f (create-frame :test/frame {})))
(catch Exception _
false))))])
(def FrameCreator
[:fn
{:error/message "Must be a function that takes type and data and returns a valid frame"
:gen/fmap (fn [_] system-start)} ; Example generator
(fn [f]
(and (fn? f)
(try
(let [result (f {:test "data"})]
(frame? result))
(catch Exception _
false))))])
(defframe system-stop
"Frame sent when the pipeline stops"
{:type :frame.system/stop
:schema :boolean})
(defframe system-error
"General error frame"
{:type :frame.system/error})
(defframe system-config-change
"Frame with configuration changes for the running pipeline"
{:type :frame.system/config-change
:schema schema/PartialConfigSchema})
;;
;; Audio Frames
;; Frames for handling raw audio data
;;
(defframe audio-input-raw
"Raw audio input frame from input transport"
{:type :frame.audio/input-raw
:schema schema/ByteArray})
(defframe audio-output-raw
"Raw audio output frame for playback through output transport"
{:type :frame.audio/output-raw})
(defframe audio-tts-raw
"Raw audio frame generated by TTS service"
{:type :frame.audio.tts/output-raw})
;;
;; Transcription Frames
;; Frames for speech-to-text processing
;;
(defframe transcription
"Transcription result. NOTE: This doesn't mean it is a full transcription, but
a transcription chunk that the transcriptor has full confidence in."
{:type :frame.transcription/result})
(defframe transcription-interim
"Interim transcription result"
{:type :frame.transcription/interim})
;;
;; Context Frames
;; Frames for managing conversation context
;;
(defframe llm-context
"Frame containing LLM context"
{:type :frame.llm/context
:schema schema/LLMContext})
(defframe llm-context-messages-append
"Frame containing messages that should be appended to the current
context."
{:type :frame.llm/context-messages-append
:schema [:map
[:messages schema/LLMContextMessages]
[:properties {:optional true}
[:map {:closed true}
[:run-llm? {:optional true
:description "Whether to send the new context further (for LLM query)"} :boolean]
[:tool-call? {:optional true
:description "Is the last message a tool call request?"} :boolean]
[:on-update {:optional true
:description "Callback called after tool result is added to context"} [:maybe [:=> [:cat] :any]]]]]]})
(defframe llm-tools-replace
"Frame containing new tools that should replace existing ones. Used by
scenario manager when transitioning to a new node"
{:type :frame.llm/tools-replace
:schema schema/LLMFunctionToolDefinition})
;;
;; Scenario frames
;; Frames used predefined scenarios
;;
(defframe scenario-context-update
"Frame containing messages to append to the llm context and the new tools to
replace the old ones in order to create future transitions from the current node."
{:type :frame.scenario/context-update
:schema schema/ScenarioUpdateContext})
;;
;; LLM Output Frames
;; Frames for language model outputs
;;
(defframe llm-text-chunk
"Chunk of text from streaming LLM output"
{:type :frame.llm/text-chunk})
(defframe llm-tool-call-chunk
"Chunk of tool call request. Needs to be assembled before use."
{:type :frame.llm/tool-call-chunk})
(defframe llm-tool-call-request
"Frame containing a tool call request"
{:type :frame.llm/tool-call-request
:schema schema/LLMAssistantMessage})
(defframe llm-tool-call-result
"Frame containing the result of invoking a tool for the LLM."
{:type :frame.llm/tool-call-result
:schema [:map
[:request schema/LLMAssistantMessage]
[:result schema/LLMToolMessage]
[:properties {:optional true}
[:map {:closed true}
[:run-llm? {:optional true
:description "Wether to send the new context further (for LLM query)"} :boolean]
[:on-update {:optional true
:description "Callback called after tool result is added to context"} [:maybe [:=> [:cat] :any]]]]]]})
(defframe llm-text-sentence
"Complete sentence from LLM output"
{:type :frame.llm/text-sentence})
(defframe llm-full-response-start
"Indicates the start of an LLM response"
{:type :frame.llm/response-start})
(defframe llm-full-response-end
"Indicates the end of an LLM response"
{:type :frame.llm/response-end})
;;
;; Vendor specific frames
;; Frames specific to certain vendors
;;
(def XiAlignment [:map
[:charStartTimesMs [:vector :int]]
[:chars [:vector :string]]
[:charDurationsMs [:vector :int]]])
(defframe xi-audio-out
"Frame containing the full output from elevenlabs including chars & timings for chars"
{:type :frame.xi/audio-out
:schema [:map
[:alignment [:maybe XiAlignment]]
[:normalizedAlignment {:optional true} [:maybe XiAlignment]]
[:audio :string] ;; base64 audio
[:isFinal [:maybe :boolean]]]})
;;
;; User Interaction Frames
;; Frames for handling user speech events
;;
(defframe user-speech-start
"User started speaking"
{:type :frame.user/speech-start
:schema :boolean})
(defframe user-speech-stop
"User stopped speaking"
{:type :frame.user/speech-stop
:scheam :boolean})
;;
;; Control Frames
;; Frames for pipeline flow control
;;
(defframe control-bot-interrupt
"Bot should be interrupted"
{:type :frame.control/bot-interrupt
:schema :boolean})
(defframe control-interrupt-start
"Start pipeline interruption"
{:type :frame.control/interrupt-start
:schema :boolean})
(defframe control-interrupt-stop
"Stop pipeline interruption"
{:type :frame.control/interrupt-stop
:schema :boolean})
;;
;; Input/Output Text Frames
;; Frames for text processing
;;
(defframe speak-frame
"Text frame meant for TTS processors to generate speech from the input"
{:type :frame.tts/speak
:schema :string})
(defframe text-input
"Text input frame for LLM processing"
{:type :frame.text/input})
(comment
(mc/emit!)
,)