@@ -107,8 +107,6 @@ class AIBackendManager: ObservableObject {
107107
108108 // OpenWebUI-specific
109109 @Published var openWebUIServerURL : String = " http://localhost:8080 "
110- @Published var availableOllamaModels : [ String ] = [ ]
111- @Published var selectedOllamaModel : String = " "
112110 @Published var availableMLXModels : [ String ] = [ ]
113111 @Published var selectedMLXModel : String = " "
114112
@@ -371,6 +369,97 @@ class AIBackendManager: ObservableObject {
371369 }
372370 }
373371
372+ // MARK: - Streaming AI Interface
373+
374+ /// Stream text generation using the active backend, calling onToken for each chunk.
375+ /// Currently supports Ollama streaming; other backends fall back to non-streaming.
376+ func generateStream(
377+ prompt: String ,
378+ systemPrompt: String ? = nil ,
379+ temperature: Float = 0.7 ,
380+ maxTokens: Int = 2048 ,
381+ onToken: @escaping ( String ) -> Void
382+ ) async throws {
383+ guard let backend = activeBackend else {
384+ throw AIBackendError . noBackendAvailable
385+ }
386+
387+ isProcessing = true
388+ defer { isProcessing = false }
389+
390+ switch backend {
391+ case . ollama:
392+ try await streamWithOllama (
393+ prompt: prompt,
394+ systemPrompt: systemPrompt,
395+ temperature: temperature,
396+ maxTokens: maxTokens,
397+ onToken: onToken
398+ )
399+ default :
400+ // Fall back to non-streaming for other backends
401+ let response = try await generate (
402+ prompt: prompt,
403+ systemPrompt: systemPrompt,
404+ temperature: temperature,
405+ maxTokens: maxTokens
406+ )
407+ onToken ( response)
408+ }
409+ }
410+
411+ /// Stream from Ollama using line-delimited JSON responses.
412+ /// Each line is a JSON object with a "response" field containing the next token.
413+ private func streamWithOllama(
414+ prompt: String ,
415+ systemPrompt: String ? ,
416+ temperature: Float ,
417+ maxTokens: Int ,
418+ onToken: @escaping ( String ) -> Void
419+ ) async throws {
420+ guard let url = URL ( string: " \( ollamaBaseURL) /api/generate " ) else {
421+ throw AIBackendError . invalidConfiguration
422+ }
423+
424+ var requestBody : [ String : Any ] = [
425+ " model " : selectedOllamaModel,
426+ " prompt " : prompt,
427+ " stream " : true ,
428+ " options " : [
429+ " temperature " : temperature,
430+ " num_predict " : maxTokens
431+ ]
432+ ]
433+
434+ if let systemPrompt = systemPrompt {
435+ requestBody [ " system " ] = systemPrompt
436+ }
437+
438+ var request = URLRequest ( url: url)
439+ request. httpMethod = " POST "
440+ request. setValue ( " application/json " , forHTTPHeaderField: " Content-Type " )
441+ request. httpBody = try JSONSerialization . data ( withJSONObject: requestBody)
442+
443+ struct OllamaStreamChunk : Codable {
444+ let response : String
445+ let done : Bool
446+ }
447+
448+ let ( bytes, _) = try await URLSession . shared. bytes ( for: request)
449+
450+ for try await line in bytes. lines {
451+ guard !line. isEmpty else { continue }
452+ guard let lineData = line. data ( using: . utf8) else { continue }
453+
454+ if let chunk = try ? JSONDecoder ( ) . decode ( OllamaStreamChunk . self, from: lineData) {
455+ if !chunk. response. isEmpty {
456+ onToken ( chunk. response)
457+ }
458+ if chunk. done { break }
459+ }
460+ }
461+ }
462+
374463 // MARK: - Ollama Implementation
375464
376465 private func generateWithOllama(
@@ -1101,52 +1190,5 @@ struct AIBackendSettingsView_Previews: PreviewProvider {
11011190 static var previews : some View {
11021191 AIBackendSettingsView ( )
11031192 }
1104-
1105- // MARK: - Dynamic Model Discovery
1106-
1107- /// Fetch available models from local Ollama instance
1108- func fetchAvailableModels( ) async {
1109- guard let url = URL ( string: " http://127.0.0.1:11434/api/tags " ) else { return }
1110- do {
1111- let ( data, _) = try await URLSession . shared. data ( from: url)
1112- struct OllamaModelsResponse : Codable {
1113- struct Model : Codable {
1114- let name : String
1115- let size : Int64 ?
1116- }
1117- let models : [ Model ]
1118- }
1119- let response = try JSONDecoder ( ) . decode ( OllamaModelsResponse . self, from: data)
1120- await MainActor . run {
1121- self . availableOllamaModels = response. models. map { $0. name }
1122- if self . selectedOllamaModel. isEmpty, let first = self . availableOllamaModels. first {
1123- self . selectedOllamaModel = first
1124- }
1125- }
1126- } catch {
1127- NSLog ( " [AIBackendManager] Failed to fetch Ollama models: \( error) " )
1128- }
1129- }
1130-
1131- /// Fetch available models from local MLX server
1132- func fetchMLXModels( ) async {
1133- guard let url = URL ( string: " http://127.0.0.1:5050/v1/models " ) else { return }
1134- do {
1135- let ( data, _) = try await URLSession . shared. data ( from: url)
1136- struct MLXModelsResponse : Codable {
1137- struct Model : Codable { let id : String }
1138- let data : [ Model ]
1139- }
1140- let response = try JSONDecoder ( ) . decode ( MLXModelsResponse . self, from: data)
1141- await MainActor . run {
1142- self . availableMLXModels = response. data. map { $0. id }
1143- if self . selectedMLXModel. isEmpty, let first = self . availableMLXModels. first {
1144- self . selectedMLXModel = first
1145- }
1146- }
1147- } catch {
1148- NSLog ( " [AIBackendManager] Failed to fetch MLX models: \( error) " )
1149- }
1150- }
11511193}
11521194#endif
0 commit comments