MituuZ · MituuZ · Jun 14, 2025 · Jun 14, 2025 · Jun 14, 2025 · Jun 14, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "sllama"
-version = "0.1.10"
+version = "0.1.11"
 edition = "2024"
 
 [dependencies]
@@ -9,3 +9,5 @@ serde = { version = "1.0.219", features = ["derive"] }
 toml = "0.8.23"
 tempfile = "3.20.0"
 rustyline = "16.0.0"
+serde_json = "1.0.140"
+ureq = { version = "3.0.11", features = ["json"] }
diff --git a/LICENSES/serde_json-MIT b/LICENSES/serde_json-MIT
@@ -0,0 +1,23 @@
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/LICENSES/ureq-MIT b/LICENSES/ureq-MIT
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Martin Algesten
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/changelog.md b/changelog.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## 0.1.11 - 2025-06-14
+
+_Switch to Ollama API instead of run commands_
+
+### Prompting
+Update the message format to match the `/chat` endpoint requirements.
+
 ## 0.1.10 - 2025-06-14
 
 _Implement basic command completion and hinting with rustyline_

diff --git a/readme.md b/readme.md
@@ -1,17 +1,20 @@
 # silent-llama
 
-A command-line interface for interacting with Ollama AI models.
+A command-line interface for interacting with Ollama API.
 
 ## Features
 
 - Store conversations as files
 - Add context to a session with `-f/--file` flag
 - Use commands to modify and customize the current session
-- Prompts are built in the following way:
-    1. System prompt
-    2. Context file
-    3. History file
-    4. Current user prompt
+- Prompts are built in the following way and sent using the
+  `/chat` [endpoint (without streaming)](https://github.com/ollama/ollama/blob/main/docs/api.md#chat-request-no-streaming)
+
+| Role   | Content                                                      |
+|--------|--------------------------------------------------------------|
+| system | sllama system prompt                                         |
+| system | context file                                                 |
+| user   | conversation history + current prompt (TODO: format history) |
 
 ## Installation
 
@@ -125,16 +128,29 @@ You can tell where you have previously responded by --- AI Response --- (added a
 mode = "emacs"
 ```
 
+### Configuring Ollama
+
+Ollama unloads the models after a set time. This can be controlled either from an environment variable or through the
+[message itself](#todo).
+
+sllama sends an empty message to preload the model before calling it and tries to resend messages that get an empty
+response from the model.
+
+[Ollama Docs - Keeping a model loaded in memory](https://ollama.readthedocs.io/en/faq/?h=keep#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately)
+
 ## TODO
 
 - [x] Clarify how the prompt is formed
 - [x] Add a configuration file
 - [x] Integrate rustyline
+- [x] Use ollama API instead of run commands
+- [ ] Parse the chat history to a correctly formatted JSON
+- [ ] Delimiter customization
+- [ ] Add `keep_alive` configuration that is sent with the API requests
 - [ ] Implement completions with rustyline
     - [x] Commands
     - [ ] Files
 - [ ] Support multiline input with shift + enter (using rustyline)
-- [ ] Use `ollama server` and API calls instead
 - [ ] Allow changing the context file during a chat
 - [ ] Add support for knowledge directory
 - [ ] Re-implement AI response interruption
@@ -146,6 +162,8 @@ mode = "emacs"
 
 - [Ollama](https://github.com/ollama/ollama) - [MIT](LICENSES/ollama-MIT)
 - [serde](https://github.com/serde-rs/serde) - [MIT](LICENSES/serde-MIT)
+- [serde_json](https://github.com/serde-rs/json) - [MIT](LICENSES/serde_json-MIT)
+- [ureq](https://github.com/algesten/ureq) - [MIT](LICENSES/serde_json-MIT)
 - [toml](https://github.com/toml-rs/toml) - [MIT](LICENSES/toml-MIT)
 - [clap](https://github.com/clap-rs/clap) - [MIT](LICENSES/clap-MIT)
 - [tempfile](https://github.com/Stebalien/tempfile) - [MIT](LICENSES/tempfile-MIT)

diff --git a/src/main.rs b/src/main.rs
@@ -78,6 +78,19 @@ fn main() -> io::Result<()> {
     println!("You're conversing with {} model", &config.model);
     let mut ollama_client = OllamaClient::new(config.model.clone(), config.system_prompt.clone());
 
+    match ollama_client.verify() {
+        Ok(s) => println!("{}", s),
+        Err(e) => {
+            println!("\n\nModel is not available: {}", e);
+            println!(
+                "Check that Ollama is installed or run `ollama pull {}` to pull the model.",
+                config.model
+            );
+
+            std::process::exit(1);
+        }
+    }
+
     // Main conversation loop
     loop {
         // Prompt the user for input
@@ -139,6 +152,8 @@ fn main() -> io::Result<()> {
             input_file_content.as_deref(),
         )?;
 
+        println!("{}", ollama_response);
+
         history.append_user_input(&user_prompt)?;
 
         history.append_ai_response(&ollama_response)?;

diff --git a/src/ollama_client.rs b/src/ollama_client.rs
@@ -14,17 +14,42 @@
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+use serde::{Deserialize, Serialize};
+use std::io;
 
-use std::io::{BufReader, Read, Write};
-use std::process::{Command, Stdio};
-use std::time::Duration;
-use std::{io, thread};
+static LLM_PROTOCOL: &str = "http";
+static LLM_HOST: &str = "localhost";
+static LLM_PORT: &str = "11434";
+static LLM_ENDPOINT: &str = "/api/chat";
 
 pub(crate) struct OllamaClient {
     model: String,
     pub(crate) system_prompt: String,
 }
 
+#[derive(Serialize)]
+pub(crate) struct OllamaRequest {
+    pub(crate) message_history: String,
+    pub(crate) current_prompt: String,
+    pub(crate) context: Option<String>,
+    pub(crate) system_prompt: String,
+}
+
+#[derive(Deserialize, Debug)]
+pub(crate) struct OllamaResponse {
+    pub(crate) model: String,
+    pub(crate) created_at: String,
+    pub(crate) message: OllamaMessage,
+    pub(crate) done: bool,
+    pub(crate) done_reason: String,
+}
+
+#[derive(Deserialize, Debug)]
+pub(crate) struct OllamaMessage {
+    pub(crate) role: String,
+    pub(crate) content: String,
+}
+
 impl OllamaClient {
     pub(crate) fn new(model: String, system_prompt: String) -> Self {
         Self {
@@ -33,84 +58,73 @@ impl OllamaClient {
         }
     }
 
+    /// Send an empty message to ollama to preload the model.
+    pub(crate) fn verify(&self) -> io::Result<String> {
+        let send_body = serde_json::json!({
+            "model": self.model,
+        });
+
+        let mut response = ureq::post(Self::api_url())
+            .send_json(&send_body)
+            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
+
+        let ollama_response = response
+            .body_mut()
+            .read_to_string()
+            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
+
+        Ok(ollama_response)
+    }
+
     pub(crate) fn generate_response(
         &self,
         history_content: &str,
         user_prompt: &str,
         context_content: Option<&str>,
     ) -> io::Result<String> {
-        // Create the ollama command with stdout piped
-        let mut cmd = Command::new("ollama")
-            .args(&["run", &self.model])
-            .stdin(Stdio::piped())
-            .stdout(Stdio::piped())
-            .stderr(Stdio::inherit())
-            .spawn()?;
-
-        // Create the input for the ollama process
-        if let Some(mut stdin) = cmd.stdin.take() {
-            // First, add the system prompt
-            stdin.write_all(b"Here is the system prompt: ")?;
-            stdin.write_all(self.system_prompt.as_bytes())?;
-
-            // Then add the context file content if available
-            if let Some(ref content) = context_content {
-                stdin.write_all(b"\n\nAdditional context from file: ")?;
-                stdin.write_all(content.as_bytes())?;
-            }
-
-            // Then include the full history file for context
-            stdin.write_all(b"\n\nPrevious conversation: ")?;
-            stdin.write_all(history_content.as_bytes())?;
-
-            // Finally, add the user prompt
-            stdin.write_all(b"\n\nCurrent user prompt: ")?;
-            stdin.write_all(user_prompt.as_bytes())?;
+        let send_body = serde_json::json!({
+            "model": self.model,
+            "messages": [
+                { "role": "system", "content": self.system_prompt },
+                { "role": "system", "content": format!("Additional context that the user has provided: {}", context_content.unwrap_or("")) },
+                { "role": "user", "content": format!("Here's the conversation so far: {}\n\n Here's the user's latest prompt: {}", history_content, user_prompt) },
+                ],
+            "stream": false,
+        });
+
+        let mut response = ureq::post(Self::api_url())
+            .send_json(&send_body)
+            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
+
+        let ollama_response: OllamaResponse = response
+            .body_mut()
+            .read_json()
+            .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
+
+        if ollama_response.done
+            && ollama_response.done_reason == "load"
+            && ollama_response.message.content.is_empty()
+        {
+            println!("Model responded with an empty message. Retrying request...");
+
+            std::thread::sleep(std::time::Duration::from_secs(1));
+
+            return self.generate_response(history_content, user_prompt, context_content);
         }
 
-        let stdout = cmd.stdout.take().expect("Failed to open stdout");
-        let mut reader = BufReader::new(stdout);
-        let full_response =
-            read_process_output_with_interrupt(&mut reader).expect("error reading process output");
-        let ollama_response = String::from_utf8_lossy(&full_response).to_string();
-
-        Ok(ollama_response)
+        Ok(ollama_response.message.content)
     }
 
     pub(crate) fn update_system_prompt(&mut self, new_system_prompt: String) {
         self.system_prompt = new_system_prompt;
     }
-}
-
-fn read_process_output_with_interrupt(reader: &mut BufReader<impl Read>) -> io::Result<Vec<u8>> {
-    let mut buffer = [0; 1024];
-    let mut full_response = Vec::new();
-
-    loop {
-        // Set up non-blocking read with timeout
-        match reader.read(&mut buffer) {
-            Ok(0) => break, // End of stream
-            Ok(bytes_read) => {
-                // Write the chunk to console
-                io::stdout().write_all(&buffer[..bytes_read])?;
-                io::stdout().flush()?;
-
-                // Store the chunk for later file writing
-                full_response.extend_from_slice(&buffer[..bytes_read]);
-            }
-            Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {
-                // Would block, just wait a bit and try again
-                thread::sleep(Duration::from_millis(10));
-                continue;
-            }
-            Err(e) => return Err(e),
-        }
 
-        // Small delay to reduce CPU usage and allow interrupt checking
-        thread::sleep(Duration::from_millis(10));
+    fn api_url() -> String {
+        format!(
+            "{}://{}:{}{}",
+            LLM_PROTOCOL, LLM_HOST, LLM_PORT, LLM_ENDPOINT
+        )
     }
-
-    Ok(full_response)
 }
 
 #[cfg(test)]
@@ -119,12 +133,24 @@ mod tests {
 
     #[test]
     fn test_ollama_client_creation() {
-        let model = "llama2".to_string();
+        let model = "gemma3:4b".to_string();
         let system_prompt = "You are a helpful assistant.".to_string();
 
         let client = OllamaClient::new(model.clone(), system_prompt.clone());
 
         assert_eq!(client.model, model);
         assert_eq!(client.system_prompt, system_prompt);
     }
+
+    #[test]
+    fn test_update_system_prompt() {
+        let model = "gemma3:4b".to_string();
+        let initial_prompt = "Initial prompt".to_string();
+        let new_prompt = "New system prompt".to_string();
+
+        let mut client = OllamaClient::new(model, initial_prompt);
+        client.update_system_prompt(new_prompt.clone());
+
+        assert_eq!(client.system_prompt, new_prompt);
+    }
 }