Moved model downloading ownership to CLI (#38)

madclaws · web-flow · commit fe14a70e2f3f · 2026-01-07T23:34:05.000+05:30
* feat: Moved model downloading ownership to cli from py server

* refactor: passing CI

* fix: using String's ends_with instead of contains

* fix: changed the default relay_count to 10

* fix: changed retry_count to relay_count
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/server/backend/mlx.py b/server/backend/mlx.py
@@ -12,7 +12,7 @@
 
 logger = logging.getLogger("app")
 
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, Iterator, List, Optional, Union
 
 _model_cache: Dict[str, MLXRunner] = {}
 _default_max_tokens: Optional[int] = None  # Use dynamic model-aware limits by default
@@ -181,3 +181,4 @@ def format_chat_messages_for_runner(
 def count_tokens(text: str) -> int:
     """Rough token count estimation."""
     return int(len(text.split()) * 1.3)  # Approximation, convert to int
+
diff --git a/tiles/Cargo.toml b/tiles/Cargo.toml
@@ -13,4 +13,4 @@ anyhow = "1.0"
 tokio = { version = "1" , features = ["macros", "rt-multi-thread"]}
 owo-colors = "4"
 futures-util = "0.3"
-
+hf-hub = {version = "0.4", features = ["tokio"]}
diff --git a/tiles/src/core/mod.rs b/tiles/src/core/mod.rs
@@ -1 +1,3 @@
+// to be deprecated and removed, the core stuff will be moved to tilekit sdk
+
 pub mod health;
diff --git a/tiles/src/lib.rs b/tiles/src/lib.rs
@@ -1,5 +1,5 @@
 pub mod core;
 pub mod runtime;
-
+pub mod utils;
 #[cfg(test)]
 mod tests {}
diff --git a/tiles/src/main.rs b/tiles/src/main.rs
@@ -1,7 +1,7 @@
 use std::error::Error;
 
 use clap::{Args, Parser, Subcommand};
-use tiles::runtime::{build_runtime, RunArgs};
+use tiles::runtime::{RunArgs, build_runtime};
 mod commands;
 #[derive(Debug, Parser)]
 #[command(name = "tiles")]
@@ -31,10 +31,9 @@ enum Commands {
 
 #[derive(Debug, Args)]
 struct RunFlags {
-    /// Number of chat retries before giving up (default: 6)
-    #[arg(short = 'r', long, default_value_t = 6)]
-    retry_count: u32,
-
+    /// Max times cli communicates with the model until it gets a proper reply for a user prompt
+    #[arg(short = 'r', long, default_value_t = 10)]
+    relay_count: u32,
     // Future flags go here:
     // #[arg(long, default_value_t = 6969)]
     // port: u16,
@@ -61,10 +60,13 @@ pub async fn main() -> Result<(), Box<dyn Error>> {
     let cli = Cli::parse();
     let runtime = build_runtime();
     match cli.command {
-        Commands::Run { modelfile_path, flags } => {
+        Commands::Run {
+            modelfile_path,
+            flags,
+        } => {
             let run_args = RunArgs {
                 modelfile_path,
-                retry_count: flags.retry_count,
+                relay_count: flags.relay_count,
             };
             commands::run(&runtime, run_args).await;
         }
diff --git a/tiles/src/runtime/mlx.rs b/tiles/src/runtime/mlx.rs
@@ -1,3 +1,5 @@
+use crate::runtime::RunArgs;
+use crate::utils::hf_model_downloader::*;
 use anyhow::{Context, Result};
 use futures_util::StreamExt;
 use owo_colors::OwoColorize;
@@ -12,7 +14,6 @@ use std::{env, fs};
 use std::{io, process::Command};
 use tilekit::modelfile::Modelfile;
 use tokio::time::sleep;
-
 pub struct MLXRuntime {}
 
 impl MLXRuntime {}
@@ -37,7 +38,7 @@ impl MLXRuntime {
         const DEFAULT_MODELFILE: &str = "FROM driaforall/mem-agent-mlx-4bit";
 
         // Parse modelfile
-        let modelfile_parse_result = if let Some(modelfile_str) = run_args.modelfile_path {
+        let modelfile_parse_result = if let Some(modelfile_str) = &run_args.modelfile_path {
             tilekit::modelfile::parse_from_file(modelfile_str.as_str())
         } else {
             tilekit::modelfile::parse(DEFAULT_MODELFILE)
@@ -53,7 +54,7 @@ impl MLXRuntime {
 
         let model = modelfile.from.as_ref().unwrap();
         if model.starts_with("driaforall/mem-agent") {
-            let _res = run_model_with_server(self, modelfile, run_args.retry_count).await;
+            let _res = run_model_with_server(self, modelfile, &run_args).await;
         } else {
             run_model_by_sub_process(modelfile);
         }
@@ -172,20 +173,27 @@ fn run_model_by_sub_process(modelfile: Modelfile) {
 async fn run_model_with_server(
     mlx_runtime: &MLXRuntime,
     modelfile: Modelfile,
-    retry_count: u32,
+    run_args: &RunArgs,
 ) -> reqwest::Result<()> {
     if !cfg!(debug_assertions) {
         let _res = mlx_runtime.start_server_daemon().await;
         let _ = wait_until_server_is_up().await;
     }
-    let stdin = io::stdin();
-    let mut stdout = io::stdout();
     // loading the model from mem-agent via daemon server
     let memory_path = get_memory_path()
         .context("Retrieving memory_path failed")
         .unwrap();
     let modelname = modelfile.from.as_ref().unwrap();
-    load_model(modelname, &memory_path).await.unwrap();
+    match load_model(modelname, &memory_path).await {
+        Ok(_) => start_repl(mlx_runtime, modelname, run_args).await,
+        Err(err) => println!("{}", err),
+    }
+    Ok(())
+}
+
+async fn start_repl(mlx_runtime: &MLXRuntime, modelname: &str, run_args: &RunArgs) {
+    let stdin = io::stdin();
+    let mut stdout = io::stdout();
     println!("Running in interactive mode");
     // TODO: Handle "enter" key press or any key press when repl is processing an input
     loop {
@@ -203,12 +211,12 @@ async fn run_model_with_server(
                 break;
             }
             _ => {
-                let mut remaining_count = retry_count;
+                let mut remaining_count = run_args.relay_count;
                 let mut g_reply: String = "".to_owned();
                 let mut python_code: String = "".to_owned();
                 loop {
                     if remaining_count > 0 {
-                        let chat_start = remaining_count == retry_count;
+                        let chat_start = remaining_count == run_args.relay_count;
                         if let Ok(response) = chat(input, modelname, chat_start, &python_code).await
                         {
                             if response.reply.is_empty() {
@@ -233,7 +241,6 @@ async fn run_model_with_server(
             }
         }
     }
-    Ok(())
 }
 
 async fn ping() -> Result<(), String> {
@@ -252,6 +259,8 @@ async fn load_model(model_name: &str, memory_path: &str) -> Result<(), String> {
         "model": model_name,
         "memory_path": memory_path
     });
+
+    //TODO: Fix the unwrap here
     let res = client
         .post("http://127.0.0.1:6969/start")
         .json(&body)
@@ -260,33 +269,26 @@ async fn load_model(model_name: &str, memory_path: &str) -> Result<(), String> {
         .unwrap();
     match res.status() {
         StatusCode::OK => Ok(()),
-        StatusCode::NOT_FOUND => download_model(model_name).await,
+        StatusCode::NOT_FOUND => {
+            println!("Downloading {}\n", model_name);
+            match pull_model(model_name).await {
+                Ok(_) => {
+                    println!("\nDownloading completed \n");
+                    Ok(())
+                }
+                Err(err) => Err(err),
+            }
+        }
         _ => {
             println!("err {:?}", res);
-            Ok(())
+            Err(format!(
+                "Failed to load model {} due to {:?}",
+                model_name, res
+            ))
         }
     }
 }
 
-async fn download_model(model_name: &str) -> Result<(), String> {
-    println!("Downloading the model {} ....", model_name);
-    let client = Client::new();
-    let body = json!({
-        "model": model_name
-    });
-    let res = client
-        .post("http://127.0.0.1:6969/download")
-        .json(&body)
-        .send()
-        .await
-        .unwrap();
-    if res.status() == 200 {
-        Ok(())
-    } else {
-        Err(String::from("Downloading model failed"))
-    }
-}
-
 async fn chat(
     input: &str,
     model_name: &str,
diff --git a/tiles/src/runtime/mod.rs b/tiles/src/runtime/mod.rs
@@ -7,7 +7,7 @@ pub mod mlx;
 
 pub struct RunArgs {
     pub modelfile_path: Option<String>,
-    pub retry_count: u32,
+    pub relay_count: u32,
     // Future flags go here
 }
 
diff --git a/tiles/src/utils/hf_model_downloader.rs b/tiles/src/utils/hf_model_downloader.rs
@@ -0,0 +1,84 @@
+/// Manages model snapshot downloading from HuggingFace
+use std::{env, path::PathBuf};
+
+use hf_hub::api::{
+    Siblings,
+    tokio::{ApiBuilder, ApiError},
+};
+
+/// Download the entire model (including snapshot) for the given model name
+pub async fn pull_model(model_name: &str) -> Result<(), String> {
+    snapshot_download(model_name).await
+}
+
+pub async fn snapshot_download(modelname: &str) -> Result<(), String> {
+    let allow_patterns = [
+        ".json",
+        ".txt",
+        ".safetensors",
+        ".md",
+        ".gitattributes",
+        "LICENSE",
+    ];
+    let api_build_result = ApiBuilder::new()
+        .with_progress(true)
+        .with_cache_dir(PathBuf::from(get_model_cache()))
+        .build();
+
+    match api_build_result {
+        Ok(api) => {
+            let repo = api.model(modelname.to_owned());
+            match repo.info().await {
+                Ok(repo_info) => {
+                    let filtered_siblings = repo_info
+                        .siblings
+                        .iter()
+                        .filter(|sibling| {
+                            allow_patterns
+                                .iter()
+                                .any(|pat| sibling.rfilename.ends_with(pat))
+                        })
+                        .collect::<Vec<&Siblings>>();
+
+                    for sibling in filtered_siblings {
+                        if repo.get(&sibling.rfilename).await.is_err() {
+                            return Err(format!(
+                                "{:?} failed to download, retry again",
+                                &sibling.rfilename,
+                            ));
+                        }
+                    }
+                }
+                Err(err) => return Err(format_hf_api_error(err)),
+            };
+        }
+        Err(err) => return Err(format_hf_api_error(err)),
+    }
+
+    Ok(())
+}
+
+fn format_hf_api_error(api_error: ApiError) -> String {
+    match api_error {
+        ApiError::RequestError(err) => err.to_string(),
+        ApiError::TooManyRetries(err) => err.to_string(),
+        _err => "Something unexpected happened, check your internet connection".to_owned(),
+    }
+}
+
+fn get_model_cache() -> String {
+    let default_cache = format!(
+        "{}/.cache/huggingface",
+        env::home_dir().unwrap().to_str().unwrap()
+    );
+    let cache_root = if let Ok(home) = env::var("HF_HOME") {
+        home.to_owned()
+    } else {
+        default_cache
+    };
+
+    format!("{}/hub", cache_root)
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/tiles/src/utils/mod.rs b/tiles/src/utils/mod.rs
@@ -0,0 +1 @@
+pub mod hf_model_downloader;

Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
	`1`	`+// to be deprecated and removed, the core stuff will be moved to tilekit sdk`
	`2`	`+`
`1`	`3`	`pub mod health;`
Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ pub mod mlx;`
`7`	`7`
`8`	`8`	`pub struct RunArgs {`
`9`	`9`	`pub modelfile_path: Option<String>,`
`10`		`- pub retry_count: u32,`
	`10`	`+ pub relay_count: u32,`
`11`	`11`	`// Future flags go here`
`12`	`12`	`}`
`13`	`13`