From 433bd0b5be85108e742e2fd98901d1eea33bb3f6 Mon Sep 17 00:00:00 2001 From: wyh <1914457309@qq.com> Date: Wed, 15 Apr 2026 00:04:52 +0800 Subject: [PATCH 1/2] feat: add independent vision model config with fallback to main LLM Add set_vision_llm command allowing users to configure a separate vision-capable model (e.g. mimo-v2-omni, gpt-4o, qwen-vl-max) for image analysis while keeping a cheaper text model for chat. When vision_model is not configured, vision calls automatically fall back to the main LLM config, maintaining full backward compatibility. Changes: - agent_config.h: add AGENT_CFG_KEY_VISION_* config keys - llm_proxy.c: add vision static vars, snapshot, setter with fallback - llm_proxy.h: declare llm_snapshot_vision_config, llm_set_vision_model - llm_vision.c: use vision-specific config in both vision entry points - cmd_llm.c: add cmd_set_vision_llm with 4 presets (mimo/openai/qwen/glm) - nsh_commands.c: register command, update help text and config_show Also fix mimo preset model name from MiMo-v2-Flash to mimo-v2-flash (lowercase) as required by api.xiaomimimo.com. Co-Authored-By: Claude Opus 4.6 --- include/agent_config.h | 3 ++ src/channels/cmd_llm.c | 81 +++++++++++++++++++++++++++++++++- src/channels/cmd_llm.h | 1 + src/channels/nsh_commands.c | 6 +++ src/llm/llm_proxy.c | 88 +++++++++++++++++++++++++++++++++++++ src/llm/llm_proxy.h | 12 +++++ src/llm/llm_vision.c | 4 +- 7 files changed, 192 insertions(+), 3 deletions(-) diff --git a/include/agent_config.h b/include/agent_config.h index 32b28aa..15d8774 100644 --- a/include/agent_config.h +++ b/include/agent_config.h @@ -249,6 +249,9 @@ #define AGENT_CFG_KEY_FEISHU_USER_TOKEN "feishu_user_token" #define AGENT_CFG_KEY_LLM_HOST "llm_host" #define AGENT_CFG_KEY_LLM_PATH "llm_path" +#define AGENT_CFG_KEY_VISION_MODEL "vision_model" +#define AGENT_CFG_KEY_VISION_HOST "vision_host" +#define AGENT_CFG_KEY_VISION_API_KEY "vision_api_key" #define AGENT_CFG_KEY_GATEWAY_HOST "gateway_host" #define AGENT_CFG_KEY_GATEWAY_PORT "gateway_port" #define AGENT_CFG_KEY_GATEWAY_TOKEN "gateway_token" diff --git a/src/channels/cmd_llm.c b/src/channels/cmd_llm.c index a77cbf7..e7c2641 100644 --- a/src/channels/cmd_llm.c +++ b/src/channels/cmd_llm.c @@ -47,7 +47,7 @@ static const router_preset_t g_router_presets[] = { { "glm", "open.bigmodel.cn", "/api/paas/v4/chat/completions", "glm-4-flash", 1 }, { "openai", "api.openai.com", "/v1/chat/completions", "gpt-4o", 3 }, { "claude", "api.anthropic.com", "/v1/messages", "claude-sonnet-4-20250514", 3 }, - { "mimo", "api.xiaomimimo.com", "/v1/chat/completions", "MiMo-v2-Flash", 1 }, + { "mimo", "api.xiaomimimo.com", "/v1/chat/completions", "mimo-v2-flash", 1 }, { "openrouter", "openrouter.ai", "/api/v1/chat/completions", "openrouter/hunter-alpha", 2 }, { NULL, NULL, NULL, NULL, 0 } @@ -223,6 +223,85 @@ void cmd_set_llm(int argc, char** argv) } +/* ── cmd_set_vision_llm ──────────────────────────────────────── */ + +void cmd_set_vision_llm(int argc, char** argv) +{ + if (argc < 2) { + printf("Usage: set_vision_llm [api_key]\n" + " set_vision_llm [api_key]\n" + " set_vision_llm clear\n" + "\n" + "Set an independent vision model. If not configured,\n" + "vision calls inherit the main LLM config.\n" + "\n" + "Presets:\n" + " mimo - api.xiaomimimo.com (mimo-v2-omni)\n" + " openai - api.openai.com (gpt-4o)\n" + " qwen - dashscope.aliyuncs.com (qwen-vl-max)\n" + " glm - open.bigmodel.cn (glm-4v-flash)\n" + "\n" + "Examples:\n" + " set_vision_llm mimo \n" + " set_vision_llm clear\n"); + return; + } + + const char* arg1 = argv[1]; + + /* clear: remove vision-specific config, fall back to main LLM */ + if (strcmp(arg1, "clear") == 0) { + llm_set_vision_model(NULL, NULL, NULL); + printf("Vision LLM config cleared (using main LLM).\n"); + return; + } + + const char* host = NULL; + const char* model = NULL; + const char* api_key = NULL; + + /* Check if arg1 is a preset name */ + static const struct { + const char* name; + const char* host; + const char* model; + } vision_presets[] = { + { "mimo", "api.xiaomimimo.com", "mimo-v2-omni" }, + { "openai", "api.openai.com", "gpt-4o" }, + { "qwen", "dashscope.aliyuncs.com", "qwen-vl-max" }, + { "glm", "open.bigmodel.cn", "glm-4v-flash" }, + { NULL, NULL, NULL } + }; + + bool found = false; + for (int i = 0; vision_presets[i].name; i++) { + if (strcmp(vision_presets[i].name, arg1) == 0) { + host = vision_presets[i].host; + model = vision_presets[i].model; + if (argc >= 3) + api_key = argv[2]; + found = true; + break; + } + } + + if (!found) { + /* Custom: arg1 = host, arg2 = model, arg3 = api_key */ + host = arg1; + if (argc >= 3) + model = argv[2]; + if (argc >= 4) + api_key = argv[3]; + } + + llm_set_vision_model(host, model, api_key); + + printf("Vision LLM: %s (model: %s)\n", host, model ? model : "(inherit)"); + if (api_key) + printf("Vision API key saved.\n"); +} + + /* ── list_models helpers ──────────────────────────────────────── */ #define LIST_MODELS_BUF_SIZE (256 * 1024) diff --git a/src/channels/cmd_llm.h b/src/channels/cmd_llm.h index 8fb5232..a9a0378 100644 --- a/src/channels/cmd_llm.h +++ b/src/channels/cmd_llm.h @@ -17,6 +17,7 @@ #pragma once void cmd_set_llm(int argc, char** argv); +void cmd_set_vision_llm(int argc, char** argv); void cmd_list_models(int argc, char** argv); void cmd_router_status(void); void cmd_router_profile(int argc, char** argv); diff --git a/src/channels/nsh_commands.c b/src/channels/nsh_commands.c index f750c4e..defb312 100644 --- a/src/channels/nsh_commands.c +++ b/src/channels/nsh_commands.c @@ -96,6 +96,7 @@ static void cmd_help(void) " set_feishu_app - Set Feishu app credentials\n" " set_feishu_user_token - Set Feishu user_access_token for doc APIs\n" " set_llm [model] [key] - Switch LLM backend (kimi/qwen/deepseek/glm/openai)\n" + " set_vision_llm [model] [key] - Set independent vision model\n" " list_models [--free] [keyword] - List available models (openrouter)\n" " memory_read - Read MEMORY.md\n" " memory_write - Write MEMORY.md (quote text)\n" @@ -462,6 +463,9 @@ static void cmd_config_show(void) SHOW_CFG("Model", AGENT_CFG_KEY_MODEL, false); SHOW_CFG("LLM Host", AGENT_CFG_KEY_LLM_HOST, false); SHOW_CFG("LLM Path", AGENT_CFG_KEY_LLM_PATH, false); + SHOW_CFG("Vision Model", AGENT_CFG_KEY_VISION_MODEL, false); + SHOW_CFG("Vision Host", AGENT_CFG_KEY_VISION_HOST, false); + SHOW_CFG("Vision Key", AGENT_CFG_KEY_VISION_API_KEY, true); SHOW_CFG("Proxy Host", AGENT_CFG_KEY_PROXY_HOST, false); SHOW_CFG("Proxy Port", AGENT_CFG_KEY_PROXY_PORT, false); SHOW_CFG("SerpAPI Key", AGENT_CFG_KEY_SERP_KEY, true); @@ -876,6 +880,8 @@ static void* cli_thread(void* arg) cmd_set_feishu_user_token(argc, argv); else if (strcmp(cmd, "set_llm") == 0) cmd_set_llm(argc, argv); + else if (strcmp(cmd, "set_vision_llm") == 0) + cmd_set_vision_llm(argc, argv); else if (strcmp(cmd, "list_models") == 0) cmd_list_models(argc, argv); else if (strcmp(cmd, "memory_read") == 0) diff --git a/src/llm/llm_proxy.c b/src/llm/llm_proxy.c index c682d90..0d4ba29 100644 --- a/src/llm/llm_proxy.c +++ b/src/llm/llm_proxy.c @@ -46,6 +46,11 @@ static char s_llm_host[128] = AGENT_LLM_API_HOST; static char s_llm_path[128] = AGENT_LLM_API_PATH; static char s_llm_port[8] = "443"; /* "443" for HTTPS, "80" etc for HTTP */ +/* Independent vision model config — empty means fall back to main LLM */ +static char s_vision_model[64] = { 0 }; +static char s_vision_host[128] = { 0 }; +static char s_vision_api_key[128] = { 0 }; + static pthread_mutex_t s_llm_lock = PTHREAD_MUTEX_INITIALIZER; /* Check if host uses OpenAI-compatible max_completion_tokens param */ @@ -124,6 +129,17 @@ int llm_proxy_init(void) if (claw_config_get("llm_port", tmp, sizeof(tmp)) == OK && tmp[0]) strncpy(s_llm_port, tmp, sizeof(s_llm_port) - 1); + /* Load optional vision model config — empty falls back to main LLM */ + memset(tmp, 0, sizeof(tmp)); + if (claw_config_get(AGENT_CFG_KEY_VISION_MODEL, tmp, sizeof(tmp)) == OK && tmp[0]) + strncpy(s_vision_model, tmp, sizeof(s_vision_model) - 1); + memset(tmp, 0, sizeof(tmp)); + if (claw_config_get(AGENT_CFG_KEY_VISION_HOST, tmp, sizeof(tmp)) == OK && tmp[0]) + strncpy(s_vision_host, tmp, sizeof(s_vision_host) - 1); + memset(tmp, 0, sizeof(tmp)); + if (claw_config_get(AGENT_CFG_KEY_VISION_API_KEY, tmp, sizeof(tmp)) == OK && tmp[0]) + strncpy(s_vision_api_key, tmp, sizeof(s_vision_api_key) - 1); + if (s_api_key[0]) syslog(LOG_INFO, "[%s] LLM proxy initialized (model: %s, host: %s)\n", TAG, s_model, s_llm_host); @@ -212,6 +228,78 @@ void llm_snapshot_config(char* model, size_t model_sz, pthread_mutex_unlock(&s_llm_lock); } +/* ── Vision config snapshot ──────────────────────────────────── */ + +void llm_snapshot_vision_config(char* model, size_t model_sz, + char* api_key, size_t key_sz, + char* host, size_t host_sz) +{ + pthread_mutex_lock(&s_llm_lock); + + /* Use vision-specific config when set, otherwise fall back to main LLM */ + if (s_vision_model[0]) + strncpy(model, s_vision_model, model_sz - 1); + else + strncpy(model, s_model, model_sz - 1); + model[model_sz - 1] = '\0'; + + if (s_vision_api_key[0]) + strncpy(api_key, s_vision_api_key, key_sz - 1); + else + strncpy(api_key, s_api_key, key_sz - 1); + api_key[key_sz - 1] = '\0'; + + if (s_vision_host[0]) + strncpy(host, s_vision_host, host_sz - 1); + else + strncpy(host, s_llm_host, host_sz - 1); + host[host_sz - 1] = '\0'; + + pthread_mutex_unlock(&s_llm_lock); +} + +/* ── Vision model setter ─────────────────────────────────────── */ + +int llm_set_vision_model(const char* host, const char* model, + const char* api_key) +{ + pthread_mutex_lock(&s_llm_lock); + + if (host && host[0]) { + claw_config_set(AGENT_CFG_KEY_VISION_HOST, host); + strncpy(s_vision_host, host, sizeof(s_vision_host) - 1); + s_vision_host[sizeof(s_vision_host) - 1] = '\0'; + } else { + config_del(AGENT_CFG_KEY_VISION_HOST); + s_vision_host[0] = '\0'; + } + + if (model && model[0]) { + claw_config_set(AGENT_CFG_KEY_VISION_MODEL, model); + strncpy(s_vision_model, model, sizeof(s_vision_model) - 1); + s_vision_model[sizeof(s_vision_model) - 1] = '\0'; + } else { + config_del(AGENT_CFG_KEY_VISION_MODEL); + s_vision_model[0] = '\0'; + } + + if (api_key && api_key[0]) { + claw_config_set(AGENT_CFG_KEY_VISION_API_KEY, api_key); + strncpy(s_vision_api_key, api_key, sizeof(s_vision_api_key) - 1); + s_vision_api_key[sizeof(s_vision_api_key) - 1] = '\0'; + } else { + config_del(AGENT_CFG_KEY_VISION_API_KEY); + s_vision_api_key[0] = '\0'; + } + + pthread_mutex_unlock(&s_llm_lock); + + syslog(LOG_INFO, "[%s] Vision LLM config updated: model=%s host=%s\n", + TAG, s_vision_model[0] ? s_vision_model : "(inherit)", + s_vision_host[0] ? s_vision_host : "(inherit)"); + return OK; +} + /* ── HTTP helpers ─────────────────────────────────────────── */ /** diff --git a/src/llm/llm_proxy.h b/src/llm/llm_proxy.h index dde3169..b92d012 100644 --- a/src/llm/llm_proxy.h +++ b/src/llm/llm_proxy.h @@ -94,6 +94,18 @@ int llm_chat_vision_raw(const char* prompt, const char* mime_type, char* response_buf, size_t buf_size); +/** Vision config snapshot: returns the vision-specific model/api_key/host. + * If vision model is not configured, falls back to the main LLM config. */ +void llm_snapshot_vision_config(char* model, size_t model_sz, + char* api_key, size_t key_sz, + char* host, size_t host_sz); + +/** Set independent vision model config. + * host/model/api_key: set non-NULL/non-empty to override, NULL to clear. + * Empty vision config falls back to main LLM config automatically. */ +int llm_set_vision_model(const char* host, const char* model, + const char* api_key); + #ifdef __cplusplus } #endif diff --git a/src/llm/llm_vision.c b/src/llm/llm_vision.c index fa337a5..549a15b 100644 --- a/src/llm/llm_vision.c +++ b/src/llm/llm_vision.c @@ -43,7 +43,7 @@ int llm_chat_vision(const char* prompt, const char* image_b64, size_t buf_size) { char model[64], api_key[128], llm_host[128]; - llm_snapshot_config(model, sizeof(model), + llm_snapshot_vision_config(model, sizeof(model), api_key, sizeof(api_key), llm_host, sizeof(llm_host)); @@ -215,7 +215,7 @@ int llm_chat_vision_raw(const char* prompt, char* response_buf, size_t buf_size) { char model[64], api_key[128], llm_host[128]; - llm_snapshot_config(model, sizeof(model), + llm_snapshot_vision_config(model, sizeof(model), api_key, sizeof(api_key), llm_host, sizeof(llm_host)); From e793eb84d60b572d1ed73fd030afd192a1b2db9e Mon Sep 17 00:00:00 2001 From: wyh <1914457309@qq.com> Date: Wed, 15 Apr 2026 23:23:11 +0800 Subject: [PATCH 2/2] fix: use claw_config_set to clear vision config and fix syslog race - Replace config_del with claw_config_set(key, "") per review feedback, using the public API instead of the non-public config_del function - Move syslog before pthread_mutex_unlock to avoid reading s_vision_model/s_vision_host after lock release Co-Authored-By: Claude Opus 4.6 --- src/llm/llm_proxy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/llm/llm_proxy.c b/src/llm/llm_proxy.c index 0d4ba29..48157cc 100644 --- a/src/llm/llm_proxy.c +++ b/src/llm/llm_proxy.c @@ -270,7 +270,7 @@ int llm_set_vision_model(const char* host, const char* model, strncpy(s_vision_host, host, sizeof(s_vision_host) - 1); s_vision_host[sizeof(s_vision_host) - 1] = '\0'; } else { - config_del(AGENT_CFG_KEY_VISION_HOST); + claw_config_set(AGENT_CFG_KEY_VISION_HOST, ""); s_vision_host[0] = '\0'; } @@ -279,7 +279,7 @@ int llm_set_vision_model(const char* host, const char* model, strncpy(s_vision_model, model, sizeof(s_vision_model) - 1); s_vision_model[sizeof(s_vision_model) - 1] = '\0'; } else { - config_del(AGENT_CFG_KEY_VISION_MODEL); + claw_config_set(AGENT_CFG_KEY_VISION_MODEL, ""); s_vision_model[0] = '\0'; } @@ -288,15 +288,15 @@ int llm_set_vision_model(const char* host, const char* model, strncpy(s_vision_api_key, api_key, sizeof(s_vision_api_key) - 1); s_vision_api_key[sizeof(s_vision_api_key) - 1] = '\0'; } else { - config_del(AGENT_CFG_KEY_VISION_API_KEY); + claw_config_set(AGENT_CFG_KEY_VISION_API_KEY, ""); s_vision_api_key[0] = '\0'; } - pthread_mutex_unlock(&s_llm_lock); - syslog(LOG_INFO, "[%s] Vision LLM config updated: model=%s host=%s\n", TAG, s_vision_model[0] ? s_vision_model : "(inherit)", s_vision_host[0] ? s_vision_host : "(inherit)"); + + pthread_mutex_unlock(&s_llm_lock); return OK; }