diff --git a/include/agent_config.h b/include/agent_config.h index 32b28aa..15d8774 100644 --- a/include/agent_config.h +++ b/include/agent_config.h @@ -249,6 +249,9 @@ #define AGENT_CFG_KEY_FEISHU_USER_TOKEN "feishu_user_token" #define AGENT_CFG_KEY_LLM_HOST "llm_host" #define AGENT_CFG_KEY_LLM_PATH "llm_path" +#define AGENT_CFG_KEY_VISION_MODEL "vision_model" +#define AGENT_CFG_KEY_VISION_HOST "vision_host" +#define AGENT_CFG_KEY_VISION_API_KEY "vision_api_key" #define AGENT_CFG_KEY_GATEWAY_HOST "gateway_host" #define AGENT_CFG_KEY_GATEWAY_PORT "gateway_port" #define AGENT_CFG_KEY_GATEWAY_TOKEN "gateway_token" diff --git a/src/channels/cmd_llm.c b/src/channels/cmd_llm.c index a77cbf7..e7c2641 100644 --- a/src/channels/cmd_llm.c +++ b/src/channels/cmd_llm.c @@ -47,7 +47,7 @@ static const router_preset_t g_router_presets[] = { { "glm", "open.bigmodel.cn", "/api/paas/v4/chat/completions", "glm-4-flash", 1 }, { "openai", "api.openai.com", "/v1/chat/completions", "gpt-4o", 3 }, { "claude", "api.anthropic.com", "/v1/messages", "claude-sonnet-4-20250514", 3 }, - { "mimo", "api.xiaomimimo.com", "/v1/chat/completions", "MiMo-v2-Flash", 1 }, + { "mimo", "api.xiaomimimo.com", "/v1/chat/completions", "mimo-v2-flash", 1 }, { "openrouter", "openrouter.ai", "/api/v1/chat/completions", "openrouter/hunter-alpha", 2 }, { NULL, NULL, NULL, NULL, 0 } @@ -223,6 +223,85 @@ void cmd_set_llm(int argc, char** argv) } +/* ── cmd_set_vision_llm ──────────────────────────────────────── */ + +void cmd_set_vision_llm(int argc, char** argv) +{ + if (argc < 2) { + printf("Usage: set_vision_llm [api_key]\n" + " set_vision_llm [api_key]\n" + " set_vision_llm clear\n" + "\n" + "Set an independent vision model. If not configured,\n" + "vision calls inherit the main LLM config.\n" + "\n" + "Presets:\n" + " mimo - api.xiaomimimo.com (mimo-v2-omni)\n" + " openai - api.openai.com (gpt-4o)\n" + " qwen - dashscope.aliyuncs.com (qwen-vl-max)\n" + " glm - open.bigmodel.cn (glm-4v-flash)\n" + "\n" + "Examples:\n" + " set_vision_llm mimo \n" + " set_vision_llm clear\n"); + return; + } + + const char* arg1 = argv[1]; + + /* clear: remove vision-specific config, fall back to main LLM */ + if (strcmp(arg1, "clear") == 0) { + llm_set_vision_model(NULL, NULL, NULL); + printf("Vision LLM config cleared (using main LLM).\n"); + return; + } + + const char* host = NULL; + const char* model = NULL; + const char* api_key = NULL; + + /* Check if arg1 is a preset name */ + static const struct { + const char* name; + const char* host; + const char* model; + } vision_presets[] = { + { "mimo", "api.xiaomimimo.com", "mimo-v2-omni" }, + { "openai", "api.openai.com", "gpt-4o" }, + { "qwen", "dashscope.aliyuncs.com", "qwen-vl-max" }, + { "glm", "open.bigmodel.cn", "glm-4v-flash" }, + { NULL, NULL, NULL } + }; + + bool found = false; + for (int i = 0; vision_presets[i].name; i++) { + if (strcmp(vision_presets[i].name, arg1) == 0) { + host = vision_presets[i].host; + model = vision_presets[i].model; + if (argc >= 3) + api_key = argv[2]; + found = true; + break; + } + } + + if (!found) { + /* Custom: arg1 = host, arg2 = model, arg3 = api_key */ + host = arg1; + if (argc >= 3) + model = argv[2]; + if (argc >= 4) + api_key = argv[3]; + } + + llm_set_vision_model(host, model, api_key); + + printf("Vision LLM: %s (model: %s)\n", host, model ? model : "(inherit)"); + if (api_key) + printf("Vision API key saved.\n"); +} + + /* ── list_models helpers ──────────────────────────────────────── */ #define LIST_MODELS_BUF_SIZE (256 * 1024) diff --git a/src/channels/cmd_llm.h b/src/channels/cmd_llm.h index 8fb5232..a9a0378 100644 --- a/src/channels/cmd_llm.h +++ b/src/channels/cmd_llm.h @@ -17,6 +17,7 @@ #pragma once void cmd_set_llm(int argc, char** argv); +void cmd_set_vision_llm(int argc, char** argv); void cmd_list_models(int argc, char** argv); void cmd_router_status(void); void cmd_router_profile(int argc, char** argv); diff --git a/src/channels/nsh_commands.c b/src/channels/nsh_commands.c index f750c4e..defb312 100644 --- a/src/channels/nsh_commands.c +++ b/src/channels/nsh_commands.c @@ -96,6 +96,7 @@ static void cmd_help(void) " set_feishu_app - Set Feishu app credentials\n" " set_feishu_user_token - Set Feishu user_access_token for doc APIs\n" " set_llm [model] [key] - Switch LLM backend (kimi/qwen/deepseek/glm/openai)\n" + " set_vision_llm [model] [key] - Set independent vision model\n" " list_models [--free] [keyword] - List available models (openrouter)\n" " memory_read - Read MEMORY.md\n" " memory_write - Write MEMORY.md (quote text)\n" @@ -462,6 +463,9 @@ static void cmd_config_show(void) SHOW_CFG("Model", AGENT_CFG_KEY_MODEL, false); SHOW_CFG("LLM Host", AGENT_CFG_KEY_LLM_HOST, false); SHOW_CFG("LLM Path", AGENT_CFG_KEY_LLM_PATH, false); + SHOW_CFG("Vision Model", AGENT_CFG_KEY_VISION_MODEL, false); + SHOW_CFG("Vision Host", AGENT_CFG_KEY_VISION_HOST, false); + SHOW_CFG("Vision Key", AGENT_CFG_KEY_VISION_API_KEY, true); SHOW_CFG("Proxy Host", AGENT_CFG_KEY_PROXY_HOST, false); SHOW_CFG("Proxy Port", AGENT_CFG_KEY_PROXY_PORT, false); SHOW_CFG("SerpAPI Key", AGENT_CFG_KEY_SERP_KEY, true); @@ -876,6 +880,8 @@ static void* cli_thread(void* arg) cmd_set_feishu_user_token(argc, argv); else if (strcmp(cmd, "set_llm") == 0) cmd_set_llm(argc, argv); + else if (strcmp(cmd, "set_vision_llm") == 0) + cmd_set_vision_llm(argc, argv); else if (strcmp(cmd, "list_models") == 0) cmd_list_models(argc, argv); else if (strcmp(cmd, "memory_read") == 0) diff --git a/src/llm/llm_proxy.c b/src/llm/llm_proxy.c index c682d90..48157cc 100644 --- a/src/llm/llm_proxy.c +++ b/src/llm/llm_proxy.c @@ -46,6 +46,11 @@ static char s_llm_host[128] = AGENT_LLM_API_HOST; static char s_llm_path[128] = AGENT_LLM_API_PATH; static char s_llm_port[8] = "443"; /* "443" for HTTPS, "80" etc for HTTP */ +/* Independent vision model config — empty means fall back to main LLM */ +static char s_vision_model[64] = { 0 }; +static char s_vision_host[128] = { 0 }; +static char s_vision_api_key[128] = { 0 }; + static pthread_mutex_t s_llm_lock = PTHREAD_MUTEX_INITIALIZER; /* Check if host uses OpenAI-compatible max_completion_tokens param */ @@ -124,6 +129,17 @@ int llm_proxy_init(void) if (claw_config_get("llm_port", tmp, sizeof(tmp)) == OK && tmp[0]) strncpy(s_llm_port, tmp, sizeof(s_llm_port) - 1); + /* Load optional vision model config — empty falls back to main LLM */ + memset(tmp, 0, sizeof(tmp)); + if (claw_config_get(AGENT_CFG_KEY_VISION_MODEL, tmp, sizeof(tmp)) == OK && tmp[0]) + strncpy(s_vision_model, tmp, sizeof(s_vision_model) - 1); + memset(tmp, 0, sizeof(tmp)); + if (claw_config_get(AGENT_CFG_KEY_VISION_HOST, tmp, sizeof(tmp)) == OK && tmp[0]) + strncpy(s_vision_host, tmp, sizeof(s_vision_host) - 1); + memset(tmp, 0, sizeof(tmp)); + if (claw_config_get(AGENT_CFG_KEY_VISION_API_KEY, tmp, sizeof(tmp)) == OK && tmp[0]) + strncpy(s_vision_api_key, tmp, sizeof(s_vision_api_key) - 1); + if (s_api_key[0]) syslog(LOG_INFO, "[%s] LLM proxy initialized (model: %s, host: %s)\n", TAG, s_model, s_llm_host); @@ -212,6 +228,78 @@ void llm_snapshot_config(char* model, size_t model_sz, pthread_mutex_unlock(&s_llm_lock); } +/* ── Vision config snapshot ──────────────────────────────────── */ + +void llm_snapshot_vision_config(char* model, size_t model_sz, + char* api_key, size_t key_sz, + char* host, size_t host_sz) +{ + pthread_mutex_lock(&s_llm_lock); + + /* Use vision-specific config when set, otherwise fall back to main LLM */ + if (s_vision_model[0]) + strncpy(model, s_vision_model, model_sz - 1); + else + strncpy(model, s_model, model_sz - 1); + model[model_sz - 1] = '\0'; + + if (s_vision_api_key[0]) + strncpy(api_key, s_vision_api_key, key_sz - 1); + else + strncpy(api_key, s_api_key, key_sz - 1); + api_key[key_sz - 1] = '\0'; + + if (s_vision_host[0]) + strncpy(host, s_vision_host, host_sz - 1); + else + strncpy(host, s_llm_host, host_sz - 1); + host[host_sz - 1] = '\0'; + + pthread_mutex_unlock(&s_llm_lock); +} + +/* ── Vision model setter ─────────────────────────────────────── */ + +int llm_set_vision_model(const char* host, const char* model, + const char* api_key) +{ + pthread_mutex_lock(&s_llm_lock); + + if (host && host[0]) { + claw_config_set(AGENT_CFG_KEY_VISION_HOST, host); + strncpy(s_vision_host, host, sizeof(s_vision_host) - 1); + s_vision_host[sizeof(s_vision_host) - 1] = '\0'; + } else { + claw_config_set(AGENT_CFG_KEY_VISION_HOST, ""); + s_vision_host[0] = '\0'; + } + + if (model && model[0]) { + claw_config_set(AGENT_CFG_KEY_VISION_MODEL, model); + strncpy(s_vision_model, model, sizeof(s_vision_model) - 1); + s_vision_model[sizeof(s_vision_model) - 1] = '\0'; + } else { + claw_config_set(AGENT_CFG_KEY_VISION_MODEL, ""); + s_vision_model[0] = '\0'; + } + + if (api_key && api_key[0]) { + claw_config_set(AGENT_CFG_KEY_VISION_API_KEY, api_key); + strncpy(s_vision_api_key, api_key, sizeof(s_vision_api_key) - 1); + s_vision_api_key[sizeof(s_vision_api_key) - 1] = '\0'; + } else { + claw_config_set(AGENT_CFG_KEY_VISION_API_KEY, ""); + s_vision_api_key[0] = '\0'; + } + + syslog(LOG_INFO, "[%s] Vision LLM config updated: model=%s host=%s\n", + TAG, s_vision_model[0] ? s_vision_model : "(inherit)", + s_vision_host[0] ? s_vision_host : "(inherit)"); + + pthread_mutex_unlock(&s_llm_lock); + return OK; +} + /* ── HTTP helpers ─────────────────────────────────────────── */ /** diff --git a/src/llm/llm_proxy.h b/src/llm/llm_proxy.h index dde3169..b92d012 100644 --- a/src/llm/llm_proxy.h +++ b/src/llm/llm_proxy.h @@ -94,6 +94,18 @@ int llm_chat_vision_raw(const char* prompt, const char* mime_type, char* response_buf, size_t buf_size); +/** Vision config snapshot: returns the vision-specific model/api_key/host. + * If vision model is not configured, falls back to the main LLM config. */ +void llm_snapshot_vision_config(char* model, size_t model_sz, + char* api_key, size_t key_sz, + char* host, size_t host_sz); + +/** Set independent vision model config. + * host/model/api_key: set non-NULL/non-empty to override, NULL to clear. + * Empty vision config falls back to main LLM config automatically. */ +int llm_set_vision_model(const char* host, const char* model, + const char* api_key); + #ifdef __cplusplus } #endif diff --git a/src/llm/llm_vision.c b/src/llm/llm_vision.c index fa337a5..549a15b 100644 --- a/src/llm/llm_vision.c +++ b/src/llm/llm_vision.c @@ -43,7 +43,7 @@ int llm_chat_vision(const char* prompt, const char* image_b64, size_t buf_size) { char model[64], api_key[128], llm_host[128]; - llm_snapshot_config(model, sizeof(model), + llm_snapshot_vision_config(model, sizeof(model), api_key, sizeof(api_key), llm_host, sizeof(llm_host)); @@ -215,7 +215,7 @@ int llm_chat_vision_raw(const char* prompt, char* response_buf, size_t buf_size) { char model[64], api_key[128], llm_host[128]; - llm_snapshot_config(model, sizeof(model), + llm_snapshot_vision_config(model, sizeof(model), api_key, sizeof(api_key), llm_host, sizeof(llm_host));