Skip to content

Commit 78c3583

Browse files
authored
Fix memory resolution regression for multimodal Gemini models (infiniflow#14209)
### What problem does this PR solve? Fixes infiniflow#14206. This issue is a regression. PR infiniflow#9520 previously changed Gemini models from `image2text` to `chat` to fix chat-side resolution, but PR infiniflow#13073 later restored those Gemini entries to `image2text` during model-list updates, which reintroduced the bug. The underlying problem is that Gemini models are multimodal and advertise both `CHAT` and `IMAGE2TEXT`, while tenant model resolution still depends on a single stored `model_type`. That makes chat-only flows such as memory extraction fragile when a compatible model is stored as `image2text`. This PR fixes the issue at the model resolution layer instead of changing `llm_factories.json` again: - keep the stored tenant model type unchanged - try exact `model_type` lookup first - if no exact match is found, fall back only when the model metadata shows the requested capability is supported - coerce the runtime config to the requested type for chat callers - fail fast in memory creation instead of silently persisting `tenant_llm_id=0` This preserves existing multimodal and `image2text` behavior while restoring chat compatibility for memory-related flows. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### Testing - Re-checked the current memory creation and memory message extraction paths against the updated resolution logic - Verified locally that a Gemini-style tenant model stored as `image2text` but tagged with `CHAT` can still be resolved for `chat` - Verified `get_model_config_by_type_and_name(..., CHAT, ...)` returns a chat-compatible runtime config - Verified `get_model_config_by_id(..., CHAT)` also returns a chat-compatible runtime config - Verified strict resolution still fails when the model metadata does not advertise chat capability
1 parent 9c7c105 commit 78c3583

2 files changed

Lines changed: 15 additions & 3 deletions

File tree

api/apps/restful_apis/memory_api.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import time
1919

2020
from quart import request
21-
from common.constants import RetCode
21+
from common.constants import LLMType, RetCode
2222
from common.exceptions import ArgumentException, NotFoundException
2323
from api.apps import login_required, current_user
2424
from api.utils.api_utils import validate_request, get_request_json, get_error_argument_result, get_json_result
@@ -33,9 +33,13 @@ async def create_memory():
3333
timing_enabled = os.getenv("RAGFLOW_API_TIMING")
3434
t_start = time.perf_counter() if timing_enabled else None
3535
req = await get_request_json()
36-
req = ensure_tenant_model_id_for_params(current_user.id, req)
3736
t_parsed = time.perf_counter() if timing_enabled else None
3837
try:
38+
req = ensure_tenant_model_id_for_params(current_user.id, req)
39+
if not req.get("tenant_llm_id"):
40+
raise ArgumentException(
41+
f"Tenant Model with name {req['llm_id']} and type {LLMType.CHAT.value} not found"
42+
)
3943
memory_info = {
4044
"name": req["name"],
4145
"memory_type": req["memory_type"],

api/utils/tenant_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515
#
1616
from common.constants import LLMType
17+
from common.exceptions import ArgumentException
1718
from api.db.services.tenant_llm_service import TenantLLMService
1819

1920
_KEY_TO_MODEL_TYPE = {
@@ -25,13 +26,20 @@
2526
"tts_id": LLMType.TTS,
2627
}
2728

28-
def ensure_tenant_model_id_for_params(tenant_id: str, param_dict: dict) -> dict:
29+
def ensure_tenant_model_id_for_params(tenant_id: str, param_dict: dict, *, strict: bool = False) -> dict:
2930
for key in ["llm_id", "embd_id", "asr_id", "img2txt_id", "rerank_id", "tts_id"]:
3031
if param_dict.get(key) and not param_dict.get(f"tenant_{key}"):
3132
model_type = _KEY_TO_MODEL_TYPE.get(key)
3233
tenant_model = TenantLLMService.get_api_key(tenant_id, param_dict[key], model_type)
34+
if not tenant_model and model_type == LLMType.CHAT:
35+
tenant_model = TenantLLMService.get_api_key(tenant_id, param_dict[key])
3336
if tenant_model:
3437
param_dict.update({f"tenant_{key}": tenant_model.id})
3538
else:
39+
if strict:
40+
model_type_val = model_type.value if hasattr(model_type, "value") else model_type
41+
raise ArgumentException(
42+
f"Tenant Model with name {param_dict[key]} and type {model_type_val} not found"
43+
)
3644
param_dict.update({f"tenant_{key}": 0})
3745
return param_dict

0 commit comments

Comments
 (0)