diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index cadf76c2aa8..27da95fb7f8 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -782,7 +782,7 @@ def decorate_answer(answer): return {"answer": think + answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()} if langfuse_tracer: - langfuse_generation = langfuse_tracer.start_generation( + langfuse_generation = langfuse_tracer.start_observation(as_type="generation", trace_context=trace_context, name="chat", model=llm_model_config["llm_name"], input={"prompt": prompt, "prompt4citation": prompt4citation, "messages": msg} ) diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 6058c6b69f7..60090bb0409 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -94,7 +94,7 @@ def bind_tools(self, toolcall_session, tools): def encode(self, texts: list): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.model_config["llm_name"], input={"texts": texts}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="encode", model=self.model_config["llm_name"], input={"texts": texts}) safe_texts = [] for text in texts: @@ -119,7 +119,7 @@ def encode(self, texts: list): def encode_queries(self, query: str): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode_queries", model=self.model_config["llm_name"], input={"query": query}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="encode_queries", model=self.model_config["llm_name"], input={"query": query}) emd, used_tokens = self.mdl.encode_queries(query) if self.model_config["llm_factory"] == "Builtin": @@ -135,7 +135,7 @@ def encode_queries(self, query: str): def similarity(self, query: str, texts: list): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="similarity", model=self.model_config["llm_name"], input={"query": query, "texts": texts}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="similarity", model=self.model_config["llm_name"], input={"query": query, "texts": texts}) sim, used_tokens = self.mdl.similarity(query, texts) if not TenantLLMService.increase_usage_by_id(self.model_config["id"], used_tokens): @@ -149,7 +149,7 @@ def similarity(self, query: str, texts: list): def describe(self, image, max_tokens=300): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="describe", metadata={"model": self.model_config["llm_name"]}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="describe", metadata={"model": self.model_config["llm_name"]}) txt, used_tokens = self.mdl.describe(image) if not TenantLLMService.increase_usage_by_id(self.model_config["id"], used_tokens): @@ -163,7 +163,7 @@ def describe(self, image, max_tokens=300): def describe_with_prompt(self, image, prompt): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="describe_with_prompt", metadata={"model": self.model_config["llm_name"], "prompt": prompt}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="describe_with_prompt", metadata={"model": self.model_config["llm_name"], "prompt": prompt}) txt, used_tokens = self.mdl.describe_with_prompt(image, prompt) if not TenantLLMService.increase_usage_by_id(self.model_config["id"], used_tokens): @@ -177,7 +177,7 @@ def describe_with_prompt(self, image, prompt): def transcription(self, audio): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="transcription", metadata={"model": self.model_config["llm_name"]}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="transcription", metadata={"model": self.model_config["llm_name"]}) txt, used_tokens = self.mdl.transcription(audio) if not TenantLLMService.increase_usage_by_id(self.model_config["id"], used_tokens): @@ -194,7 +194,7 @@ def stream_transcription(self, audio): supports_stream = hasattr(mdl, "stream_transcription") and callable(getattr(mdl, "stream_transcription")) if supports_stream: if self.langfuse: - generation = self.langfuse.start_generation( + generation = self.langfuse.start_observation(as_type="generation", trace_context=self.trace_context, name="stream_transcription", metadata={"model": self.model_config["llm_name"]}, @@ -228,7 +228,7 @@ def stream_transcription(self, audio): return if self.langfuse: - generation = self.langfuse.start_generation( + generation = self.langfuse.start_observation(as_type="generation", trace_context=self.trace_context, name="stream_transcription", metadata={"model": self.model_config["llm_name"]}, @@ -253,7 +253,7 @@ def stream_transcription(self, audio): def tts(self, text: str) -> Generator[bytes, None, None]: if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="tts", input={"text": text}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="tts", input={"text": text}) for chunk in self.mdl.tts(text): if isinstance(chunk, int): @@ -376,7 +376,7 @@ async def async_chat(self, system: str, history: list, gen_conf: dict = {}, **kw generation = None if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat", model=self.model_config["llm_name"], input={"system": system, "history": history}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="chat", model=self.model_config["llm_name"], input={"system": system, "history": history}) chat_partial = partial(base_fn, system, history, gen_conf) use_kwargs = self._clean_param(chat_partial, **kwargs) @@ -417,7 +417,7 @@ async def async_chat_streamly(self, system: str, history: list, gen_conf: dict = generation = None if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat_streamly", model=self.model_config["llm_name"], input={"system": system, "history": history}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="chat_streamly", model=self.model_config["llm_name"], input={"system": system, "history": history}) if stream_fn: chat_partial = partial(stream_fn, system, history, gen_conf) @@ -460,7 +460,7 @@ async def async_chat_streamly_delta(self, system: str, history: list, gen_conf: generation = None if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat_streamly", model=self.model_config["llm_name"], input={"system": system, "history": history}) + generation = self.langfuse.start_observation(trace_context=self.trace_context, as_type="generation", name="chat_streamly", model=self.model_config["llm_name"], input={"system": system, "history": history}) if stream_fn: chat_partial = partial(stream_fn, system, history, gen_conf) diff --git a/pyproject.toml b/pyproject.toml index dcd2898faff..b1bdeb1f6bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ dependencies = [ "infinity-emb>=0.0.66,<0.0.67", "jira==3.10.5", "json-repair==0.35.0", - "langfuse>=2.60.0", + "langfuse>=4.0.1", "mammoth>=1.11.0", "markdown==3.6", "markdown-to-json==2.1.1", diff --git a/uv.lock b/uv.lock index d355ed7fa9f..73a1be72e20 100644 --- a/uv.lock +++ b/uv.lock @@ -6721,7 +6721,7 @@ requires-dist = [ { name = "infinity-sdk", specifier = "==0.7.0.dev5" }, { name = "jira", specifier = "==3.10.5" }, { name = "json-repair", specifier = "==0.35.0" }, - { name = "langfuse", specifier = ">=2.60.0" }, + { name = "langfuse", specifier = ">=4.0.1" }, { name = "litellm", specifier = "~=1.82.0,!=1.82.7,!=1.82.8" }, { name = "mammoth", specifier = ">=1.11.0" }, { name = "markdown", specifier = "==3.6" },