Move token related functions to common

JinHai-CN · JinHai-CN · commit 4b06e172cd96 · 2025-11-02T21:27:29.000+08:00
Signed-off-by: Jin Hai &lt;haijin.chn@gmail.com&gt;
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
@@ -41,7 +41,7 @@
 from rag.nlp.search import index_name
 from rag.prompts.generator import chunks_format, citation_prompt, cross_languages, full_question, kb_prompt, keyword_extraction, message_fit_in, \
     gen_meta_filter, PROMPT_JINJA_ENV, ASK_SUMMARY
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 from rag.utils.tavily_conn import Tavily
 from common.string_utils import remove_redundant_spaces
 
diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py
@@ -16,7 +16,7 @@
 import inspect
 import logging
 import re
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 from functools import partial
 from typing import Generator
 from api.db.db_models import LLM
diff --git a/common/token_utils.py b/common/token_utils.py
@@ -0,0 +1,76 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+
+import os
+import tiktoken
+
+from common.file_utils import get_project_base_directory
+
+tiktoken_cache_dir = get_project_base_directory()
+os.environ["TIKTOKEN_CACHE_DIR"] = tiktoken_cache_dir
+# encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
+encoder = tiktoken.get_encoding("cl100k_base")
+
+
+def num_tokens_from_string(string: str) -> int:
+    """Returns the number of tokens in a text string."""
+    try:
+        code_list = encoder.encode(string)
+        return len(code_list)
+    except Exception:
+        return 0
+
+def total_token_count_from_response(resp):
+    if resp is None:
+        return 0
+
+    if hasattr(resp, "usage") and hasattr(resp.usage, "total_tokens"):
+        try:
+            return resp.usage.total_tokens
+        except Exception:
+            pass
+
+    if hasattr(resp, "usage_metadata") and hasattr(resp.usage_metadata, "total_tokens"):
+        try:
+            return resp.usage_metadata.total_tokens
+        except Exception:
+            pass
+
+    if 'usage' in resp and 'total_tokens' in resp['usage']:
+        try:
+            return resp["usage"]["total_tokens"]
+        except Exception:
+            pass
+
+    if 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
+        try:
+            return resp["usage"]["input_tokens"] + resp["usage"]["output_tokens"]
+        except Exception:
+            pass
+
+    if 'meta' in resp and 'tokens' in resp['meta'] and 'input_tokens' in resp['meta']['tokens'] and 'output_tokens' in resp['meta']['tokens']:
+        try:
+            return resp["meta"]["tokens"]["input_tokens"] + resp["meta"]["tokens"]["output_tokens"]
+        except Exception:
+            pass
+    return 0
+
+
+def truncate(string: str, max_len: int) -> str:
+    """Returns truncated text if the length of text exceed max_len."""
+    return encoder.decode(encoder.encode(string)[:max_len])
+
diff --git a/deepdoc/parser/txt_parser.py b/deepdoc/parser/txt_parser.py
@@ -17,7 +17,7 @@
 import re
 
 from deepdoc.parser.utils import get_text
-from rag.nlp import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 
 
 class RAGFlowTxtParser:
diff --git a/graphrag/general/community_reports_extractor.py b/graphrag/general/community_reports_extractor.py
@@ -21,7 +21,7 @@
 from graphrag.general.leiden import add_community_info2graph
 from rag.llm.chat_model import Base as CompletionLLM
 from graphrag.utils import perform_variable_replacements, dict_has_keys_with_types, chat_limiter
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 import trio
 
 
diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py
@@ -38,7 +38,7 @@
 )
 from rag.llm.chat_model import Base as CompletionLLM
 from rag.prompts.generator import message_fit_in
-from rag.utils import truncate
+from common.token_utils import truncate
 
 GRAPH_FIELD_SEP = "<SEP>"
 DEFAULT_ENTITY_TYPES = ["organization", "person", "geo", "event", "category"]
diff --git a/graphrag/general/graph_extractor.py b/graphrag/general/graph_extractor.py
@@ -16,7 +16,7 @@
 from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, chat_limiter, split_string_by_multi_markers
 from rag.llm.chat_model import Base as CompletionLLM
 import networkx as nx
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 
 DEFAULT_TUPLE_DELIMITER = "<|>"
 DEFAULT_RECORD_DELIMITER = "##"
diff --git a/graphrag/general/index.py b/graphrag/general/index.py
@@ -165,7 +165,7 @@ async def run_graphrag_for_kb(
         return {"ok_docs": [], "failed_docs": [], "total_docs": 0, "total_chunks": 0, "seconds": 0.0}
 
     def load_doc_chunks(doc_id: str) -> list[str]:
-        from rag.utils import num_tokens_from_string
+        from common.token_utils import num_tokens_from_string
 
         chunks = []
         current_chunk = ""
diff --git a/graphrag/general/mind_map_extractor.py b/graphrag/general/mind_map_extractor.py
@@ -27,7 +27,7 @@
 from rag.llm.chat_model import Base as CompletionLLM
 import markdown_to_json
 from functools import reduce
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 
 
 @dataclass
diff --git a/graphrag/light/graph_extractor.py b/graphrag/light/graph_extractor.py
@@ -17,7 +17,7 @@
 from graphrag.light.graph_prompt import PROMPTS
 from graphrag.utils import chat_limiter, pack_user_ass_to_openai_messages, split_string_by_multi_markers
 from rag.llm.chat_model import Base as CompletionLLM
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 
 
 @dataclass
diff --git a/graphrag/search.py b/graphrag/search.py
@@ -24,7 +24,7 @@
 from common.misc_utils import get_uuid
 from graphrag.query_analyze_prompt import PROMPTS
 from graphrag.utils import get_entity_type2samples, get_llm_cache, set_llm_cache, get_relation
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 from rag.utils.doc_store_conn import OrderByExpr
 
 from rag.nlp.search import Dealer, index_name
diff --git a/rag/app/manual.py b/rag/app/manual.py
@@ -21,7 +21,7 @@
 from api.db import ParserType
 from io import BytesIO
 from rag.nlp import rag_tokenizer, tokenize, tokenize_table, bullets_category, title_frequency, tokenize_chunks, docx_question_level
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 from deepdoc.parser import PdfParser, PlainParser, DocxParser
 from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
 from docx import Document
diff --git a/rag/flow/tokenizer/tokenizer.py b/rag/flow/tokenizer/tokenizer.py
@@ -29,7 +29,7 @@
 from rag.nlp import rag_tokenizer
 from rag.settings import EMBEDDING_BATCH_SIZE
 from rag.svr.task_executor import embed_limiter
-from rag.utils import truncate
+from common.token_utils import truncate
 
 
 class TokenizerParam(ProcessParamBase):
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
@@ -36,7 +36,7 @@
 
 from rag.llm import FACTORY_DEFAULT_BASE_URL, LITELLM_PROVIDER_PREFIX, SupportedLiteLLMProvider
 from rag.nlp import is_chinese, is_english
-from rag.utils import num_tokens_from_string, total_token_count_from_response
+from common.token_utils import num_tokens_from_string, total_token_count_from_response
 
 
 # Error message constants
diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
@@ -30,7 +30,7 @@
 from zhipuai import ZhipuAI
 from rag.nlp import is_english
 from rag.prompts.generator import vision_llm_describe_prompt
-from rag.utils import num_tokens_from_string, total_token_count_from_response
+from common.token_utils import num_tokens_from_string, total_token_count_from_response
 
 
 class Base(ABC):
diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py
@@ -28,7 +28,7 @@
 from zhipuai import ZhipuAI
 
 from api.utils.log_utils import log_exception
-from rag.utils import num_tokens_from_string, truncate
+from common.token_utils import num_tokens_from_string, truncate
 from api import settings
 import logging
 
diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py
@@ -23,7 +23,7 @@
 from yarl import URL
 
 from api.utils.log_utils import log_exception
-from rag.utils import num_tokens_from_string, truncate, total_token_count_from_response
+from common.token_utils import num_tokens_from_string, truncate, total_token_count_from_response
 
 class Base(ABC):
     def __init__(self, key, model_name, **kwargs):
diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py
@@ -24,7 +24,7 @@
 from openai import OpenAI
 from openai.lib.azure import AzureOpenAI
 
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 
 
 class Base(ABC):
diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py
@@ -36,7 +36,7 @@
 import websocket
 from pydantic import BaseModel, conint
 
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 
 
 class ServeReferenceAudio(BaseModel):
diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py
@@ -18,7 +18,7 @@
 import random
 from collections import Counter
 
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 from . import rag_tokenizer
 import re
 import copy
diff --git a/rag/prompts/generator.py b/rag/prompts/generator.py
@@ -26,7 +26,7 @@
 from rag.nlp import rag_tokenizer
 from rag.prompts.template import load_prompt
 from rag.settings import TAG_FLD
-from rag.utils import encoder, num_tokens_from_string
+from common.token_utils import encoder, num_tokens_from_string
 
 
 STOP_TOKEN="<|STOP|>"
diff --git a/rag/raptor.py b/rag/raptor.py
@@ -28,7 +28,7 @@
     set_llm_cache,
     chat_limiter,
 )
-from rag.utils import truncate
+from common.token_utils import truncate
 
 
 class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py
@@ -65,7 +65,7 @@
 from rag.nlp import search, rag_tokenizer, add_positions
 from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
 from rag.settings import DOC_MAXIMUM_SIZE, DOC_BULK_SIZE, EMBEDDING_BATCH_SIZE, SVR_CONSUMER_GROUP_NAME, get_svr_queue_name, get_svr_queue_names, print_rag_settings, TAG_FLD, PAGERANK_FLD
-from rag.utils import num_tokens_from_string, truncate
+from common.token_utils import num_tokens_from_string, truncate
 from rag.utils.redis_conn import REDIS_CONN, RedisDistributedLock
 from rag.utils.storage_factory import STORAGE_IMPL
 from graphrag.utils import chat_limiter
diff --git a/rag/utils/__init__.py b/rag/utils/__init__.py
@@ -14,59 +14,3 @@
 #  limitations under the License.
 #
 
-import os
-import tiktoken
-
-from common.file_utils import get_project_base_directory
-
-tiktoken_cache_dir = get_project_base_directory()
-os.environ["TIKTOKEN_CACHE_DIR"] = tiktoken_cache_dir
-# encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
-encoder = tiktoken.get_encoding("cl100k_base")
-
-
-def num_tokens_from_string(string: str) -> int:
-    """Returns the number of tokens in a text string."""
-    try:
-        return len(encoder.encode(string))
-    except Exception:
-        return 0
-
-def total_token_count_from_response(resp):
-    if hasattr(resp, "usage") and hasattr(resp.usage, "total_tokens"):
-        try:
-            return resp.usage.total_tokens
-        except Exception:
-            pass
-
-    if hasattr(resp, "usage_metadata") and hasattr(resp.usage_metadata, "total_tokens"):
-        try:
-            return resp.usage_metadata.total_tokens
-        except Exception:
-            pass
-
-    if 'usage' in resp and 'total_tokens' in resp['usage']:
-        try:
-            return resp["usage"]["total_tokens"]
-        except Exception:
-            pass
-
-    if 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
-        try:
-            return resp["usage"]["input_tokens"] + resp["usage"]["output_tokens"]
-        except Exception:
-            pass
-
-    if 'meta' in resp and 'tokens' in resp['meta'] and 'input_tokens' in resp['meta']['tokens'] and 'output_tokens' in resp['meta']['tokens']:
-        try:
-            return resp["meta"]["tokens"]["input_tokens"] + resp["meta"]["tokens"]["output_tokens"]
-        except Exception:
-            pass
-    return 0
-
-
-def truncate(string: str, max_len: int) -> str:
-    """Returns truncated text if the length of text exceed max_len."""
-    return encoder.decode(encoder.encode(string)[:max_len])
-
-
diff --git a/test/unit_test/common/test_token_utils.py b/test/unit_test/common/test_token_utils.py

Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@`
`38`	`38`	`)`
`39`	`39`	`from rag.llm.chat_model import Base as CompletionLLM`
`40`	`40`	`from rag.prompts.generator import message_fit_in`
`41`		`-from rag.utils import truncate`
	`41`	`+from common.token_utils import truncate`
`42`	`42`
`43`	`43`	`GRAPH_FIELD_SEP = "<SEP>"`
`44`	`44`	`DEFAULT_ENTITY_TYPES = ["organization", "person", "geo", "event", "category"]`