11import enum
2- from datetime import datetime
3- from typing import Any , Optional , Type
4- from uuid import UUID
2+ from functools import lru_cache
53
6- from sqlalchemy import DateTime , func
4+ from typing import Optional , Type
75from sqlmodel import (
86 Field ,
97 Column ,
1513from tidb_vector .sqlalchemy import VectorType
1614from llama_index .core .schema import TextNode
1715
18- from app .core .config import settings
1916from app .models .document import Document
2017from app .models .knowledge_base import KnowledgeBase
18+ from app .models .knowledge_base_scoped .table_naming import get_kb_vector_dims
19+ from app .utils .namespace import format_namespace
2120from .base import UpdatableBaseModel , UUIDBaseModel
22- from app .models .knowledge_base_scoped .registry import get_kb_scoped_registry
23- from .knowledge_base_scoped .table_naming import (
24- get_kb_chunks_table_name ,
25- get_kb_vector_dims ,
26- )
27- from app .models .patch .sql_model import SQLModel as PatchSQLModel
28- from ..utils .uuid6 import uuid7
2921
3022
3123class KgIndexStatus (str , enum .Enum ):
@@ -36,69 +28,26 @@ class KgIndexStatus(str, enum.Enum):
3628 FAILED = "failed"
3729
3830
39- # Notice: DO NOT forget to modify the definition in `get_kb_chunk_model` to
40- # keep the table structure on both sides consistent.
41- class Chunk (UUIDBaseModel , UpdatableBaseModel , table = True ):
42- hash : str = Field (max_length = 64 )
43- text : str = Field (sa_column = Column (Text ))
44- meta : dict | list = Field (default = {}, sa_column = Column (JSON ))
45- embedding : Any = Field (
46- sa_column = Column (
47- VectorType (settings .EMBEDDING_DIMS ), comment = "hnsw(distance=cosine)"
48- )
49- )
50- document_id : int = Field (foreign_key = "documents.id" , nullable = True )
51- document : "Document" = SQLRelationship (
52- sa_relationship_kwargs = {
53- "lazy" : "joined" ,
54- "primaryjoin" : "Chunk.document_id == Document.id" ,
55- },
56- )
57- relations : dict | list = Field (default = {}, sa_column = Column (JSON ))
58- source_uri : str = Field (max_length = 512 , nullable = True )
59-
60- # TODO: Add vector_index_status, vector_index_result column, vector index should be optional in the future.
61-
62- # TODO: Rename to kg_index_status, kg_index_result column.
63- index_status : KgIndexStatus = KgIndexStatus .NOT_STARTED
64- index_result : str = Field (sa_column = Column (Text , nullable = True ))
65-
66- __tablename__ = "chunks"
67-
68- def to_llama_text_node (self ) -> TextNode :
69- return TextNode (
70- id_ = self .id .hex ,
71- text = self .text ,
72- embedding = list (self .embedding ),
73- metadata = self .meta ,
74- )
75-
76-
7731def get_kb_chunk_model (kb : KnowledgeBase ) -> Type [SQLModel ]:
7832 vector_dimension = get_kb_vector_dims (kb )
79- chunks_table_name = get_kb_chunks_table_name (kb )
80- ctx = get_kb_scoped_registry (kb )
33+ return get_dynamic_chunk_model (vector_dimension , str (kb .id ))
8134
82- if ctx .chunk_model :
83- return ctx .chunk_model
8435
85- class KBChunk (PatchSQLModel , table = True , registry = ctx .registry ):
86- __tablename__ = chunks_table_name
87- __table_args__ = {"extend_existing" : True }
36+ @lru_cache (maxsize = None )
37+ def get_dynamic_chunk_model (
38+ vector_dimension : int ,
39+ namespace : Optional [str ] = None ,
40+ ) -> Type [SQLModel ]:
41+ namespace = format_namespace (namespace )
42+ chunk_table_name = f"chunks_{ namespace } "
43+ chunk_model_name = f"Chunk_{ namespace } _{ vector_dimension } "
8844
89- id : UUID = Field (
90- primary_key = True , index = True , nullable = False , default_factory = uuid7
91- )
45+ class Chunk (UUIDBaseModel , UpdatableBaseModel ):
9246 hash : str = Field (max_length = 64 )
9347 text : str = Field (sa_column = Column (Text ))
94- meta : dict | list = Field (default = {}, sa_column = Column (JSON ))
95- embedding : Any = Field (
96- sa_column = Column (
97- VectorType (vector_dimension ), comment = "hnsw(distance=cosine)"
98- )
99- )
48+ meta : dict = Field (default = {}, sa_column = Column (JSON ))
49+ embedding : list [float ] = Field (sa_type = VectorType (vector_dimension ))
10050 document_id : int = Field (foreign_key = "documents.id" , nullable = True )
101- document : "Document" = SQLRelationship ()
10251 relations : dict | list = Field (default = {}, sa_column = Column (JSON ))
10352 source_uri : str = Field (max_length = 512 , nullable = True )
10453
@@ -108,16 +57,6 @@ class KBChunk(PatchSQLModel, table=True, registry=ctx.registry):
10857 index_status : KgIndexStatus = KgIndexStatus .NOT_STARTED
10958 index_result : str = Field (sa_column = Column (Text , nullable = True ))
11059
111- created_at : Optional [datetime ] = Field (
112- default = None ,
113- sa_column = Column (DateTime (timezone = True ), server_default = func .now ()),
114- )
115- updated_at : Optional [datetime ] = Field (
116- default = None ,
117- sa_type = DateTime (timezone = True ),
118- sa_column_kwargs = {"server_default" : func .now (), "onupdate" : func .now ()},
119- )
120-
12160 def to_llama_text_node (self ) -> TextNode :
12261 return TextNode (
12362 id_ = self .id .hex ,
@@ -126,5 +65,23 @@ def to_llama_text_node(self) -> TextNode:
12665 metadata = self .meta ,
12766 )
12867
129- ctx .chunk_model = KBChunk
130- return KBChunk
68+ chunk_model = type (
69+ chunk_model_name ,
70+ (Chunk ,),
71+ {
72+ "__tablename__" : chunk_table_name ,
73+ "__table_args__" : {"extend_existing" : True },
74+ "__annotations__" : {
75+ "document" : Document ,
76+ },
77+ "document" : SQLRelationship (
78+ sa_relationship_kwargs = {
79+ "lazy" : "joined" ,
80+ "primaryjoin" : f"{ chunk_model_name } .document_id == Document.id" ,
81+ },
82+ ),
83+ },
84+ table = True ,
85+ )
86+
87+ return chunk_model
0 commit comments