33
44import sqlalchemy as sa
55from pydantic import model_validator
6- from sqlalchemy import Column , DateTime , ForeignKey , Index , UniqueConstraint
6+ from sqlalchemy import JSON , Column , DateTime , ForeignKey , Index , Text , UniqueConstraint
77from sqlmodel import Field , Relationship , SQLModel
88
99
@@ -13,11 +13,11 @@ class MemoryBaseBase(SQLModel):
1313 user_id : UUID = Field (index = True )
1414 threshold : int = Field (default = 50 )
1515 auto_capture : bool = Field (default = True )
16- # Preprocessing config — accepted in payload but logic deferred to future scope
1716 embedding_model : str = Field (default = "" )
1817 preprocessing : bool = Field (default = False )
1918 preproc_model : str | None = Field (default = None )
2019 preproc_instructions : str | None = Field (default = None )
20+ preproc_kill_phrase : str | None = Field (default = None )
2121
2222
2323class MemoryBase (MemoryBaseBase , table = True ): # type: ignore[call-arg]
@@ -42,10 +42,17 @@ class MemoryBaseCreate(MemoryBaseBase):
4242 user_id : UUID | None = None # Derived from auth token in the endpoint; not required in request body
4343
4444 @model_validator (mode = "after" )
45- def preproc_model_required_when_preprocessing (self ) -> "MemoryBaseCreate" :
45+ def preprocessing_defaults (self ) -> "MemoryBaseCreate" :
4646 if self .preprocessing and not self .preproc_model :
4747 msg = "preproc_model is required when preprocessing is enabled"
4848 raise ValueError (msg )
49+ # Default the kill phrase so callers that enable preprocessing without
50+ # supplying one still get the deterministic gate. Imported lazily so the
51+ # model module stays free of service-layer deps.
52+ if self .preprocessing and not self .preproc_kill_phrase :
53+ from langflow .services .memory_base .preprocessing import DEFAULT_KILL_PHRASE
54+
55+ self .preproc_kill_phrase = DEFAULT_KILL_PHRASE
4956 return self
5057
5158
@@ -56,6 +63,7 @@ class MemoryBaseUpdate(SQLModel):
5663 preprocessing : bool | None = None
5764 preproc_model : str | None = None
5865 preproc_instructions : str | None = None
66+ preproc_kill_phrase : str | None = None
5967
6068
6169class MemoryBaseRead (MemoryBaseBase ):
@@ -197,3 +205,66 @@ class MessageIngestionRecord(SQLModel, table=True): # type: ignore[call-arg]
197205 # Denormalized from MessageTable.session_id — immutable, avoids JOIN on the hot query path
198206 session_id : str = Field (sa_column = Column (sa .String (), nullable = False ))
199207 ingested_at : datetime = Field (sa_column = Column (DateTime (timezone = True ), nullable = False ))
208+
209+
210+ class MemoryBasePreprocessingOutput (SQLModel , table = True ): # type: ignore[call-arg]
211+ """One row per preprocessing batch — captures the LLM-distilled output before KB write.
212+
213+ Status flow:
214+ - ``processed`` — LLM produced output; Chroma write pending. Cursor NOT advanced.
215+ The next ingestion job for this session reuses this row and
216+ retries only the Chroma write (no LLM re-invocation).
217+ - ``ingested`` — Chroma write confirmed; cursor advanced; visible in get-messages view.
218+ - ``skipped`` — LLM emitted the kill phrase; no Chroma write, no output_text,
219+ but cursor advances so the same batch is not re-evaluated.
220+ """
221+
222+ __tablename__ = "memory_base_preprocessing_output"
223+ __table_args__ = (
224+ Index (
225+ "ix_mbpo_pending" ,
226+ "memory_base_id" ,
227+ "session_id" ,
228+ "status" ,
229+ "created_at" ,
230+ ),
231+ Index (
232+ "ix_mbpo_listing" ,
233+ "memory_base_id" ,
234+ "session_id" ,
235+ "created_at" ,
236+ ),
237+ Index ("ix_mbpo_job_id" , "job_id" ),
238+ )
239+
240+ id : UUID = Field (default_factory = uuid4 , primary_key = True )
241+ memory_base_id : UUID = Field (
242+ sa_column = Column (
243+ sa .Uuid (),
244+ ForeignKey ("memory_base.id" , ondelete = "CASCADE" ),
245+ nullable = False ,
246+ )
247+ )
248+ # Denormalized — immutable for the row's lifetime
249+ session_id : str = Field (sa_column = Column (sa .String (), nullable = False ))
250+ job_id : UUID | None = Field (
251+ default = None ,
252+ sa_column = Column (
253+ sa .Uuid (),
254+ ForeignKey ("job.job_id" , ondelete = "SET NULL" ),
255+ nullable = True ,
256+ ),
257+ )
258+ status : str = Field (sa_column = Column (sa .String (), nullable = False ))
259+ output_text : str | None = Field (default = None , sa_column = Column (Text (), nullable = True ))
260+ # Canonical batch identity — JSON list of message UUIDs as strings.
261+ source_message_ids : list = Field (default_factory = list , sa_column = Column (JSON (), nullable = False ))
262+ model_used : str = Field (sa_column = Column (sa .String (), nullable = False ))
263+ created_at : datetime = Field (
264+ default_factory = lambda : datetime .now (timezone .utc ),
265+ sa_column = Column (DateTime (timezone = True ), nullable = False ),
266+ )
267+ updated_at : datetime = Field (
268+ default_factory = lambda : datetime .now (timezone .utc ),
269+ sa_column = Column (DateTime (timezone = True ), nullable = False ),
270+ )
0 commit comments