File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -216,9 +216,17 @@ class ParsedText:
216216 warnings : list [str ]
217217
218218 def __post_init__ (self ) -> None :
219- """Auto-generate ID from content hash if not provided in metadata."""
220- if "id" not in self .metadata :
221- warning_msg = "No document ID found. Generating one from content hash."
219+ """Auto-generate ID from content hash if not provided or empty in metadata."""
220+ existing_id = self .metadata ["id" ]
221+ id_is_empty = (
222+ existing_id is None
223+ or pd .isna (existing_id )
224+ or (isinstance (existing_id , str ) and not existing_id .strip ())
225+ )
226+ if id_is_empty :
227+ warning_msg = (
228+ "No valid document ID found. Generating one from content hash."
229+ )
222230 self .warnings .append (warning_msg )
223231 logger .warning (warning_msg )
224232 self .metadata ["id" ] = hashlib .sha256 (self .text .encode ("utf-8" )).hexdigest ()
You can’t perform that action at this time.
0 commit comments