Skip to content

Commit d363b41

Browse files
authored
fix: check document id after pre-processing (#263)
Because id might be computed at processing time
1 parent e747cdf commit d363b41

1 file changed

Lines changed: 6 additions & 7 deletions

File tree

app/indexing.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -311,13 +311,6 @@ def from_result(self, result: FetcherResult) -> FetcherResult:
311311
if data is None:
312312
# unexpected !
313313
return FetcherResult(status=FetcherStatus.OTHER, document=None)
314-
id_field_name = self.config.index.id_field_name
315-
316-
_id = data.get(id_field_name)
317-
if _id is None or _id in self.config.document_denylist:
318-
# We don't process the document if it has no ID or if it's in the
319-
# denylist
320-
return FetcherResult(status=FetcherStatus.SKIP, document=None)
321314

322315
processed_result = (
323316
self.preprocessor.preprocess(data)
@@ -326,6 +319,8 @@ def from_result(self, result: FetcherResult) -> FetcherResult:
326319
else result
327320
)
328321

322+
id_field_name = self.config.index.id_field_name
323+
_id = data.get(id_field_name)
329324
if processed_result.status == FetcherStatus.REMOVED:
330325
return FetcherResult(
331326
status=FetcherStatus.REMOVED,
@@ -336,6 +331,10 @@ def from_result(self, result: FetcherResult) -> FetcherResult:
336331
or processed_result.document is None
337332
):
338333
return processed_result
334+
elif _id is None or _id in self.config.document_denylist:
335+
# We don't add the document if it has no ID or if it's in the
336+
# denylist
337+
return FetcherResult(status=FetcherStatus.SKIP, document={"_id": _id})
339338

340339
processed_data = processed_result.document
341340

0 commit comments

Comments
 (0)