Skip to content

Commit 6a54bd5

Browse files
feat: add ConversionStatus.TIMEOUT to differentiate from page failures
Add a dedicated TIMEOUT status so downstream consumers can distinguish between partial results caused by document_timeout being reached versus individual page conversion failures (which remain PARTIAL_SUCCESS). Fixes #3205 Signed-off-by: Joaquin Hui <joaquinhui1995@gmail.com>
1 parent f283484 commit 6a54bd5

File tree

7 files changed

+11
-7
lines changed

7 files changed

+11
-7
lines changed

docling/datamodel/base_models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class ConversionStatus(str, Enum):
5151
FAILURE = "failure"
5252
SUCCESS = "success"
5353
PARTIAL_SUCCESS = "partial_success"
54+
TIMEOUT = "timeout"
5455
SKIPPED = "skipped"
5556

5657

docling/datamodel/pipeline_options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1050,7 +1050,7 @@ class PipelineOptions(BaseOptions):
10501050
Field(
10511051
description=(
10521052
"Maximum processing time in seconds before aborting document conversion. When exceeded, the pipeline "
1053-
"stops processing and returns partial results with PARTIAL_SUCCESS status. If None, no timeout is "
1053+
"stops processing and returns partial results with TIMEOUT status. If None, no timeout is "
10541054
"enforced. Recommended: 90-120 seconds for production systems."
10551055
),
10561056
examples=[10.0, 20.0],

docling/document_converter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ def convert_all(
449449
if raises_on_error and conv_res.status not in {
450450
ConversionStatus.SUCCESS,
451451
ConversionStatus.PARTIAL_SUCCESS,
452+
ConversionStatus.TIMEOUT,
452453
}:
453454
error_details = ""
454455
if conv_res.errors:

docling/document_extractor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ def extract_all(
181181
if raises_on_error and ext_res.status not in {
182182
ConversionStatus.SUCCESS,
183183
ConversionStatus.PARTIAL_SUCCESS,
184+
ConversionStatus.TIMEOUT,
184185
}:
185186
raise ConversionError(
186187
f"Extraction failed for: {ext_res.input.file} with status: {ext_res.status}"

docling/pipeline/base_pipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
272272
_log.warning(
273273
f"Document processing time ({total_elapsed_time:.3f} seconds) exceeded the specified timeout of {self.pipeline_options.document_timeout:.3f} seconds"
274274
)
275-
conv_res.status = ConversionStatus.PARTIAL_SUCCESS
275+
conv_res.status = ConversionStatus.TIMEOUT
276276
break
277277
total_pages_processed += len(page_batch)
278278
_log.debug(
@@ -318,7 +318,7 @@ def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
318318
if status in [
319319
ConversionStatus.PENDING,
320320
ConversionStatus.STARTED,
321-
]: # preserves ConversionStatus.PARTIAL_SUCCESS
321+
]: # preserves ConversionStatus.PARTIAL_SUCCESS and TIMEOUT
322322
status = ConversionStatus.SUCCESS
323323

324324
for page in conv_res.pages:

docling/pipeline/standard_pdf_pipeline.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,8 +764,9 @@ def _integrate_results(
764764
)
765765
conv_res.errors.append(error_item)
766766
if timeout_exceeded and proc.total_expected > 0:
767-
# Timeout exceeded: set PARTIAL_SUCCESS if any pages were attempted
768-
conv_res.status = ConversionStatus.PARTIAL_SUCCESS
767+
# Timeout exceeded: use dedicated TIMEOUT status so downstream
768+
# consumers can distinguish this from individual page failures.
769+
conv_res.status = ConversionStatus.TIMEOUT
769770
elif proc.is_complete_failure:
770771
conv_res.status = ConversionStatus.FAILURE
771772
elif proc.is_partial_success:

tests/test_options.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def test_document_timeout(test_doc_path):
129129
}
130130
)
131131
result = converter.convert(test_doc_path)
132-
assert result.status == ConversionStatus.PARTIAL_SUCCESS, (
132+
assert result.status == ConversionStatus.TIMEOUT, (
133133
"Expected document timeout to be used"
134134
)
135135

@@ -142,7 +142,7 @@ def test_document_timeout(test_doc_path):
142142
}
143143
)
144144
result = converter.convert(test_doc_path)
145-
assert result.status == ConversionStatus.PARTIAL_SUCCESS, (
145+
assert result.status == ConversionStatus.TIMEOUT, (
146146
"Expected document timeout to be used"
147147
)
148148

0 commit comments

Comments
 (0)