Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docling/datamodel/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class ConversionStatus(str, Enum):
FAILURE = "failure"
SUCCESS = "success"
PARTIAL_SUCCESS = "partial_success"
TIMEOUT = "timeout"
SKIPPED = "skipped"


Expand Down
2 changes: 1 addition & 1 deletion docling/datamodel/pipeline_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ class PipelineOptions(BaseOptions):
Field(
description=(
"Maximum processing time in seconds before aborting document conversion. When exceeded, the pipeline "
"stops processing and returns partial results with PARTIAL_SUCCESS status. If None, no timeout is "
"stops processing and returns partial results with TIMEOUT status. If None, no timeout is "
"enforced. Recommended: 90-120 seconds for production systems."
),
examples=[10.0, 20.0],
Expand Down
1 change: 1 addition & 0 deletions docling/document_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ def convert_all(
if raises_on_error and conv_res.status not in {
ConversionStatus.SUCCESS,
ConversionStatus.PARTIAL_SUCCESS,
ConversionStatus.TIMEOUT,
}:
error_details = ""
if conv_res.errors:
Expand Down
1 change: 1 addition & 0 deletions docling/document_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def extract_all(
if raises_on_error and ext_res.status not in {
ConversionStatus.SUCCESS,
ConversionStatus.PARTIAL_SUCCESS,
ConversionStatus.TIMEOUT,
}:
raise ConversionError(
f"Extraction failed for: {ext_res.input.file} with status: {ext_res.status}"
Expand Down
4 changes: 2 additions & 2 deletions docling/pipeline/base_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
_log.warning(
f"Document processing time ({total_elapsed_time:.3f} seconds) exceeded the specified timeout of {self.pipeline_options.document_timeout:.3f} seconds"
)
conv_res.status = ConversionStatus.PARTIAL_SUCCESS
conv_res.status = ConversionStatus.TIMEOUT
break
total_pages_processed += len(page_batch)
_log.debug(
Expand Down Expand Up @@ -318,7 +318,7 @@ def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
if status in [
ConversionStatus.PENDING,
ConversionStatus.STARTED,
]: # preserves ConversionStatus.PARTIAL_SUCCESS
]: # preserves ConversionStatus.PARTIAL_SUCCESS and TIMEOUT
status = ConversionStatus.SUCCESS

for page in conv_res.pages:
Expand Down
5 changes: 3 additions & 2 deletions docling/pipeline/standard_pdf_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,8 +764,9 @@ def _integrate_results(
)
conv_res.errors.append(error_item)
if timeout_exceeded and proc.total_expected > 0:
# Timeout exceeded: set PARTIAL_SUCCESS if any pages were attempted
conv_res.status = ConversionStatus.PARTIAL_SUCCESS
# Timeout exceeded: use dedicated TIMEOUT status so downstream
# consumers can distinguish this from individual page failures.
conv_res.status = ConversionStatus.TIMEOUT
elif proc.is_complete_failure:
conv_res.status = ConversionStatus.FAILURE
elif proc.is_partial_success:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def test_document_timeout(test_doc_path):
}
)
result = converter.convert(test_doc_path)
assert result.status == ConversionStatus.PARTIAL_SUCCESS, (
assert result.status == ConversionStatus.TIMEOUT, (
"Expected document timeout to be used"
)

Expand All @@ -142,7 +142,7 @@ def test_document_timeout(test_doc_path):
}
)
result = converter.convert(test_doc_path)
assert result.status == ConversionStatus.PARTIAL_SUCCESS, (
assert result.status == ConversionStatus.TIMEOUT, (
"Expected document timeout to be used"
)

Expand Down
Loading