diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index ef3135df5d..970b68f163 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -51,6 +51,7 @@ class ConversionStatus(str, Enum): FAILURE = "failure" SUCCESS = "success" PARTIAL_SUCCESS = "partial_success" + TIMEOUT = "timeout" SKIPPED = "skipped" diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index dd15b6b7d5..25d77823b6 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -1050,7 +1050,7 @@ class PipelineOptions(BaseOptions): Field( description=( "Maximum processing time in seconds before aborting document conversion. When exceeded, the pipeline " - "stops processing and returns partial results with PARTIAL_SUCCESS status. If None, no timeout is " + "stops processing and returns partial results with TIMEOUT status. If None, no timeout is " "enforced. Recommended: 90-120 seconds for production systems." ), examples=[10.0, 20.0], diff --git a/docling/document_converter.py b/docling/document_converter.py index 37765413b6..ee4dd28b82 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -449,6 +449,7 @@ def convert_all( if raises_on_error and conv_res.status not in { ConversionStatus.SUCCESS, ConversionStatus.PARTIAL_SUCCESS, + ConversionStatus.TIMEOUT, }: error_details = "" if conv_res.errors: diff --git a/docling/document_extractor.py b/docling/document_extractor.py index ae66f9e472..86fa1cbd89 100644 --- a/docling/document_extractor.py +++ b/docling/document_extractor.py @@ -181,6 +181,7 @@ def extract_all( if raises_on_error and ext_res.status not in { ConversionStatus.SUCCESS, ConversionStatus.PARTIAL_SUCCESS, + ConversionStatus.TIMEOUT, }: raise ConversionError( f"Extraction failed for: {ext_res.input.file} with status: {ext_res.status}" diff --git a/docling/pipeline/base_pipeline.py b/docling/pipeline/base_pipeline.py index 356ca477d9..20bff59062 100644 --- a/docling/pipeline/base_pipeline.py +++ b/docling/pipeline/base_pipeline.py @@ -272,7 +272,7 @@ def _build_document(self, conv_res: ConversionResult) -> ConversionResult: _log.warning( f"Document processing time ({total_elapsed_time:.3f} seconds) exceeded the specified timeout of {self.pipeline_options.document_timeout:.3f} seconds" ) - conv_res.status = ConversionStatus.PARTIAL_SUCCESS + conv_res.status = ConversionStatus.TIMEOUT break total_pages_processed += len(page_batch) _log.debug( @@ -318,7 +318,7 @@ def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus: if status in [ ConversionStatus.PENDING, ConversionStatus.STARTED, - ]: # preserves ConversionStatus.PARTIAL_SUCCESS + ]: # preserves ConversionStatus.PARTIAL_SUCCESS and TIMEOUT status = ConversionStatus.SUCCESS for page in conv_res.pages: diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index f567a9e370..3bc5546d79 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -764,8 +764,9 @@ def _integrate_results( ) conv_res.errors.append(error_item) if timeout_exceeded and proc.total_expected > 0: - # Timeout exceeded: set PARTIAL_SUCCESS if any pages were attempted - conv_res.status = ConversionStatus.PARTIAL_SUCCESS + # Timeout exceeded: use dedicated TIMEOUT status so downstream + # consumers can distinguish this from individual page failures. + conv_res.status = ConversionStatus.TIMEOUT elif proc.is_complete_failure: conv_res.status = ConversionStatus.FAILURE elif proc.is_partial_success: diff --git a/tests/test_options.py b/tests/test_options.py index 76ccb2718b..b0cc1f2369 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -129,7 +129,7 @@ def test_document_timeout(test_doc_path): } ) result = converter.convert(test_doc_path) - assert result.status == ConversionStatus.PARTIAL_SUCCESS, ( + assert result.status == ConversionStatus.TIMEOUT, ( "Expected document timeout to be used" ) @@ -142,7 +142,7 @@ def test_document_timeout(test_doc_path): } ) result = converter.convert(test_doc_path) - assert result.status == ConversionStatus.PARTIAL_SUCCESS, ( + assert result.status == ConversionStatus.TIMEOUT, ( "Expected document timeout to be used" )