Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions docling/datamodel/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@
ConfigDict,
Field,
FieldSerializationInfo,
PrivateAttr,
computed_field,
field_serializer,
field_validator,
model_validator,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -505,10 +507,50 @@ class ConfidenceReport(PageConfidenceScores):
pages: dict[int, PageConfidenceScores] = Field(
default_factory=lambda: defaultdict(PageConfidenceScores)
)
_mean_score_override: ScoreValue = PrivateAttr(default=np.nan)
_low_score_override: ScoreValue = PrivateAttr(default=np.nan)

@staticmethod
def _coerce_override_score(value: Any) -> ScoreValue:
if value is None:
return ScoreValue(np.nan)
if isinstance(value, str) and value.strip().lower() in {
"nan",
"null",
"none",
"",
}:
return ScoreValue(np.nan)
return ScoreValue(value)

@model_validator(mode="wrap")
@classmethod
def _accept_flat_confidence_scores(cls, value, handler):
mean_override = ScoreValue(np.nan)
low_override = ScoreValue(np.nan)

if isinstance(value, dict):
mean_override = cls._coerce_override_score(value.get("mean_score"))
low_override = cls._coerce_override_score(value.get("low_score"))
value = dict(value)
value.pop("mean_score", None)
value.pop("low_score", None)
value.pop("mean_grade", None)
value.pop("low_grade", None)

model = handler(value)
if not model.pages:
model._mean_score_override = mean_override
model._low_score_override = low_override
return model

@computed_field # type: ignore
@property
def mean_score(self) -> ScoreValue:
if not np.isnan(self._mean_score_override):
return self._mean_score_override
if not self.pages:
return super().mean_score
return ScoreValue(
np.nanmean(
[c.mean_score for c in self.pages.values()],
Expand All @@ -518,6 +560,10 @@ def mean_score(self) -> ScoreValue:
@computed_field # type: ignore
@property
def low_score(self) -> ScoreValue:
if not np.isnan(self._low_score_override):
return self._low_score_override
if not self.pages:
return super().low_score
return ScoreValue(
np.nanmean(
[c.low_score for c in self.pages.values()],
Expand Down
11 changes: 11 additions & 0 deletions docling/service_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from docling.backend.noop_backend import NoOpBackend
from docling.datamodel.base_models import (
ConfidenceReport,
ConversionStatus,
DoclingComponentType,
ErrorItem,
Expand Down Expand Up @@ -367,6 +368,11 @@ def _build_conversion_result(
status=payload.status,
errors=payload.errors,
timings=payload.timings,
confidence=ConfidenceReport.model_validate(
{}
if payload.confidence is None
else payload.confidence.model_dump(mode="json")
),
document=document,
)

Expand Down Expand Up @@ -1756,6 +1762,11 @@ def _build_conversion_result_from_artifact_item(
status=item.status,
errors=item.errors,
timings=item.timings,
confidence=ConfidenceReport.model_validate(
{}
if item.confidence is None
else item.confidence.model_dump(mode="json")
),
document=document,
)

Expand Down
1 change: 1 addition & 0 deletions tests/test_service_client_sdk_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def _convert_payload(source_name: str) -> SimpleNamespace:
status=ConversionStatus.SUCCESS,
errors=[],
timings={},
confidence=None,
document=SimpleNamespace(
filename=None,
json_content=DoclingDocument(name=PurePath(source_name).stem),
Expand Down
Loading