Skip to content

Commit 5016e2a

Browse files
committed
Merge branch 'main' of github.com:raznem/parsera
2 parents 4c316e0 + 78952db commit 5016e2a

File tree

3 files changed

+38
-3
lines changed

3 files changed

+38
-3
lines changed

parsera/engine/simple_extractor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ async def run(self, content: str, attributes: dict[str, str]) -> list[dict]:
109109
]
110110
```
111111
112-
If no data is found return empty json:
112+
If no data is found return empty list:
113113
```json
114114
[]
115115
```

parsera/engine/structured_extractor.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pydantic import BaseModel, Field, create_model
99

1010
from parsera.engine.chunks_extractor import ChunksTabularExtractor
11+
from parsera.utils import has_any_non_none_values
1112

1213

1314
class AttributeData(BaseModel):
@@ -67,7 +68,10 @@ async def extract(
6768
]
6869
structured_output = await self.structured_model.ainvoke(messages)
6970
output_dict = structured_output.model_dump(mode="json")
70-
return output_dict["data"]
71+
if has_any_non_none_values(output_dict["data"]):
72+
return output_dict["data"]
73+
else:
74+
return []
7175

7276
async def merge_all_data(
7377
self, all_data: list[list[dict]], attributes: dict[str, str]
@@ -107,7 +111,7 @@ def create_schema(self, attributes: dict[str, dict[str, Any]]) -> Type[BaseModel
107111
)
108112

109113
class ListSchemaModel(BaseModel):
110-
data: List[RecordModel]
114+
data: List[RecordModel] = Field(default_factory=list)
111115

112116
return ListSchemaModel
113117

parsera/utils.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,34 @@ def getinstance(*args, **kwargs):
77
return instances[class_]
88

99
return getinstance
10+
11+
12+
def has_any_non_none_values(input_data: dict | list) -> bool:
13+
"""
14+
Recursively checks if an arbitrarily nested dictionary or list contains any non-None values.
15+
16+
Args:
17+
input_data (dict or list): The dictionary or list to check. Can be nested.
18+
19+
Returns:
20+
bool: True if the data structure (or any nested structure) contains at least one value
21+
that is not None, False otherwise (i.e., all values are None or containers
22+
that ultimately only contain None).
23+
"""
24+
if isinstance(input_data, dict):
25+
for value in input_data.values():
26+
if value is not None:
27+
if not isinstance(value, (dict, list)):
28+
return True
29+
elif has_any_non_none_values(value):
30+
return True
31+
32+
elif isinstance(input_data, list):
33+
for item in input_data:
34+
if item is not None:
35+
if not isinstance(item, (dict, list)):
36+
return True
37+
elif has_any_non_none_values(item):
38+
return True
39+
40+
return False

0 commit comments

Comments
 (0)