Skip to content

Commit 1bbdc56

Browse files
authored
Update Unstructured Provider, Fix Type Error in SDK (#2103)
* Fix SDK type error * Update dashboard tag
1 parent c8dd670 commit 1bbdc56

File tree

7 files changed

+25
-20
lines changed

7 files changed

+25
-20
lines changed

docker/compose.full.swarm.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ services:
386386
tag: backend
387387

388388
r2r-dashboard:
389-
image: sciphiai/r2r-dashboard:1.0.2
389+
image: sciphiai/r2r-dashboard:1.0.3
390390
environment:
391391
- NEXT_PUBLIC_R2R_DEPLOYMENT_URL=${R2R_DEPLOYMENT_URL:-http://localhost:7272}
392392
- NEXT_PUBLIC_HATCHET_DASHBOARD_URL=${HATCHET_DASHBOARD_URL:-http://localhost:${R2R_HATCHET_DASHBOARD_PORT:-7274}}

docker/compose.full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ services:
193193
tag: backend
194194

195195
r2r-dashboard:
196-
image: sciphiai/r2r-dashboard:1.0.2
196+
image: sciphiai/r2r-dashboard:1.0.3
197197
env_file:
198198
- ./env/r2r-dashboard.env
199199
ports:

docker/compose.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ services:
5050
- host.docker.internal:host-gateway
5151

5252
r2r-dashboard:
53-
image: sciphiai/r2r-dashboard:1.0.2
53+
image: sciphiai/r2r-dashboard:1.0.3
5454
env_file:
5555
- ./env/r2r-dashboard.env
5656
ports:

py/core/providers/ingestion/r2r/base.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -259,15 +259,6 @@ async def parse(
259259
logger.info(
260260
f"Using parser_override for {document.document_type} with input value {parser_overrides[document.document_type.value]}"
261261
)
262-
# TODO - Cleanup this approach to be less hardcoded
263-
# if (
264-
# document.document_type != DocumentType.PDF
265-
# or parser_overrides[DocumentType.PDF.value] != "zerox"
266-
# ):
267-
# raise ValueError(
268-
# "Only Zerox PDF parser override is available."
269-
# )
270-
271262
if parser_overrides[DocumentType.PDF.value] == "zerox":
272263
# Collect content from VLMPDFParser
273264
async for chunk in self.parsers[

py/core/providers/ingestion/unstructured/base.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -290,12 +290,26 @@ async def parse(
290290
logger.info(
291291
f"Using parser_override for {document.document_type} with input value {parser_overrides[document.document_type.value]}"
292292
)
293-
async for element in self.parse_fallback(
294-
file_content,
295-
ingestion_config=ingestion_config,
296-
parser_name=f"zerox_{DocumentType.PDF.value}",
297-
):
298-
elements.append(element)
293+
if parser_overrides[document.document_type.value] == "zerox":
294+
async for element in self.parse_fallback(
295+
file_content,
296+
ingestion_config=ingestion_config,
297+
parser_name=f"zerox_{DocumentType.PDF.value}",
298+
):
299+
logger.warning(
300+
f"Using parser_override for {document.document_type}"
301+
)
302+
elements.append(element)
303+
elif parser_overrides[document.document_type.value] == "ocr":
304+
async for element in self.parse_fallback(
305+
file_content,
306+
ingestion_config=ingestion_config,
307+
parser_name=f"ocr_{DocumentType.PDF.value}",
308+
):
309+
logger.warning(
310+
f"Using OCR parser_override for {document.document_type}"
311+
)
312+
elements.append(element)
299313

300314
elif document.document_type in self.R2R_FALLBACK_PARSERS.keys():
301315
logger.info(

py/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "r2r"
7-
version = "3.5.7"
7+
version = "3.5.8"
88
description = "SciPhi R2R"
99
readme = "README.md"
1010
license = {text = "MIT"}

py/sdk/sync_methods/documents.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def create(
6262
dict | IngestionMode
6363
] = None, # Type hint clarifies IngestionMode enum usage
6464
run_with_orchestration: Optional[bool] = True,
65-
ingestion_mode: IngestionMode = IngestionMode.custom, # Added enum parameter as per router
65+
ingestion_mode: IngestionMode | str = IngestionMode.custom,
6666
) -> WrappedIngestionResponse:
6767
"""Create a new document from either a file, raw text, or chunks.
6868

0 commit comments

Comments
 (0)