Skip to content

Commit c1620fa

Browse files
authored
handle neo4j client error when initializing vector store (#421)
* handle neo4j client error when initializing vector store * include text * .
1 parent fccf67c commit c1620fa

File tree

4 files changed

+25
-7
lines changed

4 files changed

+25
-7
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
## 0.5.13
2+
3+
### Fixes
4+
5+
* **Handle schema conflict on neo4j**
6+
7+
### Fixes
18

29
## 0.5.12
310

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.12" # pragma: no cover
1+
__version__ = "0.5.13" # pragma: no cover

unstructured_ingest/v2/processes/connectors/neo4j.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def _create_lexical_graph(self, elements: list[dict], document_node: _Node) -> "
154154
self._add_entities(element, graph, element_node)
155155

156156
if self._is_chunk(element):
157-
for origin_element in format_and_truncate_orig_elements(element):
157+
for origin_element in format_and_truncate_orig_elements(element, include_text=True):
158158
origin_element_node = self._create_element_node(origin_element)
159159

160160
graph.add_edge(
@@ -327,21 +327,29 @@ async def _create_uniqueness_constraints(self, client: AsyncDriver) -> None:
327327
async def _create_vector_index(
328328
self, client: AsyncDriver, dimensions: int, similarity_function: SimilarityFunction
329329
) -> None:
330+
import neo4j.exceptions
331+
330332
label = Label.CHUNK
331333
logger.info(
332334
f"Creating index on nodes labeled '{label.value}' if it does not already exist."
333335
)
334336
index_name = f"{label.value.lower()}_vector"
335-
await client.execute_query(
336-
f"""
337+
try:
338+
await client.execute_query(
339+
f"""
337340
CREATE VECTOR INDEX {index_name} IF NOT EXISTS
338341
FOR (n:{label.value}) ON n.embedding
339342
OPTIONS {{indexConfig: {{
340343
`vector.similarity_function`: '{similarity_function}',
341344
`vector.dimensions`: {dimensions}}}
342345
}}
343346
"""
344-
)
347+
)
348+
except neo4j.exceptions.ClientError as e:
349+
if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
350+
logger.info(f"Index on nodes labeled '{label.value}' already exists.")
351+
else:
352+
raise
345353

346354
async def _delete_old_data_if_exists(self, file_data: FileData, client: AsyncDriver) -> None:
347355
logger.info(f"Deleting old data for the record '{file_data.identifier}' (if present).")

unstructured_ingest/v2/processes/connectors/utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ def conform_string_to_dict(value: Any) -> dict:
3131
raise ValidationError(f"Input could not be mapped to a valid dict: {value}")
3232

3333

34-
def format_and_truncate_orig_elements(element: dict) -> list[dict[str, Any]]:
34+
def format_and_truncate_orig_elements(
35+
element: dict, include_text: bool = False
36+
) -> list[dict[str, Any]]:
3537
"""
3638
This function is used to format and truncate the orig_elements field in the metadata.
3739
This is used to remove the text field and other larger fields from the orig_elements
@@ -42,7 +44,8 @@ def format_and_truncate_orig_elements(element: dict) -> list[dict[str, Any]]:
4244
orig_elements = []
4345
if raw_orig_elements is not None:
4446
for element in elements_from_base64_gzipped_json(raw_orig_elements):
45-
element.pop("text", None)
47+
if not include_text:
48+
element.pop("text", None)
4649
for prop in (
4750
"image_base64",
4851
"text_as_html",

0 commit comments

Comments
 (0)