Skip to content

Commit a8a9e3b

Browse files
committed
updating integration tests
1 parent 3a4dc55 commit a8a9e3b

1 file changed

Lines changed: 74 additions & 39 deletions

File tree

integrations/supabase/src/haystack_integrations/document_stores/supabase/groonga_document_store.py

Lines changed: 74 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
from haystack import default_from_dict, default_to_dict, logging
1010
from haystack.dataclasses import Document
1111
from haystack.document_stores.errors import DuplicateDocumentError
12-
from haystack.errors import FilterError
1312
from haystack.document_stores.types import DocumentStore, DuplicatePolicy
13+
from haystack.errors import FilterError
1414
from haystack.utils.auth import Secret, deserialize_secrets_inplace
1515
from postgrest import CountMethod
1616

@@ -156,98 +156,133 @@ def filter_documents(self, filters: dict[str, Any] | None = None) -> list[Docume
156156
result = query.execute()
157157
return [self._to_haystack_document(row) for row in result.data if isinstance(row, dict)]
158158

159+
@staticmethod
160+
def _meta_col(field: str, value: Any) -> str:
161+
"""
162+
Choose the PostgREST column expression for a meta field.
163+
164+
Uses the JSONB accessor (->) for numeric values so that PostgREST performs
165+
correct numeric comparison. Uses the text accessor (->>) for strings, booleans,
166+
None, and mixed lists, which return the JSON value as text.
167+
"""
168+
if not field.startswith("meta."):
169+
return field
170+
key = field[len("meta."):]
171+
if isinstance(value, list):
172+
all_numeric = all(isinstance(v, (int, float)) and not isinstance(v, bool) for v in value if v is not None)
173+
return f"meta->{key}" if (all_numeric and value) else f"meta->>{key}"
174+
if isinstance(value, (int, float)) and not isinstance(value, bool):
175+
return f"meta->{key}"
176+
return f"meta->>{key}"
177+
178+
@staticmethod
179+
def _normalize_value(value: Any) -> Any:
180+
"""Convert Python booleans to lowercase strings compatible with JSONB text accessor."""
181+
if isinstance(value, bool):
182+
return "true" if value else "false"
183+
return value
184+
159185
@staticmethod
160186
def _apply_filters(query: Any, filters: dict[str, Any]) -> Any:
161187
"""
162188
Applies Haystack filters to a PostgREST query builder.
163189
164-
Supports AND logical operator and all standard comparison operators.
165-
OR is supported for simple (non-nested) equality/comparison conditions.
190+
Supports AND, OR, NOT logical operators and all standard comparison operators.
191+
OR and NOT are supported for simple (non-nested) conditions only.
166192
167193
:param query: The Supabase query builder.
168194
:param filters: Haystack filter dict.
169195
:returns: The query with filters applied.
170-
:raises FilterError: For unsupported operators or invalid value types.
196+
:raises FilterError: For unsupported operators, invalid value types, or malformed filters.
171197
"""
172198
if not filters:
173199
return query
174200

175-
# Simple comparison: {"field": "...", "operator": "...", "value": "..."}
176201
if "field" in filters:
177202
return SupabaseGroongaDocumentStore._apply_condition(query, filters)
178203

179-
op = filters.get("operator", "AND")
180-
conditions = filters.get("conditions", [])
204+
if "operator" not in filters:
205+
msg = "Logical filter must include an 'operator' key ('AND', 'OR', 'NOT')."
206+
raise FilterError(msg)
207+
208+
if "conditions" not in filters:
209+
msg = "Logical filter must include a 'conditions' key."
210+
raise FilterError(msg)
211+
212+
op = filters["operator"]
213+
conditions = filters["conditions"]
181214

182215
if op == "AND":
183216
for cond in conditions:
184217
query = SupabaseGroongaDocumentStore._apply_filters(query, cond)
185218
return query
186219

187-
if op == "OR":
220+
if op in ("OR", "NOT"):
221+
neg_map = {"==": "neq", "!=": "eq", ">": "lte", ">=": "lt", "<": "gte", "<=": "gt"}
222+
pg_op_map = {"==": "eq", "!=": "neq", ">": "gt", ">=": "gte", "<": "lt", "<=": "lte"}
223+
op_map = neg_map if op == "NOT" else pg_op_map
188224
parts = []
189225
for cond in conditions:
190226
if "field" not in cond:
191-
msg = "Nested logical operators inside OR are not supported."
227+
msg = f"Nested logical operators inside {op} are not supported."
228+
raise FilterError(msg)
229+
cond_field = cond.get("field", "")
230+
cond_op = cond.get("operator", "")
231+
cond_value = cond.get("value")
232+
if cond_op not in op_map:
233+
msg = f"Operator '{cond_op}' inside {op} filter is not supported."
192234
raise FilterError(msg)
193-
parts.append(SupabaseGroongaDocumentStore._condition_to_or_part(cond))
235+
col = SupabaseGroongaDocumentStore._meta_col(cond_field, cond_value)
236+
norm = SupabaseGroongaDocumentStore._normalize_value(cond_value)
237+
parts.append(f"{col}.{op_map[cond_op]}.{norm}")
194238
return query.or_(",".join(parts))
195239

196-
msg = f"Filter operator '{op}' is not supported. Supported logical operators: AND, OR."
240+
msg = f"Filter operator '{op}' is not supported. Supported logical operators: AND, OR, NOT."
197241
raise FilterError(msg)
198242

199243
@staticmethod
200-
def _condition_to_or_part(condition: dict[str, Any]) -> str:
244+
def _apply_condition(query: Any, condition: dict[str, Any]) -> Any:
201245
field: str = condition.get("field", "")
202-
op: str = condition.get("operator", "==")
203-
value = condition.get("value")
204-
col = f"meta->>{field[len('meta.'):]}" if field.startswith("meta.") else field
205-
pg_op = {"==": "eq", "!=": "neq", ">": "gt", ">=": "gte", "<": "lt", "<=": "lte"}
206-
if op not in pg_op:
207-
msg = f"Operator '{op}' inside OR filter is not supported."
246+
247+
if "operator" not in condition:
248+
msg = "Comparison filter must include an 'operator' key."
208249
raise FilterError(msg)
209-
return f"{col}.{pg_op[op]}.{value}"
210250

211-
@staticmethod
212-
def _apply_condition(query: Any, condition: dict[str, Any]) -> Any:
213-
field: str = condition.get("field", "")
214-
op: str = condition.get("operator", "==")
215-
value = condition.get("value")
251+
if "value" not in condition:
252+
msg = "Comparison filter must include a 'value' key."
253+
raise FilterError(msg)
216254

217-
# PostgREST JSONB text accessor: meta->>key (no quotes around key name)
218-
col = f"meta->>{field[len('meta.'):]}" if field.startswith("meta.") else field
255+
op: str = condition["operator"]
256+
value = condition["value"]
257+
258+
col = SupabaseGroongaDocumentStore._meta_col(field, value)
259+
norm = SupabaseGroongaDocumentStore._normalize_value(value)
219260

220261
if op == "==":
221-
return query.is_(col, "null") if value is None else query.eq(col, value)
262+
return query.is_(col, "null") if norm is None else query.eq(col, norm)
222263

223264
if op == "!=":
224-
return query.not_.is_(col, "null") if value is None else query.neq(col, value)
265+
return query.not_.is_(col, "null") if norm is None else query.neq(col, norm)
225266

226267
if op in (">", ">=", "<", "<="):
227268
if isinstance(value, list):
228269
msg = f"Filter operator '{op}' does not support list values."
229270
raise FilterError(msg)
230271
if value is None:
231-
# No document satisfies an ordering comparison against NULL.
232272
return query.eq("id", "")
233273
if isinstance(value, str):
234274
try:
235275
_datetime.fromisoformat(value)
236276
except ValueError:
237277
msg = f"Filter operator '{op}' does not support plain string values. Use a numeric or ISO date value."
238278
raise FilterError(msg)
239-
# ISO date strings sort correctly as text; no cast needed.
240-
col_cmp = col
241-
else:
242-
# Numeric value: cast JSONB text to numeric for correct ordering.
243-
col_cmp = f"{col}::numeric"
244279
if op == ">":
245-
return query.gt(col_cmp, value)
280+
return query.gt(col, norm)
246281
if op == ">=":
247-
return query.gte(col_cmp, value)
282+
return query.gte(col, norm)
248283
if op == "<":
249-
return query.lt(col_cmp, value)
250-
return query.lte(col_cmp, value)
284+
return query.lt(col, norm)
285+
return query.lte(col, norm)
251286

252287
if op == "in":
253288
if not isinstance(value, list):
@@ -404,7 +439,7 @@ def _groonga_retrieval(
404439

405440
result = self._client.rpc(
406441
"groonga_search",
407-
{"query_text": query, "table": self.table_name, "top_k": top_k},
442+
{"query_text": query, "table_name": self.table_name, "top_k": top_k},
408443
).execute()
409444

410445
data = result.data if isinstance(result.data, list) else []

0 commit comments

Comments
 (0)