|
9 | 9 | from haystack import default_from_dict, default_to_dict, logging |
10 | 10 | from haystack.dataclasses import Document |
11 | 11 | from haystack.document_stores.errors import DuplicateDocumentError |
12 | | -from haystack.errors import FilterError |
13 | 12 | from haystack.document_stores.types import DocumentStore, DuplicatePolicy |
| 13 | +from haystack.errors import FilterError |
14 | 14 | from haystack.utils.auth import Secret, deserialize_secrets_inplace |
15 | 15 | from postgrest import CountMethod |
16 | 16 |
|
@@ -156,98 +156,133 @@ def filter_documents(self, filters: dict[str, Any] | None = None) -> list[Docume |
156 | 156 | result = query.execute() |
157 | 157 | return [self._to_haystack_document(row) for row in result.data if isinstance(row, dict)] |
158 | 158 |
|
| 159 | + @staticmethod |
| 160 | + def _meta_col(field: str, value: Any) -> str: |
| 161 | + """ |
| 162 | + Choose the PostgREST column expression for a meta field. |
| 163 | +
|
| 164 | + Uses the JSONB accessor (->) for numeric values so that PostgREST performs |
| 165 | + correct numeric comparison. Uses the text accessor (->>) for strings, booleans, |
| 166 | + None, and mixed lists, which return the JSON value as text. |
| 167 | + """ |
| 168 | + if not field.startswith("meta."): |
| 169 | + return field |
| 170 | + key = field[len("meta."):] |
| 171 | + if isinstance(value, list): |
| 172 | + all_numeric = all(isinstance(v, (int, float)) and not isinstance(v, bool) for v in value if v is not None) |
| 173 | + return f"meta->{key}" if (all_numeric and value) else f"meta->>{key}" |
| 174 | + if isinstance(value, (int, float)) and not isinstance(value, bool): |
| 175 | + return f"meta->{key}" |
| 176 | + return f"meta->>{key}" |
| 177 | + |
| 178 | + @staticmethod |
| 179 | + def _normalize_value(value: Any) -> Any: |
| 180 | + """Convert Python booleans to lowercase strings compatible with JSONB text accessor.""" |
| 181 | + if isinstance(value, bool): |
| 182 | + return "true" if value else "false" |
| 183 | + return value |
| 184 | + |
159 | 185 | @staticmethod |
160 | 186 | def _apply_filters(query: Any, filters: dict[str, Any]) -> Any: |
161 | 187 | """ |
162 | 188 | Applies Haystack filters to a PostgREST query builder. |
163 | 189 |
|
164 | | - Supports AND logical operator and all standard comparison operators. |
165 | | - OR is supported for simple (non-nested) equality/comparison conditions. |
| 190 | + Supports AND, OR, NOT logical operators and all standard comparison operators. |
| 191 | + OR and NOT are supported for simple (non-nested) conditions only. |
166 | 192 |
|
167 | 193 | :param query: The Supabase query builder. |
168 | 194 | :param filters: Haystack filter dict. |
169 | 195 | :returns: The query with filters applied. |
170 | | - :raises FilterError: For unsupported operators or invalid value types. |
| 196 | + :raises FilterError: For unsupported operators, invalid value types, or malformed filters. |
171 | 197 | """ |
172 | 198 | if not filters: |
173 | 199 | return query |
174 | 200 |
|
175 | | - # Simple comparison: {"field": "...", "operator": "...", "value": "..."} |
176 | 201 | if "field" in filters: |
177 | 202 | return SupabaseGroongaDocumentStore._apply_condition(query, filters) |
178 | 203 |
|
179 | | - op = filters.get("operator", "AND") |
180 | | - conditions = filters.get("conditions", []) |
| 204 | + if "operator" not in filters: |
| 205 | + msg = "Logical filter must include an 'operator' key ('AND', 'OR', 'NOT')." |
| 206 | + raise FilterError(msg) |
| 207 | + |
| 208 | + if "conditions" not in filters: |
| 209 | + msg = "Logical filter must include a 'conditions' key." |
| 210 | + raise FilterError(msg) |
| 211 | + |
| 212 | + op = filters["operator"] |
| 213 | + conditions = filters["conditions"] |
181 | 214 |
|
182 | 215 | if op == "AND": |
183 | 216 | for cond in conditions: |
184 | 217 | query = SupabaseGroongaDocumentStore._apply_filters(query, cond) |
185 | 218 | return query |
186 | 219 |
|
187 | | - if op == "OR": |
| 220 | + if op in ("OR", "NOT"): |
| 221 | + neg_map = {"==": "neq", "!=": "eq", ">": "lte", ">=": "lt", "<": "gte", "<=": "gt"} |
| 222 | + pg_op_map = {"==": "eq", "!=": "neq", ">": "gt", ">=": "gte", "<": "lt", "<=": "lte"} |
| 223 | + op_map = neg_map if op == "NOT" else pg_op_map |
188 | 224 | parts = [] |
189 | 225 | for cond in conditions: |
190 | 226 | if "field" not in cond: |
191 | | - msg = "Nested logical operators inside OR are not supported." |
| 227 | + msg = f"Nested logical operators inside {op} are not supported." |
| 228 | + raise FilterError(msg) |
| 229 | + cond_field = cond.get("field", "") |
| 230 | + cond_op = cond.get("operator", "") |
| 231 | + cond_value = cond.get("value") |
| 232 | + if cond_op not in op_map: |
| 233 | + msg = f"Operator '{cond_op}' inside {op} filter is not supported." |
192 | 234 | raise FilterError(msg) |
193 | | - parts.append(SupabaseGroongaDocumentStore._condition_to_or_part(cond)) |
| 235 | + col = SupabaseGroongaDocumentStore._meta_col(cond_field, cond_value) |
| 236 | + norm = SupabaseGroongaDocumentStore._normalize_value(cond_value) |
| 237 | + parts.append(f"{col}.{op_map[cond_op]}.{norm}") |
194 | 238 | return query.or_(",".join(parts)) |
195 | 239 |
|
196 | | - msg = f"Filter operator '{op}' is not supported. Supported logical operators: AND, OR." |
| 240 | + msg = f"Filter operator '{op}' is not supported. Supported logical operators: AND, OR, NOT." |
197 | 241 | raise FilterError(msg) |
198 | 242 |
|
199 | 243 | @staticmethod |
200 | | - def _condition_to_or_part(condition: dict[str, Any]) -> str: |
| 244 | + def _apply_condition(query: Any, condition: dict[str, Any]) -> Any: |
201 | 245 | field: str = condition.get("field", "") |
202 | | - op: str = condition.get("operator", "==") |
203 | | - value = condition.get("value") |
204 | | - col = f"meta->>{field[len('meta.'):]}" if field.startswith("meta.") else field |
205 | | - pg_op = {"==": "eq", "!=": "neq", ">": "gt", ">=": "gte", "<": "lt", "<=": "lte"} |
206 | | - if op not in pg_op: |
207 | | - msg = f"Operator '{op}' inside OR filter is not supported." |
| 246 | + |
| 247 | + if "operator" not in condition: |
| 248 | + msg = "Comparison filter must include an 'operator' key." |
208 | 249 | raise FilterError(msg) |
209 | | - return f"{col}.{pg_op[op]}.{value}" |
210 | 250 |
|
211 | | - @staticmethod |
212 | | - def _apply_condition(query: Any, condition: dict[str, Any]) -> Any: |
213 | | - field: str = condition.get("field", "") |
214 | | - op: str = condition.get("operator", "==") |
215 | | - value = condition.get("value") |
| 251 | + if "value" not in condition: |
| 252 | + msg = "Comparison filter must include a 'value' key." |
| 253 | + raise FilterError(msg) |
216 | 254 |
|
217 | | - # PostgREST JSONB text accessor: meta->>key (no quotes around key name) |
218 | | - col = f"meta->>{field[len('meta.'):]}" if field.startswith("meta.") else field |
| 255 | + op: str = condition["operator"] |
| 256 | + value = condition["value"] |
| 257 | + |
| 258 | + col = SupabaseGroongaDocumentStore._meta_col(field, value) |
| 259 | + norm = SupabaseGroongaDocumentStore._normalize_value(value) |
219 | 260 |
|
220 | 261 | if op == "==": |
221 | | - return query.is_(col, "null") if value is None else query.eq(col, value) |
| 262 | + return query.is_(col, "null") if norm is None else query.eq(col, norm) |
222 | 263 |
|
223 | 264 | if op == "!=": |
224 | | - return query.not_.is_(col, "null") if value is None else query.neq(col, value) |
| 265 | + return query.not_.is_(col, "null") if norm is None else query.neq(col, norm) |
225 | 266 |
|
226 | 267 | if op in (">", ">=", "<", "<="): |
227 | 268 | if isinstance(value, list): |
228 | 269 | msg = f"Filter operator '{op}' does not support list values." |
229 | 270 | raise FilterError(msg) |
230 | 271 | if value is None: |
231 | | - # No document satisfies an ordering comparison against NULL. |
232 | 272 | return query.eq("id", "") |
233 | 273 | if isinstance(value, str): |
234 | 274 | try: |
235 | 275 | _datetime.fromisoformat(value) |
236 | 276 | except ValueError: |
237 | 277 | msg = f"Filter operator '{op}' does not support plain string values. Use a numeric or ISO date value." |
238 | 278 | raise FilterError(msg) |
239 | | - # ISO date strings sort correctly as text; no cast needed. |
240 | | - col_cmp = col |
241 | | - else: |
242 | | - # Numeric value: cast JSONB text to numeric for correct ordering. |
243 | | - col_cmp = f"{col}::numeric" |
244 | 279 | if op == ">": |
245 | | - return query.gt(col_cmp, value) |
| 280 | + return query.gt(col, norm) |
246 | 281 | if op == ">=": |
247 | | - return query.gte(col_cmp, value) |
| 282 | + return query.gte(col, norm) |
248 | 283 | if op == "<": |
249 | | - return query.lt(col_cmp, value) |
250 | | - return query.lte(col_cmp, value) |
| 284 | + return query.lt(col, norm) |
| 285 | + return query.lte(col, norm) |
251 | 286 |
|
252 | 287 | if op == "in": |
253 | 288 | if not isinstance(value, list): |
@@ -404,7 +439,7 @@ def _groonga_retrieval( |
404 | 439 |
|
405 | 440 | result = self._client.rpc( |
406 | 441 | "groonga_search", |
407 | | - {"query_text": query, "table": self.table_name, "top_k": top_k}, |
| 442 | + {"query_text": query, "table_name": self.table_name, "top_k": top_k}, |
408 | 443 | ).execute() |
409 | 444 |
|
410 | 445 | data = result.data if isinstance(result.data, list) else [] |
|
0 commit comments