Skip to content

Commit 5b69c98

Browse files
authored
Merge pull request #72 from fredi-sap/main
Use type SqlOperand for SQL serialization
2 parents efa6cb4 + 8e9020f commit 5b69c98

2 files changed

Lines changed: 320 additions & 150 deletions

File tree

langchain_hana/vectorstores/create_where_clause.py

Lines changed: 168 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,95 @@
33

44
logger = logging.getLogger(__name__)
55

6-
def is_date_value(value: Any) -> bool:
7-
return isinstance(value, dict) and ("type" in value) and (value["type"] == "date")
86

9-
def _determine_typed_sql_placeholder(value): # type: ignore[no-untyped-def]
7+
class FilterOperand:
8+
"""Represents a filter operand with type information for validation and error messages."""
9+
10+
def __init__(self, value):
11+
if isinstance(value, (bool, int, float, str)):
12+
self.value = value
13+
self.the_type = type(value).__name__
14+
elif isinstance(value, dict) and value.get("type") == "date":
15+
if "date" not in value:
16+
raise ValueError(f"Date operand missing 'date' key: {value!r}")
17+
self.value = value["date"]
18+
if not self.value:
19+
raise ValueError("Date operand with empty value")
20+
self.the_type = "date"
21+
else:
22+
raise ValueError(f"Operand cannot be created from {value!r}")
23+
24+
def __str__(self) -> str:
25+
return f"{self.value!r} ({self.the_type})"
26+
27+
def __repr__(self) -> str:
28+
return str(self)
29+
30+
31+
class SqlOperand:
32+
"""SQL operand with placeholder and value for parameterized queries."""
33+
34+
def __init__(self, operand: FilterOperand):
35+
"""Construct SqlOperand from a FilterOperand."""
36+
if operand.the_type == "bool":
37+
self.the_type = "BOOLEAN"
38+
self.placeholder = "TO_BOOLEAN(?)"
39+
self.value = "true" if operand.value else "false"
40+
elif operand.the_type in ("int", "float"):
41+
self.the_type = "DOUBLE"
42+
self.placeholder = "TO_DOUBLE(?)"
43+
self.value = float(operand.value)
44+
elif operand.the_type == "str":
45+
self.the_type = "NVARCHAR"
46+
self.placeholder = "TO_NVARCHAR(?)"
47+
self.value = operand.value
48+
elif operand.the_type == "date":
49+
self.the_type = "DATE"
50+
self.placeholder = "TO_DATE(?)"
51+
self.value = operand.value
52+
else:
53+
# This should not happen if FilterOperand is constructed correctly.
54+
raise AssertionError(f"Unreachable. {operand=}")
1055

11-
the_type = type(value)
56+
def __str__(self):
57+
# We do not want to print internal types.
58+
# Users of langchain should see their input value in error messages.
59+
assert False
1260

13-
# Handle plain values.
14-
if the_type is bool:
15-
return "TO_BOOLEAN(?)", "true" if value else "false"
16-
if the_type in (int, float):
17-
return "TO_DOUBLE(?)", value
18-
if the_type is str:
19-
return "TO_NVARCHAR(?)", value
2061

21-
# Handle container types: only allowed for dates.
22-
if is_date_value(value):
23-
return "TO_DATE(?)", value["date"]
24-
25-
# If we reach this point, the value type is not supported.
26-
raise ValueError(f"Unsupported filter value type: {the_type}, value: {value}")
62+
def _determine_filter_operands(operator: str, operands: any) -> list[FilterOperand]:
63+
"""Check that operands is a list and return list of FilterOperands."""
64+
if not isinstance(operands, (list, tuple)):
65+
raise ValueError(f"Operator {operator} expects list/tuple of operands, but got {operands}")
66+
if len(operands) == 0:
67+
raise ValueError(f"Operator {operator} expects at least 1 operand")
68+
return [_determine_single_filter_operand(operator, op) for op in operands]
69+
70+
71+
def _determine_single_filter_operand(operator: str, operands: any) -> FilterOperand:
72+
"""Check that operands is a single value (not list/tuple) and return FilterOperand."""
73+
if isinstance(operands, (list, tuple)):
74+
raise ValueError(
75+
f"Operator {operator} expects a single operand, but got {type(operands).__name__}: {operands}"
76+
)
77+
try:
78+
return FilterOperand(operands)
79+
except ValueError as e:
80+
error_message = str(e)
81+
raise ValueError(f"Operator {operator}: {error_message}")
82+
83+
def _sql_serialize_logical_clauses(
84+
sql_operator: str, sql_clauses: list[str]
85+
) -> str:
86+
if sql_operator not in ("AND", "OR"):
87+
raise ValueError(f"{sql_operator=}, is not in ('AND', 'OR')")
88+
if not sql_clauses:
89+
raise ValueError("sql_clauses is empty")
90+
if not all(sql_clauses):
91+
raise ValueError(f"Empty sql clause in {sql_clauses=}")
92+
if len(sql_clauses) == 1:
93+
return sql_clauses[0]
94+
return f" {sql_operator} ".join([f"({clause})" for clause in sql_clauses])
2795

2896
class CreateWhereClause:
2997
def __init__(self, hanaDb: Any) -> None:
@@ -61,8 +129,7 @@ def _create_where_clause(self, filter: dict) -> Tuple[str, List]:
61129
# Value is a column operator.
62130
if len(value) != 1:
63131
raise ValueError(
64-
"Expecting a single entry 'operator: operands'"
65-
f", but got {value=}"
132+
f"Filter expects a single 'operator: operands' entry, but got {value}"
66133
)
67134
operator, operands = list(value.items())[0]
68135
ret_sql_clause, ret_query_tuple = (
@@ -72,154 +139,131 @@ def _create_where_clause(self, filter: dict) -> Tuple[str, List]:
72139
# Value is plain NULL.
73140
ret_sql_clause = f"{self._create_selector(key)} IS NULL"
74141
ret_query_tuple = []
75-
elif is_date_value(value) or isinstance(value, (int, float, str, bool)):
76-
# Value represents a typed SQL value.
77-
# _determine_typed_sql_placeholder throws for illegal types.
78-
placeholder, value = (
79-
_determine_typed_sql_placeholder(value)
80-
)
81-
ret_sql_clause = f"{self._create_selector(key)} = {placeholder}"
82-
ret_query_tuple = [value]
83142
else:
84-
raise ValueError(
85-
f"Invalid filter value with {key=}, {value=}"
86-
)
143+
# Value represents a typed SQL value (implicit $eq operator).
144+
try:
145+
operand = FilterOperand(value)
146+
except ValueError:
147+
raise ValueError(
148+
f"Implicit operator $eq received unsupported operand: {value!r}"
149+
)
150+
sql_operand = SqlOperand(operand)
151+
ret_sql_clause = f"{self._create_selector(key)} = {sql_operand.placeholder}"
152+
ret_query_tuple = [sql_operand.value]
87153
statements.append(ret_sql_clause)
88154
parameters += ret_query_tuple
89-
return CreateWhereClause._sql_serialize_logical_clauses(
90-
"AND", statements
91-
), parameters
155+
return _sql_serialize_logical_clauses("AND", statements), parameters
92156

93157
def _sql_serialize_column_operation(
94158
self, column: str, operator: str, operands: any
95159
) -> Tuple[str, List]:
96160
if operator == "$contains":
97-
if not isinstance(operands, str) or not operands:
98-
raise ValueError(f"Expected a non-empty string operand for {operator=}, but got {operands=}")
99-
sql_placeholder, sql_value = _determine_typed_sql_placeholder(
100-
operands
101-
)
161+
operand = _determine_single_filter_operand(operator, operands)
162+
if operand.the_type != "str" or not operand.value:
163+
raise ValueError(f"Operator $contains expects a non-empty string operand, but got {operand!r}")
164+
sql_operand = SqlOperand(operand)
102165
statement = (
103-
f"SCORE({sql_placeholder} IN (\"{column}\" EXACT SEARCH MODE 'text')) > 0"
166+
f"SCORE({sql_operand.placeholder} IN (\"{column}\" EXACT SEARCH MODE 'text')) > 0"
104167
)
105-
return statement, [sql_value]
168+
return statement, [sql_operand.value]
106169
selector = self._create_selector(column)
107170
if operator == "$like":
108-
if not isinstance(operands, str):
109-
raise ValueError(f"Expected a string operand for {operator=}, but got {operands=}")
110-
sql_placeholder, sql_value = _determine_typed_sql_placeholder(
111-
operands
112-
)
113-
statement = f"{selector} LIKE {sql_placeholder}"
114-
return statement, [sql_value]
171+
operand = _determine_single_filter_operand(operator, operands)
172+
if operand.the_type != "str":
173+
raise ValueError(f"Operator $like expects a string operand, but got {operand}")
174+
sql_operand = SqlOperand(operand)
175+
statement = f"{selector} LIKE {sql_operand.placeholder}"
176+
return statement, [sql_operand.value]
115177
if operator == "$between":
116-
if not isinstance(operands, list) or len(operands) != 2:
117-
raise ValueError(f"Expected a list of two operands for {operator=}, but got {operands=}")
118-
if type(operands[0]) != type(operands[1]):
119-
raise ValueError(f"Expected operands of the same type for {operator=}, but got {operands=}")
120-
if isinstance(operands[0], bool) or not (isinstance(operands[0], (int, float, str)) or is_date_value(operands[0])):
121-
raise ValueError(f"Expected a list of (int, float, str, date) for {operator=}, but got {operands=}")
122-
from_sql_placeholder, from_sql_value = (
123-
_determine_typed_sql_placeholder(operands[0])
124-
)
125-
to_sql_placeholder, to_sql_value = (
126-
_determine_typed_sql_placeholder(operands[1])
127-
)
178+
filter_operands = _determine_filter_operands(operator, operands)
179+
if len(filter_operands) != 2:
180+
raise ValueError(f"Operator $between expects 2 operands, but got {filter_operands}")
181+
from_operand, to_operand = filter_operands
182+
if from_operand.the_type != to_operand.the_type:
183+
raise ValueError(f"Operator $between expects operands of the same type, but got {filter_operands}")
184+
if from_operand.the_type not in ("int", "float", "str", "date"):
185+
raise ValueError(f"Operator $between expects operand types (int, float, str, date), but got {filter_operands}")
186+
sql_from = SqlOperand(from_operand)
187+
sql_to = SqlOperand(to_operand)
128188
statement = (
129-
f"{selector} BETWEEN {from_sql_placeholder} AND {to_sql_placeholder}"
189+
f"{selector} BETWEEN {sql_from.placeholder} AND {sql_to.placeholder}"
130190
)
131-
return statement, [from_sql_value, to_sql_value]
191+
return statement, [sql_from.value, sql_to.value]
132192
if operator in ("$in", "$nin"):
133-
if not isinstance(operands, list) or len(operands) == 0:
134-
raise ValueError(f"Expected a non-empty list of operands for {operator=}, but got {operands=}")
135-
check_type = {type(operand) for operand in operands}
136-
if len(check_type) > 1:
137-
raise ValueError(f"Expected operands of the same type for {operator=}, but got {operands=}")
138-
if not (list(check_type)[0] in (int, float, str, bool) or all(is_date_value(operand) for operand in operands)):
139-
raise ValueError(f"Expected a list of (int, float, str, bool, date) for {operator=}, but got {operands=}")
140-
sql_placeholder_value_list = [
141-
_determine_typed_sql_placeholder(item)
142-
for item in operands
143-
]
144-
if operator == "$in":
145-
sql_operator = "IN"
146-
if operator == "$nin":
147-
sql_operator = "NOT IN"
148-
placeholders = ", ".join([item[0] for item in sql_placeholder_value_list])
149-
sql_values = [item[1] for item in sql_placeholder_value_list]
150-
statement = f"{selector} {sql_operator} ({placeholders})"
193+
sql_operator = {
194+
"$in": "IN",
195+
"$nin": "NOT IN",
196+
}[operator]
197+
filter_operands = _determine_filter_operands(operator, operands)
198+
for op in filter_operands:
199+
if op.the_type != filter_operands[0].the_type:
200+
raise ValueError(f"Operator {operator} expects operands of the same type, but got {operands}")
201+
sql_operands = [SqlOperand(op) for op in filter_operands]
202+
sql_placeholders = [sql_operand.placeholder for sql_operand in sql_operands]
203+
sql_values = [sql_operand.value for sql_operand in sql_operands]
204+
statement = f"{selector} {sql_operator} ({', '.join(sql_placeholders)})"
151205
return statement, sql_values
152206
if operator in ("$eq", "$ne"):
153-
if not (isinstance(operands, (int, float, str, bool)) or is_date_value(operands) or operands is None):
154-
raise ValueError(f"Expected a (int, float, str, bool, date, None) for {operator=}, but got {operands=}")
155207
# Allow null checks for equality operators.
156208
if operands is None:
157-
if operator == "$eq":
158-
sql_operator = "IS NULL"
159-
if operator == "$ne":
160-
sql_operator = "IS NOT NULL"
161-
statement = f"{selector} {sql_operator}"
209+
sql_operation = {
210+
"$eq": "IS NULL",
211+
"$ne": "IS NOT NULL",
212+
}[operator]
213+
statement = f"{selector} {sql_operation}"
162214
return statement, []
163-
sql_operator = "=" if operator == "$eq" else "<>"
164-
sql_placeholder, sql_value = _determine_typed_sql_placeholder(operands)
165-
statement = f"{selector} {sql_operator} {sql_placeholder}"
166-
return statement, [sql_value]
215+
sql_operator = {
216+
"$eq": "=",
217+
"$ne": "<>",
218+
}[operator]
219+
operand = _determine_single_filter_operand(operator, operands)
220+
sql_operand = SqlOperand(operand)
221+
statement = f"{selector} {sql_operator} {sql_operand.placeholder}"
222+
return statement, [sql_operand.value]
167223
if operator in ("$gt", "$gte", "$lt", "$lte"):
168-
if isinstance(operands, bool) or not (isinstance(operands, (int, float, str)) or is_date_value(operands)):
169-
raise ValueError(f"Expected a (int, float, str, date) for {operator=}, but got {operands=}")
170-
comparisons_to_sql = {
224+
operand = _determine_single_filter_operand(operator, operands)
225+
226+
# Check if the operand type is allowed for comparison operators.
227+
if operand.the_type not in ("int", "float", "str", "date"):
228+
raise ValueError(
229+
f"Operator {operator} expects operand of type int/float/str/date, but got {operand}"
230+
)
231+
232+
sql_operator = {
171233
"$gt": ">",
172234
"$gte": ">=",
173235
"$lt": "<",
174236
"$lte": "<=",
175-
}
176-
sql_operator = comparisons_to_sql[operator]
177-
sql_placeholder, sql_value = _determine_typed_sql_placeholder(operands)
178-
statement = f"{selector} {sql_operator} {sql_placeholder}"
179-
return statement, [sql_value]
237+
}[operator]
238+
sql_operand = SqlOperand(operand)
239+
statement = f"{selector} {sql_operator} {sql_operand.placeholder}"
240+
return statement, [sql_operand.value]
180241

181242
# Unknown operation if we reach this point.
182-
raise ValueError(f"Unsupported column operation for {operator=}, {operands=}")
183-
184-
@staticmethod
185-
def _sql_serialize_logical_clauses(
186-
sql_operator: str, sql_clauses: list[str]
187-
) -> str:
188-
if sql_operator not in ("AND", "OR"):
189-
raise ValueError(f"{sql_operator=}, is not in ('AND', 'OR')")
190-
if not sql_clauses:
191-
raise ValueError("sql_clauses is empty")
192-
if not all(sql_clauses):
193-
raise ValueError(f"Empty sql clause in {sql_clauses=}")
194-
if len(sql_clauses) == 1:
195-
return sql_clauses[0]
196-
return f" {sql_operator} ".join([f"({clause})" for clause in sql_clauses])
243+
raise ValueError(f"Operator {operator} is not supported")
197244

198245
def _sql_serialize_logical_operation(
199246
self, operator: str, operands: List
200247
) -> Tuple[str, List]:
201248

202-
if not isinstance(operands, list) or len(operands) < 2:
203-
raise ValueError(f"Expected a list of atleast two operands for {operator=}, but got {operands=}")
249+
if not isinstance(operands, list):
250+
raise ValueError(f"Operator {operator} expects a list of operands, but got {operands!r}")
251+
if len(operands) < 2:
252+
raise ValueError(f"Operator {operator} expects at least 2 operands, but got {operands!r}")
204253
if operator in ("$and", "$or"):
205254
sql_clauses, query_tuple = [], []
206255
for operand in operands:
207256
ret_sql_clause, ret_query_tuple = self._create_where_clause(operand)
208257
sql_clauses.append(ret_sql_clause)
209258
query_tuple += ret_query_tuple
210-
logical_operators_to_sql = {
259+
sql_operator = {
211260
"$and": "AND",
212261
"$or": "OR",
213-
}
214-
return (
215-
CreateWhereClause._sql_serialize_logical_clauses(
216-
logical_operators_to_sql[operator], sql_clauses
217-
),
218-
query_tuple,
219-
)
262+
}[operator]
263+
return _sql_serialize_logical_clauses(sql_operator, sql_clauses), query_tuple
220264

221265
# If we reach this point, the operator is not supported.
222-
raise ValueError(f"Unsupported logical operation for {operator=}, {operands=}")
266+
raise ValueError(f"Operator {operator} is not supported")
223267

224268
def _create_selector(self, column: str) -> str:
225269
if column in self.specific_metadata_columns:

0 commit comments

Comments
 (0)