Skip to content

Commit f47fde1

Browse files
authored
feat: Add full-text index type support (#51)
* Add full-text index type support * Add full-text index type support --------- Co-authored-by: xxsc0529 <xxsc0529@users.noreply.github.com>
1 parent f7d265a commit f47fde1

File tree

3 files changed

+406
-14
lines changed

3 files changed

+406
-14
lines changed

pyobvector/client/fts_index_param.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,58 @@
11
"""A module to specify fts index parameters"""
22
from enum import Enum
3-
from typing import List, Optional
3+
from typing import List, Optional, Union
44

55
class FtsParser(Enum):
6+
"""Built-in full-text search parser types supported by OceanBase"""
67
IK = 0
78
NGRAM = 1
9+
NGRAM2 = 2 # NGRAM2 parser (supported from V4.3.5 BP2+)
10+
BASIC_ENGLISH = 3 # Basic English parser
11+
JIEBA = 4 # jieba parser
812

913

1014
class FtsIndexParam:
15+
"""Full-text search index parameter.
16+
17+
Args:
18+
index_name: Index name
19+
field_names: List of field names to create full-text index on
20+
parser_type: Parser type, can be FtsParser enum or string (for custom parsers)
21+
If None, uses default Space parser
22+
"""
1123
def __init__(
1224
self,
1325
index_name: str,
1426
field_names: List[str],
15-
parser_type: Optional[FtsParser],
27+
parser_type: Optional[Union[FtsParser, str]] = None,
1628
):
1729
self.index_name = index_name
1830
self.field_names = field_names
1931
self.parser_type = parser_type
2032

21-
def param_str(self) -> str | None:
22-
if self.parser_type == FtsParser.IK:
23-
return "ik"
24-
if self.parser_type == FtsParser.NGRAM:
25-
return "ngram"
33+
def param_str(self) -> Optional[str]:
34+
"""Convert parser type to string format for SQL."""
35+
if self.parser_type is None:
36+
return None # Default Space parser, no need to specify
37+
38+
if isinstance(self.parser_type, str):
39+
# Custom parser name (e.g., "thai_ftparser")
40+
return self.parser_type.lower()
41+
42+
if isinstance(self.parser_type, FtsParser):
43+
if self.parser_type == FtsParser.IK:
44+
return "ik"
45+
if self.parser_type == FtsParser.NGRAM:
46+
return "ngram"
47+
if self.parser_type == FtsParser.NGRAM2:
48+
return "ngram2"
49+
if self.parser_type == FtsParser.BASIC_ENGLISH:
50+
return "beng"
51+
if self.parser_type == FtsParser.JIEBA:
52+
return "jieba"
53+
# Raise exception for unrecognized FtsParser enum values
54+
raise ValueError(f"Unrecognized FtsParser enum value: {self.parser_type}")
55+
2656
return None
2757

2858
def __iter__(self):
@@ -34,7 +64,7 @@ def __iter__(self):
3464
def __str__(self):
3565
return str(dict(self))
3666

37-
def __eq__(self, other: None):
67+
def __eq__(self, other: object) -> bool:
3868
if isinstance(other, self.__class__):
3969
return dict(self) == dict(other)
4070

pyobvector/client/ob_vec_json_table_client.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -884,12 +884,19 @@ def _handle_jtable_dml_select(
884884
identifier.args['quoted'] = False
885885
col.args['table'] = identifier
886886

887-
join_clause = parse_one(f"from t1, {json_table_str}")
888-
join_node = join_clause.args['joins'][0]
889-
if 'joins' in ast.args.keys():
890-
ast.args['joins'].append(join_node)
891-
else:
892-
ast.args['joins'] = [join_node]
887+
# Manually create the JOIN node for json_table
888+
# In some versions of sqlglot, comma-separated tables may not be parsed as
889+
# explicit JOINS, so we directly parse the json_table expression and create a JOIN node
890+
# explicitly
891+
json_table_expr = parse_one(json_table_str, dialect="oceanbase")
892+
893+
join_node = exp.Join()
894+
join_node.args['this'] = json_table_expr
895+
join_node.args['kind'] = None # CROSS JOIN (implicit join with comma)
896+
897+
if 'joins' not in ast.args:
898+
ast.args['joins'] = []
899+
ast.args['joins'].append(join_node)
893900

894901
if real_user_id:
895902
extra_filter_str = f"{JSON_TABLE_DATA_TABLE_NAME}.user_id = '{real_user_id}' AND {JSON_TABLE_DATA_TABLE_NAME}.jtable_name = '{table_name}'"

0 commit comments

Comments
 (0)