1+ import logging
2+ import json
13from typing import List
24
35from sub_platforms .sql_opt .videx .videx_metadata import VidexTableStats
46from sub_platforms .sql_opt .videx .model .videx_strategy import VidexModelBase , VidexStrategy
57from sub_platforms .sql_opt .videx .model .videx_model_innodb import VidexModelInnoDB
6- from sub_platforms .sql_opt .videx .videx_metadata import VidexTableStats , VidexDBTaskStats
8+ from sub_platforms .sql_opt .videx .videx_metadata import VidexDBTaskStats
9+ from sub_platforms .sql_opt .videx .videx_pg_metadata import PGVidexTableStats
710from sub_platforms .sql_opt .videx .model .videx_strategy import VidexStrategy
811from sub_platforms .sql_opt .videx .videx_utils import IndexRangeCond
912from sub_platforms .sql_opt .pg_meta import PGTable
1013
1114class VidexModelPG (VidexModelBase ):
12- def __init__ (self , db_stats : VidexDBTaskStats , ** kwargs ):
15+ def __init__ (self , table_stats : PGVidexTableStats , ** kwargs ):
1316 super ().__init__ (None , VidexStrategy .postgresql )
14- self .videx_db_task_stats : VidexDBTaskStats = db_stats
17+ self .table_stats : PGVidexTableStats = table_stats
1518
1619 def scan_time (self , req_json_item : dict ) -> float :
20+ # no used in pg
1721 return 0.0
22+
1823 def get_memory_buffer_size (self , req_json_item : dict ) -> int :
24+ # no used in pg
1925 return - 1
2026
27+ def info_low (self , req_json_item : dict ) -> int :
28+ # no used in pg
29+ return 0
30+
2131 def cardinality (self , idx_range_cond : IndexRangeCond ) -> int :
2232 return 0
2333
2434 def ndv (self , index_name , field_list : List [str ]) -> int :
25- return 0
35+ if len (field_list ) == 1 :
36+ colname = field_list [0 ]
37+ col_stats_info = self .table_stats .table_statistic .statistic_dict .get (colname )
38+ if col_stats_info is not None :
39+ return int (col_stats_info .stadistinct )
40+ else :
41+ return 0
42+ else :
43+ #TODO: try to fetch from pg_statistic_ext for multi-column NDV,
44+ # moreover, support ndv learned model (eg: PLM4NDV)
45+ return 0
2646
2747 def get_relation_stats (self , req_json_item : dict ) -> dict :
28- return None
48+ if self .table_stats .table_statistic is None :
49+ logging .warning (f"Table statistic is None for "
50+ f"db_name: { self .table_stats .table_meta .dbname } , "
51+ f"table_name: { self .table_stats .table_meta .table_name } " )
52+ return {}
53+ data_items = req_json_item .get ("data" )
54+ if isinstance (data_items , list ):
55+ for item in data_items :
56+ if item .get ("item_type" ) == "colname" :
57+ colname = (item .get ("properties" ) or {}).get ("name" )
58+ if colname :
59+ break
60+ if not colname :
61+ logging .warning ("Column name missing in request: %s" , req_json_item )
62+ return {}
63+ col_stats_info = self .table_stats .table_statistic .statistic_dict .get (colname )
64+ if col_stats_info is None :
65+ logging .warning (f"Column statistic not found for "
66+ f"db_name: { self .table_stats .table_meta .dbname } , "
67+ f"table_name: { self .table_stats .table_meta .table_name } , "
68+ f"req_json_item { req_json_item } " )
69+ return {}
70+ ndv_value = self .ndv (None , [colname ])
71+ slots_payload = []
72+ for slot in (col_stats_info .slots or []):
73+ if hasattr (slot , "model_dump" ):
74+ slots_payload .append (slot .model_dump (exclude_none = True ))
75+ else :
76+ slots_payload .append ({
77+ "kind" : getattr (slot , "kind" , None ),
78+ "op" : getattr (slot , "op" , None ),
79+ "coll" : getattr (slot , "coll" , None ),
80+ "numbers" : getattr (slot , "numbers" , None ),
81+ "values" : getattr (slot , "values" , None ),
82+ })
83+ res = {
84+ "stanullfrac" : col_stats_info .stanullfrac ,
85+ "stawidth" : col_stats_info .stawidth ,
86+ "stainherit" : col_stats_info .stainherit ,
87+ "stadistinct" : ndv_value ,
88+ "slots" : slots_payload ,
89+ }
90+ logging .info (f"Get pg column statistic for "
91+ f"db_name: { self .table_stats .table_meta .dbname } , "
92+ f"table_name: { self .table_stats .table_meta .table_name } , "
93+ f"column { colname } : { res } " )
94+ return res
2995
3096 def table_block_relation_estimate_size (self , req_json_item : dict ) -> dict :
31- properties = req_json_item [ 'properties' ]
32- videx_db = properties [ 'dbname' ]. lower ()
33- table_name = properties [ 'table_name' ]. lower ()
34- table : PGTable = self . videx_db_task_stats . get_table_meta ( videx_db , table_name )
97+ table : PGTable = self . table_stats . table_meta
98+ logging . info ( f"Start to get pg table block relation estimate size for "
99+ f"db_name: { table . dbname } , "
100+ f" table_name: { table . table_name } " )
35101 res = {
36102 "relpages" : table .relpages ,
37103 "reltuples" : table .reltuples ,
38104 "relallvisible" : table .relallvisible ,
39105 "relhasindex" : len (table .indexes ) > 0 ,
40106 }
41- return res
107+ logging .info (f"Get pg table block relation estimate size for "
108+ f"db_name: { table .dbname } , "
109+ f"table_name: { table .table_name } : { res } " )
110+ return res
111+ def get_index_stats (self , req_json_item : dict ) -> dict :
112+ return NotImplementedError ("get_index_stats is not implemented yet." )
0 commit comments