Skip to content

Feature: metadata_scan supports checking the metadata of index table now #21800

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
109 changes: 101 additions & 8 deletions pkg/sql/colexec/table_function/metadata_scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
package table_function

import (
"fmt"
"github.com/matrixorigin/matrixone/pkg/vectorindex/sqlexec"
"go.uber.org/zap"

"strings"
"unicode"

"github.com/matrixorigin/matrixone/pkg/logutil"
"github.com/matrixorigin/matrixone/pkg/perfcounter"
Expand Down Expand Up @@ -51,17 +56,73 @@ func metadataScanPrepare(proc *process.Process, tableFunction *TableFunction) (t
return &metadataScanState{}, err
}

func getIndexTableNameByIndexName(proc *process.Process, dbname, tablename, indexname string) (string, error) {
var indexTableName string

e := proc.Ctx.Value(defines.EngineKey{}).(engine.Engine)
db, err := e.Database(proc.Ctx, dbname, proc.GetTxnOperator())
if err != nil {
return "", moerr.NewInternalError(proc.Ctx, "get database failed in metadata scan")
}

rel, err := db.Relation(proc.Ctx, tablename, nil)
if err != nil {
return "", err
}
tableid := rel.GetTableID(proc.Ctx)
logutil.Info("relID", zap.Uint64("value", tableid))

sql := fmt.Sprintf("SELECT distinct(index_table_name) FROM mo_catalog.mo_indexes WHERE table_id = '%d' AND name = '%s'", tableid, indexname)
result, err := sqlexec.RunSql(proc, sql)
if err != nil {
return "", err
}
for _, batch := range result.Batches {
logutil.Info("Batch debug",
zap.Int("vec_count", len(batch.Vecs)),
zap.Strings("vector_types", func() []string {
types := make([]string, len(batch.Vecs))
for i, v := range batch.Vecs {
types[i] = v.GetType().String()
}
return types
}()),
)
if len(batch.Vecs) == 0 {
continue
}
vec := batch.Vecs[0]
for row := 0; row < vec.Length(); row++ {
if !vec.IsNull(uint64(row)) {
indexTableName = vec.GetStringAt(row)
logutil.Info("Index table name", zap.String("value", indexTableName))
}
}
}
return indexTableName, nil
}

func (s *metadataScanState) start(tf *TableFunction, proc *process.Process, nthRow int, analyzer process.Analyzer) error {
s.startPreamble(tf, proc, nthRow)

source := tf.ctr.argVecs[0]
col := tf.ctr.argVecs[1]
dbname, tablename, colname, err := handleDataSource(source, col)
logutil.Infof("db: %s, table: %s, col: %s in metadataScan", dbname, tablename, colname)
dbname, tablename, indexname, colname, err := handleDataSource(source, col)
logutil.Infof("db: %s, table: %s, index: %s, col: %s in metadataScan", dbname, tablename, indexname, colname)
if err != nil {
return err
}

// When the source format is db_name.table_name.$index_name
// metadata_scan actually returns the metadata of the index table
if indexname != "" {
indexTableName, err := getIndexTableNameByIndexName(proc, dbname, tablename, indexname)
if err != nil {
return err
}
tablename = indexTableName
}

// Oh my
e := proc.Ctx.Value(defines.EngineKey{}).(engine.Engine)
db, err := e.Database(proc.Ctx, dbname, proc.GetTxnOperator())
Expand Down Expand Up @@ -94,15 +155,47 @@ func (s *metadataScanState) start(tf *TableFunction, proc *process.Process, nthR
return nil
}

func handleDataSource(source, col *vector.Vector) (string, string, string, error) {
func handleDataSource(source, col *vector.Vector) (string, string, string, string, error) {
if source.Length() != 1 || col.Length() != 1 {
return "", "", "", moerr.NewInternalErrorNoCtx("wrong input len")
return "", "", "", "", moerr.NewInternalErrorNoCtx("wrong input len")
}
sourceStr := source.GetStringAt(0)
parts := strings.Split(sourceStr, ".")
switch len(parts) {
// Old source format: db_name.table_name
case 2:
dbname, tablename := parts[0], parts[1]
if !isValidIdentifier(dbname) || !isValidIdentifier(tablename) {
return "", "", "", "", moerr.NewInternalErrorNoCtx("invalid db or table name format")
}
return dbname, tablename, "", col.GetStringAt(0), nil
// Newly supported source format: db_name.table_name.$index_name
case 3:
dbname, tablename, indexPart := parts[0], parts[1], parts[2]
if !isValidIdentifier(dbname) || !isValidIdentifier(tablename) {
return "", "", "", "", moerr.NewInternalErrorNoCtx("invalid db or table name format")
}
if len(indexPart) == 0 || indexPart[0] != '$' || !isValidIdentifier(indexPart[1:]) {
return "", "", "", "", moerr.NewInternalErrorNoCtx("index name must start with $ and follow identifier rules")
}
indexName := indexPart[1:]
return dbname, tablename, indexName, col.GetStringAt(0), nil

default:
return "", "", "", "", moerr.NewInternalErrorNoCtx("source must be in db_name.table_name or db_name.table_name.$index_name format")
}
strs := strings.Split(source.GetStringAt(0), ".")
if len(strs) != 2 {
return "", "", "", moerr.NewInternalErrorNoCtx("wrong len of db and tbl input")
}

func isValidIdentifier(s string) bool {
if s == "" {
return false
}
for _, c := range s {
if !unicode.IsLetter(c) && !unicode.IsDigit(c) && c != '_' {
return false
}
}
return strs[0], strs[1], col.GetStringAt(0), nil
return true
}

func fillMetadataInfoBat(opBat *batch.Batch, proc *process.Process, tableFunction *TableFunction, info *plan.MetadataScanInfo) error {
Expand Down
166 changes: 166 additions & 0 deletions pkg/sql/colexec/table_function/metadata_scan_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright 2025 Matrix Origin
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package table_function

import (
"github.com/lni/goutils/leaktest"
"github.com/matrixorigin/matrixone/pkg/common/moerr"
"testing"

"github.com/matrixorigin/matrixone/pkg/common/mpool"
"github.com/matrixorigin/matrixone/pkg/container/types"
"github.com/matrixorigin/matrixone/pkg/container/vector"
"github.com/stretchr/testify/require"
)

type mock struct {
name string
source *vector.Vector
col *vector.Vector
expected_db string
expected_tbl string
expected_idx string
expected_col string
expected_err error
}

func TestHandleDataSource(t *testing.T) {
defer leaktest.AfterTest(t)()
mp := mpool.MustNewZero()

testCases := []mock{
{
name: "invalid_source_length",
source: createStringVector(t, mp, []string{"db", "table"}),
col: createStringVector(t, mp, []string{"col"}),
expected_err: moerr.NewInternalErrorNoCtx("wrong input len"),
},
{
name: "invalid_col_length",
source: createStringVector(t, mp, []string{"db.table"}),
col: createStringVector(t, mp, []string{"col1", "col2"}),
expected_err: moerr.NewInternalErrorNoCtx("wrong input len"),
},
{
name: "valid_old_format",
source: createStringVector(t, mp, []string{"db1.table1"}),
col: createStringVector(t, mp, []string{"col1"}),
expected_db: "db1",
expected_tbl: "table1",
expected_col: "col1",
},
{
name: "invalid_old_format_identifier",
source: createStringVector(t, mp, []string{"123db.table!"}),
col: createStringVector(t, mp, []string{"col1"}),
expected_err: moerr.NewInternalErrorNoCtx("invalid db or table name format"),
},
{
name: "valid_new_format",
source: createStringVector(t, mp, []string{"db2.table2.$index1"}),
col: createStringVector(t, mp, []string{"col2"}),
expected_db: "db2",
expected_tbl: "table2",
expected_idx: "index1",
expected_col: "col2",
},
{
name: "invalid_new_format_index1",
source: createStringVector(t, mp, []string{"db.table.index"}),
col: createStringVector(t, mp, []string{"col"}),
expected_err: moerr.NewInternalErrorNoCtx("index name must start with $ and follow identifier rules"),
},
{
name: "invalid_new_format_index2",
source: createStringVector(t, mp, []string{"db.table.$$index"}),
col: createStringVector(t, mp, []string{"col"}),
expected_err: moerr.NewInternalErrorNoCtx("index name must start with $ and follow identifier rules"),
},
{
name: "invalid_new_format_identifier",
source: createStringVector(t, mp, []string{"db.table.#index"}),
col: createStringVector(t, mp, []string{"col"}),
expected_err: moerr.NewInternalErrorNoCtx("index name must start with $ and follow identifier rules"),
},
{
name: "invalid_format_1_part",
source: createStringVector(t, mp, []string{"justdb"}),
col: createStringVector(t, mp, []string{"col"}),
expected_err: moerr.NewInternalErrorNoCtx("source must be in db_name.table_name or db_name.table_name.$index_name format"),
},
{
name: "invalid_format_4_parts",
source: createStringVector(t, mp, []string{"db.table.extra.$index"}),
col: createStringVector(t, mp, []string{"col"}),
expected_err: moerr.NewInternalErrorNoCtx("source must be in db_name.table_name or db_name.table_name.$index_name format"),
},
{
name: "empty_dbname",
source: createStringVector(t, mp, []string{".table.$index"}),
col: createStringVector(t, mp, []string{"col"}),
expected_err: moerr.NewInternalErrorNoCtx("invalid db or table name format"),
},
{
name: "whitespace_in_source",
source: createStringVector(t, mp, []string{"db .table.$index"}),
col: createStringVector(t, mp, []string{"col"}),
expected_err: moerr.NewInternalErrorNoCtx("invalid db or table name format"),
},
{
name: "underscore_identifier",
source: createStringVector(t, mp, []string{"_db.table.$_index"}),
col: createStringVector(t, mp, []string{"col"}),
expected_db: "_db",
expected_tbl: "table",
expected_idx: "_index",
expected_col: "col",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
defer func() {
if tc.source != nil {
tc.source.Free(mp)
}
if tc.col != nil {
tc.col.Free(mp)
}
}()

db, tbl, idx, col, err := handleDataSource(tc.source, tc.col)

if tc.expected_err != nil {
require.Error(t, err)
require.Contains(t, err.Error(), tc.expected_err.Error())
} else {
require.NoError(t, err)
require.Equal(t, tc.expected_db, db)
require.Equal(t, tc.expected_tbl, tbl)
require.Equal(t, tc.expected_idx, idx)
require.Equal(t, tc.expected_col, col)
}
})
}
}

func createStringVector(t *testing.T, mp *mpool.MPool, values []string) *vector.Vector {
vec := vector.NewVec(types.T_varchar.ToType())
for _, v := range values {
err := vector.AppendBytes(vec, []byte(v), false, mp)
require.NoError(t, err)
}
return vec
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
select * from metadata_scan('table_func_metadata_scan_idx.no_exist_table', '*') g;
SQL parser error: table "no_exist_table" does not exist
drop table if exists t;
create table t(a int, b varchar, c float, d decimal(10, 8), e float(5, 2));
insert into t values(1, null, 1.1, 1, 1.11);
insert into t values(2, "abc", 2.0, 2, 2.22);
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
select count(*) from t;
count(*)
8192
select mo_ctl("dn", "flush", "table_func_metadata_scan_idx.t");
mo_ctl(dn, flush, table_func_metadata_scan_idx.t)
{\n "method": "Flush",\n "result": [\n {\n "returnStr": "OK"\n }\n ]\n}\n
CREATE INDEX idx_a ON t(a);
select distinct(col_name) from metadata_scan("table_func_metadata_scan_idx.t.$idx_a", "*")g;
col_name
__mo_index_idx_col
__mo_index_pri_col
CREATE INDEX idx_c_d ON t(c, d);
select distinct(col_name) from metadata_scan("table_func_metadata_scan_idx.t.$idx_c_d", "*")g;
col_name
__mo_index_idx_col
__mo_index_pri_col
select distinct(col_name) from metadata_scan("table_func_metadata_scan_idx.t.$idx_invalid", "*")g;
SQL parser error: table "" does not exist
select distinct(col_name) from metadata_scan("table_func_metadata_scan_idx.t.idx_invalid", "*")g;
internal error: index name must start with $ and follow identifier rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
select * from metadata_scan('table_func_metadata_scan_idx.no_exist_table', '*') g;
drop table if exists t;
create table t(a int, b varchar, c float, d decimal(10, 8), e float(5, 2));
insert into t values(1, null, 1.1, 1, 1.11);
insert into t values(2, "abc", 2.0, 2, 2.22);
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
insert into t select * from t;
select count(*) from t;
-- @ignore:0
select mo_ctl("dn", "flush", "table_func_metadata_scan_idx.t");
CREATE INDEX idx_a ON t(a);
select distinct(col_name) from metadata_scan("table_func_metadata_scan_idx.t.$idx_a", "*")g;
CREATE INDEX idx_c_d ON t(c, d);
select distinct(col_name) from metadata_scan("table_func_metadata_scan_idx.t.$idx_c_d", "*")g;
select distinct(col_name) from metadata_scan("table_func_metadata_scan_idx.t.$idx_invalid", "*")g;
select distinct(col_name) from metadata_scan("table_func_metadata_scan_idx.t.idx_invalid", "*")g;
Loading