From b60f9c66660d85741d1e05c1391982e5cb1c2652 Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 13 Mar 2025 17:19:28 +0800 Subject: [PATCH 01/10] feat: watch db support sharding --story=121164695 --- .../admin-server/get_sharding_db_config.md | 4 +- .../admin-server/update_sharding_db_config.md | 4 +- src/common/tablenames.go | 3 + src/storage/dal/mongo/local/db.go | 5 - src/storage/dal/mongo/local/error.go | 4 - src/storage/dal/mongo/local/mongo.go | 21 +- src/storage/dal/mongo/local/txn_manager.go | 9 +- src/storage/dal/mongo/sharding/mongo.go | 364 +++++++++++------- src/storage/dal/mongo/sharding/types.go | 17 +- src/storage/driver/mongodb/mongodb.go | 12 +- 10 files changed, 258 insertions(+), 185 deletions(-) diff --git a/docs/apidoc/inner/admin-server/get_sharding_db_config.md b/docs/apidoc/inner/admin-server/get_sharding_db_config.md index 135656ca7a..ced0777b0a 100644 --- a/docs/apidoc/inner/admin-server/get_sharding_db_config.md +++ b/docs/apidoc/inner/admin-server/get_sharding_db_config.md @@ -16,7 +16,7 @@ GET /migrate/v3/find/system/sharding_db_config "permission": null, "data": { "master_db": "masteruuid", - "for_new_tenant": "slave1uuid", + "for_new_data": "slave1uuid", "slave_db": { "slave1uuid": { "name": "slave1", @@ -53,7 +53,7 @@ GET /migrate/v3/find/system/sharding_db_config | 参数名称 | 参数类型 | 描述 | |----------------|-------------------|----------------------------| | master_db | string | 主库唯一标识 | -| for_new_tenant | string | 指定新增租户数据写入哪个库,存储这个数据库的唯一标识 | +| for_new_data | string | 指定新增租户数据写入哪个库,存储这个数据库的唯一标识 | | slave_db | map[string]object | 从库唯一标识->从库配置的映射 | #### data.slave_db[key] diff --git a/docs/apidoc/inner/admin-server/update_sharding_db_config.md b/docs/apidoc/inner/admin-server/update_sharding_db_config.md index e10df28b50..2b9507aac7 100644 --- a/docs/apidoc/inner/admin-server/update_sharding_db_config.md +++ b/docs/apidoc/inner/admin-server/update_sharding_db_config.md @@ -10,7 +10,7 @@ PUT /migrate/v3/update/system/sharding_db_config | 参数名称 | 参数类型 | 必选 | 描述 | |-----------------|-------------------|----|---------------------------------------------| -| for_new_tenant | string | 否 | 指定新增租户数据写入哪个库。对于存量数据库指定它的唯一标识。对于新增的从库指定它的名称 | +| for_new_data | string | 否 | 指定新增租户数据写入哪个库。对于存量数据库指定它的唯一标识。对于新增的从库指定它的名称 | | create_slave_db | object array | 否 | 新增的从库配置数组 | | update_slave_db | map[string]object | 否 | 更新的从库唯一标识->从库配置的映射 | @@ -41,7 +41,7 @@ PUT /migrate/v3/update/system/sharding_db_config ```json { - "for_new_tenant": "slave1uuid", + "for_new_data": "slave1uuid", "create_slave_db": [ { "name": "slave2", diff --git a/src/common/tablenames.go b/src/common/tablenames.go index 1a2d1c4120..fd45c683fd 100644 --- a/src/common/tablenames.go +++ b/src/common/tablenames.go @@ -120,6 +120,9 @@ const ( // BKTableNameObjectBaseMapping object base mapping table BKTableNameObjectBaseMapping = "ObjectBaseMapping" + + // BKTableNameWatchDBRelation is the db and watch db relation table + BKTableNameWatchDBRelation = "WatchDBRelation" ) // AllTables is all table names, not include the sharding tables which is created dynamically, diff --git a/src/storage/dal/mongo/local/db.go b/src/storage/dal/mongo/local/db.go index 5a0cd8c2f6..6d38baad9a 100644 --- a/src/storage/dal/mongo/local/db.go +++ b/src/storage/dal/mongo/local/db.go @@ -21,7 +21,6 @@ import ( "context" "configcenter/src/common/metadata" - "configcenter/src/storage/dal/redis" "configcenter/src/storage/dal/types" ) @@ -60,8 +59,4 @@ type DB interface { CommitTransaction(context.Context, *metadata.TxnCapable) error // AbortTransaction 取消事务 AbortTransaction(context.Context, *metadata.TxnCapable) (bool, error) - - // InitTxnManager TxnID management of initial transaction - // TODO 后续放到TenantDB里,只用初始化一次 - InitTxnManager(r redis.Client) error } diff --git a/src/storage/dal/mongo/local/error.go b/src/storage/dal/mongo/local/error.go index e7d45ef925..8b27bdc444 100644 --- a/src/storage/dal/mongo/local/error.go +++ b/src/storage/dal/mongo/local/error.go @@ -21,7 +21,6 @@ import ( "context" "configcenter/src/common/metadata" - "configcenter/src/storage/dal/redis" "configcenter/src/storage/dal/types" ) @@ -82,9 +81,6 @@ func (e *errDB) AbortTransaction(_ context.Context, _ *metadata.TxnCapable) (boo return false, e.err } -// InitTxnManager return error for method chaining -func (e *errDB) InitTxnManager(_ redis.Client) error { return e.err } - type errColl struct { err error } diff --git a/src/storage/dal/mongo/local/mongo.go b/src/storage/dal/mongo/local/mongo.go index a9e202fbc8..12ce0a3f65 100644 --- a/src/storage/dal/mongo/local/mongo.go +++ b/src/storage/dal/mongo/local/mongo.go @@ -26,7 +26,7 @@ import ( "configcenter/src/common/blog" "configcenter/src/common/json" "configcenter/src/common/metadata" - "configcenter/src/storage/dal/redis" + "configcenter/src/common/util/table" "configcenter/src/storage/dal/types" "github.com/tidwall/gjson" @@ -344,12 +344,6 @@ func checkMongodbVersion(db string, client *mongo.Client) error { return nil } -// InitTxnManager TxnID management of initial transaction -// TODO remove this -func (c *Mongo) InitTxnManager(r redis.Client) error { - return c.tm.InitTxnManager(r) -} - // Close replica client func (c *Mongo) Close() error { c.cli.Client().Disconnect(context.TODO()) @@ -418,6 +412,12 @@ func (c *Mongo) GetDBClient() *mongo.Client { func (c *Mongo) GetDBName() string { return c.cli.DBName() } + +// GetMongoClient get mongo client +func (c *Mongo) GetMongoClient() *MongoClient { + return c.cli +} + func (c *Mongo) redirectTable(tableName string) string { if common.IsObjectInstShardingTable(tableName) { tableName = common.BKTableNameBaseInst @@ -576,6 +576,11 @@ func (c *Mongo) DropTable(ctx context.Context, collName string) error { // CreateTable 创建集合 TODO test func (c *Mongo) CreateTable(ctx context.Context, collName string) error { + opts := make([]*options.CreateCollectionOptions, 0) + if table.NeedPreImageTable(collName) { + opts = append(opts, options.CreateCollection().SetChangeStreamPreAndPostImages(bson.M{"enabled": true})) + } + if c.enableSharding { var err error collName, err = c.convColl(collName) @@ -583,7 +588,7 @@ func (c *Mongo) CreateTable(ctx context.Context, collName string) error { return err } } - return c.cli.Database().RunCommand(ctx, map[string]interface{}{"create": collName}).Err() + return c.cli.Database().CreateCollection(ctx, collName, opts...) } // RenameTable 更新集合名称 diff --git a/src/storage/dal/mongo/local/txn_manager.go b/src/storage/dal/mongo/local/txn_manager.go index 4db5db42b6..0bd1f04734 100644 --- a/src/storage/dal/mongo/local/txn_manager.go +++ b/src/storage/dal/mongo/local/txn_manager.go @@ -86,13 +86,6 @@ type TxnManager struct { cache redis.Client } -// InitTxnManager is to init txn manager, set the redis storage -// TODO remove this -func (t *TxnManager) InitTxnManager(r redis.Client) error { - t.cache = r - return nil -} - // GetTxnNumber TODO func (t *TxnManager) GetTxnNumber(sessionID string) (int64, error) { key := sessionKey(sessionID).genKey(t.dbID) @@ -305,7 +298,7 @@ func (t *TxnManager) setTxnError(sessionID sessionKey, txnErr error) { func (t *TxnManager) GetTxnError(sessionID sessionKey) TxnErrorType { key := sessionID.genErrKey(t.dbID) errorType, err := t.cache.Get(context.Background(), key).Result() - if err != nil && redis.IsNilErr(err) { + if err != nil && !redis.IsNilErr(err) { blog.Errorf("get txn error failed, err: %v, session id: %s", err, sessionID) return UnknownType } diff --git a/src/storage/dal/mongo/sharding/mongo.go b/src/storage/dal/mongo/sharding/mongo.go index 1e49c9d8d1..a4c3c096a9 100644 --- a/src/storage/dal/mongo/sharding/mongo.go +++ b/src/storage/dal/mongo/sharding/mongo.go @@ -35,98 +35,190 @@ import ( // ShardingMongoManager is the sharding db manager for mongo type ShardingMongoManager struct { + *shardingMongoClient + // conf is the mongo client config + conf *local.MongoCliConf +} + +// NewShardingMongo returns new sharding db manager for mongo +func NewShardingMongo(config local.MongoConf, timeout time.Duration, crypto cryptor.Cryptor) (ShardingDB, error) { + clientInfo, masterMongo, err := newShardingMongoClient(config, timeout, crypto) + if err != nil { + return nil, err + } + + sharding := &ShardingMongoManager{ + shardingMongoClient: clientInfo, + conf: &local.MongoCliConf{DisableInsert: config.DisableInsert}, + } + + sharding.conf.IDGenStep, err = masterMongo.InitIDGenerator(context.Background()) + if err != nil { + return nil, err + } + + err = tenant.Init(&tenant.Options{DB: sharding.IgnoreTenant()}) + if err != nil { + return nil, err + } + + if err = sharding.RefreshTenantDBMap(); err != nil { + return nil, err + } + + // loop refresh tenant to db relation + go func() { + for { + time.Sleep(time.Minute) + if err = sharding.RefreshTenantDBMap(); err != nil { + blog.Errorf("refresh tenant to db relation failed, err: %v", err) + continue + } + } + }() + + return sharding, nil +} + +// shardingMongoClient is the common structure that stores all sharding db mongo client info +type shardingMongoClient struct { // masterCli is the client for master mongodb, master mongodb stores the platform data and some tenant data masterCli *local.MongoClient - // newTenantCli is the client for mongodb that new tenant data will be stored into - newTenantCli *local.MongoClient + // newDataCli is the client for mongodb that new data without specified db will be stored into + newDataCli *local.MongoClient // tenantCli is the tenant id to mongodb client map tenantCli map[string]*local.MongoClient // dbClientMap is the db uuid to mongodb client map dbClientMap map[string]*local.MongoClient // tm is the transaction manager tm *local.ShardingTxnManager - // conf is the mongo client config - conf *local.MongoCliConf } -// NewShardingMongo returns new sharding db manager for mongo -func NewShardingMongo(config local.MongoConf, timeout time.Duration, crypto cryptor.Cryptor) (ShardingDB, error) { +func newShardingMongoClient(config local.MongoConf, timeout time.Duration, crypto cryptor.Cryptor) ( + *shardingMongoClient, *local.Mongo, error) { + // connect master mongodb masterCli, err := local.NewMongoClient(true, "", &config, timeout) if err != nil { - return nil, fmt.Errorf("new master mongo client failed, err: %v", err) + return nil, nil, fmt.Errorf("new master mongo client failed, err: %v", err) } - sharding := &ShardingMongoManager{ - masterCli: masterCli, - tenantCli: make(map[string]*local.MongoClient), - tm: new(local.ShardingTxnManager), - conf: &local.MongoCliConf{DisableInsert: config.DisableInsert}, + clientInfo := &shardingMongoClient{ + masterCli: masterCli, + newDataCli: nil, + tenantCli: make(map[string]*local.MongoClient), + dbClientMap: nil, + tm: new(local.ShardingTxnManager), } - masterMongo, err := local.NewMongo(masterCli, new(local.TxnManager), sharding.conf, + masterMongo, err := local.NewMongo(masterCli, new(local.TxnManager), &local.MongoCliConf{IDGenStep: 1}, &local.MongoOptions{IgnoreTenant: true}) if err != nil { - return nil, fmt.Errorf("new master mongo db client failed, err: %v", err) - } - - ctx := context.Background() - sharding.conf.IDGenStep, err = masterMongo.InitIDGenerator(ctx) - if err != nil { - return nil, err + return nil, nil, fmt.Errorf("new master mongo db client failed, err: %v", err) } // get sharding db config - shardingConf, err := getShardingDBConfig(ctx, masterMongo) + shardingConf, err := getShardingDBConfig(context.Background(), masterMongo) if err != nil { - return nil, err + return nil, nil, err } // fill mongo client info - sharding.masterCli.SetUUID(shardingConf.MasterDB) - - sharding.dbClientMap = map[string]*local.MongoClient{shardingConf.MasterDB: sharding.masterCli} + clientInfo.masterCli.SetUUID(shardingConf.MasterDB) + clientInfo.dbClientMap = map[string]*local.MongoClient{shardingConf.MasterDB: clientInfo.masterCli} for slaveUUID, mongoConf := range shardingConf.SlaveDB { // decrypt slave mongodb uri mongoConf.URI, err = crypto.Decrypt(mongoConf.URI) if err != nil { - return nil, fmt.Errorf("decrypt %s slave mongo uri failed, err: %v", slaveUUID, err) + return nil, nil, fmt.Errorf("decrypt %s slave mongo uri failed, err: %v", slaveUUID, err) } client, err := local.NewMongoClient(false, slaveUUID, &mongoConf, timeout) if err != nil { - return nil, fmt.Errorf("new %s slave mongo client failed, err: %v", slaveUUID, err) + return nil, nil, fmt.Errorf("new %s slave mongo client failed, err: %v", slaveUUID, err) } - sharding.dbClientMap[slaveUUID] = client + clientInfo.dbClientMap[slaveUUID] = client } - newTenantCli, exists := sharding.dbClientMap[shardingConf.ForNewTenant] + newDataCli, exists := clientInfo.dbClientMap[shardingConf.ForNewData] if !exists { - return nil, fmt.Errorf("add new tenant db %s config not found", shardingConf.ForNewTenant) + return nil, nil, fmt.Errorf("add new tenant db %s config not found", shardingConf.ForNewData) } - sharding.newTenantCli = newTenantCli + clientInfo.newDataCli = newDataCli - err = tenant.Init(&tenant.Options{DB: sharding.IgnoreTenant()}) + return clientInfo, masterMongo, nil +} + +// newTenantDB new db client for tenant +func (c *shardingMongoClient) newTenantDB(tenant string, conf *local.MongoCliConf) local.DB { + if tenant == "" { + return local.NewErrDB(errors.New("tenant is not set")) + } + + client, exists := c.tenantCli[tenant] + if !exists { + return local.NewErrDB(fmt.Errorf("tenant %s not exists", tenant)) + } + + if client.Disabled() { + return local.NewErrDB(fmt.Errorf("db client %s is disabled", client.UUID())) + } + + txnManager, err := c.tm.DB(client.UUID()) if err != nil { - return nil, err + return local.NewErrDB(err) } - if err = sharding.RefreshTenantDBMap(); err != nil { - return nil, err + db, err := local.NewMongo(client, txnManager, conf, &local.MongoOptions{Tenant: tenant}) + if err != nil { + return local.NewErrDB(err) } + return db +} - // loop refresh tenant to db relation - go func() { - for { - time.Sleep(time.Minute) - if err = sharding.RefreshTenantDBMap(); err != nil { - blog.Errorf("refresh tenant to db relation failed, err: %v", err) - continue - } +// newIgnoreTenantDB new master db client that do not use tenant +func (c *shardingMongoClient) newIgnoreTenantDB(conf *local.MongoCliConf) local.DB { + txnManager, err := c.tm.DB(c.masterCli.UUID()) + if err != nil { + return local.NewErrDB(err) + } + + db, err := local.NewMongo(c.masterCli, txnManager, conf, &local.MongoOptions{IgnoreTenant: true}) + if err != nil { + return local.NewErrDB(err) + } + return db +} + +// ping all sharding db clients +func (c *shardingMongoClient) ping() error { + for uuid, client := range c.dbClientMap { + err := client.Client().Ping(context.Background(), nil) + if err != nil { + return fmt.Errorf("ping db %s failed, err: %v", uuid, err) } - }() + } + return nil +} - return sharding, nil +// execForAllDB execute handler for all db clients +func (c *shardingMongoClient) execForAllDB(handler func(db local.DB) error, conf *local.MongoCliConf) error { + for uuid, client := range c.dbClientMap { + txnManager, err := c.tm.DB(client.UUID()) + if err != nil { + return fmt.Errorf("get txn manager failed, err: %v", err) + } + + db, err := local.NewMongo(client, txnManager, conf, &local.MongoOptions{IgnoreTenant: true}) + if err != nil { + return fmt.Errorf("generate %s db client failed, err: %v", uuid, err) + } + + if err = handler(db); err != nil { + return fmt.Errorf("execute for db %s failed, err: %v", uuid, err) + } + } + return nil } // getShardingDBConfig get sharding db config @@ -142,10 +234,10 @@ func getShardingDBConfig(ctx context.Context, c *local.Mongo) (*ShardingDBConf, // generate new sharding db config and save it if not exists, new tenant will be added to master db by default newUUID := uuid.NewString() conf = &ShardingDBConf{ - ID: common.ShardingDBConfID, - MasterDB: newUUID, - ForNewTenant: newUUID, - SlaveDB: make(map[string]local.MongoConf), + ID: common.ShardingDBConfID, + MasterDB: newUUID, + ForNewData: newUUID, + SlaveDB: make(map[string]local.MongoConf), } if err = c.Table(common.BKTableNameSystem).Insert(ctx, conf); err != nil { return nil, fmt.Errorf("insert new sharding db config failed, err: %v", err) @@ -187,7 +279,7 @@ func (m *ShardingMongoManager) Shard(opt ShardOpts) local.DB { // NewTenantCli returns the new tenant db client func (m *ShardingMongoManager) NewTenantCli(tenant string) (local.DB, string, error) { - client := m.newTenantCli + client := m.newDataCli txnManager, err := m.tm.DB(client.UUID()) if err != nil { return nil, "", err @@ -197,48 +289,17 @@ func (m *ShardingMongoManager) NewTenantCli(tenant string) (local.DB, string, er if err != nil { return nil, "", err } - return db, m.newTenantCli.UUID(), nil + return db, m.newDataCli.UUID(), nil } // Tenant returns the db client for tenant func (m *ShardingMongoManager) Tenant(tenant string) local.DB { - if tenant == "" { - return local.NewErrDB(errors.New("tenant is not set")) - } - - client, exists := m.tenantCli[tenant] - if !exists { - return local.NewErrDB(fmt.Errorf("tenant %s not exists", tenant)) - } - - if client.Disabled() { - return local.NewErrDB(fmt.Errorf("db client %s is disabled", client.UUID())) - } - - txnManager, err := m.tm.DB(client.UUID()) - if err != nil { - return local.NewErrDB(err) - } - - db, err := local.NewMongo(client, txnManager, m.conf, &local.MongoOptions{Tenant: tenant}) - if err != nil { - return local.NewErrDB(err) - } - return db + return m.shardingMongoClient.newTenantDB(tenant, m.conf) } // IgnoreTenant returns the master db client that do not use tenant func (m *ShardingMongoManager) IgnoreTenant() local.DB { - txnManager, err := m.tm.DB(m.masterCli.UUID()) - if err != nil { - return local.NewErrDB(err) - } - - db, err := local.NewMongo(m.masterCli, txnManager, m.conf, &local.MongoOptions{IgnoreTenant: true}) - if err != nil { - return local.NewErrDB(err) - } - return db + return m.shardingMongoClient.newIgnoreTenantDB(m.conf) } // InitTxnManager TxnID management of initial transaction @@ -248,69 +309,86 @@ func (m *ShardingMongoManager) InitTxnManager(r redis.Client) error { // Ping all sharding db clients func (m *ShardingMongoManager) Ping() error { - for uuid, client := range m.dbClientMap { - err := client.Client().Ping(context.Background(), nil) - if err != nil { - return fmt.Errorf("ping db %s failed, err: %v", uuid, err) - } - } - return nil + return m.shardingMongoClient.ping() } // ExecForAllDB execute handler for all db clients func (m *ShardingMongoManager) ExecForAllDB(handler func(db local.DB) error) error { - for uuid, client := range m.dbClientMap { - txnManager, err := m.tm.DB(client.UUID()) - if err != nil { - return fmt.Errorf("get txn manager failed, err: %v", err) - } - - db, err := local.NewMongo(client, txnManager, m.conf, &local.MongoOptions{IgnoreTenant: true}) - if err != nil { - return fmt.Errorf("generate %s db client failed, err: %v", uuid, err) - } - - if err = handler(db); err != nil { - return fmt.Errorf("execute for db %s failed, err: %v", uuid, err) - } - } - return nil + return m.shardingMongoClient.execForAllDB(handler, m.conf) } -// DisableDBShardingMongo is the disabled db sharding mongo db manager, right now only watch db sharding is disabled -type DisableDBShardingMongo struct { - client *local.MongoClient - tm *local.TxnManager - conf *local.MongoCliConf +// WatchMongo is the watch mongo db manager +type WatchMongo struct { + *shardingMongoClient + // dbWatchDBMap is the db uuid to watch db uuid map + dbWatchDBMap map[string]string } -// NewDisableDBShardingMongo returns new disabled db sharding mongo db manager -func NewDisableDBShardingMongo(config local.MongoConf, timeout time.Duration) (ShardingDB, error) { - client, err := local.NewMongoClient(true, "", &config, timeout) +// NewWatchMongo returns new watch mongo db manager +func NewWatchMongo(config local.MongoConf, timeout time.Duration, crypto cryptor.Cryptor) (ShardingDB, error) { + clientInfo, masterMongo, err := newShardingMongoClient(config, timeout, crypto) if err != nil { - return nil, fmt.Errorf("new mongo client failed, err: %v", err) + return nil, err } - db := &DisableDBShardingMongo{ - client: client, - tm: new(local.TxnManager), - conf: &local.MongoCliConf{DisableInsert: config.DisableInsert}, + sharding := &WatchMongo{ + shardingMongoClient: clientInfo, + dbWatchDBMap: make(map[string]string), } - masterMongo, err := local.NewMongo(client, new(local.TxnManager), db.conf, &local.MongoOptions{IgnoreTenant: true}) + // generate db uuid to watch db uuid map + relations := make([]WatchDBRelation, 0) + err = masterMongo.Table(common.BKTableNameWatchDBRelation).Find(nil).All(context.Background(), &relations) if err != nil { - return nil, fmt.Errorf("new master mongo db client failed, err: %v", err) + return nil, fmt.Errorf("get db and watch db relation failed, err: %v", err) } - db.conf.IDGenStep, err = masterMongo.InitIDGenerator(context.Background()) - if err != nil { + + for _, relation := range relations { + sharding.dbWatchDBMap[relation.DB] = relation.WatchDB + } + + // refresh tenant to db relation + if err = sharding.refreshTenantDBMap(); err != nil { return nil, err } - return db, nil + go func() { + for { + time.Sleep(time.Minute) + if err = sharding.refreshTenantDBMap(); err != nil { + blog.Errorf("refresh tenant to db relation failed, err: %v", err) + continue + } + } + }() + + return sharding, nil +} + +func (m *WatchMongo) refreshTenantDBMap() error { + tenantDBMap := make(map[string]string) + for _, relation := range tenant.GetAllTenants() { + watchDBUUID, exists := m.dbWatchDBMap[relation.Database] + if exists { + tenantDBMap[relation.TenantID] = watchDBUUID + } + } + + tenantCli := make(map[string]*local.MongoClient) + for tenant, db := range tenantDBMap { + client, exists := m.dbClientMap[db] + if !exists { + return fmt.Errorf("tenant %s related db %s config not found", tenant, db) + } + tenantCli[tenant] = client + } + + m.tenantCli = tenantCli + return nil } // Shard returns the sharded db client -func (m *DisableDBShardingMongo) Shard(opt ShardOpts) local.DB { +func (m *WatchMongo) Shard(opt ShardOpts) local.DB { if opt.IsIgnoreTenant() { return m.IgnoreTenant() } @@ -318,38 +396,26 @@ func (m *DisableDBShardingMongo) Shard(opt ShardOpts) local.DB { } // Tenant returns the db client for tenant -func (m *DisableDBShardingMongo) Tenant(tenant string) local.DB { - if tenant == "" { - return local.NewErrDB(errors.New("tenant is not set")) - } - - db, err := local.NewMongo(m.client, m.tm, m.conf, &local.MongoOptions{Tenant: tenant}) - if err != nil { - return local.NewErrDB(err) - } - return db +func (m *WatchMongo) Tenant(tenant string) local.DB { + return m.shardingMongoClient.newTenantDB(tenant, &local.MongoCliConf{IDGenStep: 1}) } // IgnoreTenant returns the master db client that do not use tenant -func (m *DisableDBShardingMongo) IgnoreTenant() local.DB { - db, err := local.NewMongo(m.client, m.tm, m.conf, &local.MongoOptions{IgnoreTenant: true}) - if err != nil { - return local.NewErrDB(err) - } - return db +func (m *WatchMongo) IgnoreTenant() local.DB { + return m.shardingMongoClient.newIgnoreTenantDB(&local.MongoCliConf{IDGenStep: 1}) } // InitTxnManager TxnID management of initial transaction -func (m *DisableDBShardingMongo) InitTxnManager(r redis.Client) error { - return m.tm.InitTxnManager(r) +func (m *WatchMongo) InitTxnManager(_ redis.Client) error { + return fmt.Errorf("watch db do not support transaction") } -// Ping db client -func (m *DisableDBShardingMongo) Ping() error { - return m.client.Client().Ping(context.Background(), nil) +// Ping all sharding db clients +func (m *WatchMongo) Ping() error { + return m.shardingMongoClient.ping() } // ExecForAllDB execute handler for all db clients -func (m *DisableDBShardingMongo) ExecForAllDB(handler func(db local.DB) error) error { - return handler(m.IgnoreTenant()) +func (m *WatchMongo) ExecForAllDB(handler func(db local.DB) error) error { + return m.shardingMongoClient.execForAllDB(handler, &local.MongoCliConf{IDGenStep: 1}) } diff --git a/src/storage/dal/mongo/sharding/types.go b/src/storage/dal/mongo/sharding/types.go index 5d5bb12094..2d3231a594 100644 --- a/src/storage/dal/mongo/sharding/types.go +++ b/src/storage/dal/mongo/sharding/types.go @@ -62,8 +62,17 @@ func (s *ShardOptions) IsIgnoreTenant() bool { // ShardingDBConf is sharding mongodb config type ShardingDBConf struct { - ID string `bson:"_id"` - MasterDB string `bson:"master_db"` - ForNewTenant string `bson:"for_new_tenant"` - SlaveDB map[string]local.MongoConf `bson:"slave_db"` + ID string `bson:"_id"` + // MasterDB is the master db uuid generated by system + MasterDB string `bson:"master_db"` + // ForNewData is the uuid for db that new data without specified db will be stored into + ForNewData string `bson:"for_new_data"` + // SlaveDB is the slave db uuid to mongodb config map + SlaveDB map[string]local.MongoConf `bson:"slave_db"` +} + +// WatchDBRelation is db and watch db relation +type WatchDBRelation struct { + DB string `bson:"db"` + WatchDB string `bson:"watch_db"` } diff --git a/src/storage/driver/mongodb/mongodb.go b/src/storage/driver/mongodb/mongodb.go index 2aa40072cc..a1820ca2af 100644 --- a/src/storage/driver/mongodb/mongodb.go +++ b/src/storage/driver/mongodb/mongodb.go @@ -141,9 +141,15 @@ func SetShardingCli(prefix string, config *mongo.Config, cryptoConf *cryptor.Con return nil } -// SetDisableDBShardingCli set mongodb client that disables db sharding with prefix -func SetDisableDBShardingCli(prefix string, config *mongo.Config) error { - shardingDB, err := sharding.NewDisableDBShardingMongo(config.GetMongoConf(), time.Minute) +// SetWatchCli set mongodb client that disables db sharding with prefix +func SetWatchCli(prefix string, config *mongo.Config, cryptoConf *cryptor.Config) error { + crypto, err := cryptor.NewCrypto(cryptoConf) + if err != nil { + blog.Errorf("new %s mongo crypto failed, err: %v", prefix, err) + return errors.NewCCError(common.CCErrCommResourceInitFailed, "init mongo crypto failed") + } + + shardingDB, err := sharding.NewWatchMongo(config.GetMongoConf(), time.Minute, crypto) if err != nil { blog.Errorf("new %s disable db sharding mongo client failed, err: %v", prefix, err) return errors.NewCCError(common.CCErrCommResourceInitFailed, "init disable db sharding mongo client failed") From 4d4ea65bf704a0cafec96ed1bb0129b9e6103b5b Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 13 Mar 2025 17:25:52 +0800 Subject: [PATCH 02/10] feat: support watch by database --story=120905990 --- src/storage/stream/event/event.go | 11 +- src/storage/stream/event/list.go | 263 ++++++++++---- src/storage/stream/event/list_watch.go | 84 ++--- src/storage/stream/event/utils.go | 236 ++++++++++--- src/storage/stream/event/watch.go | 460 ++++++++++++++----------- src/storage/stream/loop/loop.go | 2 - src/storage/stream/loop/loop_watch.go | 193 ++++++----- src/storage/stream/stream.go | 9 +- src/storage/stream/types/types.go | 234 ++++++++----- 9 files changed, 944 insertions(+), 548 deletions(-) delete mode 100644 src/storage/stream/loop/loop.go diff --git a/src/storage/stream/event/event.go b/src/storage/stream/event/event.go index e7d87b7444..6d256e5a86 100644 --- a/src/storage/stream/event/event.go +++ b/src/storage/stream/event/event.go @@ -10,18 +10,19 @@ * limitations under the License. */ -// Package event TODO +// Package event defines event watch logics package event import "go.mongodb.org/mongo-driver/mongo" -// Event TODO +// Event is the struct for event watch type Event struct { database string + DBName string client *mongo.Client } -// NewEvent TODO -func NewEvent(client *mongo.Client, db string) (*Event, error) { - return &Event{client: client, database: db}, nil +// NewEvent new Event +func NewEvent(client *mongo.Client, db, dbName string) (*Event, error) { + return &Event{client: client, database: db, DBName: dbName}, nil } diff --git a/src/storage/stream/event/list.go b/src/storage/stream/event/list.go index 2700be2f05..08042852b9 100644 --- a/src/storage/stream/event/list.go +++ b/src/storage/stream/event/list.go @@ -14,14 +14,18 @@ package event import ( "context" - "fmt" "reflect" + "sync" "time" + "configcenter/pkg/filter" + "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/common/json" + "configcenter/src/common/mapstr" "configcenter/src/storage/stream/types" - "github.com/tidwall/gjson" + + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo/options" ) @@ -32,123 +36,244 @@ func (e *Event) List(ctx context.Context, opts *types.ListOptions) (ch chan *typ if err := opts.CheckSetDefault(); err != nil { return nil, err } - // prepare for list all the data. - totalCnt, err := e.client.Database(e.database). - Collection(opts.Collection). - CountDocuments(ctx, opts.Filter) + + // list collections + collections, err := e.client.Database(e.database).ListCollectionNames(ctx, bson.M{}) if err != nil { - return nil, fmt.Errorf("count db %s, collection: %s with filter: %+v failed, err: %v", - e.database, opts.Collection, opts.Filter, err) + blog.Errorf("list db: %s collections failed, err :%v", e.database, err) + return nil, err + } + + collOpts := make(map[string]types.WatchCollOptions) + for id, collOpt := range opts.CollOpts { + collOpts[id] = types.WatchCollOptions{CollectionOptions: collOpt} + } + + listOpts := &listOptions{ + collections: collections, + collOptsInfo: parseCollOpts(collOpts), + pageSize: opts.PageSize, } eventChan := make(chan *types.Event, types.DefaultEventChanSize) go func() { - e.lister(ctx, false, totalCnt, opts, eventChan) + e.lister(ctx, opts.WithRetry, listOpts, eventChan) }() return eventChan, nil } -// lister is try to list data with filter. withRetry is to control whether you need to retry list when an error encountered. -func (e *Event) lister(ctx context.Context, withRetry bool, cnt int64, opts *types.ListOptions, ch chan *types.Event) { +type listOptions struct { + collections []string + collOptsInfo *parsedCollOptsInfo + pageSize *int +} - pageSize := *opts.PageSize +// lister try to list data with filter. withRetry controls whether you need to retry list when an error encountered. +func (e *Event) lister(ctx context.Context, withRetry bool, opts *listOptions, ch chan *types.Event) { reset := func() { // sleep a while and retry later time.Sleep(3 * time.Second) } - for start := 0; start < int(cnt); start += pageSize { - findOpts := new(options.FindOptions) - findOpts.SetSkip(int64(start)) - findOpts.SetLimit(int64(pageSize)) + var wg sync.WaitGroup + needReturn := false + pipeline := make(chan struct{}, 10) + for _, collection := range opts.collections { + taskIDs, findOpts, cond, needSkip, err := e.parseCollListOpts(collection, opts) + if err != nil { + if withRetry { + continue + } + return + } + + if needSkip { + continue + } + + // list data from this collection + pipeline <- struct{}{} + wg.Add(1) + listOpt := &listOneCollOptions{collection: collection, taskIDs: taskIDs, filter: cond, + findOpts: findOpts, ch: ch, withRetry: withRetry, reset: reset} + go func(listOpt *listOneCollOptions) { + defer func() { + wg.Done() + <-pipeline + }() + + collNeedReturn := e.listOneColl(ctx, listOpt) + if collNeedReturn { + needReturn = true + } + }(listOpt) + + if needReturn { + return + } + } + + wg.Wait() + + // tell the user that the list operation has already done. + // we only send for once. + ch <- &types.Event{ + OperationType: types.ListDone, + } +} + +// get collection related task ids, find options and filters +func (e *Event) parseCollListOpts(collection string, opts *listOptions) ([]string, *options.FindOptions, mapstr.MapStr, + bool, error) { + taskIDs, fields, filters := make([]string, 0), make([]string, 0), make([]filter.RuleFactory, 0) + needAllFilter, needAllFields := false, false + for collRegex, regex := range opts.collOptsInfo.collRegexMap { + if !regex.MatchString(collection) { + continue + } + + taskIDs = append(taskIDs, opts.collOptsInfo.collRegexTasksMap[collRegex]...) + if opts.collOptsInfo.collCondMap[collRegex] == nil { + needAllFilter = true + } else if !needAllFilter { + filters = append(filters, opts.collOptsInfo.collCondMap[collRegex]) + } + if len(opts.collOptsInfo.collFieldsMap[collRegex]) == 0 { + needAllFields = true + } else if !needAllFields { + fields = append(fields, opts.collOptsInfo.collFieldsMap[collRegex]...) + } + } + + if len(taskIDs) == 0 { + return nil, nil, nil, true, nil + } + + findOpts := new(options.FindOptions) + findOpts.SetLimit(int64(*opts.pageSize)) + if !needAllFields && len(fields) != 0 { projection := make(map[string]int) - if len(opts.Fields) != 0 { - for _, field := range opts.Fields { - if len(field) <= 0 { - continue - } - projection[field] = 1 + for _, field := range fields { + if len(field) <= 0 { + continue } - findOpts.Projection = projection + projection[field] = 1 } + projection["_id"] = 1 + findOpts.Projection = projection + } + + cond := make(mapstr.MapStr) + if !needAllFilter && len(filters) != 0 { + expr := filter.Expression{RuleFactory: &filter.CombinedRule{Condition: filter.Or, Rules: filters}} + var err error + cond, err = expr.ToMgo() + if err != nil { + return nil, nil, nil, false, err + } + } + + return taskIDs, findOpts, cond, false, nil +} + +type listOneCollOptions struct { + collection string + taskIDs []string + filter mapstr.MapStr + findOpts *options.FindOptions + taskTypeMap map[string]reflect.Type + taskFilterMap map[string]*filter.Expression + ch chan *types.Event + withRetry bool + reset func() +} + +type mongoID struct { + Oid primitive.ObjectID `bson:"_id"` +} +// listOneColl try to list data with filter from one collection, returns if list operation needs to exit +func (e *Event) listOneColl(ctx context.Context, opts *listOneCollOptions) bool { + collInfo, err := parseCollInfo(opts.collection) + if err != nil { + blog.Errorf("parse collection info for list operation failed, opt: %+v, err: %v", *opts, err) + return false + } + + for { retry: cursor, err := e.client.Database(e.database). - Collection(opts.Collection). - Find(ctx, opts.Filter, findOpts) + Collection(opts.collection). + Find(ctx, opts.filter, opts.findOpts) if err != nil { - blog.Errorf("list watch operation, but list db: %s, collection: %s failed, will *retry later*, err: %v", - e.database, opts.Collection, err) - reset() + blog.Errorf("list db: %s, coll: %s failed, will *retry later*, err: %v", e.database, opts.collection, err) + opts.reset() continue } + hasData := false for cursor.Next(ctx) { + hasData = true select { case <-ctx.Done(): blog.Errorf("received stopped lister signal, stop list db: %s, collection: %s, err: %v", e.database, - opts.Collection, ctx.Err()) - return + opts.collection, ctx.Err()) + return true default: - } - // create a new event struct for use - result := reflect.New(reflect.TypeOf(opts.EventStruct)).Elem() - err := cursor.Decode(result.Addr().Interface()) - if err != nil { - blog.Errorf("list watch operation, but list db: %s, collection: %s with cursor failed, will *retry later*, err: %v", - e.database, opts.Collection, err) - + rawDoc := bson.Raw{} + if err := cursor.Decode(&rawDoc); err != nil { + blog.Errorf("list db: %s, coll: %s with cursor failed, err: %v", e.database, opts.collection, err) cursor.Close(ctx) - if !withRetry { - blog.Warnf("list watch operation, but list db: %s, collection: %s with cursor failed, will exit list immediately.", - e.database, opts.Collection) - close(ch) - return + if !opts.withRetry { + blog.Warnf("list db: %s, coll: %s failed, will exit list immediately", e.database, opts.collection) + close(opts.ch) + return true } - reset() + opts.reset() goto retry } - byt, _ := json.Marshal(result.Addr().Interface()) - oid := gjson.GetBytes(byt, "_id").String() + oidInfo := new(mongoID) + if err := bson.Unmarshal(rawDoc, &oidInfo); err != nil { + blog.Errorf("decode mongodb oid failed, err: %v, data: %s", err, rawDoc) + continue + } + opts.filter["_id"] = mapstr.MapStr{common.BKDBGT: oidInfo.Oid} + + for _, taskID := range opts.taskIDs { + parsed, isValid := parseDataForTask(rawDoc, taskID, opts.taskFilterMap, opts.taskTypeMap) + if !isValid { + continue + } - // send the event now - ch <- &types.Event{ - Oid: oid, - Document: result.Interface(), - OperationType: types.Lister, - DocBytes: byt, + parsed.Oid = oidInfo.Oid.Hex() + parsed.OperationType = types.Lister + parsed.CollectionInfo = collInfo + opts.ch <- parsed } } if err := cursor.Err(); err != nil { - blog.Errorf("list watch operation, but list db: %s, collection: %s with cursor failed, will *retry later*, err: %v", - e.database, opts.Collection, err) + blog.Errorf("list db: %s, coll: %s with cursor failed, err: %v", e.database, opts.collection, err) cursor.Close(ctx) - if !withRetry { - blog.Warnf("list watch operation, but list db: %s, collection: %s with cursor failed, will exit list immediately.", - e.database, opts.Collection) - close(ch) - return + if !opts.withRetry { + blog.Warnf("list db: %s, coll: %s failed, will exit list immediately", e.database, opts.collection) + close(opts.ch) + return true } - reset() + opts.reset() goto retry } cursor.Close(ctx) - } - // tell the user that the list operation has already done. - // we only send for once. - ch <- &types.Event{ - Oid: "", - Document: reflect.New(reflect.TypeOf(opts.EventStruct)).Elem().Interface(), - OperationType: types.ListDone, + if !hasData { + return false + } } - } diff --git a/src/storage/stream/event/list_watch.go b/src/storage/stream/event/list_watch.go index 9e178c9ba9..7603a74bd9 100644 --- a/src/storage/stream/event/list_watch.go +++ b/src/storage/stream/event/list_watch.go @@ -18,19 +18,28 @@ import ( "configcenter/src/common/blog" "configcenter/src/storage/stream/types" + + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" ) -// ListWatch TODO +// ListWatch list all data and watch change stream events func (e *Event) ListWatch(ctx context.Context, opts *types.ListWatchOptions) (*types.Watcher, error) { if err := opts.CheckSetDefault(); err != nil { return nil, err } + // list collections + collections, err := e.client.Database(e.database).ListCollectionNames(ctx, bson.M{}) + if err != nil { + blog.Errorf("list db: %s collections failed, err :%v", e.database, err) + return nil, err + } + eventChan := make(chan *types.Event, types.DefaultEventChanSize) go func() { - pipeline, streamOptions := generateOptions(&opts.Options) + pipeline, streamOptions, collOptsInfo := generateOptions(&opts.Options) // TODO: should use the mongodb cluster timestamp, if the time is not synchronise with // mongodb cluster time, then we may have to lost some events. @@ -50,70 +59,25 @@ func (e *Event) ListWatch(ctx context.Context, opts *types.ListWatchOptions) (*t // we watch the stream at first, so that we can know if we can watch success. // and, we do not read the event stream immediately, we wait until all the data // has been listed from database. - stream, err := e.client.Database(e.database). - Collection(opts.Collection). - Watch(ctx, pipeline, streamOptions) - if err != nil && isFatalError(err) { - // TODO: send alarm immediately. - blog.Errorf("mongodb watch collection: %s got a fatal error, skip resume token and retry, err: %v", - opts.Collection, err) - // reset the resume token, because we can not use the former resume token to watch success for now. - streamOptions.StartAfter = nil - opts.StartAfterToken = nil - // cause we have already got a fatal error, we can not try to watch from where we lost. - // so re-watch from 1 minutes ago to avoid lost events. - // Note: apparently, we may got duplicate events with this re-watch - startAtTime := uint32(time.Now().Unix()) - 60 - streamOptions.StartAtOperationTime = &primitive.Timestamp{ - T: startAtTime, - I: 0, - } - opts.StartAtTime = &types.TimeStamp{Sec: startAtTime} - - if opts.WatchFatalErrorCallback != nil { - err := opts.WatchFatalErrorCallback(types.TimeStamp{Sec: startAtTime}) - if err != nil { - blog.Errorf("do watch fatal error callback for coll %s failed, err: %v", opts.Collection, err) - } - } - - stream, err = e.client. - Database(e.database). - Collection(opts.Collection). - Watch(ctx, pipeline, streamOptions) - } - + stream, streamOptions, watchOpts, err := e.watch(ctx, pipeline, streamOptions, &opts.Options) if err != nil { blog.Fatalf("mongodb watch failed with conf: %+v, err: %v", *opts, err) } - // prepare for list all the data. - totalCnt, err := e.client.Database(e.database). - Collection(opts.Collection). - CountDocuments(ctx, opts.Filter) - if err != nil { - // close the event stream. - stream.Close(ctx) - - blog.Fatalf("count db %s, collection: %s with filter: %+v failed, err: %v", - e.database, opts.Collection, opts.Filter, err) - } - - listOptions := &types.ListOptions{ - Filter: opts.Filter, - EventStruct: opts.EventStruct, - Collection: opts.Collection, - PageSize: opts.PageSize, + listOpts := &listOptions{ + collections: collections, + collOptsInfo: collOptsInfo, + pageSize: opts.PageSize, } go func() { // list all the data from the collection and send it as an event now. - e.lister(ctx, true, totalCnt, listOptions, eventChan) + e.lister(ctx, true, listOpts, eventChan) select { case <-ctx.Done(): - blog.Errorf("received stopped watch signal, stop list db: %s, collection: %s, err: %v", e.database, - opts.Collection, ctx.Err()) + blog.Errorf("received stopped watch signal, stop list db: %s, name: %s, err: %v", e.database, e.DBName, + ctx.Err()) return default: @@ -121,7 +85,15 @@ func (e *Event) ListWatch(ctx context.Context, opts *types.ListWatchOptions) (*t // all the data has already listed and send the event. // now, it's time to watch the event stream. - e.loopWatch(ctx, &opts.Options, streamOptions, stream, pipeline, eventChan) + loopOpts := &loopWatchOpts{ + Options: watchOpts, + streamOptions: streamOptions, + stream: stream, + pipeline: pipeline, + eventChan: eventChan, + collOptsInfo: collOptsInfo, + } + e.loopWatch(ctx, loopOpts) }() }() diff --git a/src/storage/stream/event/utils.go b/src/storage/stream/event/utils.go index f2ce40f350..583aaa848b 100644 --- a/src/storage/stream/event/utils.go +++ b/src/storage/stream/event/utils.go @@ -14,67 +14,48 @@ package event import ( "reflect" + "regexp" + "strings" + "configcenter/pkg/filter" + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/common/json" "configcenter/src/storage/stream/types" + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" ) -var ( - esType = reflect.TypeOf(types.EventStream{}) -) - -// newEventStruct construct a change stream event data structure -// which can help us to adjust different kind of collection structure. -func newEventStruct(typ reflect.Type) reflect.Value { - f := reflect.StructOf([]reflect.StructField{ - { - Name: "EventStream", - Type: esType, - Anonymous: true, - Tag: `bson:",inline"`, - }, - { - Name: "FullDocument", - Type: typ, - Anonymous: false, - Tag: `bson:"fullDocument"`, - }, - }) - return reflect.New(f).Elem() -} - const fullDocPrefix = "fullDocument." var eventFields = []string{"_id", "operationType", "clusterTime", "ns", "documentKey", "updateDescription"} -func generateOptions(opts *types.Options) (mongo.Pipeline, *options.ChangeStreamOptions) { +func generateOptions(opts *types.Options) (mongo.Pipeline, *options.ChangeStreamOptions, *parsedCollOptsInfo) { + collOptsInfo := parseCollOpts(opts.CollOpts) - fields := make([]bson.E, 0) - if opts.OperationType != nil { - fields = append(fields, bson.E{Key: "operationType", Value: *opts.OperationType}) - } + allFilters := genWatchFilter(collOptsInfo.collCondMap, collOptsInfo.collOpTypeMap) - if opts.Collection == "" { - fields = append(fields, bson.E{Key: "ns.coll", Value: opts.CollectionFilter}) - } - - if opts.Filter != nil { - for k, v := range opts.Filter { - fields = append(fields, bson.E{Key: fullDocPrefix + k, Value: v}) + // if any options needs all fields, do not filter fields + allFields := make([]string, 0) + for _, fields := range collOptsInfo.collFieldsMap { + if len(fields) == 0 { + allFields = make([]string, 0) + break } + allFields = append(allFields, fields...) } var pipeline mongo.Pipeline - if len(fields) != 0 { - pipeline = []bson.D{{{Key: "$match", Value: fields}}} + if len(allFilters) != 0 { + pipeline = []bson.D{{{Key: "$match", Value: allFilters}}} } - if len(opts.Fields) != 0 { + if len(allFields) != 0 { project := make(map[string]int) - for _, f := range opts.Fields { + for _, f := range allFields { project[fullDocPrefix+f] = 1 } @@ -91,6 +72,8 @@ func generateOptions(opts *types.Options) (mongo.Pipeline, *options.ChangeStream if *opts.MajorityCommitted { major := options.UpdateLookup streamOptions.FullDocument = &major + preImage := options.WhenAvailable + streamOptions.FullDocumentBeforeChange = &preImage } else { def := options.Default streamOptions.FullDocument = &def @@ -122,5 +105,176 @@ func generateOptions(opts *types.Options) (mongo.Pipeline, *options.ChangeStream var batchSize int32 = 2000 streamOptions.BatchSize = &batchSize - return pipeline, streamOptions + return pipeline, streamOptions, collOptsInfo +} + +// parsedCollOptsInfo is the parsed watch task and collection info generated by collection options +type parsedCollOptsInfo struct { + // taskTypeMap is watch task id to event data type map + taskTypeMap map[string]reflect.Type + // taskFilterMap is watch task id to filter map + taskFilterMap map[string]*filter.Expression + // collRegexMap is collection regex string value to collection regex expression map + collRegexMap map[string]*regexp.Regexp + // collRegexTasksMap is collection regex to watch task ids map + collRegexTasksMap map[string][]string + // collCondMap is collection regex to merged data filter condition map + collCondMap map[string]*filter.Expression + // collOpTypeMap is collection regex to merged operation types map + collOpTypeMap map[string][]types.OperType + // collFieldsMap is collection regex to merged fields map + collFieldsMap map[string][]string +} + +// parseCollOpts parse collection options to parsedCollOptsInfo +func parseCollOpts(collOpts map[string]types.WatchCollOptions) *parsedCollOptsInfo { + // generate watch task and collection mapping info by collection filter watch options + info := &parsedCollOptsInfo{ + taskTypeMap: make(map[string]reflect.Type), + taskFilterMap: make(map[string]*filter.Expression), + collRegexMap: make(map[string]*regexp.Regexp), + collRegexTasksMap: make(map[string][]string), + collCondMap: make(map[string]*filter.Expression), + collOpTypeMap: make(map[string][]types.OperType), + collFieldsMap: make(map[string][]string), + } + + for id, opt := range collOpts { + // generate watch task and collection mapping info + info.taskTypeMap[id] = reflect.Indirect(reflect.ValueOf(opt.EventStruct)).Type() + info.taskFilterMap[id] = opt.Filter + regex := opt.CollectionFilter.Regex + info.collRegexMap[regex] = regexp.MustCompile(regex) + info.collRegexTasksMap[regex] = append(info.collRegexTasksMap[regex], id) + + // merge collection condition with the same collection regex + cond := opt.Filter + collCond, exists := info.collCondMap[regex] + if exists { + if collCond == nil { + cond = nil + } else if cond != nil { + cond = &filter.Expression{RuleFactory: &filter.CombinedRule{Condition: filter.Or, + Rules: []filter.RuleFactory{collCond, cond}}} + } + } + info.collCondMap[regex] = cond + + // select all operation type if any options needs all types, otherwise, return types specified by all options + collOpTypes, exists := info.collOpTypeMap[regex] + if !exists || len(collOpTypes) != 0 { + if opt.OperationType == nil { + info.collOpTypeMap[regex] = make([]types.OperType, 0) + } else { + info.collOpTypeMap[regex] = append(collOpTypes, *opt.OperationType) + } + } + + // select all fields if any options needs all fields, otherwise, return fields specified by all options + collFields, exists := info.collFieldsMap[regex] + if !exists || len(collFields) != 0 { + if len(opt.Fields) == 0 { + info.collFieldsMap[regex] = make([]string, 0) + } else { + info.collFieldsMap[regex] = append(collFields, opt.Fields...) + } + } + } + + return info +} + +// genWatchFilter generate watch filter by collection to condition and operation type map +func genWatchFilter(collCondMap map[string]*filter.Expression, collOpTypeMap map[string][]types.OperType) bson.D { + allFilters := make([]bson.D, 0) + noFilterCollRegexes := make([]string, 0) + for regex, cond := range collCondMap { + // if the collection regex has no condition and no operation type filter, add to noFilterCollRegexes + if len(collOpTypeMap[regex]) == 0 && cond == nil { + noFilterCollRegexes = append(noFilterCollRegexes, regex) + continue + } + + // generate filter for collection regex with special condition + filters := bson.D{{Key: "ns.coll", Value: bson.M{common.BKDBLIKE: regex}}} + + if len(collOpTypeMap[regex]) > 0 { + filters = append(filters, bson.E{Key: "operationType", Value: bson.M{common.BKDBIN: collOpTypeMap[regex]}}) + } + + if cond != nil { + mongoFilter, err := cond.ToMgo() + if err != nil { + blog.Errorf("convert coll(%s) filter(%s) to mongo filter failed, err: %v, skip", regex, cond, err) + continue + } + for k, v := range mongoFilter { + filters = append(filters, bson.E{Key: fullDocPrefix + k, Value: v}) + } + } + + allFilters = append(allFilters, filters) + } + + // merge all no filter collection regexes to one collection regex filter + if len(noFilterCollRegexes) != 0 { + allFilters = append(allFilters, []bson.E{ + {Key: "ns.coll", Value: bson.M{common.BKDBLIKE: strings.Join(noFilterCollRegexes, "|")}}, + }) + } + + if len(allFilters) == 0 { + return make(bson.D, 0) + } + if len(allFilters) == 1 { + return allFilters[0] + } + return bson.D{{Key: common.BKDBOR, Value: allFilters}} +} + +// parseCollInfo parse collection info +func parseCollInfo(collection string) (types.CollectionInfo, error) { + tenantID, parsedColl, err := common.SplitTenantTableName(collection) + if err != nil { + blog.Errorf("split collection %s failed, err: %v", collection, err) + return types.CollectionInfo{}, err + } + + return types.CollectionInfo{ + Collection: collection, + ParsedColl: parsedColl, + TenantID: tenantID, + }, nil +} + +// parseDataForTask parse event data for task, returns event data and matched flag +func parseDataForTask(rawDoc bson.Raw, taskID string, taskFilterMap map[string]*filter.Expression, + taskTypeMap map[string]reflect.Type) (*types.Event, bool) { + + // decode event data and get json value + doc := reflect.New(taskTypeMap[taskID]).Interface() + if err := bson.Unmarshal(rawDoc, doc); err != nil { + blog.Errorf("decode to struct: %T failed, err: %v, data: %s", doc, err, rawDoc) + return nil, false + } + byt, _ := json.Marshal(doc) + + // check if event data matches watch filter + if expr, exists := taskFilterMap[taskID]; exists && expr != nil { + matched, err := expr.Match(filter.JsonString(byt)) + if err != nil { + blog.Errorf("check if event data(%s) matches watch filter(%s) failed, err: %v", string(byt), expr, err) + return nil, false + } + + if !matched { + return nil, false + } + } + + return &types.Event{ + Document: doc, + DocBytes: byt, + TaskID: taskID, + }, true } diff --git a/src/storage/stream/event/watch.go b/src/storage/stream/event/watch.go index 1d53149e11..fa7d3d97c1 100644 --- a/src/storage/stream/event/watch.go +++ b/src/storage/stream/event/watch.go @@ -15,19 +15,19 @@ package event import ( "context" "errors" - "reflect" "strings" + "sync" "time" "configcenter/src/common/blog" - "configcenter/src/common/json" "configcenter/src/storage/stream/types" + "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" ) -// Watch TODO +// Watch mongodb change stream events func (e *Event) Watch(ctx context.Context, opts *types.WatchOptions) (*types.Watcher, error) { if err := opts.CheckSetDefault(); err != nil { return nil, err @@ -35,61 +35,12 @@ func (e *Event) Watch(ctx context.Context, opts *types.WatchOptions) (*types.Wat eventChan := make(chan *types.Event, types.DefaultEventChanSize) go func() { - pipeline, streamOptions := generateOptions(&opts.Options) - - blog.InfoJSON("start watch with pipeline: %s, options: %s, stream options: %s", pipeline, opts, streamOptions) - - var stream *mongo.ChangeStream - var err error - - if opts.Collection != "" { - stream, err = e.client. - Database(e.database). - Collection(opts.Collection). - Watch(ctx, pipeline, streamOptions) - } else { - stream, err = e.client. - Database(e.database). - Watch(ctx, pipeline, streamOptions) - } + pipeline, streamOptions, collOptsInfo := generateOptions(&opts.Options) - if err != nil && isFatalError(err) { - // TODO: send alarm immediately. - blog.Errorf("mongodb watch collection: %s got a fatal error, skip resume token and retry, err: %v", - opts.Collection, err) - // reset the resume token, because we can not use the former resume token to watch success for now. - streamOptions.StartAfter = nil - opts.StartAfterToken = nil - // cause we have already got a fatal error, we can not try to watch from where we lost. - // so re-watch from 1 minutes ago to avoid lost events. - // Note: apparently, we may got duplicate events with this re-watch - startAtTime := uint32(time.Now().Unix()) - 60 - streamOptions.StartAtOperationTime = &primitive.Timestamp{ - T: startAtTime, - I: 0, - } - opts.StartAtTime = &types.TimeStamp{Sec: startAtTime} - - if opts.WatchFatalErrorCallback != nil { - err := opts.WatchFatalErrorCallback(types.TimeStamp{Sec: startAtTime}) - if err != nil { - blog.Errorf("do watch fatal error callback for coll %s failed, err: %v", opts.Collection, err) - } - } - - blog.InfoJSON("start watch with pipeline: %s, options: %s, stream options: %s", pipeline, opts, streamOptions) - if opts.Collection != "" { - stream, err = e.client. - Database(e.database). - Collection(opts.Collection). - Watch(ctx, pipeline, streamOptions) - } else { - stream, err = e.client. - Database(e.database). - Watch(ctx, pipeline, streamOptions) - } - } + blog.InfoJSON("start watch db %s with pipeline: %s, options: %s, stream options: %s", e.DBName, pipeline, opts, + streamOptions) + stream, streamOptions, watchOpts, err := e.watch(ctx, pipeline, streamOptions, &opts.Options) if err != nil { if errors.Is(err, context.Canceled) { // if error is context cancelled, then loop watch will exit at the same time @@ -98,8 +49,15 @@ func (e *Event) Watch(ctx context.Context, opts *types.WatchOptions) (*types.Wat blog.Fatalf("mongodb watch failed with conf: %+v, err: %v", *opts, err) } - go e.loopWatch(ctx, &opts.Options, streamOptions, stream, pipeline, eventChan) - + loopOpts := &loopWatchOpts{ + Options: watchOpts, + streamOptions: streamOptions, + stream: stream, + pipeline: pipeline, + eventChan: eventChan, + collOptsInfo: collOptsInfo, + } + go e.loopWatch(ctx, loopOpts) }() watcher := &types.Watcher{ @@ -108,173 +66,108 @@ func (e *Event) Watch(ctx context.Context, opts *types.WatchOptions) (*types.Wat return watcher, nil } -func (e *Event) loopWatch(ctx context.Context, - opts *types.Options, - streamOptions *options.ChangeStreamOptions, - stream *mongo.ChangeStream, - pipeline mongo.Pipeline, - eventChan chan *types.Event) { +func (e *Event) watch(ctx context.Context, pipeline mongo.Pipeline, streamOptions *options.ChangeStreamOptions, + opts *types.Options) (*mongo.ChangeStream, *options.ChangeStreamOptions, *types.Options, error) { + + stream, err := e.client. + Database(e.database). + Watch(ctx, pipeline, streamOptions) + + if err != nil && isFatalError(err) { + // TODO: send alarm immediately. + blog.Errorf("mongodb watch db: %s got a fatal error, skip resume token and retry, err: %v", e.DBName, err) + // reset the resume token, because we can not use the former resume token to watch success for now. + streamOptions.StartAfter = nil + opts.StartAfterToken = nil + // cause we have already got a fatal error, we can not try to watch from where we lost. + // so re-watch from 1 minutes ago to avoid lost events. + // Note: apparently, we may got duplicate events with this re-watch + startAtTime := uint32(time.Now().Unix()) - 60 + streamOptions.StartAtOperationTime = &primitive.Timestamp{ + T: startAtTime, + I: 0, + } + opts.StartAtTime = &types.TimeStamp{Sec: startAtTime} + + if opts.WatchFatalErrorCallback != nil { + err := opts.WatchFatalErrorCallback(types.TimeStamp{Sec: startAtTime}) + if err != nil { + blog.Errorf("do watch fatal error callback for db %s failed, err: %v", e.DBName, err) + } + } + + blog.InfoJSON("start watch db %s with pipeline: %s, options: %s, stream options: %s", e.DBName, pipeline, + opts, streamOptions) + stream, err = e.client. + Database(e.database). + Watch(ctx, pipeline, streamOptions) + } + + return stream, streamOptions, opts, err +} + +type loopWatchOpts struct { + *types.Options + streamOptions *options.ChangeStreamOptions + stream *mongo.ChangeStream + pipeline mongo.Pipeline + eventChan chan *types.Event + currentToken types.EventToken + collOptsInfo *parsedCollOptsInfo +} + +func (e *Event) loopWatch(ctx context.Context, opts *loopWatchOpts) { retry := false - currentToken := types.EventToken{Data: ""} - typ := reflect.Indirect(reflect.ValueOf(opts.EventStruct)).Type() + opts.currentToken = types.EventToken{Data: ""} - e.setCleaner(ctx, eventChan, opts.Collection) + e.setCleaner(ctx, opts.eventChan) + + // init collection to task ids map + collTasksMap := make(map[string][]string) for { // no events, try cancel watch here. select { case <-ctx.Done(): - blog.Warnf("received stopped loop watch signal, stop watch db: %s, collection: %s, err: %v", e.database, - opts.Collection, ctx.Err()) + blog.Warnf("received stopped loop watch signal, stop watch db: %s, name: %s, err: %v", e.database, e.DBName, + ctx.Err()) - if stream != nil { - stream.Close(context.Background()) + if opts.stream != nil { + opts.stream.Close(context.Background()) } return default: - } if retry { - time.Sleep(5 * time.Second) - if len(currentToken.Data) != 0 { - // if error occurs, then retry watch and start from the last token. - // so that we can continue the event from where it just broken. - streamOptions.StartAtOperationTime = nil - streamOptions.SetStartAfter(currentToken) - } - - // if start at operation time and start after token is both set, use resume token instead of start time - if streamOptions.StartAtOperationTime != nil && streamOptions.StartAfter != nil { - blog.Infof("resume token and time is both set, discard the resume time, option: %+v", streamOptions) - streamOptions.StartAtOperationTime = nil - } - - blog.InfoJSON("retry watch with pipeline: %s, opts: %s, stream opts: %s", pipeline, opts, streamOptions) - - var err error - if opts.Collection != "" { - stream, err = e.client. - Database(e.database). - Collection(opts.Collection). - Watch(ctx, pipeline, streamOptions) - } else { - stream, err = e.client. - Database(e.database). - Watch(ctx, pipeline, streamOptions) - } - if err != nil { - if isFatalError(err) { - // TODO: send alarm immediately. - blog.Errorf("mongodb watch collection: %s got a fatal error, skip resume token and retry, err: %v", - opts.Collection, err) - // reset the resume token, because we can not use the former resume token to watch success for now. - streamOptions.StartAfter = nil - opts.StartAfterToken = nil - // because we have already got a fatal error, we can not try to watch from where we lost. - // so re-watch from 1 minutes ago to avoid lost events. - // Note: apparently, we may got duplicate events with this re-watch - startAtTime := uint32(time.Now().Unix()) - 60 - streamOptions.StartAtOperationTime = &primitive.Timestamp{ - T: startAtTime, - I: 0, - } - opts.StartAtTime = &types.TimeStamp{Sec: startAtTime} - currentToken.Data = "" - - if opts.WatchFatalErrorCallback != nil { - err := opts.WatchFatalErrorCallback(types.TimeStamp{Sec: startAtTime}) - if err != nil { - blog.Errorf("do watch fatal error callback for coll %s failed, err: %v", opts.Collection, err) - } - } - } - - blog.ErrorJSON("mongodb watch %s failed with opts: %s, pipeline: %s, streamOpts: %s, err: %s", - opts.Collection, opts, pipeline, streamOptions, err) - - retry = true + opts, retry = e.retryWatch(ctx, opts) + if retry { continue } - - // re-watch success, now we clean start at operation time options - streamOptions.StartAtOperationTime = nil } - for stream.Next(ctx) { + for opts.stream.Next(ctx) { // still have events, try cancel steam here. select { case <-ctx.Done(): - blog.Warnf("received stopped loop watch signal, stop loop next, watch db: %s, collection: %s, err: %v", - e.database, opts.Collection, ctx.Err()) - stream.Close(context.Background()) + blog.Warnf("received stopped loop watch signal, stop loop next, watch db: %s, db name: %s, err: %v", + e.database, e.DBName, ctx.Err()) + opts.stream.Close(context.Background()) return default: - } - newStruct := newEventStruct(typ) - if err := stream.Decode(newStruct.Addr().Interface()); err != nil { - blog.Errorf("watch collection %s, but decode to event struct: %v failed, err: %v", - opts.Collection, reflect.TypeOf(opts.EventStruct), err) - continue - } - - base := newStruct.Field(0).Interface().(types.EventStream) - - // if we received a invalid event, which is caused by collection drop, rename or drop database operation, - // we have to try re-watch again. otherwise, this may cause this process CPU high because of continue - // for loop cursor. - // https://docs.mongodb.com/manual/reference/change-events/#invalidate-event - if base.OperationType == types.Invalidate { - blog.ErrorJSON("mongodb watch received a invalid event, will retry watch again, options: %s", *opts) - - // clean the last resume token to force the next try watch from the beginning. otherwise we will - // receive the invalid event again. - streamOptions.StartAfter = nil - opts.StartAfterToken = nil - // cause we have already got a fatal error, we can not try to watch from where we lost. - // so re-watch from 1 minutes ago to avoid lost events. - // Note: apparently, we may got duplicate events with this re-watch - startAtTime := uint32(time.Now().Unix()) - 60 - streamOptions.StartAtOperationTime = &primitive.Timestamp{ - T: startAtTime, - I: 0, - } - opts.StartAtTime = &types.TimeStamp{Sec: startAtTime} - currentToken.Data = "" - - stream.Close(ctx) - retry = true + opts, retry = e.handleStreamEvent(ctx, opts, collTasksMap) + if retry { break } - - currentToken.Data = base.Token.Data - byt, _ := json.Marshal(newStruct.Field(1).Addr().Interface()) - - eventChan <- &types.Event{ - Oid: base.DocumentKey.ID.Hex(), - OperationType: base.OperationType, - Document: newStruct.Field(1).Addr().Interface(), - DocBytes: byt, - Collection: base.Namespace.Collection, - ClusterTime: types.TimeStamp{ - Sec: base.ClusterTime.T, - Nano: base.ClusterTime.I, - }, - Token: base.Token, - ChangeDesc: &types.ChangeDescription{ - UpdatedFields: base.UpdateDesc.UpdatedFields, - RemovedFields: base.UpdateDesc.RemovedFields, - }, - } } - if err := stream.Err(); err != nil { + if err := opts.stream.Err(); err != nil { blog.ErrorJSON("mongodb watch encountered a error, conf: %s, err: %s", *opts, err) - stream.Close(ctx) + opts.stream.Close(ctx) retry = true continue } @@ -283,19 +176,18 @@ func (e *Event) loopWatch(ctx context.Context, // setCleaner set up a monitor to close the cursor when the context is canceled. // this is useful to release stream resource when this watch is canceled outside with context is canceled. -func (e *Event) setCleaner(ctx context.Context, eventChan chan *types.Event, coll string) { +func (e *Event) setCleaner(ctx context.Context, eventChan chan *types.Event) { go func() { select { case <-ctx.Done(): - blog.Warnf("received stopped loop watch collection: %s signal, close cursor now, err: %v", - coll, ctx.Err()) + blog.Warnf("received stopped loop watch db: %s signal, close cursor now, err: %v", e.DBName, ctx.Err()) // even though we may already close the stream, but there may still have events in the stream's // batch cursor, so we need to consume a event, so that we can release the stream resource select { // try consume a event, so that stream.Next(ctx) can be called to release the stream resources. case <-eventChan: - blog.Warnf("received stopped loop watch collection: %s signal, consumed a event", coll) + blog.Warnf("received stopped loop watch db: %s signal, consumed a event", e.DBName) default: // no events, and stream resource will be recycled in the next round. @@ -306,8 +198,178 @@ func (e *Event) setCleaner(ctx context.Context, eventChan chan *types.Event, col }() } -// isFatalError TODO -// if watch encountered a fatal error, we should watch without resume token, which means from now. +func (e *Event) retryWatch(ctx context.Context, opts *loopWatchOpts) (*loopWatchOpts, bool) { + streamOptions := opts.streamOptions + + time.Sleep(5 * time.Second) + if len(opts.currentToken.Data) != 0 { + // if error occurs, then retry watch and start from the last token. + // so that we can continue the event from where it just broken. + streamOptions.StartAtOperationTime = nil + streamOptions.SetStartAfter(opts.currentToken) + } + + // if start at operation time and start after token is both set, use resume token instead of start time + if streamOptions.StartAtOperationTime != nil && streamOptions.StartAfter != nil { + blog.Infof("resume token and time is both set, discard the resume time, option: %+v", streamOptions) + streamOptions.StartAtOperationTime = nil + } + + blog.InfoJSON("retry watch db %s with pipeline: %s, opts: %s, stream opts: %s", e.DBName, opts.pipeline, + opts.Options, streamOptions) + + var err error + opts.stream, err = e.client. + Database(e.database). + Watch(ctx, opts.pipeline, streamOptions) + if err != nil { + if isFatalError(err) { + // TODO: send alarm immediately. + blog.Errorf("mongodb watch db: %s got a fatal error, skip resume token and retry, err: %v", + e.DBName, err) + // reset the resume token, because we can not use the former resume token to watch success for now. + streamOptions.StartAfter = nil + opts.StartAfterToken = nil + // because we have already got a fatal error, we can not try to watch from where we lost. + // so re-watch from 1 minutes ago to avoid lost events. + // Note: apparently, we may got duplicate events with this re-watch + startAtTime := uint32(time.Now().Unix()) - 60 + streamOptions.StartAtOperationTime = &primitive.Timestamp{ + T: startAtTime, + I: 0, + } + opts.StartAtTime = &types.TimeStamp{Sec: startAtTime} + opts.currentToken.Data = "" + + if opts.WatchFatalErrorCallback != nil { + err := opts.WatchFatalErrorCallback(types.TimeStamp{Sec: startAtTime}) + if err != nil { + blog.Errorf("do watch fatal error callback for db %s failed, err: %v", e.DBName, err) + } + } + } + + blog.ErrorJSON("mongodb watch db %s failed with opts: %s, pipeline: %s, streamOpts: %s, err: %s", + e.DBName, opts, opts.pipeline, streamOptions, err) + return opts, true + } + + // re-watch success, now we clean start at operation time options + streamOptions.StartAtOperationTime = nil + return opts, false +} + +func (e *Event) handleStreamEvent(ctx context.Context, opts *loopWatchOpts, collTasksMap map[string][]string) ( + *loopWatchOpts, bool) { + + event := new(types.RawEvent) + if err := opts.stream.Decode(event); err != nil { + blog.Errorf("watch db %s, but decode to raw event struct failed, err: %v", e.DBName, err) + return opts, true + } + + // if we received a invalid event, which is caused by collection drop, rename or drop database operation, + // we have to try re-watch again. otherwise, this may cause this process CPU high because of continue + // for loop cursor. + // https://docs.mongodb.com/manual/reference/change-events/#invalidate-event + if event.EventStream.OperationType == types.Invalidate { + blog.ErrorJSON("mongodb watch received a invalid event, will retry watch again, options: %s", *opts) + + // clean the last resume token to force the next try watch from the beginning. otherwise we will + // receive the invalid event again. + opts.streamOptions.StartAfter = nil + opts.StartAfterToken = nil + // cause we have already got a fatal error, we can not try to watch from where we lost. + // so re-watch from 1 minutes ago to avoid lost events. + // Note: apparently, we may got duplicate events with this re-watch + startAtTime := uint32(time.Now().Unix()) - 60 + opts.streamOptions.StartAtOperationTime = &primitive.Timestamp{ + T: startAtTime, + I: 0, + } + opts.StartAtTime = &types.TimeStamp{Sec: startAtTime} + opts.currentToken.Data = "" + + opts.stream.Close(ctx) + return opts, true + } + + opts.currentToken.Data = event.EventStream.Token.Data + + e.parseEvent(event, opts.eventChan, opts.collOptsInfo, collTasksMap) + + return opts, false +} + +func (e *Event) parseEvent(event *types.RawEvent, eventChan chan *types.Event, collOptsInfo *parsedCollOptsInfo, + collTasksMap map[string][]string) { + + base := event.EventStream + + collInfo, err := parseCollInfo(base.Namespace.Collection) + if err != nil { + blog.Errorf("parse event(%+v) collection info failed, err: %v", base, err) + return + } + + // get the event task ids matching the collection name + taskIDs, exists := collTasksMap[base.Namespace.Collection] + if !exists { + for collRegex, regex := range collOptsInfo.collRegexMap { + if regex.MatchString(base.Namespace.Collection) { + taskIDs = append(taskIDs, collOptsInfo.collRegexTasksMap[collRegex]...) + } + } + collTasksMap[base.Namespace.Collection] = taskIDs + } + + if len(taskIDs) == 0 { + blog.Errorf("watch db %s, but get invalid event not matching any task, base: %+v", e.DBName, base) + return + } + + // decode the event data to the event data struct, use pre data for delete event + rawDoc := event.FullDoc + if base.OperationType == types.Delete || event.FullDoc == nil { + rawDoc = event.PreFullDoc + } + + if rawDoc == nil { + blog.Errorf("watch db %s, but get invalid event with no detail, base: %+v", e.DBName, base) + return + } + + var wg sync.WaitGroup + for _, taskID := range taskIDs { + wg.Add(1) + go func(taskID string) { + defer wg.Done() + + parsed, isValid := parseDataForTask(rawDoc, taskID, collOptsInfo.taskFilterMap, collOptsInfo.taskTypeMap) + if !isValid { + return + } + + parsed.Oid = base.DocumentKey.ID.Hex() + parsed.OperationType = base.OperationType + parsed.CollectionInfo = collInfo + parsed.ClusterTime = types.TimeStamp{ + Sec: base.ClusterTime.T, + Nano: base.ClusterTime.I, + } + parsed.Token = base.Token + parsed.ChangeDesc = &types.ChangeDescription{ + UpdatedFields: base.UpdateDesc.UpdatedFields, + RemovedFields: base.UpdateDesc.RemovedFields, + } + + eventChan <- parsed + }(taskID) + } + wg.Wait() +} + +// isFatalError if watch encountered a fatal error, we should watch without resume token, which means from now. // errors like: // https://jira.mongodb.org/browse/SERVER-44610 // https://jira.mongodb.org/browse/SERVER-44733 diff --git a/src/storage/stream/loop/loop.go b/src/storage/stream/loop/loop.go deleted file mode 100644 index 545b01f441..0000000000 --- a/src/storage/stream/loop/loop.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package loop TODO -package loop diff --git a/src/storage/stream/loop/loop_watch.go b/src/storage/stream/loop/loop_watch.go index ec200a4c67..066ea02f93 100644 --- a/src/storage/stream/loop/loop_watch.go +++ b/src/storage/stream/loop/loop_watch.go @@ -10,6 +10,7 @@ * limitations under the License. */ +// Package loop defines loop watch logics package loop import ( @@ -45,23 +46,17 @@ func (lw *LoopsWatch) WithOne(opts *types.LoopOneOptions) error { return err } - startToken, err := opts.TokenHandler.GetStartWatchToken(context.Background()) + watchOpt, err := lw.updateStartTokenInfo(&opts.LoopOptions) if err != nil { - blog.Errorf("%s job, run loop watch %s, but get start token failed, err: %v", opts.Name, - opts.WatchOpt.Collection, err) return err } - - // update the start token. - if len(startToken) != 0 { - opts.WatchOpt.StartAfterToken = &types.EventToken{Data: startToken} - } + watchOpt.WatchFatalErrorCallback = opts.TokenHandler.ResetWatchToken var cancel func() var cancelCtx context.Context cancelCtx, cancel = context.WithCancel(context.Background()) - watcher, err := lw.streamWatch.Watch(cancelCtx, opts.WatchOpt) + watcher, err := lw.streamWatch.Watch(cancelCtx, watchOpt) if err != nil { blog.Errorf("%s job, run loop, but watch failed, err: %v", opts.Name, err) cancel() @@ -87,6 +82,25 @@ func (lw *LoopsWatch) WithOne(opts *types.LoopOneOptions) error { return nil } +func (lw *LoopsWatch) updateStartTokenInfo(opts *types.LoopOptions) (*types.WatchOptions, error) { + startToken, err := opts.TokenHandler.GetStartWatchToken(context.Background()) + if err != nil { + blog.Errorf("%s job, loop watch db %s, but get start watch token failed, err: %v", opts.Name, + lw.streamWatch.DBName, err) + return nil, err + } + + // update the start token. + if len(startToken.Token) != 0 { + opts.WatchOpt.StartAfterToken = &types.EventToken{Data: startToken.Token} + } + if startToken.StartAtTime != nil { + opts.WatchOpt.StartAtTime = startToken.StartAtTime + } + + return opts.WatchOpt, nil +} + // WithBatch allows users to watch events with batch. func (lw *LoopsWatch) WithBatch(opts *types.LoopBatchOptions) error { if err := opts.Validate(); err != nil { @@ -94,23 +108,17 @@ func (lw *LoopsWatch) WithBatch(opts *types.LoopBatchOptions) error { return err } - startToken, err := opts.TokenHandler.GetStartWatchToken(context.Background()) + watchOpt, err := lw.updateStartTokenInfo(&opts.LoopOptions) if err != nil { - blog.Errorf("%s job, run loop watch batch %s, but get start token failed, err: %v", opts.Name, - opts.WatchOpt.Collection, err) return err } - - // update the start token. - if len(startToken) != 0 { - opts.WatchOpt.StartAfterToken = &types.EventToken{Data: startToken} - } + watchOpt.WatchFatalErrorCallback = opts.TokenHandler.ResetWatchToken var cancel func() var cancelCtx context.Context cancelCtx, cancel = context.WithCancel(context.Background()) - watcher, err := lw.streamWatch.Watch(cancelCtx, opts.WatchOpt) + watcher, err := lw.streamWatch.Watch(cancelCtx, watchOpt) if err != nil { blog.Errorf("%s job, run loop, but watch failed, err: %v", opts.Name, err) cancel() @@ -166,23 +174,16 @@ func (lw *LoopsWatch) watchRetry(cancel context.CancelFunc, cancel() // use the last token to resume so that we can start again from where we stopped. - lastToken, err := opts.TokenHandler.GetStartWatchToken(ctx) + watchOpt, err := lw.updateStartTokenInfo(opts) if err != nil { - blog.Errorf("%s job, run loop watch, but get last event token failed, err: %v", opts.Name, err) // notify retry signal, exit loop close(retrySignal) continue } + opts.WatchOpt = watchOpt - blog.Errorf("%s job, the former watch loop: %s failed, start retry again from token: %s.", - opts.Name, opts.WatchOpt.Collection, lastToken) - - // set start after token if needed. - if len(lastToken) != 0 { - // we have already received the new event and handle it success, - // so we need to use this token. otherwise, we should still use the w.watchOpt.StartAfterToken - opts.WatchOpt.StartAfterToken = &types.EventToken{Data: lastToken} - } + blog.Errorf("%s job, the former watch loop: %s failed, start retry again from token: %+v.", opts.Name, + lw.streamWatch.DBName, watchOpt.StartAfterToken) var cancelCtx context.Context cancelCtx, cancel = context.WithCancel(ctx) @@ -199,7 +200,8 @@ func (lw *LoopsWatch) watchRetry(cancel context.CancelFunc, // start handle loop jobs go doHandler(cancelCtx, watcher, retrySignal) - blog.Warnf("%s job, retry loop %s from token: %s success.", opts.Name, opts.WatchOpt.Collection, lastToken) + blog.Warnf("%s job, retry loop %s from token: %+v success.", opts.Name, lw.streamWatch.DBName, + watchOpt.StartAfterToken) } } } @@ -219,23 +221,20 @@ func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context, } for { - reWatch, loop := observer.canLoop() if reWatch { // stop the tick to release resource. ticker.Stop() - blog.Warnf("%s job, master status has changed, try to re-watch again, collection:%s", opts.Name, - opts.WatchOpt.Collection) - + blog.Warnf("%s job, master status has changed, try to re-watch again, db:%s", opts.Name, + lw.streamWatch.DBName) // trigger re-watch action now. close(retrySignal) - // exit the for loop return } if !loop { - blog.V(5).Infof("%s job, loop %s event, but not master, skip.", opts.Name, opts.WatchOpt.Collection) + blog.V(5).Infof("%s job, loop %s event, but not master, skip.", opts.Name, lw.streamWatch.DBName) time.Sleep(5 * time.Second) continue } @@ -247,18 +246,15 @@ func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context, case <-ctxWithCancel.Done(): // stop the tick to release resource. ticker.Stop() - - blog.Warnf("%s job, received cancel loop watch %s signal, exit loop.", opts.Name, - opts.WatchOpt.Collection) + blog.Warnf("%s job, received cancel loop watch %s signal, exit loop.", opts.Name, lw.streamWatch.DBName) // exist the goroutine return case one := <-watcher.EventChan: batchEvents = append(batchEvents, one) - if blog.V(4) { blog.Infof("%s job, received %s event, detail: %s, op-time: %s, rid: %s", opts.Name, - opts.WatchOpt.Collection, one.String(), one.ClusterTime.String(), one.ID()) + lw.streamWatch.DBName, one.String(), one.ClusterTime.String(), one.ID()) } // calculate event count, try to get more event for a batch @@ -266,14 +262,12 @@ func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context, // continue to get more events continue } - case <-ticker.C: // handle with batch event. if len(batchEvents) == 0 { // ticks, but no events received, loop next round to get events. continue } - case <-opts.StopNotifier: ticker.Stop() blog.Warnf("received stop %s loop watch job notify, stopping now.", opts.Name) @@ -284,50 +278,62 @@ func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context, break } - // for safety guarantee - if len(batchEvents) == 0 { - continue + if lw.handleBatchEvents(ctxWithCancel, batchEvents, opts, retryObserver, retrySignal) { + return } + } +} - first := batchEvents[0] +// handleBatchEvents handle batch events, returns if the loop watch needs retry +func (lw *LoopsWatch) handleBatchEvents(ctx context.Context, batchEvents []*types.Event, opts *types.LoopBatchOptions, + retryObserver *retryHandler, retrySignal chan struct{}) bool { - blog.Infof("%s job, received %s batch %d events, first op-time: %s rid: %s.", opts.Name, opts.WatchOpt.Collection, - len(batchEvents), first.ClusterTime.String(), first.ID()) + // for safety guarantee + if len(batchEvents) == 0 { + return false + } - retry := opts.EventHandler.DoBatch(batchEvents) - if retry { + first := batchEvents[0] - if retryObserver.canStillRetry() { - blog.Warnf("%s job, received %s %d events in batch, but do batch failed, retry now, rid: %s", opts.Name, - opts.WatchOpt.Collection, len(batchEvents), first.ID()) - // an error occurred, we need to retry it later. - // tell the schedule to re-watch again. - close(retrySignal) - // exist this goroutine. - return - } + blog.Infof("%s job, received %s batch %d events, first op-time: %s rid: %s.", opts.Name, lw.streamWatch.DBName, + len(batchEvents), first.ClusterTime.String(), first.ID()) - blog.Warnf("%s job, collection %s batch watch retry exceed max count, skip, rid: %s.", opts.Name, - opts.WatchOpt.Collection, first.ID()) - // save the event token now. + retry := opts.EventHandler.DoBatch(batchEvents) + if retry { + if retryObserver.canStillRetry() { + blog.Warnf("%s job, received %s %d events in batch, but do batch failed, retry now, rid: %s", opts.Name, + lw.streamWatch.DBName, len(batchEvents), first.ID()) + // an error occurred, we need to retry it later. + // tell the schedule to re-watch again. + close(retrySignal) + // exit this goroutine. + return true } - // reset retry counter so that the previous retry count will not affect the next event - retryObserver.resetRetryCounter() + blog.Warnf("%s job, collection %s batch watch retry exceed max count, skip, rid: %s.", opts.Name, + lw.streamWatch.DBName, first.ID()) + // save the event token now. + } - last := batchEvents[len(batchEvents)-1] - // update the last watched token for resume usage. - if err := opts.TokenHandler.SetLastWatchToken(ctxWithCancel, last.Token.Data); err != nil { - blog.Errorf("%s job, loop watch %s event, but set last token failed, err: %v, rid: %s, retry later.", - opts.Name, opts.WatchOpt.Collection, err, first.ID()) + // reset retry counter so that the previous retry count will not affect the next event + retryObserver.resetRetryCounter() - // retry later. - close(retrySignal) - // exist this goroutine - return - } + last := batchEvents[len(batchEvents)-1] + // update the last watched token for resume usage. + lastToken := &types.TokenInfo{ + Token: last.Token.Data, + StartAtTime: &last.ClusterTime, } - + if err := opts.TokenHandler.SetLastWatchToken(ctx, lastToken); err != nil { + blog.Errorf("%s job, loop watch %s event, but set last token failed, err: %v, rid: %s, retry later.", + opts.Name, lw.streamWatch.DBName, err, first.ID()) + + // retry later. + close(retrySignal) + // exit this goroutine + return true + } + return false } // tryLoopWithOne try handle event one by one @@ -346,19 +352,17 @@ func (lw *LoopsWatch) tryLoopWithOne(ctxWithCancel context.Context, select { case <-ctxWithCancel.Done(): blog.Warnf("%s job, received cancel loop watch %s signal, exit loop, exit loop", opts.Name, - opts.WatchOpt.Collection) + lw.streamWatch.DBName) return - case <-opts.StopNotifier: blog.Warnf("received stop %s loop watch job notify, stopping now.", opts.Name) return - default: } reWatch, loop := observer.canLoop() if reWatch { - blog.Warnf("%s job, master status has changed, try to re-watch %s again", opts.Name, opts.WatchOpt.Collection) + blog.Warnf("%s job, master status has changed, try to re-watch %s again", opts.Name, lw.streamWatch.DBName) // trigger re-watch action now. close(retrySignal) // exit the for loop @@ -366,12 +370,12 @@ func (lw *LoopsWatch) tryLoopWithOne(ctxWithCancel context.Context, } if !loop { - blog.Infof("%s job, received %s %s event, but not master, skip. details: %s, rid: %s", - opts.Name, opts.WatchOpt.Collection, one.OperationType, one.String(), one.ID()) + blog.Infof("%s job, received %s %s event, but not master, skip. details: %s, rid: %s", opts.Name, + lw.streamWatch.DBName, one.OperationType, one.String(), one.ID()) continue } - blog.Infof("%s job, received %s event, type: %s, op-time: %s rid: %s", opts.Name, opts.WatchOpt.Collection, + blog.Infof("%s job, received %s event, type: %s, op-time: %s rid: %s", opts.Name, lw.streamWatch.DBName, one.OperationType, one.ClusterTime.String(), one.ID()) if blog.V(4) { @@ -381,7 +385,7 @@ func (lw *LoopsWatch) tryLoopWithOne(ctxWithCancel context.Context, retry := lw.tryOne(one, opts) if retry { if retryObserver.canStillRetry() { - blog.Warnf("%s job, retry watch %s later. rid: %s", opts.Name, opts.WatchOpt.Collection, one.ID()) + blog.Warnf("%s job, retry watch %s later. rid: %s", opts.Name, lw.streamWatch.DBName, one.ID()) // an error occurred, we need to retry it later. // tell the schedule to re-watch again. close(retrySignal) @@ -390,7 +394,7 @@ func (lw *LoopsWatch) tryLoopWithOne(ctxWithCancel context.Context, } blog.Warnf("%s job, retry %s event exceed max count, skip, detail: %s, rid: %s", opts.Name, - opts.WatchOpt.Collection, one.String(), one.ID()) + lw.streamWatch.DBName, one.String(), one.ID()) // save the event token now. } @@ -398,10 +402,13 @@ func (lw *LoopsWatch) tryLoopWithOne(ctxWithCancel context.Context, retryObserver.resetRetryCounter() // update the last watched token for resume usage. - if err := opts.TokenHandler.SetLastWatchToken(ctxWithCancel, one.Token.Data); err != nil { + lastToken := &types.TokenInfo{ + Token: one.Token.Data, + StartAtTime: &one.ClusterTime, + } + if err := opts.TokenHandler.SetLastWatchToken(ctxWithCancel, lastToken); err != nil { blog.Errorf("%s job, loop watch %s event, but set last watched token failed, err: %v, rid: %s, "+ - "retry later.", - opts.WatchOpt.Collection, err, one.ID()) + "retry later.", lw.streamWatch.DBName, err, one.ID()) // retry later. close(retrySignal) @@ -418,7 +425,7 @@ func (lw *LoopsWatch) tryOne(e *types.Event, opts *types.LoopOneOptions) (retry retry := opts.EventHandler.DoAdd(e) if retry { blog.Warnf("%s job, received %s %s event, but do add job failed, retry now, rid: %s", opts.Name, - opts.WatchOpt.Collection, e.OperationType, e.ID()) + lw.streamWatch.DBName, e.OperationType, e.ID()) return retry } @@ -427,7 +434,7 @@ func (lw *LoopsWatch) tryOne(e *types.Event, opts *types.LoopOneOptions) (retry retry := opts.EventHandler.DoUpdate(e) if retry { blog.Warnf("%s job, received %s %s event, but do update job failed, retry now, rid: %s", opts.Name, - opts.WatchOpt.Collection, e.OperationType, e.ID()) + lw.streamWatch.DBName, e.OperationType, e.ID()) return retry } @@ -436,19 +443,19 @@ func (lw *LoopsWatch) tryOne(e *types.Event, opts *types.LoopOneOptions) (retry retry := opts.EventHandler.DoDelete(e) if retry { blog.Warnf("%s job, received %s %s event, but do delete job failed, retry now, rid: %s", opts.Name, - opts.WatchOpt.Collection, e.OperationType, e.ID()) + lw.streamWatch.DBName, e.OperationType, e.ID()) return retry } case types.Invalidate: blog.Errorf("%s job, watch %s event, received invalid operation type, doc: %s, rid: %s", opts.Name, - opts.WatchOpt.Collection, e.DocBytes, e.ID()) + lw.streamWatch.DBName, e.DocBytes, e.ID()) return false default: blog.Errorf("%s job, watch %s event, received unsupported operation type, doc: %s, rid: %s", opts.Name, - opts.WatchOpt.Collection, e.DocBytes, e.ID()) + lw.streamWatch.DBName, e.DocBytes, e.ID()) return false } diff --git a/src/storage/stream/stream.go b/src/storage/stream/stream.go index c20f6841ba..e2ebf148c4 100644 --- a/src/storage/stream/stream.go +++ b/src/storage/stream/stream.go @@ -10,7 +10,7 @@ * limitations under the License. */ -// Package stream TODO +// Package stream defines mongodb change stream logics package stream import ( @@ -29,8 +29,7 @@ import ( "go.mongodb.org/mongo-driver/x/mongo/driver/connstring" ) -// Interface TODO -// Stream Interface defines all the functionality it have. +// Interface defines all the functionality it has. type Interface interface { List(ctx context.Context, opts *types.ListOptions) (ch chan *types.Event, err error) Watch(ctx context.Context, opts *types.WatchOptions) (*types.Watcher, error) @@ -68,14 +67,14 @@ func newEvent(conf local.MongoConf) (*event.Event, error) { return nil, err } - event, err := event.NewEvent(client, connStr.Database) + event, err := event.NewEvent(client, connStr.Database, conf.Name) if err != nil { return nil, fmt.Errorf("new event failed, err: %v", err) } return event, nil } -// LoopInterface TODO +// LoopInterface is the interface for event loop stream. type LoopInterface interface { WithOne(opts *types.LoopOneOptions) error WithBatch(opts *types.LoopBatchOptions) error diff --git a/src/storage/stream/types/types.go b/src/storage/stream/types/types.go index f9318f0997..81bf311204 100644 --- a/src/storage/stream/types/types.go +++ b/src/storage/stream/types/types.go @@ -10,7 +10,7 @@ * limitations under the License. */ -// Package types TODO +// Package types defines event stream types package types import ( @@ -18,8 +18,12 @@ import ( "errors" "fmt" "reflect" + "regexp" "time" + "configcenter/pkg/filter" + "configcenter/src/common" + "github.com/tidwall/gjson" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/bsontype" @@ -66,35 +70,29 @@ const ( ListDone OperType = "listerDone" ) -// ListOptions TODO +// ListOptions is the option to list data from specified collections by filter type ListOptions struct { - // Filter helps you filter out which kind of data's change event you want - // to receive, such as the filter : - // {"bk_obj_id":"biz"} means you can only receives the data that has this kv. - // Note: the filter's key must be a exist document key filed in the collection's - // document - Filter map[string]interface{} - - // list the documents only with these fields. - Fields []string - - // EventStruct is the point data struct that the event decoded into. - // Note: must be a point value. - EventStruct interface{} - - // Collection defines which collection you want you watch. - Collection string + // CollOpts is the watch task id to list data options for different collections + CollOpts map[string]CollectionOptions // Step defines the list step when the client try to list all the data defines in the // namespace. default value is `DefaultListStep`, value range [200,2000] PageSize *int + + // WithRetry defines whether the list operation needs to retry when failed + WithRetry bool } -// CheckSetDefault TODO +// CheckSetDefault validate list options, and set default value for not set fields func (opts *ListOptions) CheckSetDefault() error { - if reflect.ValueOf(opts.EventStruct).Kind() != reflect.Ptr || - reflect.ValueOf(opts.EventStruct).IsNil() { - return fmt.Errorf("invalid EventStruct field, must be a pointer and not nil") + if len(opts.CollOpts) == 0 { + return errors.New("invalid Namespace field, database and collection can not be empty") + } + + for id, opt := range opts.CollOpts { + if err := opt.Validate(); err != nil { + return fmt.Errorf("collection options[%s] is invalid, err: %v", id, err) + } } if opts.PageSize != nil { @@ -104,14 +102,83 @@ func (opts *ListOptions) CheckSetDefault() error { } else { opts.PageSize = &defaultListPageSize } + return nil +} + +// CollectionOptions is the options for collections with the same watch filter +type CollectionOptions struct { + // CollectionFilter helps you filter out which kind of collection's change event you want to receive, + // such as the filter : {"$regex":"_HostBase$"} means you can only receive events from collections + // that ends with the suffix _HostBase + CollectionFilter *CollectionFilter + + // Filter helps you filter out which kind of data's change event you want to receive, + // such as the filter: {"bk_obj_id":"biz"} means you can only receive the data that has this kv. + // Note: the filter's key must be an exist document key filed in the collection's document + Filter *filter.Expression + + // Fields defines which fields will be returned along with the events + // this is optional, if not set, all the fields will be returned. + Fields []string + + // EventStruct is the point data struct that the event decoded into. + // Note: must be a point value. + EventStruct interface{} +} + +// Validate CollectionOptions +func (opts *CollectionOptions) Validate() error { + if reflect.ValueOf(opts.EventStruct).Kind() != reflect.Ptr || + reflect.ValueOf(opts.EventStruct).IsNil() { + return errors.New("invalid EventStruct field, must be a pointer and not nil") + } - if len(opts.Collection) == 0 { + if opts.CollectionFilter == nil { return errors.New("invalid Namespace field, database and collection can not be empty") } + + if err := opts.CollectionFilter.Validate(); err != nil { + return err + } + + if opts.Filter != nil { + validOpt := filter.NewDefaultExprOpt(nil) + validOpt.IgnoreRuleFields = true + return opts.Filter.Validate(validOpt) + } + return nil +} + +// CollectionFilter is the collection filter for watch +type CollectionFilter struct { + Regex string +} + +// Validate CollectionFilter +func (c *CollectionFilter) Validate() error { + if c.Regex == "" { + return errors.New("collection filter has no regex") + } + + _, err := regexp.Compile(c.Regex) + if err != nil { + return fmt.Errorf("collection filter regex %s is invalid, err: %v", c.Regex, err) + } + return nil } -// Options TODO +// ToMongo convert to mongodb filter +func (c *CollectionFilter) ToMongo() interface{} { + return bson.M{common.BKDBLIKE: c.Regex} +} + +// Match checks if the collection name matches the filter +func (c *CollectionFilter) Match(coll string) bool { + return regexp.MustCompile(c.Regex).MatchString(coll) +} + +// Options is the options for watch change stream operation type Options struct { // reference doc: // https://docs.mongodb.com/manual/reference/method/db.collection.watch/#change-stream-with-full-document-update-lookup @@ -124,29 +191,8 @@ type Options struct { // default value is 1000ms MaxAwaitTime *time.Duration - // OperationType describe which kind of operation you want to watch, - // such as a "insert" operation or a "replace" operation. - // If you don't set, it will means watch all kinds of operations. - OperationType *OperType - - // Filter helps you filter out which kind of data's change event you want - // to receive, such as the filter : - // {"bk_obj_id":"biz"} means you can only receives the data that has this kv. - // Note: the filter's key must be a exist document key filed in the collection's - // document - Filter map[string]interface{} - - // CollectionFilter helps you filter out which kind of collection's change event you want to receive, - // such as the filter : {"$regex":"^cc_ObjectBase"} means you can only receive events from collections - // starts with the prefix cc_ObjectBase - CollectionFilter interface{} - - // EventStruct is the point data struct that the event decoded into. - // Note: must be a point value. - EventStruct interface{} - - // Collection defines which collection you want you watch. - Collection string + // CollOpts is the watch task id to watch options for different collections + CollOpts map[string]WatchCollOptions // StartAfterToken describe where you want to watch the event. // Note: the returned event doesn't contains the token represented, @@ -160,20 +206,20 @@ type Options struct { // WatchFatalErrorCallback the function to be called when watch failed with a fatal error // reset the resume token and set the start time for next watch in case it use the mistaken token again WatchFatalErrorCallback func(startAtTime TimeStamp) error `json:"-"` - - // Fields defines which fields will be returned along with the events - // this is optional, if not set, all the fields will be returned. - Fields []string } var defaultMaxAwaitTime = time.Second -// CheckSetDefault TODO -// CheckSet check the legal of each option, and set the default value +// CheckSetDefault check the legal of each option, and set the default value func (opts *Options) CheckSetDefault() error { - if reflect.ValueOf(opts.EventStruct).Kind() != reflect.Ptr || - reflect.ValueOf(opts.EventStruct).IsNil() { - return fmt.Errorf("invalid EventStruct field, must be a pointer and not nil") + if len(opts.CollOpts) == 0 { + return errors.New("invalid Namespace field, database and collection can not be empty") + } + + for i, opt := range opts.CollOpts { + if err := opt.Validate(); err != nil { + return fmt.Errorf("collection options[%s] is invalid, err: %v", i, err) + } } if opts.MajorityCommitted == nil { @@ -184,13 +230,19 @@ func (opts *Options) CheckSetDefault() error { if opts.MaxAwaitTime == nil { opts.MaxAwaitTime = &defaultMaxAwaitTime } - - if len(opts.Collection) == 0 && opts.CollectionFilter == nil { - return errors.New("invalid Namespace field, database and collection can not be empty") - } return nil } +// WatchCollOptions is the watch options for collections with the same watch filter +type WatchCollOptions struct { + // OperationType describe which kind of operation you want to watch, + // such as an "insert" operation or a "replace" operation. + // If you don't set, it will means watch all kinds of operations. + OperationType *OperType + + CollectionOptions +} + // TimeStamp TODO type TimeStamp struct { // the most significant 32 bits are a time_t value (seconds since the Unix epoch) @@ -282,7 +334,9 @@ type Event struct { Document interface{} DocBytes []byte OperationType OperType - Collection string + CollectionInfo + // TaskID is the task id of the event, which is used to distribute event to event watch task + TaskID string // The timestamp from the oplog entry associated with the event. ClusterTime TimeStamp @@ -294,6 +348,16 @@ type Event struct { ChangeDesc *ChangeDescription } +// CollectionInfo is the collection info of the event +type CollectionInfo struct { + // Collection is the original collection name + Collection string + // ParsedColl is the parsed collection name without tenant id + ParsedColl string + // TenantID is the tenant ID separated from the collection name + TenantID string +} + // ChangeDescription TODO type ChangeDescription struct { // updated details's value is the current value, not the previous value. @@ -311,15 +375,13 @@ func (e *Event) ID() string { return fmt.Sprintf("%s-%d-%d", e.Oid, e.ClusterTime.Sec, e.ClusterTime.Nano) } -// EventToken TODO -// mongodb change stream token, which represent a event's identity. +// EventToken mongodb change stream token, which represent a event's identity. type EventToken struct { // Hex value of document's _id Data string `bson:"_data"` } -// EventStream TODO -// reference: +// EventStream reference: // https://docs.mongodb.com/manual/reference/change-events/ type EventStream struct { Token EventToken `bson:"_id"` @@ -350,6 +412,13 @@ type UpdateDescription struct { RemovedFields []string `json:"removedFields" bson:"removedFields"` } +// RawEvent is the change stream event struct with raw event data +type RawEvent struct { + EventStream `bson:",inline"` + FullDoc bson.Raw `bson:"fullDocument"` + PreFullDoc bson.Raw `bson:"fullDocumentBeforeChange"` +} + // EventInfo is mongodb event info type EventInfo struct { UpdatedFields map[string]interface{} `json:"update_fields,omitempty"` @@ -388,10 +457,19 @@ func GetEventDetail(detailStr *string) *string { return &detail } -// TokenHandler TODO +// TokenHandler is the token handler interface type TokenHandler interface { - SetLastWatchToken(ctx context.Context, token string) error - GetStartWatchToken(ctx context.Context) (token string, err error) + SetLastWatchToken(ctx context.Context, token *TokenInfo) error + GetStartWatchToken(ctx context.Context) (token *TokenInfo, err error) + ResetWatchToken(startAtTime TimeStamp) error +} + +// TokenInfo is the watch token info +type TokenInfo struct { + Token string `bson:"token"` + StartAtTime *TimeStamp `bson:"start_at_time"` + // TenantID is used for tenant watch task token stores in platform table, do not set this field + TenantID string `bson:"tenant_id"` } // LoopOptions TODO @@ -439,11 +517,11 @@ func (lo *LoopOneOptions) Validate() error { if lo.RetryOptions != nil { if lo.RetryOptions.MaxRetryCount <= 0 { - lo.RetryOptions.MaxRetryCount = defaultRetryCount + lo.RetryOptions.MaxRetryCount = DefaultRetryCount } if lo.RetryOptions.RetryDuration == 0 { - lo.RetryOptions.RetryDuration = defaultRetryDuration + lo.RetryOptions.RetryDuration = DefaultRetryDuration } if lo.RetryOptions.RetryDuration < 500*time.Millisecond { @@ -451,8 +529,8 @@ func (lo *LoopOneOptions) Validate() error { } } else { lo.RetryOptions = &RetryOptions{ - MaxRetryCount: defaultRetryCount, - RetryDuration: defaultRetryDuration, + MaxRetryCount: DefaultRetryCount, + RetryDuration: DefaultRetryDuration, } } @@ -474,8 +552,8 @@ type LoopBatchOptions struct { const ( defaultBatchSize = 200 - defaultRetryCount = 10 - defaultRetryDuration = 1 * time.Second + DefaultRetryCount = 10 + DefaultRetryDuration = 1 * time.Second ) // Validate TODO @@ -502,11 +580,11 @@ func (lo *LoopBatchOptions) Validate() error { if lo.RetryOptions != nil { if lo.RetryOptions.MaxRetryCount <= 0 { - lo.RetryOptions.MaxRetryCount = defaultRetryCount + lo.RetryOptions.MaxRetryCount = DefaultRetryCount } if lo.RetryOptions.RetryDuration == 0 { - lo.RetryOptions.RetryDuration = defaultRetryDuration + lo.RetryOptions.RetryDuration = DefaultRetryDuration } if lo.RetryOptions.RetryDuration < 200*time.Millisecond { @@ -514,8 +592,8 @@ func (lo *LoopBatchOptions) Validate() error { } } else { lo.RetryOptions = &RetryOptions{ - MaxRetryCount: defaultRetryCount, - RetryDuration: defaultRetryDuration, + MaxRetryCount: DefaultRetryCount, + RetryDuration: DefaultRetryDuration, } } From c4070d7a336650d5800c6770e043e8b409c60012 Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 13 Mar 2025 17:27:09 +0800 Subject: [PATCH 03/10] feat: add event watch task logics --story=121104198 --- src/storage/stream/task/task.go | 364 ++++++++++++++++++++++++++ src/storage/stream/task/token.go | 193 ++++++++++++++ src/storage/stream/task/util.go | 69 +++++ src/storage/stream/task/watch_task.go | 198 ++++++++++++++ src/storage/stream/types/task.go | 177 +++++++++++++ 5 files changed, 1001 insertions(+) create mode 100644 src/storage/stream/task/task.go create mode 100644 src/storage/stream/task/token.go create mode 100644 src/storage/stream/task/util.go create mode 100644 src/storage/stream/task/watch_task.go create mode 100644 src/storage/stream/types/task.go diff --git a/src/storage/stream/task/task.go b/src/storage/stream/task/task.go new file mode 100644 index 0000000000..ba3c7f3b98 --- /dev/null +++ b/src/storage/stream/task/task.go @@ -0,0 +1,364 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +// Package task defines event watch task logics +package task + +import ( + "context" + "fmt" + "time" + + "configcenter/src/apimachinery/discovery" + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/common/util" + "configcenter/src/storage/dal" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/stream/event" + "configcenter/src/storage/stream/loop" + "configcenter/src/storage/stream/types" +) + +// Task is the event watch task that contains all resource watch tasks +type Task struct { + // eventMap is the db uuid to event instance map + eventMap map[string]*event.Event + // loopWatch is the db uuid to loop watch instance map + loopWatch map[string]*loop.LoopsWatch + // dbClients is the db uuid to db client map + dbClients map[string]local.DB + // watchClients is the db uuid to watch client map + watchClients map[string]*local.Mongo + // watchTasks is the task name to watch task map + watchTasks map[string]*watchTask + + // these options are used to generate loop watch options + majorityCommitted *bool + maxAwaitTime *time.Duration + + // stopNotifier is used when user need to stop loop events and release related resources. + // It's a optional option. when it's not set(as is nil), then the loop will not exit forever. + // Otherwise, user can use it to stop loop events. + // When a user want to stop the loop, the only thing that a user need to do is to just + // **close** this stop notifier channel. + // Attention: + // Close this notifier channel is the only way to stop loop correctly. + // Do not send data to this channel. + stopNotifier <-chan struct{} +} + +// New create a new watch task instance +func New(db, watchDB dal.Dal, isMaster discovery.ServiceManageInterface, opts *types.NewTaskOptions) (*Task, error) { + if err := opts.Validate(); err != nil { + blog.Errorf("validate new task options(%+v) failed, err: %v", opts, err) + return nil, err + } + + t := &Task{ + eventMap: make(map[string]*event.Event), + loopWatch: make(map[string]*loop.LoopsWatch), + dbClients: make(map[string]local.DB), + watchClients: make(map[string]*local.Mongo), + watchTasks: make(map[string]*watchTask), + stopNotifier: opts.StopNotifier, + } + + watchDBRelation, err := genWatchDBRelationMap(watchDB) + if err != nil { + return nil, err + } + + // generate watch db uuid to watch db client map + watchDBClientMap := make(map[string]*local.Mongo) + err = watchDB.ExecForAllDB(func(db local.DB) error { + dbClient, ok := db.(*local.Mongo) + if !ok { + return fmt.Errorf("watch db is not an instance of local mongo") + } + watchDBClientMap[dbClient.GetMongoClient().UUID()] = dbClient + return nil + }) + if err != nil { + blog.Errorf("get all watch db client failed, err: %v", err) + return nil, err + } + + // generate db uuid to db client & watch db client & loop watch instance map + err = db.ExecForAllDB(func(db local.DB) error { + dbClient, ok := db.(*local.Mongo) + if !ok { + return fmt.Errorf("db to be watched is not an instance of local mongo") + } + mongoClient := dbClient.GetMongoClient() + uuid := mongoClient.UUID() + + watchDBUUID, exists := watchDBRelation[uuid] + if !exists { + blog.Warnf("db %s has no watch db", uuid) + return nil + } + + watchClient, exists := watchDBClientMap[watchDBUUID] + if !exists { + return fmt.Errorf("db %s related watch db %s is invalid", uuid, watchDBUUID) + } + t.watchClients[uuid] = watchClient + t.dbClients[uuid] = dbClient + + eventInst, err := event.NewEvent(mongoClient.Client(), mongoClient.DBName(), uuid) + if err != nil { + return fmt.Errorf("new event for db %s failed, err: %v", uuid, err) + } + t.eventMap[uuid] = eventInst + + loopWatch, err := loop.NewLoopWatch(eventInst, isMaster) + if err != nil { + return fmt.Errorf("new loop watch for db %s failed, err: %v", uuid, err) + } + t.loopWatch[uuid] = loopWatch + return nil + }) + if err != nil { + blog.Errorf("generate db uuid related map failed, err: %v", err) + return nil, err + } + + return t, nil +} + +// AddLoopOneTask add a loop watch task that handles one event at one time +func (t *Task) AddLoopOneTask(opts *types.LoopOneTaskOptions) error { + if err := opts.Validate(); err != nil { + blog.Errorf("validate loop batch task options(%s) failed, err: %v", opts.Name, err) + return err + } + + batchOpts := &types.LoopBatchTaskOptions{ + WatchTaskOptions: opts.WatchTaskOptions, + BatchSize: 1, + EventHandler: &types.TaskBatchHandler{ + DoBatch: func(dbInfo *types.DBInfo, es []*types.Event) bool { + for _, e := range es { + var retry bool + switch e.OperationType { + case types.Insert: + retry = opts.EventHandler.DoAdd(dbInfo, e) + case types.Update, types.Replace: + retry = opts.EventHandler.DoUpdate(dbInfo, e) + case types.Delete: + retry = opts.EventHandler.DoDelete(dbInfo, e) + default: + blog.Warnf("received unsupported operation type for %s job, doc: %s", opts.Name, e.DocBytes) + continue + } + if retry { + return retry + } + } + return false + }, + }, + } + + return t.addWatchTask(batchOpts, false) +} + +// AddLoopBatchTask add a loop watch task that handles batch events +func (t *Task) AddLoopBatchTask(opts *types.LoopBatchTaskOptions) error { + if err := opts.Validate(); err != nil { + blog.Errorf("validate loop batch task options(%s) failed, err: %v", opts.Name, err) + return err + } + return t.addWatchTask(opts, false) +} + +// AddListWatchTask add a list watch task +func (t *Task) AddListWatchTask(opts *types.LoopBatchTaskOptions) error { + if err := opts.Validate(); err != nil { + blog.Errorf("validate list watch task options(%s) failed, err: %v", opts.Name, err) + return err + } + return t.addWatchTask(opts, true) +} + +func (t *Task) addWatchTask(opts *types.LoopBatchTaskOptions, needList bool) error { + _, exists := t.watchTasks[opts.Name] + if exists { + return fmt.Errorf("loop watch task %s already exists", opts.Name) + } + + if opts.MajorityCommitted != nil && *opts.MajorityCommitted { + t.majorityCommitted = opts.MajorityCommitted + } + if opts.MaxAwaitTime != nil && (t.maxAwaitTime == nil || *opts.MaxAwaitTime > *t.maxAwaitTime) { + t.maxAwaitTime = opts.MaxAwaitTime + } + + t.watchTasks[opts.Name] = &watchTask{ + name: opts.Name, + collOptions: opts.CollOpts, + eventHandler: opts.EventHandler, + tokenHandler: opts.TokenHandler, + needList: needList, + retryOptions: opts.RetryOptions, + batchSize: opts.BatchSize, + } + + return nil +} + +// Start execute all watch tasks +func (t *Task) Start() error { + if len(t.watchTasks) == 0 { + return nil + } + + // generate task name to collection options map and db uuid to task name to db watch tasks map by watch task info + collOptions := make(map[string]types.WatchCollOptions) + listCollOptions := make(map[string]types.CollectionOptions) + dbWatchTasks := make(map[string]map[string]*dbWatchTask) + var batchSize int + for taskName, task := range t.watchTasks { + collOptions[taskName] = *task.collOptions + if task.needList { + listCollOptions[taskName] = task.collOptions.CollectionOptions + } + if task.batchSize > batchSize { + batchSize = task.batchSize + } + for uuid, dbClient := range t.dbClients { + dbTask, err := newDBWatchTask(task, &types.DBInfo{ + UUID: uuid, + WatchDB: t.watchClients[uuid], + CcDB: dbClient, + }) + if err != nil { + return err + } + if _, exists := dbWatchTasks[uuid]; !exists { + dbWatchTasks[uuid] = make(map[string]*dbWatchTask) + } + dbWatchTasks[uuid][taskName] = dbTask + } + } + + // list data for all list watch tasks + if len(listCollOptions) > 0 { + err := t.startList(listCollOptions, batchSize, dbWatchTasks) + if err != nil { + return err + } + } + + // loop watch all db events for all tasks + err := t.startLoopWatch(collOptions, dbWatchTasks, batchSize) + if err != nil { + return err + } + + // run watch tasks for all dbs + for _, dbTaskMap := range dbWatchTasks { + for _, dbTask := range dbTaskMap { + dbTask.start(t.stopNotifier) + } + } + + return nil +} + +func (t *Task) startList(listCollOptions map[string]types.CollectionOptions, batchSize int, + dbWatchTasks map[string]map[string]*dbWatchTask) error { + + for uuid, eventInst := range t.eventMap { + ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) + opt := &types.ListOptions{ + CollOpts: listCollOptions, + PageSize: &batchSize, + WithRetry: true, + } + listCh, err := eventInst.List(ctx, opt) + if err != nil { + blog.Errorf("list db %s failed, err: %v, options: %+v", uuid, err, *opt) + return err + } + + go func(uuid string) { + for e := range listCh { + task, exists := dbWatchTasks[uuid][e.TaskID] + if !exists { + blog.Warnf("loop watch task %s not exists, event: %+v", e.TaskID, *e) + continue + } + task.listChan <- e + } + }(uuid) + } + return nil +} + +func (t *Task) startLoopWatch(collOptions map[string]types.WatchCollOptions, + dbWatchTasks map[string]map[string]*dbWatchTask, batchSize int) error { + + for uuid, loopWatch := range t.loopWatch { + uuid := uuid + tokenHandler, err := newDBTokenHandler(uuid, t.watchClients[uuid], dbWatchTasks[uuid]) + if err != nil { + return err + } + opts := &types.LoopBatchOptions{ + LoopOptions: types.LoopOptions{ + Name: uuid, + WatchOpt: &types.WatchOptions{ + Options: types.Options{ + MajorityCommitted: t.majorityCommitted, + MaxAwaitTime: t.maxAwaitTime, + CollOpts: collOptions, + }, + }, + TokenHandler: tokenHandler, + RetryOptions: &types.RetryOptions{ + MaxRetryCount: types.DefaultRetryCount, + RetryDuration: types.DefaultRetryDuration, + }, + StopNotifier: t.stopNotifier, + }, + EventHandler: &types.BatchHandler{DoBatch: func(es []*types.Event) (retry bool) { + taskLastTokenMap := make(map[string]string) + for _, e := range es { + task, exists := dbWatchTasks[uuid][e.TaskID] + if !exists { + blog.Warnf("loop watch task %s not exists, event: %+v", e.TaskID, *e) + continue + } + task.eventChan <- e + taskLastTokenMap[e.TaskID] = e.Token.Data + } + tokenHandler.setTaskLastTokenInfo(taskLastTokenMap) + return false + }}, + BatchSize: batchSize, + } + + err = loopWatch.WithBatch(opts) + if err != nil { + blog.Errorf("start loop watch for db failed, err: %v", err) + return err + } + } + return nil +} diff --git a/src/storage/stream/task/token.go b/src/storage/stream/task/token.go new file mode 100644 index 0000000000..8eca422e19 --- /dev/null +++ b/src/storage/stream/task/token.go @@ -0,0 +1,193 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package task + +import ( + "context" + "sync" + + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/driver/mongodb" + "configcenter/src/storage/stream/types" +) + +type dbTokenHandler struct { + uuid string + watchDB local.DB + + lastToken *types.TokenInfo + taskMap map[string]*dbWatchTask + lastTokenInfo map[string]string + mu sync.RWMutex +} + +// newDBTokenHandler new token handler for db watch task +func newDBTokenHandler(uuid string, watchDB local.DB, taskMap map[string]*dbWatchTask) (*dbTokenHandler, error) { + handler := &dbTokenHandler{ + uuid: uuid, + watchDB: watchDB, + taskMap: taskMap, + lastTokenInfo: make(map[string]string), + } + + lastToken, err := handler.GetStartWatchToken(context.Background()) + if err != nil { + return nil, err + } + handler.lastToken = lastToken + + tokenChan := make(chan struct{}) + + for taskID, task := range taskMap { + if task.lastToken != nil { + handler.lastTokenInfo[taskID] = task.lastToken.Token + } + task.tokenChan = tokenChan + } + + go func() { + for _ = range tokenChan { + handler.setLastWatchToken() + } + }() + return handler, nil +} + +func (d *dbTokenHandler) setTaskLastTokenInfo(taskLastTokenMap map[string]string) { + d.mu.Lock() + defer d.mu.Unlock() + for taskID, token := range taskLastTokenMap { + d.lastTokenInfo[taskID] = token + } +} + +func (d *dbTokenHandler) setLastWatchToken() { + // update last token for db to the earliest last token of all db watch tasks + // this token specifies the last event that all db watch tasks has handled + var lastToken *types.TokenInfo + allFinished := false + + for taskID, task := range d.taskMap { + token := task.lastToken + + // if token is nil, skip it + if token == nil { + continue + } + + isFinished := true + d.mu.RLock() + if token.Token < d.lastTokenInfo[taskID] { + isFinished = false + } + d.mu.RUnlock() + + if lastToken == nil { + lastToken = token + allFinished = isFinished + continue + } + + if allFinished { + // if all other tasks are finished but this task is not finished, use the last token of the unfinished task + if !isFinished { + allFinished = false + lastToken = token + continue + } + + // if all tasks are finished, use the last token of the latest finished task + if lastToken.Token < token.Token { + lastToken = token + } + continue + } + + // if not all tasks are finished, skip the finished tasks + if isFinished { + continue + } + if lastToken.Token > token.Token { + // use the last token of the earliest unfinished task + lastToken = token + } + } + + // if no events are handled, do not update the last token + if lastToken == nil || lastToken.Token == "" || lastToken.Token <= d.lastToken.Token { + return + } + + filter := map[string]interface{}{ + "_id": d.uuid, + } + + data := map[string]interface{}{ + common.BKTokenField: lastToken.Token, + common.BKStartAtTimeField: lastToken.StartAtTime, + } + + if err := d.watchDB.Table(common.BKTableNameWatchToken).Update(context.Background(), filter, data); err != nil { + blog.Errorf("set db %s last watch token failed, err: %v, data: %+v", d.uuid, err, data) + return + } + d.lastToken = lastToken +} + +// SetLastWatchToken set last watch token for db watch task +func (d *dbTokenHandler) SetLastWatchToken(ctx context.Context, token *types.TokenInfo) error { + return nil +} + +// GetStartWatchToken get start watch token of db watch task +func (d *dbTokenHandler) GetStartWatchToken(ctx context.Context) (*types.TokenInfo, error) { + filter := map[string]interface{}{ + "_id": d.uuid, + } + + data := new(types.TokenInfo) + err := d.watchDB.Table(common.BKTableNameWatchToken).Find(filter).One(ctx, data) + if err != nil { + if !mongodb.IsNotFoundError(err) { + blog.Errorf("get db %s last watch token failed, err: %v", d.uuid, err) + return nil, err + } + return new(types.TokenInfo), nil + } + return data, nil +} + +// ResetWatchToken set watch token to empty and set the start watch time to the given one for next watch +func (d *dbTokenHandler) ResetWatchToken(startAtTime types.TimeStamp) error { + filter := map[string]interface{}{ + "_id": d.uuid, + } + + data := map[string]interface{}{ + common.BKTokenField: "", + common.BKStartAtTimeField: startAtTime, + } + + if err := d.watchDB.Table(common.BKTableNameWatchToken).Update(context.Background(), filter, data); err != nil { + blog.Errorf("reset db %s watch token failed, err: %v, data: %+v", d.uuid, err, data) + return err + } + return nil +} diff --git a/src/storage/stream/task/util.go b/src/storage/stream/task/util.go new file mode 100644 index 0000000000..b9cd098d6b --- /dev/null +++ b/src/storage/stream/task/util.go @@ -0,0 +1,69 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package task + +import ( + "context" + "fmt" + + "configcenter/src/common" + "configcenter/src/storage/dal" + "configcenter/src/storage/dal/mongo/sharding" + "configcenter/src/storage/stream/types" +) + +// genWatchDBRelationMap generate db uuid to watch db uuid map +func genWatchDBRelationMap(db dal.Dal) (map[string]string, error) { + ctx := context.Background() + masterDB := db.Shard(sharding.NewShardOpts().WithIgnoreTenant()) + + relations := make([]sharding.WatchDBRelation, 0) + if err := masterDB.Table(common.BKTableNameWatchDBRelation).Find(nil).All(ctx, &relations); err != nil { + return nil, fmt.Errorf("get db and watch db relation failed, err: %v", err) + } + + watchDBRelation := make(map[string]string) + for _, relation := range relations { + watchDBRelation[relation.DB] = relation.WatchDB + } + return watchDBRelation, nil +} + +// compareToken compare event with token, returns if event is greater than the token +func compareToken(event *types.Event, token *types.TokenInfo) bool { + if token == nil { + return true + } + + if token.Token != "" { + return event.Token.Data > token.Token + } + + if token.StartAtTime == nil { + return true + } + + if event.ClusterTime.Sec > token.StartAtTime.Sec { + return true + } + + if event.ClusterTime.Sec == token.StartAtTime.Sec && event.ClusterTime.Nano > token.StartAtTime.Nano { + return true + } + return false +} diff --git a/src/storage/stream/task/watch_task.go b/src/storage/stream/task/watch_task.go new file mode 100644 index 0000000000..0caf7379a6 --- /dev/null +++ b/src/storage/stream/task/watch_task.go @@ -0,0 +1,198 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package task + +import ( + "context" + "time" + + "configcenter/src/common/blog" + "configcenter/src/storage/stream/types" +) + +// watchTask is the resource watch task +type watchTask struct { + // name is the watch task name that uniquely identifies the watch task + name string + // collOptions is the watch collection options + collOptions *types.WatchCollOptions + // eventHandler is the batch event handler + eventHandler *types.TaskBatchHandler + // tokenHandler is the token handler + tokenHandler types.TaskTokenHandler + // needList defines whether to list all data before watch + needList bool + + retryOptions *types.RetryOptions + batchSize int +} + +type dbWatchTask struct { + *watchTask + dbInfo *types.DBInfo + eventChan chan *types.Event + listChan chan *types.Event + lastToken *types.TokenInfo + tokenChan chan struct{} +} + +// maxUnhandledEventLimit if the number of unhandled events exceeds this value, block the event watch process +const maxUnhandledEventLimit = 2000 + +func newDBWatchTask(task *watchTask, dbInfo *types.DBInfo) (*dbWatchTask, error) { + lastToken, err := task.tokenHandler.GetStartWatchToken(context.Background(), dbInfo.UUID, dbInfo.WatchDB) + if err != nil { + blog.Errorf("get task %s db %s last watch token failed, err: %v", task.name, dbInfo.UUID, err) + return nil, err + } + + return &dbWatchTask{ + watchTask: task, + dbInfo: dbInfo, + eventChan: make(chan *types.Event, maxUnhandledEventLimit+task.batchSize), + listChan: make(chan *types.Event, task.batchSize), + lastToken: lastToken, + }, nil +} + +// start execute watch task +func (t *dbWatchTask) start(stopNotifier <-chan struct{}) { + go func() { + // list all data before watch if this task is a list watch task + if t.needList { + t.lastToken = &types.TokenInfo{ + StartAtTime: &types.TimeStamp{ + Sec: uint32(time.Now().Unix()), + }, + } + + events := make([]*types.Event, 0) + for event := range t.listChan { + events = append(events, event) + if len(events) == t.batchSize { + t.eventHandler.DoBatch(t.dbInfo, events) + } + if event.OperationType == types.ListDone { + break + } + } + if len(events) > 0 { + t.eventHandler.DoBatch(t.dbInfo, events) + } + } + + ticker := time.NewTicker(50 * time.Millisecond) + for { + // get events to be handled + events := make([]*types.Event, 0) + for { + select { + case one := <-t.eventChan: + // skip previous event with smaller token + if !compareToken(one, t.lastToken) { + blog.V(4).Infof("%s-%s job, skip previous event(%s)", t.name, t.dbInfo.UUID, one.String()) + continue + } + events = append(events, one) + if len(events) < t.batchSize { + continue + } + case <-ticker.C: + if len(events) == 0 { + continue + } + case <-stopNotifier: + ticker.Stop() + return + } + break + } + + // handle events + t.handleEvents(events) + } + }() +} + +func (t *dbWatchTask) handleEvents(events []*types.Event) { + ctx := context.Background() + first, last := events[0], events[len(events)-1] + rid := first.ID() + blog.Infof("%s-%s job, received %d events, first op-time: %s, fist token: %s, rid: %s", t.name, t.dbInfo.UUID, + len(events), first.ClusterTime.String(), first.Token.Data, rid) + + needRetry := false + retryCnt := 0 + for { + // get start watch token after retry to avoid conflict with another watch task + if needRetry { + time.Sleep(t.retryOptions.RetryDuration) + lastToken, err := t.tokenHandler.GetStartWatchToken(ctx, t.dbInfo.UUID, t.dbInfo.WatchDB) + if err != nil { + blog.Errorf("get task %s db %s token failed, err: %v, rid: %s", t.name, t.dbInfo.UUID, err, rid) + time.Sleep(t.retryOptions.RetryDuration) + continue + } + t.lastToken = lastToken + + // if current token is greater than last token, return + if !compareToken(last, lastToken) { + return + } + + // remove events with smaller token that are already handled + index := 0 + for i, event := range events { + if compareToken(event, lastToken) { + break + } + index = i + 1 + } + events = events[index:] + } + + // handle events, if all events are handled, just update last watch token + if len(events) > 0 { + needRetry = t.eventHandler.DoBatch(t.dbInfo, events) + if needRetry { + if retryCnt < t.retryOptions.MaxRetryCount { + retryCnt++ + continue + } + } + } + + // update last watch token, retry if failed + lastToken := &types.TokenInfo{ + Token: last.Token.Data, + StartAtTime: &last.ClusterTime, + } + if err := t.tokenHandler.SetLastWatchToken(ctx, t.dbInfo.UUID, t.dbInfo.WatchDB, lastToken); err != nil { + blog.Errorf("set task %s db %s last watch token(%+v) failed, err: %v, rid: %s", t.name, t.dbInfo.UUID, + *lastToken, err, rid) + needRetry = true + continue + } + t.lastToken = lastToken + select { + case t.tokenChan <- struct{}{}: + default: + } + return + } +} diff --git a/src/storage/stream/types/task.go b/src/storage/stream/types/task.go new file mode 100644 index 0000000000..ea82375dc9 --- /dev/null +++ b/src/storage/stream/types/task.go @@ -0,0 +1,177 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package types + +import ( + "context" + "errors" + "time" + + "configcenter/src/storage/dal/mongo/local" +) + +// NewTaskOptions is the new task options +type NewTaskOptions struct { + StopNotifier <-chan struct{} +} + +// Validate NewTaskOptions +func (o *NewTaskOptions) Validate() error { + if o.StopNotifier == nil { + // if not set, then set never stop loop as default + o.StopNotifier = make(<-chan struct{}) + } + return nil +} + +// TaskTokenHandler is the token handler for db watch task +type TaskTokenHandler interface { + SetLastWatchToken(ctx context.Context, uuid string, watchDB local.DB, token *TokenInfo) error + GetStartWatchToken(ctx context.Context, uuid string, watchDB local.DB) (*TokenInfo, error) +} + +// WatchTaskOptions is the common options for watch task +type WatchTaskOptions struct { + Name string + CollOpts *WatchCollOptions + TokenHandler TaskTokenHandler + RetryOptions *RetryOptions + MajorityCommitted *bool + MaxAwaitTime *time.Duration +} + +// Validate WatchTaskOptions +func (o *WatchTaskOptions) Validate() error { + if len(o.Name) == 0 { + return errors.New("watch task name is not set") + } + + if o.CollOpts == nil { + return errors.New("watch task coll options is not set") + } + + if err := o.CollOpts.Validate(); err != nil { + return err + } + + if o.TokenHandler == nil { + return errors.New("token handler is not set") + } + + if o.TokenHandler.SetLastWatchToken == nil || o.TokenHandler.GetStartWatchToken == nil { + return errors.New("some token handler functions is not set") + } + + if o.RetryOptions != nil { + if o.RetryOptions.MaxRetryCount <= 0 { + o.RetryOptions.MaxRetryCount = DefaultRetryCount + } + + if o.RetryOptions.RetryDuration == 0 { + o.RetryOptions.RetryDuration = DefaultRetryDuration + } + + if o.RetryOptions.RetryDuration < 500*time.Millisecond { + return errors.New("invalid retry duration, can not less than 500ms") + } + } else { + o.RetryOptions = &RetryOptions{ + MaxRetryCount: DefaultRetryCount, + RetryDuration: DefaultRetryDuration, + } + } + + return nil +} + +// LoopOneTaskOptions is the options for loop watch events one by one operation of one task +type LoopOneTaskOptions struct { + *WatchTaskOptions + EventHandler *TaskOneHandler +} + +// Validate LoopOneTaskOptions +func (o *LoopOneTaskOptions) Validate() error { + if o.WatchTaskOptions == nil { + return errors.New("common watch task options is not set") + } + + if err := o.WatchTaskOptions.Validate(); err != nil { + return err + } + + if o.EventHandler == nil { + return errors.New("event handler is not set") + } + + if o.EventHandler.DoAdd == nil || o.EventHandler.DoUpdate == nil || o.EventHandler.DoDelete == nil { + return errors.New("some event handler functions is not set") + } + return nil +} + +// TaskOneHandler is the watch task's event handler that handles events one by one +type TaskOneHandler struct { + DoAdd func(dbInfo *DBInfo, event *Event) (retry bool) + DoUpdate func(dbInfo *DBInfo, event *Event) (retry bool) + DoDelete func(dbInfo *DBInfo, event *Event) (retry bool) +} + +// LoopBatchTaskOptions is the options for loop watch batch events operation of one task +type LoopBatchTaskOptions struct { + *WatchTaskOptions + BatchSize int + EventHandler *TaskBatchHandler +} + +// Validate LoopBatchTaskOptions +func (o *LoopBatchTaskOptions) Validate() error { + if o.WatchTaskOptions == nil { + return errors.New("common watch task options is not set") + } + + if err := o.WatchTaskOptions.Validate(); err != nil { + return err + } + + if o.BatchSize <= 0 { + return errors.New("batch size is invalid") + } + + if o.EventHandler == nil { + return errors.New("event handler is not set") + } + + if o.EventHandler.DoBatch == nil { + return errors.New("event handler DoBatch function is not set") + } + return nil +} + +// TaskBatchHandler is the watch task's batch events handler +type TaskBatchHandler struct { + DoBatch func(dbInfo *DBInfo, es []*Event) bool +} + +// DBInfo is the db info for watch task +type DBInfo struct { + // UUID is the cc db uuid + UUID string + WatchDB *local.Mongo + CcDB local.DB +} From 69822acc278c36086ca664552b68279cb53949c0 Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 13 Mar 2025 17:30:57 +0800 Subject: [PATCH 04/10] feat: change del archive to change stream pre image --story=121037708 --- docs/db/other.md | 17 ---- src/common/index/collections/delarchive.go | 86 ------------------ .../index/collections/deprecatedindexname.go | 5 -- src/common/metadata/cache.go | 10 --- src/common/util/table/table.go | 88 ++++++++----------- .../upgrader/y3.15.202411071530/indexes.go | 28 ------ .../y3.15.202411071530/table_index.go | 1 - src/storage/dal/mongo/local/collection.go | 71 --------------- 8 files changed, 39 insertions(+), 267 deletions(-) delete mode 100644 src/common/index/collections/delarchive.go diff --git a/docs/db/other.md b/docs/db/other.md index e701a524d6..707f3ee890 100644 --- a/docs/db/other.md +++ b/docs/db/other.md @@ -36,23 +36,6 @@ | _id | ObjectId | 数据唯一ID | | host_snap | NumberLong | gse数据入库的stream_to_id | -## cc_DelArchive - -#### 作用 - -用于归档被删除的数据 - -#### 表结构 - -| 字段 | 类型 | 描述 | -|-------------|----------|--------| -| _id | ObjectId | 数据唯一ID | -| oid | String | 事件ID | -| coll | String | 所操作的表 | -| detail | Object | 操作数据详情 | -| create_time | ISODate | 创建时间 | -| last_time | ISODate | 最后更新时间 | - ## cc_idgenerator #### 作用 diff --git a/src/common/index/collections/delarchive.go b/src/common/index/collections/delarchive.go deleted file mode 100644 index 6a6e6ee695..0000000000 --- a/src/common/index/collections/delarchive.go +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package collections - -import ( - "configcenter/src/common" - "configcenter/src/storage/dal/types" - - "go.mongodb.org/mongo-driver/bson" -) - -func init() { - registerIndexes(common.BKTableNameDelArchive, commDelArchiveIndexes) - registerIndexes(common.BKTableNameKubeDelArchive, commKubeDelArchiveIndexes) -} - -var commDelArchiveIndexes = []types.Index{ - { - Name: common.CCLogicUniqueIdxNamePrefix + "oid_coll", - Keys: bson.D{{"oid", 1}, {"coll", 1}}, - Unique: true, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "coll", - Keys: bson.D{{"coll", 1}}, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "oid", - Keys: bson.D{{"oid", 1}}, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "time", - Keys: bson.D{{common.FieldTypeTime, -1}}, - Background: true, - ExpireAfterSeconds: 7 * 24 * 60 * 60, - PartialFilterExpression: make(map[string]interface{}), - }, -} - -var commKubeDelArchiveIndexes = []types.Index{ - { - Name: common.CCLogicUniqueIdxNamePrefix + "coll_oid", - Keys: bson.D{ - {"coll", 1}, - {"oid", 1}, - }, - Unique: true, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "coll", - Keys: bson.D{{"coll", 1}}, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "oid", - Keys: bson.D{{"oid", 1}}, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "time", - Keys: bson.D{{common.FieldTypeTime, -1}}, - Background: true, - ExpireAfterSeconds: 2 * 24 * 60 * 60, - PartialFilterExpression: make(map[string]interface{}), - }, -} diff --git a/src/common/index/collections/deprecatedindexname.go b/src/common/index/collections/deprecatedindexname.go index e5150f7e37..d43f4007c3 100644 --- a/src/common/index/collections/deprecatedindexname.go +++ b/src/common/index/collections/deprecatedindexname.go @@ -111,11 +111,6 @@ var deprecatedIndexName = map[string][]string{ common.BKTableNameTopoGraphics: { "scope_type_1_scope_id_1_node_type_1_bk_obj_id_1_bk_inst_id_1", }, - common.BKTableNameDelArchive: { - "oid_1", - "idx_oid_coll", - "idx_coll", - }, common.BKTableNameServiceInstance: { "idx_bkBizID", "idx_serviceTemplateID", diff --git a/src/common/metadata/cache.go b/src/common/metadata/cache.go index e47a5ed8c0..ca79767e85 100644 --- a/src/common/metadata/cache.go +++ b/src/common/metadata/cache.go @@ -12,8 +12,6 @@ package metadata -import "time" - // SearchHostWithInnerIPOption 通过IP查找host details请求参数 type SearchHostWithInnerIPOption struct { InnerIP string `json:"bk_host_innerip"` @@ -44,14 +42,6 @@ type ListWithIDOption struct { Fields []string `json:"fields"` } -// DeleteArchive TODO -type DeleteArchive struct { - Oid string `json:"oid" bson:"oid"` - Coll string `json:"coll" bson:"coll"` - Time time.Time `json:"time" bson:"time"` - Detail interface{} `json:"detail" bson:"detail"` -} - // ListHostWithPage TODO // list hosts with page in cache, which page info is in redis cache. // store in a zset. diff --git a/src/common/util/table/table.go b/src/common/util/table/table.go index 4b11bc3a6e..b6981aa18c 100644 --- a/src/common/util/table/table.go +++ b/src/common/util/table/table.go @@ -24,64 +24,54 @@ import ( kubetypes "configcenter/src/kube/types" ) -var delArchiveCollMap = map[string]string{ - common.BKTableNameModuleHostConfig: common.BKTableNameDelArchive, - common.BKTableNameBaseHost: common.BKTableNameDelArchive, - common.BKTableNameBaseApp: common.BKTableNameDelArchive, - common.BKTableNameBaseSet: common.BKTableNameDelArchive, - common.BKTableNameBaseModule: common.BKTableNameDelArchive, - common.BKTableNameSetTemplate: common.BKTableNameDelArchive, - common.BKTableNameBaseProcess: common.BKTableNameDelArchive, - common.BKTableNameProcessInstanceRelation: common.BKTableNameDelArchive, - common.BKTableNameBaseBizSet: common.BKTableNameDelArchive, - common.BKTableNameBasePlat: common.BKTableNameDelArchive, - common.BKTableNameBaseProject: common.BKTableNameDelArchive, - fullsynccond.BKTableNameFullSyncCond: common.BKTableNameDelArchive, +var delArchiveCollMap = map[string]struct{}{ + common.BKTableNameModuleHostConfig: {}, + common.BKTableNameBaseHost: {}, + common.BKTableNameBaseApp: {}, + common.BKTableNameBaseSet: {}, + common.BKTableNameBaseModule: {}, + common.BKTableNameSetTemplate: {}, + common.BKTableNameBaseProcess: {}, + common.BKTableNameProcessInstanceRelation: {}, + common.BKTableNameBaseBizSet: {}, + common.BKTableNameBasePlat: {}, + common.BKTableNameBaseProject: {}, + fullsynccond.BKTableNameFullSyncCond: {}, - common.BKTableNameBaseInst: common.BKTableNameDelArchive, - common.BKTableNameMainlineInstance: common.BKTableNameDelArchive, - common.BKTableNameInstAsst: common.BKTableNameDelArchive, + common.BKTableNameBaseInst: {}, + common.BKTableNameMainlineInstance: {}, + common.BKTableNameInstAsst: {}, - common.BKTableNameServiceInstance: common.BKTableNameDelArchive, + common.BKTableNameServiceInstance: {}, - kubetypes.BKTableNameBaseCluster: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseNode: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseNamespace: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseWorkload: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseDeployment: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseStatefulSet: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseDaemonSet: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameGameDeployment: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameGameStatefulSet: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseCronJob: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseJob: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBasePodWorkload: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseCustom: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBasePod: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameBaseContainer: common.BKTableNameKubeDelArchive, - kubetypes.BKTableNameNsSharedClusterRel: common.BKTableNameKubeDelArchive, + kubetypes.BKTableNameBaseCluster: {}, + kubetypes.BKTableNameBaseNode: {}, + kubetypes.BKTableNameBaseNamespace: {}, + kubetypes.BKTableNameBaseWorkload: {}, + kubetypes.BKTableNameBaseDeployment: {}, + kubetypes.BKTableNameBaseStatefulSet: {}, + kubetypes.BKTableNameBaseDaemonSet: {}, + kubetypes.BKTableNameGameDeployment: {}, + kubetypes.BKTableNameGameStatefulSet: {}, + kubetypes.BKTableNameBaseCronJob: {}, + kubetypes.BKTableNameBaseJob: {}, + kubetypes.BKTableNameBasePodWorkload: {}, + kubetypes.BKTableNameBaseCustom: {}, + kubetypes.BKTableNameBasePod: {}, + kubetypes.BKTableNameBaseContainer: {}, + kubetypes.BKTableNameNsSharedClusterRel: {}, } -// GetDelArchiveTable get delete archive table -func GetDelArchiveTable(table string) (string, bool) { - delArchiveTable, exists := delArchiveCollMap[table] +// NeedPreImageTable check if table needs to enable change stream pre-image +func NeedPreImageTable(table string) bool { + _, exists := delArchiveCollMap[table] if exists { - return delArchiveTable, true + return true } if !common.IsObjectShardingTable(table) { - return "", false + return false } - return common.BKTableNameDelArchive, true -} - -// GetDelArchiveFields get delete archive fields by table -func GetDelArchiveFields(table string) []string { - switch table { - case common.BKTableNameServiceInstance: - return []string{common.BKFieldID} - } - - return make([]string, 0) + return true } diff --git a/src/scene_server/admin_server/upgrader/y3.15.202411071530/indexes.go b/src/scene_server/admin_server/upgrader/y3.15.202411071530/indexes.go index e572c1d819..9ac1e9ed87 100644 --- a/src/scene_server/admin_server/upgrader/y3.15.202411071530/indexes.go +++ b/src/scene_server/admin_server/upgrader/y3.15.202411071530/indexes.go @@ -377,34 +377,6 @@ var ( PartialFilterExpression: make(map[string]interface{}), }, } - delArchiveIndexes = []types.Index{ - { - Name: common.CCLogicUniqueIdxNamePrefix + "OID_coll", - Keys: bson.D{{"oid", 1}, {"coll", 1}}, - Unique: true, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "coll", - Keys: bson.D{{"coll", 1}}, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "OID", - Keys: bson.D{{"oid", 1}}, - Background: true, - PartialFilterExpression: make(map[string]interface{}), - }, - { - Name: common.CCLogicIndexNamePrefix + "time", - Keys: bson.D{{"time", -1}}, - Background: true, - ExpireAfterSeconds: 7 * 24 * 60 * 60, - PartialFilterExpression: make(map[string]interface{}), - }, - } deploymentBaseIndexes = []types.Index{ { Name: common.CCLogicUniqueIdxNamePrefix + "ID", diff --git a/src/scene_server/admin_server/upgrader/y3.15.202411071530/table_index.go b/src/scene_server/admin_server/upgrader/y3.15.202411071530/table_index.go index 57147379c6..16155116dc 100644 --- a/src/scene_server/admin_server/upgrader/y3.15.202411071530/table_index.go +++ b/src/scene_server/admin_server/upgrader/y3.15.202411071530/table_index.go @@ -52,7 +52,6 @@ var tableIndexMap = map[string][]daltypes.Index{ common.BKTableNameBasePlat: platBaseIndexes, common.BKTableNameBaseSet: setBaseIndexes, common.BKTableNameBaseProcess: processIndexes, - common.BKTableNameDelArchive: delArchiveIndexes, common.BKTableNameKubeDelArchive: kubeDelArchiveIndexes, common.BKTableNameModuleHostConfig: moduleHostConfigIndexes, common.BKTableNameHostFavorite: nil, diff --git a/src/storage/dal/mongo/local/collection.go b/src/storage/dal/mongo/local/collection.go index 0ce82fad6e..673f72c0f0 100644 --- a/src/storage/dal/mongo/local/collection.go +++ b/src/storage/dal/mongo/local/collection.go @@ -26,14 +26,11 @@ import ( "time" "configcenter/src/common" - "configcenter/src/common/metadata" "configcenter/src/common/util" - "configcenter/src/common/util/table" "configcenter/src/storage/dal/types" dtype "configcenter/src/storage/types" "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "go.mongodb.org/mongo-driver/mongo/readpref" @@ -245,10 +242,6 @@ func (c *Collection) DeleteMany(ctx context.Context, filter types.Filter) (uint6 var deleteCount uint64 err = c.tm.AutoRunWithTxn(ctx, c.cli.Client(), func(ctx context.Context) error { - if err := c.tryArchiveDeletedDoc(ctx, filter); err != nil { - mtc.collectErrorCount(c.collName, deleteOper) - return err - } deleteRet, err := c.cli.Database().Collection(c.collName).DeleteMany(ctx, filter) if err != nil { mtc.collectErrorCount(c.collName, deleteOper) @@ -262,70 +255,6 @@ func (c *Collection) DeleteMany(ctx context.Context, filter types.Filter) (uint6 return deleteCount, err } -func (c *Collection) tryArchiveDeletedDoc(ctx context.Context, filter types.Filter) error { - delArchiveTable, exists := table.GetDelArchiveTable(c.collName) - if !exists { - // do not archive the delete docs - return nil - } - - filter, err := c.addTenantID(filter) - if err != nil { - return err - } - - // only archive the specified fields for delete docs - var findOpts *options.FindOptions - fields := table.GetDelArchiveFields(c.collName) - if len(fields) > 0 { - projection := map[string]int{"_id": 1} - for _, field := range fields { - projection[field] = 1 - } - findOpts = &options.FindOptions{Projection: projection} - } - - docs := make([]bson.D, 0) - cursor, err := c.cli.Database().Collection(c.collName).Find(ctx, filter, findOpts) - if err != nil { - return err - } - - if err := cursor.All(ctx, &docs); err != nil { - return err - } - - if len(docs) == 0 { - return nil - } - - archives := make([]interface{}, len(docs)) - for idx, doc := range docs { - detail := make(bson.D, 0) - var oid string - for _, e := range doc { - if e.Key == "_id" { - rawOid, ok := e.Value.(primitive.ObjectID) - if !ok { - return errors.New("invalid object id") - } - oid = rawOid.Hex() - continue - } - detail = append(detail, e) - } - archives[idx] = metadata.DeleteArchive{ - Oid: oid, - Detail: detail, - Time: time.Now(), - Coll: c.collName, - } - } - - _, err = c.cli.Database().Collection(delArchiveTable).InsertMany(ctx, archives) - return err -} - // BatchCreateIndexes 批量创建索引 func (c *Collection) BatchCreateIndexes(ctx context.Context, indexes []types.Index) error { mtc.collectOperCount(c.collName, indexCreateOper) From 36ecdc2735cd209d4e3f525127d55f219cbc9773 Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 13 Mar 2025 17:36:04 +0800 Subject: [PATCH 05/10] feat: use watch task to do flow watch --story=121164612 --- src/ac/parser/event.go | 13 - src/common/http/rest/kit.go | 20 + src/common/tablenames.go | 17 +- src/common/watch/cursor.go | 33 +- src/common/watch/types.go | 15 +- src/common/watch/watch.go | 2 +- .../cacheservice/app/server.go | 18 +- .../event/bsrelation/bsrelation.go | 10 +- .../event/bsrelation/converter.go | 479 ++++++++---------- .../cacheservice/event/bsrelation/event.go | 87 ++-- .../cacheservice/event/flow/details.go | 138 ----- .../cacheservice/event/flow/event.go | 113 ++--- .../cacheservice/event/flow/flow.go | 379 +++++++------- .../cacheservice/event/flow/handler.go | 100 +--- .../cacheservice/event/flow/inst_asst_flow.go | 48 +- .../cacheservice/event/flow/instance_flow.go | 479 +++++++++--------- .../cacheservice/event/flow/parser.go | 170 +++---- .../cacheservice/event/flow/workload_flow.go | 98 ---- .../event/identifier/converter.go | 259 ++-------- .../cacheservice/event/identifier/event.go | 30 +- .../event/identifier/identifier.go | 35 +- .../cacheservice/event/key.go | 136 +---- .../cacheservice/event/mix-event/flow.go | 233 ++++----- .../cacheservice/event/mix-event/handler.go | 131 ++--- .../cacheservice/event/util.go | 77 +-- .../cacheservice/event/watch/client.go | 271 ++-------- .../cacheservice/event/watch/watch.go | 11 +- .../cacheservice/service/health.go | 10 +- .../cacheservice/service/service.go | 32 +- 29 files changed, 1212 insertions(+), 2232 deletions(-) delete mode 100644 src/source_controller/cacheservice/event/flow/details.go delete mode 100644 src/source_controller/cacheservice/event/flow/workload_flow.go diff --git a/src/ac/parser/event.go b/src/ac/parser/event.go index cc1b8d56bd..49443b2c90 100644 --- a/src/ac/parser/event.go +++ b/src/ac/parser/event.go @@ -90,19 +90,6 @@ func (ps *parseStream) watch() *parseStream { } authResource.InstanceID = model.ID } - case watch.KubeWorkload: - body, err := ps.RequestCtx.getRequestBody() - if err != nil { - ps.err = err - return ps - } - - // use sub resource(corresponding to the kind of the workload) for authorization if it is set - // if sub resource is not set, verify authorization of the resource(which means all sub resources) - subResource := gjson.GetBytes(body, "bk_filter."+common.BKSubResourceField) - if subResource.Exists() { - authResource.InstanceIDEx = subResource.String() - } } ps.Attribute.Resources = append(ps.Attribute.Resources, authResource) diff --git a/src/common/http/rest/kit.go b/src/common/http/rest/kit.go index 8638007f3a..eba18ac8ff 100644 --- a/src/common/http/rest/kit.go +++ b/src/common/http/rest/kit.go @@ -68,6 +68,26 @@ func NewKit() *Kit { return NewKitFromHeader(headerutil.GenDefaultHeader(), errors.GetGlobalCCError()) } +// WithCtx set kit context +func (kit *Kit) WithCtx(ctx context.Context) *Kit { + kit.Ctx = ctx + return kit +} + +// WithTenant set kit tenant +func (kit *Kit) WithTenant(tenantID string) *Kit { + kit.TenantID = tenantID + httpheader.SetTenantID(kit.Header, tenantID) + return kit +} + +// WithRid set kit rid +func (kit *Kit) WithRid(rid string) *Kit { + kit.Rid = rid + httpheader.SetRid(kit.Header, rid) + return kit +} + // ShardOpts returns sharding options func (kit *Kit) ShardOpts() sharding.ShardOpts { return sharding.NewShardOpts().WithTenant(kit.TenantID) diff --git a/src/common/tablenames.go b/src/common/tablenames.go index fd45c683fd..05218b1413 100644 --- a/src/common/tablenames.go +++ b/src/common/tablenames.go @@ -284,9 +284,9 @@ var platformTableMap = map[string]struct{}{ BKTableNameTenantTemplate: {}, BKTableNamePlatformAuditLog: {}, BKTableNameWatchToken: {}, - BKTableNameLastWatchEvent: {}, BKTableNameAPITask: {}, BKTableNameAPITaskSyncHistory: {}, + BKTableNameWatchDBRelation: {}, } // IsPlatformTable returns if the target table is a platform table @@ -320,15 +320,22 @@ func GenTenantTableName(tenantID, tableName string) string { return fmt.Sprintf("%s_%s", tenantID, tableName) } -// SplitTenantTableName split tenant table name to table name and tenant id +// SplitTenantTableName split tenant table name to tenant id and table name func SplitTenantTableName(tenantTableName string) (string, string, error) { if IsPlatformTable(tenantTableName) { - return tenantTableName, "", nil + return "", tenantTableName, nil } - if !strings.Contains(tenantTableName, "_") { - return "", "", errors.New("tenant table name is invalid") + if strings.Contains(tenantTableName, "_"+BKObjectInstShardingTablePrefix) { + sepIdx := strings.LastIndex(tenantTableName, "_"+BKObjectInstShardingTablePrefix) + return tenantTableName[:sepIdx], tenantTableName[sepIdx+1:], nil + } + + if strings.Contains(tenantTableName, "_"+BKObjectInstAsstShardingTablePrefix) { + sepIdx := strings.LastIndex(tenantTableName, "_"+BKObjectInstAsstShardingTablePrefix) + return tenantTableName[:sepIdx], tenantTableName[sepIdx+1:], nil } + sepIdx := strings.LastIndex(tenantTableName, "_") if sepIdx == -1 { return "", "", errors.New("tenant table name is invalid") diff --git a/src/common/watch/cursor.go b/src/common/watch/cursor.go index 1303a46f2f..e83a96904a 100644 --- a/src/common/watch/cursor.go +++ b/src/common/watch/cursor.go @@ -22,10 +22,8 @@ import ( "configcenter/src/common" "configcenter/src/common/blog" - kubetypes "configcenter/src/kube/types" "configcenter/src/storage/stream/types" - "github.com/tidwall/gjson" "go.mongodb.org/mongo-driver/bson/primitive" ) @@ -73,11 +71,6 @@ var ( BizSet: 14, BizSetRelation: 15, Plat: 16, - KubeCluster: 17, - KubeNode: 18, - KubeNamespace: 19, - KubeWorkload: 20, - KubePod: 21, Project: 22, } @@ -124,17 +117,6 @@ const ( Plat CursorType = "plat" // Project project event cursor type Project CursorType = "project" - // kube related cursor types - // KubeCluster cursor type - KubeCluster CursorType = "kube_cluster" - // KubeNode cursor type - KubeNode CursorType = "kube_node" - // KubeNamespace cursor type - KubeNamespace CursorType = "kube_namespace" - // KubeWorkload cursor type, including all workloads(e.g. deployment) with their type specified in sub-resource - KubeWorkload CursorType = "kube_workload" - // KubePod cursor type, its event detail is pod info with containers in it - KubePod CursorType = "kube_pod" ) // ToInt TODO @@ -161,8 +143,7 @@ func (ct *CursorType) ParseInt(typ int) { // ListCursorTypes returns all support CursorTypes. func ListCursorTypes() []CursorType { return []CursorType{Host, ModuleHostRelation, Biz, Set, Module, ObjectBase, Process, ProcessInstanceRelation, - HostIdentifier, MainlineInstance, InstAsst, BizSet, BizSetRelation, Plat, KubeCluster, KubeNode, KubeNamespace, - KubeWorkload, KubePod, Project} + HostIdentifier, MainlineInstance, InstAsst, BizSet, BizSetRelation, Plat, Project} } // Cursor is a self-defined token which is corresponding to the mongodb's resume token. @@ -339,11 +320,6 @@ var collEventCursorTypeMap = map[string]CursorType{ common.BKTableNameInstAsst: InstAsst, common.BKTableNameBaseBizSet: BizSet, common.BKTableNameBasePlat: Plat, - kubetypes.BKTableNameBaseCluster: KubeCluster, - kubetypes.BKTableNameBaseNode: KubeNode, - kubetypes.BKTableNameBaseNamespace: KubeNamespace, - kubetypes.BKTableNameBaseWorkload: KubeWorkload, - kubetypes.BKTableNameBasePod: KubePod, common.BKTableNameBaseProject: Project, } @@ -370,13 +346,6 @@ func GetEventCursor(coll string, e *types.Event, instID int64) (string, error) { // add unique key for common object instance. hCursor.UniqKey = strconv.FormatInt(instID, 10) - case KubeWorkload: - if instID <= 0 { - return "", errors.New("invalid kube workload id") - } - - // add unique key for kube workload, composed by workload type and id. - hCursor.UniqKey = fmt.Sprintf("%s:%d", gjson.GetBytes(e.DocBytes, kubetypes.KindField).String(), instID) } hCursorEncode, err := hCursor.Encode() diff --git a/src/common/watch/types.go b/src/common/watch/types.go index 65e1c33e7f..c4ba2f12e9 100644 --- a/src/common/watch/types.go +++ b/src/common/watch/types.go @@ -75,15 +75,16 @@ type ChainNode struct { InstanceID int64 `json:"inst_id,omitempty" bson:"inst_id,omitempty"` // SubResource the sub resource of the watched resource, eg. the object ID of the instance resource SubResource []string `json:"bk_sub_resource,omitempty" bson:"bk_sub_resource,omitempty"` - // TenantID the supplier account of the chain node's related event resource. - TenantID string `json:"tenant_id" bson:"tenant_id"` } // LastChainNodeData TODO type LastChainNodeData struct { - Coll string `json:"_id" bson:"_id"` - ID uint64 `json:"id" bson:"id"` - Token string `json:"token" bson:"token"` - Cursor string `json:"cursor" bson:"cursor"` - StartAtTime types.TimeStamp `json:"start_at_time,omitempty" bson:"start_at_time,omitempty"` + Coll string `json:"_id" bson:"_id"` + ID uint64 `json:"id" bson:"id"` + Cursor string `json:"cursor" bson:"cursor"` +} + +// GenDBWatchTokenID generate db watch token identifier by db uuid and collection name +func GenDBWatchTokenID(dbID, coll string) string { + return dbID + ":" + coll } diff --git a/src/common/watch/watch.go b/src/common/watch/watch.go index 84c87978de..defbb50ee5 100644 --- a/src/common/watch/watch.go +++ b/src/common/watch/watch.go @@ -71,7 +71,7 @@ func (w *WatchEventOptions) Validate(isInner bool) error { if len(w.Filter.SubResource) > 0 || len(w.Filter.SubResources) > 0 { switch w.Resource { - case ObjectBase, MainlineInstance, InstAsst, KubeWorkload: + case ObjectBase, MainlineInstance, InstAsst: default: return fmt.Errorf("%s event cannot have sub resource", w.Resource) } diff --git a/src/source_controller/cacheservice/app/server.go b/src/source_controller/cacheservice/app/server.go index 573a4453cd..3e0b71081c 100644 --- a/src/source_controller/cacheservice/app/server.go +++ b/src/source_controller/cacheservice/app/server.go @@ -22,6 +22,7 @@ import ( "configcenter/src/common/backbone" cc "configcenter/src/common/backbone/configcenter" "configcenter/src/common/blog" + "configcenter/src/common/errors" "configcenter/src/common/types" "configcenter/src/source_controller/cacheservice/app/options" cachesvr "configcenter/src/source_controller/cacheservice/service" @@ -85,6 +86,7 @@ func Run(ctx context.Context, cancel context.CancelFunc, op *options.ServerOptio } cacheSvr.Core = engine + errors.SetGlobalCCError(engine.CCErr) if err := initResource(cacheSvr); err != nil { return nil @@ -122,9 +124,19 @@ func initResource(cacheSvr *CacheServer) error { return err } - dbErr := mongodb.InitClient("", &cacheSvr.Config.Mongo) - if dbErr != nil { - blog.Errorf("failed to connect the db server, error info is %s", dbErr.Error()) + cryptoConf, err := cc.Crypto("crypto") + if err != nil { + blog.Errorf("get crypto config failed, err: %v", err) + return err + } + + if dbErr := mongodb.SetShardingCli("", &cacheSvr.Config.Mongo, cryptoConf); dbErr != nil { + blog.Errorf("failed to connect the db server, err: %v", dbErr) + return dbErr + } + + if dbErr := mongodb.SetWatchCli("watch", &cacheSvr.Config.WatchMongo, cryptoConf); dbErr != nil { + blog.Errorf("new watch db sharding client failed, err: %v", dbErr) return dbErr } diff --git a/src/source_controller/cacheservice/event/bsrelation/bsrelation.go b/src/source_controller/cacheservice/event/bsrelation/bsrelation.go index ba6c25a783..55f1584b2d 100644 --- a/src/source_controller/cacheservice/event/bsrelation/bsrelation.go +++ b/src/source_controller/cacheservice/event/bsrelation/bsrelation.go @@ -21,9 +21,7 @@ import ( "configcenter/src/common/blog" "configcenter/src/source_controller/cacheservice/event" mixevent "configcenter/src/source_controller/cacheservice/event/mix-event" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/local" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) const ( @@ -32,12 +30,10 @@ const ( ) // NewBizSetRelation init and run biz set relation event watch -func NewBizSetRelation(watch stream.LoopInterface, watchDB *local.Mongo, ccDB dal.DB) error { +func NewBizSetRelation(task *task.Task) error { base := mixevent.MixEventFlowOptions{ MixKey: event.BizSetRelationKey, - Watch: watch, - WatchDB: watchDB, - CcDB: ccDB, + Task: task, EventLockKey: bizSetRelationLockKey, EventLockTTL: bizSetRelationLockTTL, } diff --git a/src/source_controller/cacheservice/event/bsrelation/converter.go b/src/source_controller/cacheservice/event/bsrelation/converter.go index 1e640b5434..8f8f65d539 100644 --- a/src/source_controller/cacheservice/event/bsrelation/converter.go +++ b/src/source_controller/cacheservice/event/bsrelation/converter.go @@ -18,13 +18,17 @@ import ( "sync" "time" + "configcenter/pkg/tenant" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/json" "configcenter/src/common/metadata" "configcenter/src/common/querybuilder" "configcenter/src/common/util" + "configcenter/src/storage/dal/mongo/sharding" dbtypes "configcenter/src/storage/dal/types" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" "github.com/tidwall/gjson" @@ -33,15 +37,15 @@ import ( // needCareBizFields struct to get id->type map of need cared biz fields that will result in biz set relation changes type needCareBizFields struct { - fieldMap map[string]string + fieldMap map[string]map[string]string lock sync.RWMutex } // Get get need cared biz fields -func (a *needCareBizFields) Get() map[string]string { +func (a *needCareBizFields) Get() map[string]map[string]string { a.lock.RLock() defer a.lock.RUnlock() - fieldMap := make(map[string]string, len(a.fieldMap)) + fieldMap := make(map[string]map[string]string, len(a.fieldMap)) for key, value := range a.fieldMap { fieldMap[key] = value } @@ -49,10 +53,10 @@ func (a *needCareBizFields) Get() map[string]string { } // Set set need cared biz fields -func (a *needCareBizFields) Set(fieldMap map[string]string) { +func (a *needCareBizFields) Set(tenantID string, fieldMap map[string]string) { a.lock.Lock() defer a.lock.Unlock() - a.fieldMap = fieldMap + a.fieldMap[tenantID] = fieldMap } // syncNeedCareBizFields refresh need cared biz fields every minutes @@ -60,18 +64,25 @@ func (b *bizSetRelation) syncNeedCareBizFields() { for { time.Sleep(time.Minute) - fields, err := b.getNeedCareBizFields(context.Background()) + err := tenant.ExecForAllTenants(func(tenantID string) error { + fields, err := b.getNeedCareBizFields(context.Background(), tenantID) + if err != nil { + blog.Errorf("run biz set relation watch, but get need care biz fields failed, err: %v", err) + return err + } + b.needCareBizFields.Set(tenantID, fields) + blog.V(5).Infof("run biz set relation watch, sync tenant %s need care biz fields done, fields: %+v", + tenantID, fields) + return nil + }) if err != nil { - blog.Errorf("run biz set relation watch, but get need care biz fields failed, err: %v", err) continue } - b.needCareBizFields.Set(fields) - blog.V(5).Infof("run biz set relation watch, sync need care biz fields done, fields: %+v", fields) } } // getNeedCareBizFields get need cared biz fields, including biz id and enum/organization type fields -func (b *bizSetRelation) getNeedCareBizFields(ctx context.Context) (map[string]string, error) { +func (b *bizSetRelation) getNeedCareBizFields(ctx context.Context, tenantID string) (map[string]string, error) { filter := map[string]interface{}{ common.BKObjIDField: common.BKInnerObjIDApp, common.BKPropertyTypeField: map[string]interface{}{ @@ -80,8 +91,8 @@ func (b *bizSetRelation) getNeedCareBizFields(ctx context.Context) (map[string]s } attributes := make([]metadata.Attribute, 0) - err := b.ccDB.Table(common.BKTableNameObjAttDes).Find(filter).Fields(common.BKPropertyIDField, - common.BKPropertyTypeField).All(ctx, &attributes) + err := mongodb.Shard(sharding.NewShardOpts().WithTenant(tenantID)).Table(common.BKTableNameObjAttDes).Find(filter). + Fields(common.BKPropertyIDField, common.BKPropertyTypeField).All(ctx, &attributes) if err != nil { blog.Errorf("get need care biz attributes failed, filter: %+v, err: %v", filter, err) return nil, err @@ -94,42 +105,30 @@ func (b *bizSetRelation) getNeedCareBizFields(ctx context.Context) (map[string]s return fieldMap, nil } -// rearrangeBizSetEvents TODO -// biz set events rearrange policy: -// 1. If update event's updated fields do not contain "bk_scope" field, we will drop this event. -// 2. Aggregate multiple same biz set's events to one event, so that we can decrease the amount of biz set relation -// events. Because we only care about which biz set's relation is changed, one event is enough for us. +// rearrangeBizSetEvents biz set events rearrange policy: +// 1. If update event's updated fields do not contain "bk_scope" field, we will drop this event. +// 2. Aggregate multiple same biz set's events to one event, so that we can decrease the amount of biz set relation +// events. Because we only care about which biz set's relation is changed, one event is enough for us. func (b *bizSetRelation) rearrangeBizSetEvents(es []*types.Event, rid string) ([]*types.Event, error) { // get last biz set event type, in order to rearrange biz set events later, policy: // 2. create + update: event is changed to create event with the update event detail // 3. update + delete: event is changed to delete event lastEventMap := make(map[string]*types.Event) - deletedOids := make([]string, 0) for i := len(es) - 1; i >= 0; i-- { e := es[i] - if _, exists := lastEventMap[e.Oid]; !exists { - lastEventMap[e.Oid] = e - - if e.OperationType == types.Delete { - deletedOids = append(deletedOids, e.Oid) - } + if _, exists := lastEventMap[genUniqueKey(e)]; !exists { + lastEventMap[genUniqueKey(e)] = e } } - // get deleted biz set detail from del archive before adding to the events. - oidDetailMap, err := b.getDeleteEventDetails(deletedOids, rid) - if err != nil { - return nil, err - } - hitEvents := make([]*types.Event, 0) // remind if a biz set events has already been hit, if yes, then skip this event. reminder := make(map[string]struct{}) for _, one := range es { - if _, yes := reminder[one.Oid]; yes { + if _, yes := reminder[genUniqueKey(one)]; yes { // this biz set event is already hit, then we aggregate the event with the former ones to only one event. // this is useful to decrease biz set relation events. blog.Infof("biz set event: %s is aggregated, rid: %s", one.ID(), rid) @@ -137,25 +136,30 @@ func (b *bizSetRelation) rearrangeBizSetEvents(es []*types.Event, rid string) ([ } // get the hit event(nil means no event is hit) and if other events of the biz set needs to be ignored - hitEvent, needIgnored := b.parseBizSetEvent(one, oidDetailMap, lastEventMap, rid) + hitEvent, needIgnored := b.parseBizSetEvent(one, lastEventMap, rid) if hitEvent != nil { hitEvents = append(hitEvents, hitEvent) } if needIgnored { - reminder[one.Oid] = struct{}{} + reminder[genUniqueKey(one)] = struct{}{} } } // refresh all biz ids cache if the events contains match all biz set, the cache is used to generate detail later + matchAllTenantIDs := make([]string, 0) for _, e := range hitEvents { if e.OperationType != types.Delete && gjson.Get(string(e.DocBytes), "bk_scope.match_all").Bool() { - err := b.refreshAllBizIDStr(rid) - if err != nil { + matchAllTenantIDs = append(matchAllTenantIDs, e.TenantID) + break + } + + matchAllTenantIDs = util.StrArrayUnique(matchAllTenantIDs) + for _, tenantID := range matchAllTenantIDs { + if err := b.refreshAllBizIDStr(tenantID, rid); err != nil { return nil, err } - break } } @@ -163,13 +167,13 @@ func (b *bizSetRelation) rearrangeBizSetEvents(es []*types.Event, rid string) ([ } // parseBizSetEvent parse biz set event, returns the hit event and if same oid events needs to be skipped -func (b *bizSetRelation) parseBizSetEvent(one *types.Event, oidDetailMap map[string][]byte, - lastEventMap map[string]*types.Event, rid string) (*types.Event, bool) { +func (b *bizSetRelation) parseBizSetEvent(one *types.Event, lastEventMap map[string]*types.Event, rid string) ( + *types.Event, bool) { switch one.OperationType { case types.Insert: // if events are in the order of create + update + delete, all events of this biz set will be ignored - lastEvent := lastEventMap[one.Oid] + lastEvent := lastEventMap[genUniqueKey(one)] if lastEvent.OperationType == types.Delete { return nil, true } @@ -183,17 +187,11 @@ func (b *bizSetRelation) parseBizSetEvent(one *types.Event, oidDetailMap map[str // insert event is added directly. return one, true case types.Delete: - // reset delete event detail to the value before deletion from del archive, then add to hit events. - doc, exist := oidDetailMap[one.Oid] - if !exist { - blog.Errorf("%s event delete detail[oid: %s] not exists, rid: %s", b.key.Collection(), one.Oid, rid) - return nil, false - } - one.DocBytes = doc + // delete event is added directly. return one, true case types.Update, types.Replace: // if events are in the order of update + delete, it is changed to delete event, update event is ignored - if lastEventMap[one.Oid].OperationType == types.Delete { + if lastEventMap[genUniqueKey(one)].OperationType == types.Delete { return nil, false } @@ -227,56 +225,19 @@ func (b *bizSetRelation) parseBizSetEvent(one *types.Event, oidDetailMap map[str return nil, false } -// delArchive delete event archived detail struct, with oid and detail -type delArchive struct { - oid string `bson:"oid"` - Detail map[string]interface{} `bson:"detail"` -} - -// rearrangeBizEvents TODO -// biz events rearrange policy: -// 1. Biz event is redirected to its related biz sets' events by traversing all biz sets and checking if the "bk_scope" -// field matches the biz's attribute. -// 2. Create and delete event's related biz set is judged by whether its scope contains the biz. -// 3. Update event's related biz set is judged by whether its scope contains the updated fields of the event. Since -// we can't get the previous value of the updated fields, we can't get the exact biz sets it was in before. -// 4. Aggregate multiple biz events with the same biz set to one event. +// rearrangeBizEvents biz events rearrange policy: +// 1. Biz event is redirected to its related biz sets' events by traversing all biz sets and checking if the "bk_scope" +// field matches the biz's attribute. +// 2. Create and delete event's related biz set is judged by whether its scope contains the biz. +// 3. Update event's related biz set is judged by whether its scope contains the updated fields of the event. Since +// we can't get the previous value of the updated fields, we can't get the exact biz sets it was in before. +// 4. Aggregate multiple biz events with the same biz set to one event. func (b *bizSetRelation) rearrangeBizEvents(es []*types.Event, rid string) ([]*types.Event, error) { - - // get delete event oids from delete events, and get deleted biz detail by oids to find matching biz sets. - deletedOids := make([]string, 0) - for _, one := range es { - if one.OperationType == types.Delete { - deletedOids = append(deletedOids, one.Oid) - } - } - - deletedDetailMap := make(map[string]map[string]interface{}) - if len(deletedOids) > 0 { - filter := map[string]interface{}{ - "oid": map[string]interface{}{common.BKDBIN: deletedOids}, - "coll": b.key.Collection(), - } - - docs := make([]delArchive, 0) - err := b.ccDB.Table(common.BKTableNameDelArchive).Find(filter).Fields("detail").All(context.Background(), &docs) - if err != nil { - b.metrics.CollectMongoError() - blog.Errorf("get archive deleted doc for collection %s from mongodb failed, oids: %+v, err: %v, rid: %s", - b.key.Collection(), deletedOids, err, rid) - return nil, err - } - - for _, doc := range docs { - deletedDetailMap[doc.oid] = doc.Detail - } - } - // parse biz events to convert biz event parameter - params := b.parseBizEvents(es, deletedDetailMap, rid) + params := b.parseBizEvents(es, rid) // convert biz event to related biz set event whose relation is affected by biz event - bizSetEvents, err := b.convertBizEvent(params) + bizSetEvents, err := b.convertBizEvent(params, rid) if err != nil { return nil, err } @@ -285,76 +246,53 @@ func (b *bizSetRelation) rearrangeBizEvents(es []*types.Event, rid string) ([]*t } // parseBizEvents parse biz events to insert/delete event details and updated fields, used to generate biz set events -func (b *bizSetRelation) parseBizEvents(es []*types.Event, deletedDetailMap map[string]map[string]interface{}, - rid string) convertBizEventParams { +func (b *bizSetRelation) parseBizEvents(es []*types.Event, rid string) convertBizEventParams { // generate convert biz event to biz set event parameter params := convertBizEventParams{ es: es, - insertedAndDeletedBiz: make([]map[string]interface{}, 0), - insertedAndDeletedBizIndexMap: make(map[int]int, 0), - updatedFieldsIndexMap: make(map[string]int, 0), + insertedAndDeletedBiz: make(map[string][]map[string]interface{}), + insertedAndDeletedBizIndexMap: make(map[string]map[int]int), + updatedFieldsIndexMap: make(map[string]map[string]int), needCareFieldsMap: b.needCareBizFields.Get(), - rid: rid, } // parse biz event into inserted/deleted details, and updated fields to match biz sets whose relation changed. for index, one := range es { + tenantID := one.TenantID switch one.OperationType { - case types.Insert: + case types.Insert, types.Delete: // use document in insert event to find matching biz sets. - params.insertedAndDeletedBiz = append(params.insertedAndDeletedBiz, *one.Document.(*map[string]interface{})) - params.insertedAndDeletedBizIndexMap[len(params.insertedAndDeletedBiz)-1] = index - - case types.Delete: - // use document in delete event detail map to find matching biz sets. - detail, exists := deletedDetailMap[one.Oid] - if !exists { - blog.Errorf("%s event delete detail[oid: %s] not exists, rid: %s", b.key.Collection(), one.Oid, rid) - continue - } - params.insertedAndDeletedBiz = append(params.insertedAndDeletedBiz, detail) - params.insertedAndDeletedBizIndexMap[len(params.insertedAndDeletedBiz)-1] = index + params.convertInsertedAndDeletedBizEvent(tenantID, one.Document, index) case types.Update, types.Replace: // replace event's change description is empty, treat as updating all fields. if len(one.ChangeDesc.UpdatedFields) == 0 && len(one.ChangeDesc.RemovedFields) == 0 { - for field := range params.needCareFieldsMap { - if _, exists := params.updatedFieldsIndexMap[field]; !exists { - params.updatedFieldsIndexMap[field] = index - } + for field := range params.needCareFieldsMap[tenantID] { + params.convertUpdateBizEvent(tenantID, field, index) } continue } // get the updated/removed fields that need to be cared, ignores the rest. isIgnored := true + updateFields := make([]string, 0) if len(one.ChangeDesc.UpdatedFields) > 0 { // for biz set relation,archive biz is treated as delete event while recover is treated as insert event if one.ChangeDesc.UpdatedFields[common.BKDataStatusField] == string(common.DataStatusDisabled) || one.ChangeDesc.UpdatedFields[common.BKDataStatusField] == string(common.DataStatusEnable) { - - detail := *one.Document.(*map[string]interface{}) - params.insertedAndDeletedBiz = append(params.insertedAndDeletedBiz, detail) - params.insertedAndDeletedBizIndexMap[len(params.insertedAndDeletedBiz)-1] = index + params.convertInsertedAndDeletedBizEvent(tenantID, one.Document, index) continue } for field := range one.ChangeDesc.UpdatedFields { - if _, exists := params.needCareFieldsMap[field]; exists { - if _, exists := params.updatedFieldsIndexMap[field]; !exists { - params.updatedFieldsIndexMap[field] = index - } - isIgnored = false - } + updateFields = append(updateFields, field) } } - for _, field := range one.ChangeDesc.RemovedFields { - if _, exists := params.needCareFieldsMap[field]; exists { - if _, exists := params.updatedFieldsIndexMap[field]; !exists { - params.updatedFieldsIndexMap[field] = index - } + for _, field := range append(updateFields, one.ChangeDesc.RemovedFields...) { + if _, exists := params.needCareFieldsMap[tenantID][field]; exists { + params.convertUpdateBizEvent(tenantID, field, index) isIgnored = false } } @@ -377,20 +315,39 @@ type convertBizEventParams struct { // es biz events es []*types.Event // insertedAndDeletedBiz inserted and deleted biz event details, handled in the same way - insertedAndDeletedBiz []map[string]interface{} + insertedAndDeletedBiz map[string][]map[string]interface{} // insertedAndDeletedBizIndexMap mapping of insertedAndDeletedBiz index to es index, used to locate origin event - insertedAndDeletedBizIndexMap map[int]int + insertedAndDeletedBizIndexMap map[string]map[int]int // updatedFieldsIndexMap mapping of updated fields to the first update event index, used to locate origin event - updatedFieldsIndexMap map[string]int + updatedFieldsIndexMap map[string]map[string]int // needCareFieldsMap need care biz fields that can be used in biz set scope - needCareFieldsMap map[string]string - // rid request id - rid string + needCareFieldsMap map[string]map[string]string } -// convertBizEvent convert biz event to related biz set event whose relation is affected by biz event -func (b *bizSetRelation) convertBizEvent(params convertBizEventParams) ([]*types.Event, error) { +func (params *convertBizEventParams) convertInsertedAndDeletedBizEvent(tenantID string, doc interface{}, index int) { + detail, ok := doc.(*map[string]interface{}) + if !ok || detail == nil { + return + } + params.insertedAndDeletedBiz[tenantID] = append(params.insertedAndDeletedBiz[tenantID], *detail) + if _, exists := params.insertedAndDeletedBizIndexMap[tenantID]; !exists { + params.insertedAndDeletedBizIndexMap[tenantID] = make(map[int]int) + } + params.insertedAndDeletedBizIndexMap[tenantID][len(params.insertedAndDeletedBiz)-1] = index +} + +func (params *convertBizEventParams) convertUpdateBizEvent(tenantID string, field string, index int) { + if _, exists := params.updatedFieldsIndexMap[tenantID]; !exists { + params.updatedFieldsIndexMap[tenantID] = make(map[string]int) + } + if _, exists := params.updatedFieldsIndexMap[tenantID][field]; !exists { + params.updatedFieldsIndexMap[tenantID][field] = index + } +} + +// convertBizEvent convert biz event to related biz set event whose relation is affected by biz event +func (b *bizSetRelation) convertBizEvent(params convertBizEventParams, rid string) ([]*types.Event, error) { bizSetEvents := make([]*types.Event, 0) if len(params.insertedAndDeletedBiz) == 0 && len(params.updatedFieldsIndexMap) == 0 { @@ -398,17 +355,24 @@ func (b *bizSetRelation) convertBizEvent(params convertBizEventParams) ([]*types } // get all biz sets to check if their scope matches the biz events, because filtering them in db is too complicated - bizSets, err := b.getAllBizSets(context.Background(), params.rid) + tenantIDs := make([]string, 0) + for tenantID := range params.insertedAndDeletedBiz { + tenantIDs = append(tenantIDs, tenantID) + } + for tenantID := range params.updatedFieldsIndexMap { + tenantIDs = append(tenantIDs, tenantID) + } + bizSetsMap, err := b.getAllBizSets(tenantIDs, rid) if err != nil { return nil, err } // get biz events' related biz sets - relatedBizSets, containsMatchAllBizSet := b.getRelatedBizSets(params, bizSets) + relatedBizSets, containsMatchAllBizSetTenants := b.getRelatedBizSets(params, bizSetsMap, rid) // refresh all biz ids cache if the events affected match all biz set, the cache is used to generate detail later - if containsMatchAllBizSet { - if err := b.refreshAllBizIDStr(params.rid); err != nil { + for _, tenantID := range containsMatchAllBizSetTenants { + if err := b.refreshAllBizIDStr(tenantID, rid); err != nil { return nil, err } } @@ -420,7 +384,7 @@ func (b *bizSetRelation) convertBizEvent(params convertBizEventParams) ([]*types for _, bizSet := range bizSetArr { doc, err := json.Marshal(bizSet.BizSetInst) if err != nil { - blog.Errorf("marshal biz set(%+v) failed, err: %v, rid: %s", bizSet.BizSetInst, err, params.rid) + blog.Errorf("marshal biz set(%+v) failed, err: %v, rid: %s", bizSet.BizSetInst, err, rid) return nil, err } @@ -429,9 +393,13 @@ func (b *bizSetRelation) convertBizEvent(params convertBizEventParams) ([]*types Document: bizSet.BizSetInst, DocBytes: doc, OperationType: types.Update, - Collection: common.BKTableNameBaseBizSet, - ClusterTime: bizEvent.ClusterTime, - Token: bizEvent.Token, + CollectionInfo: types.CollectionInfo{ + Collection: common.GenTenantTableName(bizEvent.TenantID, common.BKTableNameBaseBizSet), + TenantID: bizEvent.TenantID, + ParsedColl: common.BKTableNameBaseBizSet, + }, + ClusterTime: bizEvent.ClusterTime, + Token: bizEvent.Token, }) } } @@ -440,87 +408,99 @@ func (b *bizSetRelation) convertBizEvent(params convertBizEventParams) ([]*types } // getRelatedBizSets get biz events index to related biz sets map, which is used to generate biz set events -func (b *bizSetRelation) getRelatedBizSets(params convertBizEventParams, bizSets []bizSetWithOid) ( - map[int][]bizSetWithOid, bool) { +func (b *bizSetRelation) getRelatedBizSets(params convertBizEventParams, bizSetsMap map[string][]bizSetWithOid, + rid string) (map[int][]bizSetWithOid, []string) { - containsMatchAllBizSet := false + containsMatchAllBizSetTenants := make([]string, 0) relatedBizSets := make(map[int][]bizSetWithOid, 0) - for _, bizSet := range bizSets { - // for biz set that matches all biz, only insert and delete event will affect their relations - if bizSet.Scope.MatchAll { - if len(params.insertedAndDeletedBiz) > 0 { - eventIndex := params.insertedAndDeletedBizIndexMap[0] - relatedBizSets[eventIndex] = append(relatedBizSets[eventIndex], bizSet) - containsMatchAllBizSet = true + for tenantID, bizSets := range bizSetsMap { + for _, bizSet := range bizSets { + // for biz set that matches all biz, only insert and delete event will affect their relations + if bizSet.Scope.MatchAll { + if len(params.insertedAndDeletedBiz[tenantID]) > 0 { + eventIndex := params.insertedAndDeletedBizIndexMap[tenantID][0] + relatedBizSets[eventIndex] = append(relatedBizSets[eventIndex], bizSet) + containsMatchAllBizSetTenants = append(containsMatchAllBizSetTenants, tenantID) + } + continue } - continue - } - if bizSet.Scope.Filter == nil { - blog.Errorf("biz set(%+v) scope filter is empty, skip, rid: %s", bizSet, params.rid) - continue + if bizSet.Scope.Filter == nil { + blog.Errorf("biz set(%+v) scope filter is empty, skip, rid: %s", bizSet, rid) + continue + } + + firstEventIndex, matched := b.getFirstMatchedEvent(params, bizSet, tenantID, rid) + if matched { + relatedBizSets[firstEventIndex] = append(relatedBizSets[firstEventIndex], bizSet) + } } + } - var firstEventIndex int + return relatedBizSets, containsMatchAllBizSetTenants +} - // update biz event matches all biz sets whose scope contains the updated fields, get all matching fields. - matched := bizSet.Scope.Filter.MatchAny(func(r querybuilder.AtomRule) bool { - if index, exists := params.updatedFieldsIndexMap[r.Field]; exists { - if firstEventIndex == 0 || index < firstEventIndex { - firstEventIndex = index - } - return true - } - return false - }) +func (b *bizSetRelation) getFirstMatchedEvent(params convertBizEventParams, bizSet bizSetWithOid, tenantID string, + rid string) (int, bool) { - // check if biz set scope filter matches the inserted/removed biz - for index, biz := range params.insertedAndDeletedBiz { - // if the event index already exceeds the event index of matched update fields, stop checking - eventIndex := params.insertedAndDeletedBizIndexMap[index] - if firstEventIndex != 0 && eventIndex >= firstEventIndex { - break + var firstEventIndex int + + // update biz event matches all biz sets whose scope contains the updated fields, get all matching fields. + matched := bizSet.Scope.Filter.MatchAny(func(r querybuilder.AtomRule) bool { + updatedFieldsIndexMap, exists := params.updatedFieldsIndexMap[tenantID] + if !exists { + return false + } + if index, exists := updatedFieldsIndexMap[r.Field]; exists { + if firstEventIndex == 0 || index < firstEventIndex { + firstEventIndex = index } + return true + } + return false + }) - bizMatched := bizSet.Scope.Filter.Match(func(r querybuilder.AtomRule) bool { - // ignores the biz set filter rule that do not contain need care fields - propertyType, exists := params.needCareFieldsMap[r.Field] - if !exists { - blog.Errorf("biz set(%+v) filter rule contains ignored field, rid: %s", bizSet, params.rid) - return false - } + // check if biz set scope filter matches the inserted/removed biz + for index, biz := range params.insertedAndDeletedBiz[tenantID] { + // if the event index already exceeds the event index of matched update fields, stop checking + eventIndex := params.insertedAndDeletedBizIndexMap[tenantID][index] + if firstEventIndex != 0 && eventIndex >= firstEventIndex { + break + } - // ignores the biz that do not contain the field in filter rule - bizVal, exists := biz[r.Field] - if !exists { - blog.Infof("biz(%+v) do not contain rule field %s, rid: %s", biz, r.Field, params.rid) - return false - } + bizMatched := bizSet.Scope.Filter.Match(func(r querybuilder.AtomRule) bool { + // ignores the biz set filter rule that do not contain need care fields + propertyType, exists := params.needCareFieldsMap[tenantID][r.Field] + if !exists { + blog.Errorf("biz set(%+v) filter rule contains ignored field, rid: %s", bizSet, rid) + return false + } - switch r.Operator { - case querybuilder.OperatorEqual: - return matchEqualOper(r.Value, bizVal, propertyType, params.rid) - case querybuilder.OperatorIn: - return matchInOper(r.Value, bizVal, propertyType, params.rid) - default: - blog.Errorf("biz set(%+v) filter rule contains invalid operator, rid: %s", bizSet, params.rid) - return false - } - }) + // ignores the biz that do not contain the field in filter rule + bizVal, exists := biz[r.Field] + if !exists { + blog.Infof("biz(%+v) do not contain rule field %s, rid: %s", biz, r.Field, rid) + return false + } - if bizMatched { - firstEventIndex = eventIndex - matched = bizMatched - break + switch r.Operator { + case querybuilder.OperatorEqual: + return matchEqualOper(r.Value, bizVal, propertyType, rid) + case querybuilder.OperatorIn: + return matchInOper(r.Value, bizVal, propertyType, rid) + default: + blog.Errorf("biz set(%+v) filter rule contains invalid operator, rid: %s", bizSet, rid) + return false } - } + }) - if matched { - relatedBizSets[firstEventIndex] = append(relatedBizSets[firstEventIndex], bizSet) + if bizMatched { + firstEventIndex = eventIndex + matched = bizMatched + break } } - - return relatedBizSets, containsMatchAllBizSet + return firstEventIndex, matched } // matchEqualOper check if biz set scope filter rule with equal operator matches biz value @@ -594,69 +574,42 @@ type bizSetWithOid struct { metadata.BizSetInst `bson:",inline"` } -func (b *bizSetRelation) getAllBizSets(ctx context.Context, rid string) ([]bizSetWithOid, error) { +func (b *bizSetRelation) getAllBizSets(tenantIDs []string, rid string) (map[string][]bizSetWithOid, error) { const step = 500 - bizSets := make([]bizSetWithOid, 0) + bizSetsMap := make(map[string][]bizSetWithOid, 0) - cond := map[string]interface{}{} findOpts := dbtypes.NewFindOpts().SetWithObjectID(true) - for { - oneStep := make([]bizSetWithOid, 0) - err := b.ccDB.Table(common.BKTableNameBaseBizSet).Find(cond, findOpts).Fields(common.BKBizSetIDField, - common.BKBizSetScopeField).Limit(step).Sort(common.BKBizSetIDField).All(ctx, &oneStep) - if err != nil { - blog.Errorf("get biz set failed, err: %v, rid: %s", err, rid) - return nil, err - } + for _, tenantID := range tenantIDs { + kit := rest.NewKit().WithTenant(tenantID).WithRid(rid) + cond := map[string]interface{}{} - bizSets = append(bizSets, oneStep...) + for { + oneStep := make([]bizSetWithOid, 0) + err := mongodb.Shard(kit.ShardOpts()).Table(common.BKTableNameBaseBizSet).Find(cond, findOpts). + Fields(common.BKBizSetIDField, common.BKBizSetScopeField).Limit(step). + Sort(common.BKBizSetIDField).All(kit.Ctx, &oneStep) + if err != nil { + blog.Errorf("get biz set failed, err: %v, rid: %s", err, rid) + return nil, err + } - if len(oneStep) < step { - break - } + bizSetsMap[tenantID] = append(bizSetsMap[tenantID], oneStep...) - cond = map[string]interface{}{ - common.BKBizSetIDField: map[string]interface{}{common.BKDBGT: oneStep[len(oneStep)-1].BizSetID}, + if len(oneStep) < step { + break + } + + cond = map[string]interface{}{ + common.BKBizSetIDField: map[string]interface{}{common.BKDBGT: oneStep[len(oneStep)-1].BizSetID}, + } } } - return bizSets, nil + return bizSetsMap, nil } -// getDeleteEventDetails get delete events' oid to related detail map from db -func (b *bizSetRelation) getDeleteEventDetails(oids []string, rid string) (map[string][]byte, error) { - oidDetailMap := make(map[string][]byte) - - if len(oids) == 0 { - return oidDetailMap, nil - } - - filter := map[string]interface{}{ - "oid": map[string]interface{}{common.BKDBIN: oids}, - "coll": b.key.Collection(), - } - - docs := make([]map[string]interface{}, 0) - err := b.ccDB.Table(common.BKTableNameDelArchive).Find(filter).All(context.Background(), &docs) - if err != nil { - b.metrics.CollectMongoError() - blog.Errorf("get archive deleted doc for collection %s from mongodb failed, oids: %+v, err: %v, rid: %s", - b.key.Collection(), oids, err, rid) - return nil, err - } - - for _, doc := range docs { - oid := util.GetStrByInterface(doc["oid"]) - byt, err := json.Marshal(doc["detail"]) - if err != nil { - blog.Errorf("get archive deleted doc for collection %s, but marshal detail to bytes failed, oid: %s, "+ - "err: %v, rid: %s", b.key.Collection(), oid, err, rid) - return nil, err - } - oidDetailMap[oid] = byt - } - - return oidDetailMap, nil +func genUniqueKey(e *types.Event) string { + return e.Collection + "-" + e.Oid } diff --git a/src/source_controller/cacheservice/event/bsrelation/event.go b/src/source_controller/cacheservice/event/bsrelation/event.go index f9dd78c015..448b1bc807 100644 --- a/src/source_controller/cacheservice/event/bsrelation/event.go +++ b/src/source_controller/cacheservice/event/bsrelation/event.go @@ -19,6 +19,7 @@ import ( "strconv" "sync" + "configcenter/pkg/tenant" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/json" @@ -27,32 +28,36 @@ import ( "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/event" mixevent "configcenter/src/source_controller/cacheservice/event/mix-event" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/local" - "configcenter/src/storage/stream" + "configcenter/src/storage/dal/mongo/sharding" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) // newBizSetRelation init and run biz set relation event watch with sub event key func newBizSetRelation(ctx context.Context, opts mixevent.MixEventFlowOptions) error { relation := bizSetRelation{ - watch: opts.Watch, - watchDB: opts.WatchDB, - ccDB: opts.CcDB, - mixKey: opts.MixKey, - key: opts.Key, - needCareBizFields: new(needCareBizFields), - allBizIDStr: new(allBizIDStr), - metrics: event.InitialMetrics(opts.Key.Collection(), "biz_set_relation"), + mixKey: opts.MixKey, + key: opts.Key, + needCareBizFields: &needCareBizFields{ + fieldMap: make(map[string]map[string]string), + }, + allBizIDStr: &allBizIDStr{data: make(map[string]string)}, + metrics: event.InitialMetrics(opts.Key.Collection(), "biz_set_relation"), } // get need care biz fields for biz event conversion, then sync it in goroutine - fields, err := relation.getNeedCareBizFields(context.Background()) + err := tenant.ExecForAllTenants(func(tenantID string) error { + fields, err := relation.getNeedCareBizFields(context.Background(), tenantID) + if err != nil { + blog.Errorf("run biz set relation watch, but get %s need care biz fields failed, err: %v", tenantID, err) + return err + } + relation.needCareBizFields.Set(tenantID, fields) + return nil + }) if err != nil { - blog.Errorf("run biz set relation watch, but get need care biz fields failed, err: %v", err) return err } - relation.needCareBizFields.Set(fields) go relation.syncNeedCareBizFields() @@ -66,11 +71,8 @@ func newBizSetRelation(ctx context.Context, opts mixevent.MixEventFlowOptions) e // bizSetRelation biz set relation event watch logic struct type bizSetRelation struct { - watch stream.LoopInterface - watchDB *local.Mongo mixKey event.Key key event.Key - ccDB dal.DB needCareBizFields *needCareBizFields allBizIDStr *allBizIDStr @@ -91,16 +93,18 @@ func (b *bizSetRelation) rearrangeEvents(rid string, es []*types.Event) ([]*type } // parseEvent parse event into chain node and detail, detail is biz set id and its related biz ids -func (b *bizSetRelation) parseEvent(e *types.Event, id uint64, rid string) (*watch.ChainNode, []byte, bool, error) { +func (b *bizSetRelation) parseEvent(e *types.Event, id uint64, rid string) (string, *watch.ChainNode, []byte, bool, + error) { + switch e.OperationType { case types.Insert, types.Update, types.Replace, types.Delete: case types.Invalidate: blog.Errorf("biz set relation event, received invalid event operation type, doc: %s, rid: %s", e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil default: blog.Errorf("biz set relation event, received unsupported event operation type: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } name := b.key.Name(e.DocBytes) @@ -108,7 +112,7 @@ func (b *bizSetRelation) parseEvent(e *types.Event, id uint64, rid string) (*wat if err != nil { blog.Errorf("get %s event cursor failed, name: %s, err: %v, oid: %s, rid: %s", b.key.Collection(), name, err, e.ID(), rid) - return nil, nil, false, err + return "", nil, nil, false, err } chainNode := &watch.ChainNode{ @@ -119,7 +123,6 @@ func (b *bizSetRelation) parseEvent(e *types.Event, id uint64, rid string) (*wat EventType: watch.ConvertOperateType(types.Update), Token: e.Token.Data, Cursor: cursor, - TenantID: b.key.SupplierAccount(e.DocBytes), } if instanceID := b.mixKey.InstanceID(e.DocBytes); instanceID > 0 { @@ -128,7 +131,7 @@ func (b *bizSetRelation) parseEvent(e *types.Event, id uint64, rid string) (*wat relationDetail, err := b.getBizSetRelationDetail(e, rid) if err != nil { - return nil, nil, true, err + return "", nil, nil, true, err } detail := types.EventDetail{ @@ -138,52 +141,51 @@ func (b *bizSetRelation) parseEvent(e *types.Event, id uint64, rid string) (*wat if err != nil { blog.Errorf("run %s flow, %s, marshal detail failed, name: %s, detail: %+v, err: %v, oid: %s, rid: %s", b.mixKey.Collection(), b.key.Collection(), name, detail, err, e.ID(), rid) - return nil, nil, false, err + return "", nil, nil, false, err } - return chainNode, detailBytes, false, nil + return e.TenantID, chainNode, detailBytes, false, nil } // allBizIDStr struct to cache all biz ids in string form to generate detail, refreshed when rearranging events type allBizIDStr struct { - data string + data map[string]string lock sync.RWMutex } // Get get all biz ids in string form -func (a *allBizIDStr) Get() string { +func (a *allBizIDStr) Get(tenantID string) string { a.lock.RLock() defer a.lock.RUnlock() - return a.data + return a.data[tenantID] } // Set set all biz ids in string form -func (a *allBizIDStr) Set(data string) { +func (a *allBizIDStr) Set(tenantID string, data string) { a.lock.Lock() defer a.lock.Unlock() - a.data = data + a.data[tenantID] = data } // refreshAllBizIDStr refresh all biz ids in string form -func (b *bizSetRelation) refreshAllBizIDStr(rid string) error { +func (b *bizSetRelation) refreshAllBizIDStr(tenantID string, rid string) error { // do not include resource pool and disabled biz in biz set allBizIDCond := map[string]interface{}{ common.BKDefaultField: mapstr.MapStr{common.BKDBNE: common.DefaultAppFlag}, common.BKDataStatusField: map[string]interface{}{common.BKDBNE: common.DataStatusDisabled}, } - allBizID, err := b.getBizIDArrStrByCond(allBizIDCond, rid) + allBizID, err := b.getBizIDArrStrByCond(tenantID, allBizIDCond, rid) if err != nil { return err } - b.allBizIDStr.Set(allBizID) + b.allBizIDStr.Set(tenantID, allBizID) return nil } // getBizSetRelationDetail get biz set relation detail by biz set event func (b *bizSetRelation) getBizSetRelationDetail(e *types.Event, rid string) (string, error) { - // get biz set relation detail by the scope of biz set event doc bytes bizSet := new(metadata.BizSetInst) if err := json.Unmarshal(e.DocBytes, bizSet); err != nil { @@ -196,7 +198,7 @@ func (b *bizSetRelation) getBizSetRelationDetail(e *types.Event, rid string) (st return event.GenBizSetRelationDetail(bizSet.BizSetID, ""), nil } - allBizID := b.allBizIDStr.Get() + allBizID := b.allBizIDStr.Get(e.TenantID) // biz set that matches all biz uses the same all biz ids from cache if bizSet.Scope.MatchAll { @@ -208,11 +210,11 @@ func (b *bizSetRelation) getBizSetRelationDetail(e *types.Event, rid string) (st } var err error - allBizID, err = b.getBizIDArrStrByCond(allBizIDCond, rid) + allBizID, err = b.getBizIDArrStrByCond(e.TenantID, allBizIDCond, rid) if err != nil { return "", err } - b.allBizIDStr.Set(allBizID) + b.allBizIDStr.Set(e.TenantID, allBizID) } return event.GenBizSetRelationDetail(bizSet.BizSetID, allBizID), nil } @@ -233,7 +235,7 @@ func (b *bizSetRelation) getBizSetRelationDetail(e *types.Event, rid string) (st bizSetBizCond[common.BKDefaultField] = mapstr.MapStr{common.BKDBNE: common.DefaultAppFlag} bizSetBizCond[common.BKDataStatusField] = map[string]interface{}{common.BKDBNE: common.DataStatusDisabled} - bizIDStr, err := b.getBizIDArrStrByCond(bizSetBizCond, rid) + bizIDStr, err := b.getBizIDArrStrByCond(e.TenantID, bizSetBizCond, rid) if err != nil { return "", err } @@ -241,7 +243,9 @@ func (b *bizSetRelation) getBizSetRelationDetail(e *types.Event, rid string) (st return event.GenBizSetRelationDetail(bizSet.BizSetID, bizIDStr), nil } -func (b *bizSetRelation) getBizIDArrStrByCond(cond map[string]interface{}, rid string) (string, error) { +func (b *bizSetRelation) getBizIDArrStrByCond(tenantID string, cond map[string]interface{}, rid string) (string, + error) { + const step = 500 bizIDJson := bytes.Buffer{} @@ -249,8 +253,9 @@ func (b *bizSetRelation) getBizIDArrStrByCond(cond map[string]interface{}, rid s for start := uint64(0); ; start += step { oneStep := make([]metadata.BizInst, 0) - err := b.ccDB.Table(common.BKTableNameBaseApp).Find(cond).Fields(common.BKAppIDField).Start(start). - Limit(step).Sort(common.BKAppIDField).All(context.Background(), &oneStep) + err := mongodb.Shard(sharding.NewShardOpts().WithTenant(tenantID)).Table(common.BKTableNameBaseApp).Find(cond). + Fields(common.BKAppIDField).Start(start).Limit(step).Sort(common.BKAppIDField). + All(context.Background(), &oneStep) if err != nil { blog.Errorf("get biz by cond(%+v) failed, err: %v, rid: %s", cond, err, rid) return "", err diff --git a/src/source_controller/cacheservice/event/flow/details.go b/src/source_controller/cacheservice/event/flow/details.go deleted file mode 100644 index 1fd7fd4315..0000000000 --- a/src/source_controller/cacheservice/event/flow/details.go +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making - * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. - * Copyright (C) 2017 THL A29 Limited, - * a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * We undertake not to change the open source license (MIT license) applicable - * to the current version of the project delivered to anyone in the future. - */ - -package flow - -import ( - "context" - - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/json" - "configcenter/src/common/util" - "configcenter/src/common/util/table" - "configcenter/src/source_controller/cacheservice/event" - "configcenter/src/storage/dal" - "configcenter/src/storage/stream/types" -) - -// getDeleteEventDetailsFunc function type for getting delete events' oid+collection to related detail map -type getDeleteEventDetailsFunc func(es []*types.Event, db dal.DB, metrics *event.EventMetrics) (map[oidCollKey][]byte, - bool, error) - -// getDeleteEventDetails get delete events' oid+collection to related detail map from cmdb -func getDeleteEventDetails(es []*types.Event, db dal.DB, metrics *event.EventMetrics) (map[oidCollKey][]byte, bool, - error) { - - oidDetailMap := make(map[oidCollKey][]byte) - if len(es) == 0 { - return oidDetailMap, false, nil - } - - deletedEventOidMap := make(map[string][]string, 0) - for _, e := range es { - if e.OperationType == types.Delete { - deletedEventOidMap[e.Collection] = append(deletedEventOidMap[e.Collection], e.Oid) - } - } - - if len(deletedEventOidMap) == 0 { - return oidDetailMap, false, nil - } - - for collection, deletedEventOids := range deletedEventOidMap { - delArchiveTable, exists := table.GetDelArchiveTable(collection) - if !exists { - blog.Errorf("collection %s related del archive table not exists", collection) - continue - } - - filter := map[string]interface{}{ - "oid": map[string]interface{}{common.BKDBIN: deletedEventOids}, - "coll": collection, - } - - docs := make([]map[string]interface{}, 0) - err := db.Table(delArchiveTable).Find(filter).All(context.Background(), &docs) - if err != nil { - metrics.CollectMongoError() - blog.Errorf("get archive deleted doc for collection %s from mongodb failed, oids: %+v, err: %v", - collection, deletedEventOids, err) - return nil, true, err - } - - for _, doc := range docs { - oid := util.GetStrByInterface(doc["oid"]) - byt, err := json.Marshal(doc["detail"]) - if err != nil { - blog.Errorf("received delete %s event, but marshal detail to bytes failed, oid: %s, err: %v", - collection, oid, err) - return nil, false, err - } - oidDetailMap[oidCollKey{oid: oid, coll: collection}] = byt - } - } - - return oidDetailMap, false, nil -} - -// getDeleteEventDetails get delete events' oid+collection to related detail map from cmdb -func getHostDeleteEventDetails(es []*types.Event, db dal.DB, metrics *event.EventMetrics) (map[oidCollKey][]byte, bool, - error) { - - oidDetailMap := make(map[oidCollKey][]byte) - if len(es) == 0 { - return oidDetailMap, false, nil - } - - deletedEventOids := make([]string, 0) - for _, e := range es { - if e.OperationType == types.Delete { - deletedEventOids = append(deletedEventOids, e.Oid) - } - } - - if len(deletedEventOids) == 0 { - return oidDetailMap, false, nil - } - - filter := map[string]interface{}{ - "oid": map[string]interface{}{common.BKDBIN: deletedEventOids}, - "coll": common.BKTableNameBaseHost, - } - - docs := make([]event.HostArchive, 0) - err := db.Table(common.BKTableNameDelArchive).Find(filter).All(context.Background(), &docs) - if err != nil { - metrics.CollectMongoError() - blog.Errorf("get archive deleted doc for collection %s from mongodb failed, oids: %+v, err: %v", - common.BKTableNameBaseHost, deletedEventOids, err) - return nil, true, err - } - - for _, doc := range docs { - byt, err := json.Marshal(doc.Detail) - if err != nil { - blog.Errorf("received delete %s event, but marshal detail to bytes failed, oid: %s, err: %v", - common.BKTableNameBaseHost, doc.Oid, err) - return nil, false, err - } - oidDetailMap[oidCollKey{oid: doc.Oid, coll: common.BKTableNameBaseHost}] = byt - } - - return oidDetailMap, false, nil -} diff --git a/src/source_controller/cacheservice/event/flow/event.go b/src/source_controller/cacheservice/event/flow/event.go index 4913cbd834..2efe2aed8c 100644 --- a/src/source_controller/cacheservice/event/flow/event.go +++ b/src/source_controller/cacheservice/event/flow/event.go @@ -14,31 +14,17 @@ package flow import ( "context" - "fmt" - "configcenter/src/apimachinery/discovery" "configcenter/src/common/blog" "configcenter/src/common/metadata" "configcenter/src/source_controller/cacheservice/event" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/local" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) -// NewEvent TODO -func NewEvent(watch stream.LoopInterface, isMaster discovery.ServiceManageInterface, watchDB dal.DB, - ccDB dal.DB) error { - watchMongoDB, ok := watchDB.(*local.Mongo) - if !ok { - blog.Errorf("watch event, but watch db is not an instance of local mongo to start transaction") - return fmt.Errorf("watch db is not an instance of local mongo") - } - +// NewEvent new event flow +func NewEvent(watchTask *task.Task) error { e := Event{ - watch: watch, - isMaster: isMaster, - watchDB: watchMongoDB, - ccDB: ccDB, + task: watchTask, } if err := e.runHost(context.Background()); err != nil { @@ -102,166 +88,127 @@ func NewEvent(watch stream.LoopInterface, isMaster discovery.ServiceManageInterf return nil } -// Event TODO +// Event is the event flow struct type Event struct { - watch stream.LoopInterface - watchDB *local.Mongo - ccDB dal.DB - isMaster discovery.ServiceManageInterface + task *task.Task } func (e *Event) runHost(ctx context.Context) error { opts := flowOptions{ key: event.HostKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(metadata.HostMapStr), } - return newFlow(ctx, opts, getHostDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runModuleHostRelation(ctx context.Context) error { opts := flowOptions{ key: event.ModuleHostRelationKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runBiz(ctx context.Context) error { opts := flowOptions{ key: event.BizKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runSet(ctx context.Context) error { opts := flowOptions{ key: event.SetKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runModule(ctx context.Context) error { opts := flowOptions{ key: event.ModuleKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runObjectBase(ctx context.Context) error { opts := flowOptions{ key: event.ObjectBaseKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newInstanceFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newInstanceFlow(ctx, opts, parseEvent) } func (e *Event) runProcess(ctx context.Context) error { opts := flowOptions{ key: event.ProcessKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runProcessInstanceRelation(ctx context.Context) error { opts := flowOptions{ key: event.ProcessInstanceRelationKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runInstAsst(ctx context.Context) error { opts := flowOptions{ key: event.InstAsstKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newInstAsstFlow(ctx, opts, getDeleteEventDetails, parseInstAsstEvent) + return newInstAsstFlow(ctx, opts, parseInstAsstEvent) } func (e *Event) runBizSet(ctx context.Context) error { opts := flowOptions{ key: event.BizSetKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runPlat(ctx context.Context) error { opts := flowOptions{ key: event.PlatKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } func (e *Event) runProject(ctx context.Context) error { opts := flowOptions{ key: event.ProjectKey, - watch: e.watch, - watchDB: e.watchDB, - ccDB: e.ccDB, - isMaster: e.isMaster, + task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, getDeleteEventDetails, parseEvent) + return newFlow(ctx, opts, parseEvent) } diff --git a/src/source_controller/cacheservice/event/flow/flow.go b/src/source_controller/cacheservice/event/flow/flow.go index 7c3247e751..11f497dae3 100644 --- a/src/source_controller/cacheservice/event/flow/flow.go +++ b/src/source_controller/cacheservice/event/flow/flow.go @@ -16,46 +16,31 @@ package flow import ( "context" "fmt" - "strings" "sync" "time" - "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" - types2 "configcenter/src/common/types" "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/event" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/dal/mongo/sharding" + ccredis "configcenter/src/storage/dal/redis" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/driver/redis" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" - "configcenter/src/thirdparty/monitor" - "configcenter/src/thirdparty/monitor/meta" "go.mongodb.org/mongo-driver/mongo" ) type flowOptions struct { key event.Key - watch stream.LoopInterface - isMaster discovery.ServiceManageInterface - watchDB *local.Mongo - ccDB dal.DB + task *task.Task EventStruct interface{} } -// oidCollKey key for oid to detail map. Since oid can duplicate in different collections, we need oid & coll for unique -type oidCollKey struct { - oid string - coll string -} - -func newFlow(ctx context.Context, opts flowOptions, getDeleteEventDetails getDeleteEventDetailsFunc, - parseEvent parseEventFunc) error { - - flow, err := NewFlow(opts, getDeleteEventDetails, parseEvent) +func newFlow(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { + flow, err := NewFlow(opts, parseEvent) if err != nil { return err } @@ -64,20 +49,15 @@ func newFlow(ctx context.Context, opts flowOptions, getDeleteEventDetails getDel } // NewFlow create a new event watch flow -func NewFlow(opts flowOptions, getDelEventDetails getDeleteEventDetailsFunc, parseEvent parseEventFunc) (Flow, error) { - if getDelEventDetails == nil { - return Flow{}, fmt.Errorf("getDeleteEventDetailsFunc is not set, key: %s", opts.key.Namespace()) - } - +func NewFlow(opts flowOptions, parseEvent parseEventFunc) (Flow, error) { if parseEvent == nil { return Flow{}, fmt.Errorf("parseEventFunc is not set, key: %s", opts.key.Namespace()) } return Flow{ - flowOptions: opts, - metrics: event.InitialMetrics(opts.key.Collection(), "watch"), - getDeleteEventDetails: getDelEventDetails, - parseEvent: parseEvent, + flowOptions: opts, + metrics: event.InitialMetrics(opts.key.Collection(), "watch"), + parseEvent: parseEvent, cursorQueue: &cursorQueue{ cursorQueue: make(map[string]string), }, @@ -87,11 +67,10 @@ func NewFlow(opts flowOptions, getDelEventDetails getDeleteEventDetailsFunc, par // Flow TODO type Flow struct { flowOptions - metrics *event.EventMetrics - tokenHandler *flowTokenHandler - getDeleteEventDetails getDeleteEventDetailsFunc - parseEvent parseEventFunc - cursorQueue *cursorQueue + metrics *event.EventMetrics + tokenHandler *flowTokenHandler + parseEvent parseEventFunc + cursorQueue *cursorQueue } // cursorQueue saves the specific amount of previous cursors to check if event is duplicated with previous batch's event @@ -104,25 +83,26 @@ type cursorQueue struct { } // checkIfConflict check if the cursor is conflict with previous cursors, maintain the length of the queue -func (c *cursorQueue) checkIfConflict(cursor string) bool { +func (c *cursorQueue) checkIfConflict(uuid, cursor string) bool { + dbCursor := uuid + "-" + cursor c.lock.Lock() defer c.lock.Unlock() - if _, exists := c.cursorQueue[cursor]; exists { + if _, exists := c.cursorQueue[dbCursor]; exists { return true } if c.length <= 0 { - c.head = cursor - c.tail = cursor - c.cursorQueue[cursor] = "" + c.head = dbCursor + c.tail = dbCursor + c.cursorQueue[dbCursor] = "" c.length++ return false } // append cursor to the tail of the cursor queue - c.cursorQueue[c.tail] = cursor - c.cursorQueue[cursor] = "" - c.tail = cursor + c.cursorQueue[c.tail] = dbCursor + c.cursorQueue[dbCursor] = "" + c.tail = dbCursor if c.length < cursorQueueSize { c.length++ @@ -142,53 +122,44 @@ const ( cursorQueueSize = 50000 ) -// RunFlow TODO +// RunFlow run event flow func (f *Flow) RunFlow(ctx context.Context) error { blog.Infof("start run flow for key: %s.", f.key.Namespace()) - f.tokenHandler = NewFlowTokenHandler(f.key, f.watchDB, f.metrics) - - startAtTime, err := f.tokenHandler.getStartWatchTime(ctx) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", f.key.Collection(), err) - return err - } - - watchOpts := &types.WatchOptions{ - Options: types.Options{ - EventStruct: f.EventStruct, - Collection: f.key.Collection(), - StartAfterToken: nil, - StartAtTime: startAtTime, - WatchFatalErrorCallback: f.tokenHandler.resetWatchToken, - }, - } - - opts := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: f.key.Namespace(), - WatchOpt: watchOpts, + f.tokenHandler = NewFlowTokenHandler(f.key, f.metrics) + + opts := &types.LoopBatchTaskOptions{ + WatchTaskOptions: &types.WatchTaskOptions{ + Name: f.key.Namespace(), + CollOpts: &types.WatchCollOptions{ + CollectionOptions: types.CollectionOptions{ + CollectionFilter: &types.CollectionFilter{ + Regex: fmt.Sprintf("_%s$", f.key.Collection()), + }, + EventStruct: f.EventStruct, + }, + }, TokenHandler: f.tokenHandler, RetryOptions: &types.RetryOptions{ MaxRetryCount: 10, RetryDuration: 1 * time.Second, }, }, - EventHandler: &types.BatchHandler{ + EventHandler: &types.TaskBatchHandler{ DoBatch: f.doBatch, }, BatchSize: batchSize, } - if err := f.watch.WithBatch(opts); err != nil { - blog.Errorf("run flow, but watch batch failed, err: %v", err) + err := f.task.AddLoopBatchTask(opts) + if err != nil { + blog.Errorf("run %s flow, but add loop batch task failed, err: %v", f.key.Namespace(), err) return err } - return nil } -func (f *Flow) doBatch(es []*types.Event) (retry bool) { +func (f *Flow) doBatch(dbInfo *types.DBInfo, es []*types.Event) (retry bool) { eventLen := len(es) if eventLen == 0 { return false @@ -209,32 +180,78 @@ func (f *Flow) doBatch(es []*types.Event) (retry bool) { f.metrics.CollectCycleDuration(time.Since(start)) }() - oidDetailMap, retry, err := f.getDeleteEventDetails(es, f.ccDB, f.metrics) + chainNodes, oids, pipe, hitConflict, err := f.parseEvents(dbInfo, es, rid) + if err != nil { + return true + } + + lastTokenData := map[string]interface{}{ + common.BKTokenField: es[eventLen-1].Token.Data, + common.BKStartAtTimeField: es[eventLen-1].ClusterTime, + } + + // if all events are invalid, set last token to the last events' token, do not need to retry for the invalid ones + if len(chainNodes) == 0 { + err = f.tokenHandler.setLastWatchToken(context.Background(), dbInfo.UUID, dbInfo.WatchDB, lastTokenData) + if err != nil { + f.metrics.CollectMongoError() + return false + } + return false + } + + // store details at first, in case those watching cmdb events read chain when details are not inserted yet + if _, err := pipe.Exec(); err != nil { + f.metrics.CollectRedisError() + blog.Errorf("run flow, but insert details for %s failed, oids: %+v, err: %v, rid: %s,", f.key.Collection(), + oids, err, rid) + return true + } + + if hitConflict { + // update the chain nodes with picked chain nodes, so that we can handle them later. + chainNodes = f.rearrangeEvents(chainNodes, rid) + } + + retry, err = f.doInsertEvents(dbInfo, chainNodes, lastTokenData, rid) if err != nil { - blog.Errorf("get deleted event details failed, err: %v, rid: %s", err, rid) return retry } - ids, err := f.watchDB.NextSequences(context.Background(), f.key.ChainCollection(), eventLen) + blog.Infof("insert watch event for %s success, oids: %v, rid: %s", f.key.Collection(), oids, rid) + hasError = false + return false +} + +func (f *Flow) parseEvents(dbInfo *types.DBInfo, es []*types.Event, rid string) (map[string][]*watch.ChainNode, + []string, ccredis.Pipeliner, bool, error) { + + eventLen := len(es) + ids, err := dbInfo.WatchDB.NextSequences(context.Background(), f.key.ChainCollection(), eventLen) if err != nil { blog.Errorf("get %s event ids failed, err: %v, rid: %s", f.key.ChainCollection(), err, rid) - return true + return nil, nil, nil, false, err } - chainNodes := make([]*watch.ChainNode, 0) + chainNodes := make(map[string][]*watch.ChainNode, 0) oids := make([]string, eventLen) // process events into db chain nodes to store in db and details to store in redis pipe := redis.Client().Pipeline() cursorMap := make(map[string]struct{}) hitConflict := false + for index, e := range es { // collect event's basic metrics f.metrics.CollectBasic(e) - chainNode, detail, retry, err := f.parseEvent(f.ccDB, f.key, e, oidDetailMap, ids[index], rid) + tenant, chainNode, detail, retry, err := f.parseEvent(dbInfo.CcDB, f.key, e, ids[index], rid) if err != nil { - return retry + if retry { + return nil, nil, nil, false, err + } + continue } + if chainNode == nil { continue } @@ -242,8 +259,8 @@ func (f *Flow) doBatch(es []*types.Event) (retry bool) { // if hit cursor conflict, the former cursor node's detail will be overwrite by the later one, so it // is not needed to remove the overlapped cursor node's detail again. ttl := time.Duration(f.key.TTLSeconds()) * time.Second - pipe.Set(f.key.DetailKey(chainNode.Cursor), string(detail.eventInfo), ttl) - pipe.Set(f.key.GeneralResDetailKey(chainNode), string(detail.resDetail), ttl) + pipe.Set(f.key.DetailKey(tenant, chainNode.Cursor), string(detail.eventInfo), ttl) + pipe.Set(f.key.GeneralResDetailKey(tenant, chainNode), string(detail.resDetail), ttl) // validate if the cursor already exists in the batch, this happens when the concurrency is very high. // which will generate the same operation event with same cluster time, and generate with the same cursor @@ -255,94 +272,64 @@ func (f *Flow) doBatch(es []*types.Event) (retry bool) { } // if the cursor is conflict with another cursor in the former batches, skip it - if f.cursorQueue.checkIfConflict(chainNode.Cursor) && !exists { + if f.cursorQueue.checkIfConflict(dbInfo.UUID, chainNode.Cursor) && !exists { f.metrics.CollectConflict() continue } cursorMap[chainNode.Cursor] = struct{}{} oids[index] = e.ID() - chainNodes = append(chainNodes, chainNode) - } - lastTokenData := map[string]interface{}{ - common.BKTokenField: es[eventLen-1].Token.Data, - common.BKStartAtTimeField: es[eventLen-1].ClusterTime, + chainNodes[tenant] = append(chainNodes[tenant], chainNode) } - // if all events are invalid, set last token to the last events' token, do not need to retry for the invalid ones - if len(chainNodes) == 0 { - if err := f.tokenHandler.setLastWatchToken(context.Background(), lastTokenData); err != nil { - f.metrics.CollectMongoError() - return false - } - return false - } - - // store details at first, in case those watching cmdb events read chain when details are not inserted yet - if _, err := pipe.Exec(); err != nil { - f.metrics.CollectRedisError() - blog.Errorf("run flow, but insert details for %s failed, oids: %+v, err: %v, rid: %s,", f.key.Collection(), - oids, err, rid) - return true - } - - if hitConflict { - // update the chain nodes with picked chain nodes, so that we can handle them later. - chainNodes = f.rearrangeEvents(chainNodes, rid) - } - - retry, err = f.doInsertEvents(chainNodes, lastTokenData, rid) - if err != nil { - return retry - } - - blog.Infof("insert watch event for %s success, oids: %v, rid: %s", f.key.Collection(), oids, rid) - hasError = false - return false + return chainNodes, oids, pipe, hitConflict, nil } // rearrangeEvents remove the earlier chain nodes with the same cursor with a later one -func (f *Flow) rearrangeEvents(chainNodes []*watch.ChainNode, rid string) []*watch.ChainNode { - pickedChainNodes := make([]*watch.ChainNode, 0) - conflictNodes := make([]*watch.ChainNode, 0) - reminder := make(map[string]struct{}) - for i := len(chainNodes) - 1; i >= 0; i-- { - chainNode := chainNodes[i] - if _, exists := reminder[chainNode.Cursor]; exists { - conflictNodes = append(conflictNodes, chainNode) - // skip this event, because it has been replaced the the one later. - continue +func (f *Flow) rearrangeEvents(chainNodeMap map[string][]*watch.ChainNode, rid string) map[string][]*watch.ChainNode { + pickedChainNodeMap := make(map[string][]*watch.ChainNode) + + for tenantID, chainNodes := range chainNodeMap { + pickedChainNodes := make([]*watch.ChainNode, 0) + conflictNodes := make([]*watch.ChainNode, 0) + reminder := make(map[string]struct{}) + for i := len(chainNodes) - 1; i >= 0; i-- { + chainNode := chainNodes[i] + if _, exists := reminder[chainNode.Cursor]; exists { + conflictNodes = append(conflictNodes, chainNode) + // skip this event, because it has been replaced the one later. + continue + } + + reminder[chainNode.Cursor] = struct{}{} + pickedChainNodes = append(pickedChainNodes, chainNode) } - reminder[chainNode.Cursor] = struct{}{} - pickedChainNodes = append(pickedChainNodes, chainNode) - } + // reverse the picked chain nodes to their origin order + for i, j := 0, len(pickedChainNodes)-1; i < j; i, j = i+1, j-1 { + pickedChainNodes[i], pickedChainNodes[j] = pickedChainNodes[j], pickedChainNodes[i] + } - // reverse the picked chain nodes to their origin order - for i, j := 0, len(pickedChainNodes)-1; i < j; i, j = i+1, j-1 { - pickedChainNodes[i], pickedChainNodes[j] = pickedChainNodes[j], pickedChainNodes[i] - } + blog.WarnJSON("got tenant %s conflict cursor with chain nodes: %s, replaced with nodes: %s, rid: %s", + tenantID, conflictNodes, pickedChainNodes, rid) - blog.WarnJSON("got conflict got conflict cursor with chain nodes: %s, replaced with nodes: %s, rid: %s", - conflictNodes, pickedChainNodes, rid) + pickedChainNodeMap[tenantID] = pickedChainNodes + } - return pickedChainNodes + return pickedChainNodeMap } -func (f *Flow) doInsertEvents(chainNodes []*watch.ChainNode, lastTokenData map[string]interface{}, rid string) ( - bool, error) { +func (f *Flow) doInsertEvents(dbInfo *types.DBInfo, chainNodeMap map[string][]*watch.ChainNode, + lastTokenData map[string]interface{}, rid string) (bool, error) { - count := len(chainNodes) - - if count == 0 { + if len(chainNodeMap) == 0 { return false, nil } + coll := f.key.Collection() - watchDBClient := f.watchDB.GetDBClient() - - session, err := watchDBClient.StartSession() + session, err := dbInfo.WatchDB.GetDBClient().StartSession() if err != nil { - blog.Errorf("run flow, but start session failed, coll: %s, err: %v, rid: %s", f.key.Collection(), err, rid) + blog.Errorf("run flow, but start session failed, coll: %s, err: %v, rid: %s", coll, err, rid) return true, err } defer session.EndSession(context.Background()) @@ -350,32 +337,20 @@ func (f *Flow) doInsertEvents(chainNodes []*watch.ChainNode, lastTokenData map[s // retry insert the event node with remove the first event node, // which means the first one's cursor is conflicted with the former's batch operation inserted nodes. retryWithReduce := false + var conflictTenantID string txnErr := mongo.WithSession(context.Background(), session, func(sc mongo.SessionContext) error { if err = session.StartTransaction(); err != nil { - blog.Errorf("run flow, but start transaction failed, coll: %s, err: %v, rid: %s", f.key.Collection(), - err, rid) + blog.Errorf("run flow, but start transaction failed, coll: %s, err: %v, rid: %s", coll, err, rid) return err } - if err := f.watchDB.Table(f.key.ChainCollection()).Insert(sc, chainNodes); err != nil { - blog.ErrorJSON("run flow, but insert chain nodes for %s failed, nodes: %s, err: %v, rid: %s", - f.key.Collection(), chainNodes, err, rid) - f.metrics.CollectMongoError() - _ = session.AbortTransaction(context.Background()) - - if event.IsConflictError(err) { - // set retry with reduce flag and retry later - retryWithReduce = true - } + err, retryWithReduce, conflictTenantID = f.insertChainNodes(sc, session, f.key, chainNodeMap, rid) + if err != nil { return err } - lastNode := chainNodes[len(chainNodes)-1] - lastTokenData[common.BKFieldID] = lastNode.ID - lastTokenData[common.BKCursorField] = lastNode.Cursor - lastTokenData[common.BKStartAtTimeField] = lastNode.ClusterTime - if err := f.tokenHandler.setLastWatchToken(sc, lastTokenData); err != nil { + if err = f.tokenHandler.setLastWatchToken(sc, dbInfo.UUID, dbInfo.WatchDB, lastTokenData); err != nil { f.metrics.CollectMongoError() _ = session.AbortTransaction(context.Background()) return err @@ -393,49 +368,61 @@ func (f *Flow) doInsertEvents(chainNodes []*watch.ChainNode, lastTokenData map[s if txnErr != nil { blog.Errorf("do insert flow events failed, err: %v, rid: %s", txnErr, rid) - - rid = rid + ":" + chainNodes[0].Oid if retryWithReduce { - monitor.Collect(&meta.Alarm{ - RequestID: rid, - Type: meta.EventFatalError, - Detail: fmt.Sprintf("run event flow, but got conflict %s cursor with chain nodes", - f.key.Collection()), - Module: types2.CC_MODULE_CACHESERVICE, - Dimension: map[string]string{"retry_conflict_nodes": "yes"}, - }) - - if len(chainNodes) <= 1 { + chainNodeMap = event.ReduceChainNode(chainNodeMap, conflictTenantID, coll, txnErr, f.metrics, rid) + if len(chainNodeMap) == 0 { return false, nil } + return f.doInsertEvents(dbInfo, chainNodeMap, lastTokenData, rid) + } + // if an error occurred, roll back and re-watch again + return true, err + } + return false, nil +} - for index, reducedChainNode := range chainNodes { - if isConflictChainNode(reducedChainNode, txnErr) { - f.metrics.CollectConflict() - chainNodes = append(chainNodes[:index], chainNodes[index+1:]...) +// insertChainNodes insert chain nodes and last event info into db +func (f *Flow) insertChainNodes(ctx context.Context, session mongo.Session, key event.Key, + chainNodeMap map[string][]*watch.ChainNode, rid string) (error, bool, string) { - // need do with retry with reduce - blog.ErrorJSON("run flow, insert %s event with reduce node %s, remain nodes: %s, rid: %s", - f.key.Collection(), reducedChainNode, chainNodes, rid) + for tenantID, chainNodes := range chainNodeMap { + if len(chainNodes) == 0 { + continue + } - return f.doInsertEvents(chainNodes, lastTokenData, rid) - } - } + shardingDB := mongodb.Dal("watch").Shard(sharding.NewShardOpts().WithTenant(tenantID)) - // when no cursor conflict node is found, discard the first node and try to insert the others - blog.ErrorJSON("run flow, insert %s event with reduce node %s, remain nodes: %s, rid: %s", - f.key.Collection(), chainNodes[0], chainNodes[1:], rid) + // insert chain nodes into db + if err := shardingDB.Table(key.ChainCollection()).Insert(ctx, chainNodes); err != nil { + blog.ErrorJSON("run flow, but insert tenant %s chain nodes for %s failed, nodes: %s, err: %s, rid: %s", + tenantID, key.Collection(), chainNodes, err, rid) + f.metrics.CollectMongoError() + _ = session.AbortTransaction(context.Background()) - return f.doInsertEvents(chainNodes[1:], lastTokenData, rid) + if event.IsConflictError(err) { + return err, true, tenantID + } + return err, false, "" } - // if an error occurred, roll back and re-watch again - return true, err - } + // set last watch event info for tenant + lastNode := chainNodes[len(chainNodes)-1] + lastNodeInfo := map[string]interface{}{ + common.BKFieldID: lastNode.ID, + common.BKCursorField: lastNode.Cursor, + } - return false, nil -} + filter := map[string]interface{}{ + "_id": key.Collection(), + } + if err := shardingDB.Table(common.BKTableNameLastWatchEvent).Update(ctx, filter, lastNodeInfo); err != nil { + blog.Errorf("insert %s last event info(%+v) for coll %s failed, err: %v, rid: %s", tenantID, lastNodeInfo, + key.Collection(), err, rid) + f.metrics.CollectMongoError() + _ = session.AbortTransaction(context.Background()) + return err, false, "" + } + } -func isConflictChainNode(chainNode *watch.ChainNode, err error) bool { - return strings.Contains(err.Error(), chainNode.Cursor) && strings.Contains(err.Error(), "index_cursor") + return nil, false, "" } diff --git a/src/source_controller/cacheservice/event/flow/handler.go b/src/source_controller/cacheservice/event/flow/handler.go index bca7e98c17..d1374ab219 100644 --- a/src/source_controller/cacheservice/event/flow/handler.go +++ b/src/source_controller/cacheservice/event/flow/handler.go @@ -19,48 +19,49 @@ package flow import ( "context" + "time" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/event" - "configcenter/src/storage/dal" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) -var _ = types.TokenHandler(&flowTokenHandler{}) - type flowTokenHandler struct { key event.Key - watchDB dal.DB metrics *event.EventMetrics } // NewFlowTokenHandler new flow token handler -func NewFlowTokenHandler(key event.Key, watchDB dal.DB, metrics *event.EventMetrics) *flowTokenHandler { +func NewFlowTokenHandler(key event.Key, metrics *event.EventMetrics) *flowTokenHandler { return &flowTokenHandler{ key: key, - watchDB: watchDB, metrics: metrics, } } -/* SetLastWatchToken do not set last watch token in the do batch action(set it after events are successfully inserted) - when there are several masters watching db event, we use db transaction to avoid inserting duplicate data by setting - the last token after the insertion of db chain nodes in one transaction, since we have a unique index on the cursor - field, the later one will encounters an error when inserting nodes and roll back without setting the token and watch - another round from the last token of the last inserted node, thus ensures the sequence of db chain nodes. +/* +SetLastWatchToken do not set last watch token in the do batch action(set it after events are successfully inserted) +when there are several masters watching db event, we use db transaction to avoid inserting duplicate data by setting +the last token after the insertion of db chain nodes in one transaction, since we have a unique index on the cursor +field, the later one will encounter an error when inserting nodes and roll back without setting the token and watch +another round from the last token of the last inserted node, thus ensures the sequence of db chain nodes. */ -func (f *flowTokenHandler) SetLastWatchToken(ctx context.Context, token string) error { +func (f *flowTokenHandler) SetLastWatchToken(_ context.Context, _ string, _ local.DB, _ *types.TokenInfo) error { return nil } // setLastWatchToken set last watch token(used after events are successfully inserted) -func (f *flowTokenHandler) setLastWatchToken(ctx context.Context, data map[string]interface{}) error { +func (f *flowTokenHandler) setLastWatchToken(ctx context.Context, uuid string, watchDB local.DB, + data map[string]any) error { + filter := map[string]interface{}{ - "_id": f.key.Collection(), + "_id": watch.GenDBWatchTokenID(uuid, f.key.Collection()), } - if err := f.watchDB.Table(common.BKTableNameWatchToken).Update(ctx, filter, data); err != nil { + if err := watchDB.Table(common.BKTableNameWatchToken).Update(ctx, filter, data); err != nil { blog.Errorf("set last watch token failed, err: %v, data: %+v", err, data) return err } @@ -68,69 +69,24 @@ func (f *flowTokenHandler) setLastWatchToken(ctx context.Context, data map[strin } // GetStartWatchToken get start watch token from watch token db first, if an error occurred, get from chain db -func (f *flowTokenHandler) GetStartWatchToken(ctx context.Context) (token string, err error) { - filter := map[string]interface{}{ - "_id": f.key.Collection(), - } - - data := new(watch.LastChainNodeData) - err = f.watchDB.Table(common.BKTableNameWatchToken).Find(filter).Fields(common.BKTokenField).One(ctx, data) - if err != nil { - if !f.watchDB.IsNotFoundError(err) { - f.metrics.CollectMongoError() - blog.ErrorJSON("run flow, but get start watch token failed, err: %v, filter: %+v", err, filter) - } - - tailNode := new(watch.ChainNode) - if err := f.watchDB.Table(f.key.ChainCollection()).Find(map[string]interface{}{}).Fields(common.BKTokenField). - Sort(common.BKFieldID+":-1").One(context.Background(), tailNode); err != nil { - - if !f.watchDB.IsNotFoundError(err) { - f.metrics.CollectMongoError() - blog.Errorf("get last watch token from mongo failed, err: %v", err) - return "", err - } - // the tail node is not exist. - return "", nil - } - return tailNode.Token, nil - } - - return data.Token, nil -} - -// resetWatchToken set watch token to empty and set the start watch time to the given one for next watch -func (f *flowTokenHandler) resetWatchToken(startAtTime types.TimeStamp) error { - data := map[string]interface{}{ - common.BKTokenField: "", - common.BKStartAtTimeField: startAtTime, - } +func (f *flowTokenHandler) GetStartWatchToken(ctx context.Context, uuid string, watchDB local.DB) (*types.TokenInfo, + error) { filter := map[string]interface{}{ - "_id": f.key.Collection(), + "_id": watch.GenDBWatchTokenID(uuid, f.key.Collection()), } - if err := f.watchDB.Table(common.BKTableNameWatchToken).Update(context.Background(), filter, data); err != nil { - blog.ErrorJSON("clear watch token failed, err: %s, collection: %s, data: %s", err, f.key.Collection(), data) - return err - } - return nil -} - -func (f *flowTokenHandler) getStartWatchTime(ctx context.Context) (*types.TimeStamp, error) { - filter := map[string]interface{}{ - "_id": f.key.Collection(), - } - - data := new(watch.LastChainNodeData) - err := f.watchDB.Table(common.BKTableNameWatchToken).Find(filter).Fields(common.BKStartAtTimeField).One(ctx, data) + data := new(types.TokenInfo) + err := watchDB.Table(common.BKTableNameWatchToken).Find(filter).Fields(common.BKTokenField, + common.BKStartAtTimeField).One(ctx, data) if err != nil { - if !f.watchDB.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { f.metrics.CollectMongoError() - blog.ErrorJSON("run flow, but get start watch time failed, err: %v, filter: %+v", err, filter) - return nil, err + blog.Errorf("run flow, but get start watch token failed, err: %v, filter: %+v", err, filter) } - return new(types.TimeStamp), nil + // the tail node is not exist. + return &types.TokenInfo{Token: "", StartAtTime: &types.TimeStamp{Sec: uint32(time.Now().Unix())}}, nil } - return &data.StartAtTime, nil + + return data, nil } diff --git a/src/source_controller/cacheservice/event/flow/inst_asst_flow.go b/src/source_controller/cacheservice/event/flow/inst_asst_flow.go index 7efd1d297e..f886d02178 100644 --- a/src/source_controller/cacheservice/event/flow/inst_asst_flow.go +++ b/src/source_controller/cacheservice/event/flow/inst_asst_flow.go @@ -14,18 +14,16 @@ package flow import ( "context" + "fmt" "time" "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/source_controller/cacheservice/event" "configcenter/src/storage/stream/types" ) -func newInstAsstFlow(ctx context.Context, opts flowOptions, getDeleteEventDetails getDeleteEventDetailsFunc, - parseEvent parseEventFunc) error { - - flow, err := NewFlow(opts, getDeleteEventDetails, parseEvent) +func newInstAsstFlow(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { + flow, err := NewFlow(opts, parseEvent) if err != nil { return err } @@ -45,44 +43,34 @@ type InstAsstFlow struct { func (f *InstAsstFlow) RunFlow(ctx context.Context) error { blog.Infof("start run flow for key: %s.", f.key.Namespace()) - f.tokenHandler = NewFlowTokenHandler(f.key, f.watchDB, f.metrics) - - startAtTime, err := f.tokenHandler.getStartWatchTime(ctx) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", f.key.Collection(), err) - return err - } + f.tokenHandler = NewFlowTokenHandler(f.key, f.metrics) - watchOpts := &types.WatchOptions{ - Options: types.Options{ - EventStruct: f.EventStruct, - // watch all tables with the prefix of instance association table - CollectionFilter: map[string]interface{}{ - common.BKDBLIKE: event.InstAsstTablePrefixRegex, + opts := &types.LoopBatchTaskOptions{ + WatchTaskOptions: &types.WatchTaskOptions{ + Name: f.key.Namespace(), + CollOpts: &types.WatchCollOptions{ + CollectionOptions: types.CollectionOptions{ + CollectionFilter: &types.CollectionFilter{ + Regex: fmt.Sprintf("_%s", common.BKObjectInstAsstShardingTablePrefix), + }, + EventStruct: f.EventStruct, + }, }, - StartAtTime: startAtTime, - WatchFatalErrorCallback: f.tokenHandler.resetWatchToken, - }, - } - - opts := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: f.key.Namespace(), - WatchOpt: watchOpts, TokenHandler: f.tokenHandler, RetryOptions: &types.RetryOptions{ MaxRetryCount: 10, RetryDuration: 1 * time.Second, }, }, - EventHandler: &types.BatchHandler{ + EventHandler: &types.TaskBatchHandler{ DoBatch: f.doBatch, }, BatchSize: batchSize, } - if err := f.watch.WithBatch(opts); err != nil { - blog.Errorf("run flow, but watch batch failed, err: %v", err) + err := f.task.AddLoopBatchTask(opts) + if err != nil { + blog.Errorf("run %s flow, but add loop batch task failed, err: %v", f.key.Namespace(), err) return err } diff --git a/src/source_controller/cacheservice/event/flow/instance_flow.go b/src/source_controller/cacheservice/event/flow/instance_flow.go index 0e34b65f4f..c8aba7a364 100644 --- a/src/source_controller/cacheservice/event/flow/instance_flow.go +++ b/src/source_controller/cacheservice/event/flow/instance_flow.go @@ -20,61 +20,69 @@ import ( "sync" "time" + "configcenter/pkg/tenant" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/mapstr" "configcenter/src/common/metadata" - types2 "configcenter/src/common/types" "configcenter/src/common/util" "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/event" + "configcenter/src/storage/dal/mongo/sharding" + ccRedis "configcenter/src/storage/dal/redis" dbtypes "configcenter/src/storage/dal/types" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/driver/redis" "configcenter/src/storage/stream/types" - "configcenter/src/thirdparty/monitor" - "configcenter/src/thirdparty/monitor/meta" "github.com/tidwall/gjson" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" ) -func newInstanceFlow(ctx context.Context, opts flowOptions, getDeleteEventDetails getDeleteEventDetailsFunc, - parseEvent parseEventFunc) error { - - flow, err := NewFlow(opts, getDeleteEventDetails, parseEvent) +func newInstanceFlow(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { + flow, err := NewFlow(opts, parseEvent) if err != nil { return err } instFlow := InstanceFlow{ - Flow: flow, - mainlineObjectMap: new(mainlineObjectMap), + Flow: flow, + mainlineObjectMap: &mainlineObjectMap{ + data: make(map[string]map[string]struct{}), + }, } - mainlineObjectMap, err := instFlow.getMainlineObjectMap(ctx) + err = tenant.ExecForAllTenants(func(tenantID string) error { + mainlineObjMap, err := instFlow.getMainlineObjectMap(ctx, tenantID) + if err != nil { + blog.Errorf("run object instance watch, but get tenant %s mainline objects failed, err: %v", tenantID, err) + return err + } + instFlow.mainlineObjectMap.Set(tenantID, mainlineObjMap) + + go instFlow.syncMainlineObjectMap(tenantID) + return nil + }) if err != nil { - blog.Errorf("run object instance watch, but get mainline objects failed, err: %v", err) return err } - instFlow.mainlineObjectMap.Set(mainlineObjectMap) - - go instFlow.syncMainlineObjectMap() return instFlow.RunFlow(ctx) } // syncMainlineObjectMap refresh mainline object ID map every 5 minutes -func (f *InstanceFlow) syncMainlineObjectMap() { +func (f *InstanceFlow) syncMainlineObjectMap(tenantID string) { for { time.Sleep(time.Minute * 5) - mainlineObjectMap, err := f.getMainlineObjectMap(context.Background()) + mainlineObjMap, err := f.getMainlineObjectMap(context.Background(), tenantID) if err != nil { - blog.Errorf("run object instance watch, but get mainline objects failed, err: %v", err) + blog.Errorf("run object instance watch, but get tenant %s mainline objects failed, err: %v", tenantID, err) continue } - f.mainlineObjectMap.Set(mainlineObjectMap) - blog.V(5).Infof("run object instance watch, sync mainline object map done, map: %+v", f.mainlineObjectMap.Get()) + f.mainlineObjectMap.Set(tenantID, mainlineObjMap) + + blog.V(5).Infof("sync tenant %s mainline obj map done, map: %+v", tenantID, mainlineObjMap) } } @@ -84,26 +92,26 @@ type mapStrWithOid struct { } type mainlineObjectMap struct { - data map[string]struct{} + data map[string]map[string]struct{} lock sync.RWMutex } -// Get TODO -func (m *mainlineObjectMap) Get() map[string]struct{} { +// Get mainline object ID map for db +func (m *mainlineObjectMap) Get(tenantID string) map[string]struct{} { m.lock.RLock() defer m.lock.RUnlock() - data := make(map[string]struct{}) - for key, value := range m.data { - data[key] = value + mainlineMap, exists := m.data[tenantID] + if !exists { + return make(map[string]struct{}) } - return data + return mainlineMap } -// Set TODO -func (m *mainlineObjectMap) Set(data map[string]struct{}) { +// Set mainline object ID map for db +func (m *mainlineObjectMap) Set(tenantID string, data map[string]struct{}) { m.lock.Lock() defer m.lock.Unlock() - m.data = data + m.data[tenantID] = data } // InstanceFlow TODO @@ -116,52 +124,41 @@ type InstanceFlow struct { func (f *InstanceFlow) RunFlow(ctx context.Context) error { blog.Infof("start run flow for key: %s.", f.key.Namespace()) - f.tokenHandler = NewFlowTokenHandler(f.key, f.watchDB, f.metrics) - - startAtTime, err := f.tokenHandler.getStartWatchTime(ctx) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", f.key.Collection(), err) - return err - } - - watchOpts := &types.WatchOptions{ - Options: types.Options{ - EventStruct: f.EventStruct, - // watch all tables with the prefix of instance table - CollectionFilter: map[string]interface{}{ - common.BKDBLIKE: event.ObjInstTablePrefixRegex, + f.tokenHandler = NewFlowTokenHandler(f.key, f.metrics) + + opts := &types.LoopBatchTaskOptions{ + WatchTaskOptions: &types.WatchTaskOptions{ + Name: f.key.Namespace(), + CollOpts: &types.WatchCollOptions{ + CollectionOptions: types.CollectionOptions{ + CollectionFilter: &types.CollectionFilter{ + Regex: fmt.Sprintf("_%s", common.BKObjectInstShardingTablePrefix), + }, + EventStruct: f.EventStruct, + }, }, - StartAfterToken: nil, - StartAtTime: startAtTime, - WatchFatalErrorCallback: f.tokenHandler.resetWatchToken, - }, - } - - opts := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: f.key.Namespace(), - WatchOpt: watchOpts, TokenHandler: f.tokenHandler, RetryOptions: &types.RetryOptions{ MaxRetryCount: 10, RetryDuration: 1 * time.Second, }, }, - EventHandler: &types.BatchHandler{ + EventHandler: &types.TaskBatchHandler{ DoBatch: f.doBatch, }, BatchSize: batchSize, } - if err := f.watch.WithBatch(opts); err != nil { - blog.Errorf("run flow, but watch batch failed, err: %v", err) + err := f.task.AddLoopBatchTask(opts) + if err != nil { + blog.Errorf("run %s flow, but add loop batch task failed, err: %v", f.key.Namespace(), err) return err } return nil } -func (f *InstanceFlow) doBatch(es []*types.Event) (retry bool) { +func (f *InstanceFlow) doBatch(dbInfo *types.DBInfo, es []*types.Event) (retry bool) { if len(es) == 0 { return false } @@ -181,13 +178,7 @@ func (f *InstanceFlow) doBatch(es []*types.Event) (retry bool) { f.metrics.CollectCycleDuration(time.Since(start)) }() - oidDetailMap, retry, err := f.getDeleteEventDetails(es, f.ccDB, f.metrics) - if err != nil { - blog.Errorf("get deleted event details failed, err: %v, rid: %s", err, rid) - return retry - } - - eventMap, oidIndexMap, aggregationEvents, err := f.classifyEvents(es, oidDetailMap, rid) + eventMap, oidIndexMap, aggregationEvents, err := f.classifyEvents(es, rid) if err != nil { blog.Errorf("get aggregation inst events failed, err: %v, rid: %s", err, rid) return false @@ -198,16 +189,57 @@ func (f *InstanceFlow) doBatch(es []*types.Event) (retry bool) { return false } - ids, err := f.watchDB.NextSequences(context.Background(), f.key.Collection(), eventLen) + ids, err := dbInfo.WatchDB.NextSequences(context.Background(), f.key.Collection(), eventLen) if err != nil { blog.Errorf("get %s event ids failed, err: %v, rid: %s", f.key.Collection(), err, rid) return true } + pipe, oids, chainNodesMap, err := f.parseEvents(dbInfo, ids, eventMap, oidIndexMap, rid) + if err != nil { + return true + } + + lastTokenData := map[string]interface{}{ + common.BKTokenField: aggregationEvents[eventLen-1].Token.Data, + common.BKStartAtTimeField: aggregationEvents[eventLen-1].ClusterTime, + } + + // if all events are invalid, set last token to the last events' token, do not need to retry for the invalid ones + if len(chainNodesMap) == 0 { + err = f.tokenHandler.setLastWatchToken(context.Background(), dbInfo.UUID, dbInfo.WatchDB, lastTokenData) + if err != nil { + f.metrics.CollectMongoError() + return false + } + return false + } + + // store details at first, in case those watching cmdb events read chain when details are not inserted yet + if _, err := pipe.Exec(); err != nil { + f.metrics.CollectRedisError() + blog.Errorf("run flow, but insert details for %s failed, oids: %+v, err: %v, rid: %s,", f.key.Collection(), + oids, err, rid) + return true + } + + retry, err = f.doInsertEvents(dbInfo, chainNodesMap, lastTokenData, rid) + if err != nil { + return retry + } + + hasError = false + return false +} + +func (f *InstanceFlow) parseEvents(dbInfo *types.DBInfo, ids []uint64, eventMap map[string][]*types.Event, + oidIndexMap map[string]int, rid string) (ccRedis.Pipeliner, []string, map[string]map[string][]*watch.ChainNode, + error) { + pipe := redis.Client().Pipeline() oids := make([]string, 0) - chainNodesMap := make(map[string][]*watch.ChainNode) - lastChainNode := new(watch.ChainNode) + chainNodesMap := make(map[string]map[string][]*watch.ChainNode) + for coll, events := range eventMap { key := f.getKeyByCollection(coll) cursorMap := make(map[string]struct{}) @@ -216,19 +248,20 @@ func (f *InstanceFlow) doBatch(es []*types.Event) (retry bool) { // collect event's basic metrics f.metrics.CollectBasic(e) - idIndex := oidIndexMap[e.Oid+e.Collection] - chainNode, detail, retry, err := f.parseEvent(f.ccDB, key, e, oidDetailMap, ids[idIndex], rid) + idIdx := oidIndexMap[e.Oid+e.Collection] + tenantID, chainNode, detail, retry, err := f.parseEvent(dbInfo.CcDB, key, e, ids[idIdx], rid) if err != nil { - return retry + if retry { + return nil, nil, nil, err + } + continue } + if chainNode == nil { continue } - chainNode.SubResource = []string{gjson.GetBytes(e.DocBytes, common.BKObjIDField).String()} - if idIndex == eventLen-1 { - lastChainNode = chainNode - } + chainNode.SubResource = []string{gjson.GetBytes(e.DocBytes, common.BKObjIDField).String()} // validate if the cursor is already exists, this is happens when the concurrent operation is very high. // which will generate the same operation event with same cluster time, and generate with the same cursor @@ -240,13 +273,17 @@ func (f *InstanceFlow) doBatch(es []*types.Event) (retry bool) { cursorMap[chainNode.Cursor] = struct{}{} oids = append(oids, e.ID()) - chainNodesMap[coll] = append(chainNodesMap[coll], chainNode) + _, exists := chainNodesMap[coll] + if !exists { + chainNodesMap[coll] = make(map[string][]*watch.ChainNode) + } + chainNodesMap[coll][tenantID] = append(chainNodesMap[coll][tenantID], chainNode) // if hit cursor conflict, the former cursor node's detail will be overwrite by the later one, so it // is not needed to remove the overlapped cursor node's detail again. ttl := time.Duration(key.TTLSeconds()) * time.Second - pipe.Set(key.DetailKey(chainNode.Cursor), string(detail.eventInfo), ttl) - pipe.Set(key.GeneralResDetailKey(chainNode), string(detail.resDetail), ttl) + pipe.Set(key.DetailKey(tenantID, chainNode.Cursor), string(detail.eventInfo), ttl) + pipe.Set(key.GeneralResDetailKey(tenantID, chainNode), string(detail.resDetail), ttl) } if hitConflict { @@ -254,47 +291,16 @@ func (f *InstanceFlow) doBatch(es []*types.Event) (retry bool) { } } - lastTokenData := map[string]interface{}{ - common.BKTokenField: aggregationEvents[eventLen-1].Token.Data, - common.BKStartAtTimeField: aggregationEvents[eventLen-1].ClusterTime, - } - - // if all events are invalid, set last token to the last events' token, do not need to retry for the invalid ones - if len(chainNodesMap) == 0 { - if err := f.tokenHandler.setLastWatchToken(context.Background(), lastTokenData); err != nil { - f.metrics.CollectMongoError() - return false - } - return false - } - - lastTokenData[common.BKFieldID] = lastChainNode.ID - lastTokenData[common.BKCursorField] = lastChainNode.Cursor - lastTokenData[common.BKStartAtTimeField] = lastChainNode.ClusterTime - - // store details at first, in case those watching cmdb events read chain when details are not inserted yet - if _, err := pipe.Exec(); err != nil { - f.metrics.CollectRedisError() - blog.Errorf("run flow, but insert details for %s failed, oids: %+v, err: %v, rid: %s,", f.key.Collection(), - oids, err, rid) - return true - } - - retry, err = f.doInsertEvents(chainNodesMap, lastTokenData, rid) - if err != nil { - return retry - } - - hasError = false - return false + return pipe, oids, chainNodesMap, nil } -func (f *InstanceFlow) getMainlineObjectMap(ctx context.Context) (map[string]struct{}, error) { +func (f *InstanceFlow) getMainlineObjectMap(ctx context.Context, tenantID string) (map[string]struct{}, error) { relations := make([]metadata.Association, 0) filter := map[string]interface{}{ common.AssociationKindIDField: common.AssociationKindMainline, } - err := f.ccDB.Table(common.BKTableNameObjAsst).Find(filter).Fields(common.BKObjIDField).All(ctx, &relations) + err := mongodb.Shard(sharding.NewShardOpts().WithTenant(tenantID)).Table(common.BKTableNameObjAsst).Find(filter). + Fields(common.BKObjIDField).All(ctx, &relations) if err != nil { blog.Errorf("get mainline topology association failed, err: %v", err) return nil, err @@ -311,8 +317,8 @@ func (f *InstanceFlow) getMainlineObjectMap(ctx context.Context) (map[string]str } // classifyEvents classify events by their related key's collection -func (f *InstanceFlow) classifyEvents(es []*types.Event, oidDetailMap map[oidCollKey][]byte, rid string) ( - map[string][]*types.Event, map[string]int, []*types.Event, error) { +func (f *InstanceFlow) classifyEvents(es []*types.Event, rid string) (map[string][]*types.Event, map[string]int, + []*types.Event, error) { mainlineColl := event.MainlineInstanceKey.Collection() commonColl := f.key.Collection() @@ -331,16 +337,6 @@ func (f *InstanceFlow) classifyEvents(es []*types.Event, oidDetailMap map[oidCol for index, e := range aggregationInstEvents { oidIndexMap[e.Oid+e.Collection] = index - if e.OperationType == types.Delete { - doc, exist := oidDetailMap[oidCollKey{oid: e.Oid, coll: e.Collection}] - if !exist { - blog.Errorf("run flow, received %s %s event, but delete doc[oid: %s] detail not exists, rid: %s", - f.key.Collection(), e.OperationType, e.Oid, rid) - continue - } - e.DocBytes = doc - } - objID := gjson.GetBytes(e.DocBytes, common.BKObjIDField).String() if len(objID) == 0 { blog.Errorf("run flow, %s event[oid: %s] object id not exists, doc: %s, rid: %s", @@ -348,7 +344,7 @@ func (f *InstanceFlow) classifyEvents(es []*types.Event, oidDetailMap map[oidCol continue } - if _, exists := f.mainlineObjectMap.Get()[objID]; exists { + if _, exists := f.mainlineObjectMap.Get(e.TenantID)[objID]; exists { eventMap[mainlineColl] = append(eventMap[mainlineColl], e) continue } @@ -367,107 +363,131 @@ func (f *InstanceFlow) convertTableInstEvent(es []*types.Event, rid string) ([]* return es, nil } - notContainTableInstEventsMap := make(map[int]*types.Event, 0) - srcObjIDInstIDsMap := make(map[string][]int64, 0) + tenantObjIDInstIDsMap := make(map[string]map[string][]int64) + tenantObjIDsMap := make(map[string][]string) instIDEventMap := make(map[int64]*types.Event) - instIDIndexMap := make(map[int64]int, 0) + instIDIndexMap := make(map[int64]int) for index, e := range es { - objID, err := common.GetInstObjIDByTableName(e.Collection, gjson.Get(string(e.DocBytes), - common.TenantID).Str) + objID, err := common.GetInstObjIDByTableName(e.ParsedColl, e.TenantID) if err != nil { blog.Errorf("collection name is illegal, err: %v, rid: %s", err, rid) return nil, err } + tenantObjIDsMap[e.TenantID] = append(tenantObjIDsMap[e.TenantID], objID) + + instID := gjson.Get(string(e.DocBytes), common.BKInstIDField).Int() + + _, exists := tenantObjIDInstIDsMap[e.TenantID] + if !exists { + tenantObjIDInstIDsMap[e.TenantID] = make(map[string][]int64) + } + tenantObjIDInstIDsMap[e.TenantID][objID] = append(tenantObjIDInstIDsMap[e.TenantID][objID], instID) + instIDIndexMap[instID] = index + instIDEventMap[instID] = e + } + + notContainTableInstEventsMap := make(map[int]*types.Event) + srcObjIDInstIDsMap := make(map[string]map[string][]int64) + for tenantID, objIDs := range tenantObjIDsMap { modelQuoteRel := make([]metadata.ModelQuoteRelation, 0) queryCond := mapstr.MapStr{ - common.BKDestModelField: objID, + common.BKDestModelField: mapstr.MapStr{common.BKDBIN: objIDs}, } - err = f.ccDB.Table(common.BKTableNameModelQuoteRelation).Find(queryCond).All(context.TODO(), &modelQuoteRel) + err := mongodb.Shard(sharding.NewShardOpts().WithTenant(tenantID)).Table(common.BKTableNameModelQuoteRelation). + Find(queryCond).All(context.TODO(), &modelQuoteRel) if err != nil { blog.Errorf("get model quote relation failed, err: %v, rid: %s", err, rid) return nil, err } - if len(modelQuoteRel) == 0 { - notContainTableInstEventsMap[index] = e - continue - } - if len(modelQuoteRel) != 1 { - return nil, fmt.Errorf("model quote relation not unique, rel: %v", modelQuoteRel) - } - if modelQuoteRel[0].SrcModel == "" { - return nil, fmt.Errorf("src model objID is illegal, rel: %v", modelQuoteRel) + objSrcObjIDMap := make(map[string]string) + for _, rel := range modelQuoteRel { + if rel.SrcModel == "" { + return nil, fmt.Errorf("src model objID is illegal, rel: %v", modelQuoteRel) + } + if rel.PropertyID == "" { + return nil, fmt.Errorf("table field property id is illegal, rel: %v", modelQuoteRel) + } + objSrcObjIDMap[rel.DestModel] = rel.SrcModel } - if modelQuoteRel[0].PropertyID == "" { - return nil, fmt.Errorf("table field property id is illegal, rel: %v", modelQuoteRel) + for _, objID := range objIDs { + srcObjID, exists := objSrcObjIDMap[objID] + if !exists { + for _, instID := range tenantObjIDInstIDsMap[tenantID][objID] { + notContainTableInstEventsMap[instIDIndexMap[instID]] = instIDEventMap[instID] + } + continue + } + srcObjIDInstIDsMap[tenantID][srcObjID] = append(srcObjIDInstIDsMap[tenantID][srcObjID], + tenantObjIDInstIDsMap[tenantID][objID]...) } - - srcObjID := modelQuoteRel[0].SrcModel - instID := gjson.Get(string(e.DocBytes), common.BKInstIDField).Int() - srcObjIDInstIDsMap[srcObjID] = append(srcObjIDInstIDsMap[srcObjID], instID) - - instIDIndexMap[instID] = index - instIDEventMap[instID] = e } return f.convertToInstEvents(notContainTableInstEventsMap, srcObjIDInstIDsMap, instIDEventMap, instIDIndexMap, rid) } -func (f *InstanceFlow) convertToInstEvents(es map[int]*types.Event, srcObjIDInstIDsMap map[string][]int64, +func (f *InstanceFlow) convertToInstEvents(es map[int]*types.Event, srcObjIDInstIDsMap map[string]map[string][]int64, instIDEventMap map[int64]*types.Event, instIDIndexMap map[int64]int, rid string) ([]*types.Event, error) { - for objID, instIDs := range srcObjIDInstIDsMap { - if len(instIDs) == 0 { - continue - } - tableName := common.GetInstTableName(objID, gjson.Get(string(instIDEventMap[instIDs[0]].DocBytes), - common.TenantID).Str) - filter := mapstr.MapStr{ - common.GetInstIDField(objID): mapstr.MapStr{ - common.BKDBIN: util.IntArrayUnique(instIDs), - }, - } - findOpts := dbtypes.NewFindOpts().SetWithObjectID(true) - insts := make([]mapStrWithOid, 0) - err := f.ccDB.Table(tableName).Find(filter, findOpts).All(context.TODO(), &insts) - if err != nil { - blog.Errorf("get src model inst failed, err: %v, rid: %s", err, rid) - return nil, err - } - - for _, inst := range insts { - doc, err := json.Marshal(inst.MapStr) - if err != nil { - blog.Errorf("marshal inst to byte failed, err: %v, rid: %s", err, rid) + for tenantID, objInstIDMap := range srcObjIDInstIDsMap { + for objID, instIDs := range objInstIDMap { + if len(instIDs) == 0 { continue } - instID, err := util.GetInt64ByInterface(inst.MapStr[common.GetInstIDField(objID)]) + tableName := common.GetInstTableName(objID, tenantID) + filter := mapstr.MapStr{ + common.GetInstIDField(objID): mapstr.MapStr{ + common.BKDBIN: util.IntArrayUnique(instIDs), + }, + } + findOpts := dbtypes.NewFindOpts().SetWithObjectID(true) + insts := make([]mapStrWithOid, 0) + err := mongodb.Shard(sharding.NewShardOpts().WithTenant(tenantID)).Table(tableName).Find(filter, findOpts). + All(context.TODO(), &insts) if err != nil { - blog.Errorf("get inst id failed, err: %v, rid: %s", err, rid) + blog.Errorf("get src model inst failed, err: %v, rid: %s", err, rid) return nil, err } - instEvent := &types.Event{ - Oid: inst.Oid.Hex(), - Document: inst.MapStr, - DocBytes: doc, - OperationType: "update", - Collection: tableName, - ClusterTime: types.TimeStamp{ - Sec: instIDEventMap[instID].ClusterTime.Sec, - Nano: instIDEventMap[instID].ClusterTime.Nano, - }, - Token: instIDEventMap[instID].Token, - ChangeDesc: &types.ChangeDescription{ - UpdatedFields: make(map[string]interface{}, 0), - RemovedFields: make([]string, 0), - }, - } + for _, inst := range insts { + doc, err := json.Marshal(inst.MapStr) + if err != nil { + blog.Errorf("marshal inst to byte failed, err: %v, rid: %s", err, rid) + continue + } + + instID, err := util.GetInt64ByInterface(inst.MapStr[common.GetInstIDField(objID)]) + if err != nil { + blog.Errorf("get inst id failed, err: %v, rid: %s", err, rid) + return nil, err + } + + instEvent := &types.Event{ + Oid: inst.Oid.Hex(), + Document: inst.MapStr, + DocBytes: doc, + OperationType: "update", + CollectionInfo: types.CollectionInfo{ + Collection: common.GenTenantTableName(tenantID, tableName), + ParsedColl: tableName, + TenantID: tenantID, + }, + ClusterTime: types.TimeStamp{ + Sec: instIDEventMap[instID].ClusterTime.Sec, + Nano: instIDEventMap[instID].ClusterTime.Nano, + }, + Token: instIDEventMap[instID].Token, + ChangeDesc: &types.ChangeDescription{ + UpdatedFields: make(map[string]interface{}, 0), + RemovedFields: make([]string, 0), + }, + } - es[instIDIndexMap[instID]] = instEvent + es[instIDIndexMap[instID]] = instEvent + } } } @@ -484,16 +504,14 @@ func (f *InstanceFlow) convertToInstEvents(es map[int]*types.Event, srcObjIDInst return aggregationInstEvents, nil } -func (f *InstanceFlow) doInsertEvents(chainNodesMap map[string][]*watch.ChainNode, lastTokenData map[string]interface{}, - rid string) (bool, error) { +func (f *InstanceFlow) doInsertEvents(dbInfo *types.DBInfo, chainNodesMap map[string]map[string][]*watch.ChainNode, + lastTokenData map[string]interface{}, rid string) (bool, error) { if len(chainNodesMap) == 0 { return false, nil } - watchDBClient := f.watchDB.GetDBClient() - - session, err := watchDBClient.StartSession() + session, err := dbInfo.WatchDB.GetDBClient().StartSession() if err != nil { blog.Errorf("run flow, but start session failed, coll: %s, err: %v, rid: %s", f.key.Collection(), err, rid) return true, err @@ -503,7 +521,7 @@ func (f *InstanceFlow) doInsertEvents(chainNodesMap map[string][]*watch.ChainNod // retry insert the event node with remove the first event node, // which means the first one's cursor is conflicted with the former's batch operation inserted nodes. retryWithReduce := false - var conflictColl string + var conflictColl, conflictTenantID string txnErr := mongo.WithSession(context.Background(), session, func(sc mongo.SessionContext) error { if err = session.StartTransaction(); err != nil { @@ -512,28 +530,18 @@ func (f *InstanceFlow) doInsertEvents(chainNodesMap map[string][]*watch.ChainNod return err } - for coll, chainNodes := range chainNodesMap { - if len(chainNodes) == 0 { - continue - } + for coll, chainNodeInfo := range chainNodesMap { key := f.getKeyByCollection(coll) - - if err := f.watchDB.Table(key.ChainCollection()).Insert(sc, chainNodes); err != nil { - blog.ErrorJSON("run flow, but insert chain nodes for %s failed, nodes: %s, err: %v, rid: %s", - key.Collection(), chainNodes, err, rid) - f.metrics.CollectMongoError() - _ = session.AbortTransaction(context.Background()) - - if event.IsConflictError(err) { - // set retry with reduce flag and retry later - retryWithReduce = true + err, retryWithReduce, conflictTenantID = f.insertChainNodes(sc, session, key, chainNodeInfo, rid) + if err != nil { + if retryWithReduce { conflictColl = coll } return err } } - if err := f.tokenHandler.setLastWatchToken(sc, lastTokenData); err != nil { + if err := f.tokenHandler.setLastWatchToken(sc, dbInfo.UUID, dbInfo.WatchDB, lastTokenData); err != nil { f.metrics.CollectMongoError() _ = session.AbortTransaction(context.Background()) return err @@ -553,46 +561,15 @@ func (f *InstanceFlow) doInsertEvents(chainNodesMap map[string][]*watch.ChainNod blog.Errorf("do insert flow events failed, err: %v, rid: %s", txnErr, rid) if retryWithReduce { - chainNodes := chainNodesMap[conflictColl] - if len(chainNodes) == 0 { - return false, nil + chainNodesMap[conflictColl] = event.ReduceChainNode(chainNodesMap[conflictColl], conflictTenantID, + f.getKeyByCollection(conflictColl).Collection(), txnErr, f.metrics, rid) + if len(chainNodesMap[conflictColl]) == 0 { + delete(chainNodesMap, conflictColl) } - key := f.getKeyByCollection(conflictColl) - - rid = rid + ":" + chainNodes[0].Oid - monitor.Collect(&meta.Alarm{ - RequestID: rid, - Type: meta.EventFatalError, - Detail: fmt.Sprintf("run event flow, but got conflict %s cursor with chain nodes", key.Collection()), - Module: types2.CC_MODULE_CACHESERVICE, - Dimension: map[string]string{"retry_conflict_nodes": "yes"}, - }) - - // no need to retry because the only one chain node conflicts with the nodes in db - if len(chainNodes) <= 1 { + if len(chainNodesMap) == 0 { return false, nil } - - for index, reducedChainNode := range chainNodes { - if isConflictChainNode(reducedChainNode, txnErr) { - chainNodes = append(chainNodes[:index], chainNodes[index+1:]...) - - chainNodesMap[conflictColl] = chainNodes - - // need do with retry with reduce - blog.ErrorJSON("run flow, insert %s event with reduce node %s, remain nodes: %s, rid: %s", - key.Collection(), reducedChainNode, chainNodes, rid) - - return f.doInsertEvents(chainNodesMap, lastTokenData, rid) - } - } - - // when no cursor conflict node is found, discard the first node and try to insert the others - blog.ErrorJSON("run flow, insert %s event with reduce node %s, remain nodes: %s, rid: %s", - key.Collection(), chainNodes[0], chainNodes[1:], rid) - - chainNodesMap[conflictColl] = chainNodes[1:] - return f.doInsertEvents(chainNodesMap, lastTokenData, rid) + return f.doInsertEvents(dbInfo, chainNodesMap, lastTokenData, rid) } // if an error occurred, roll back and re-watch again diff --git a/src/source_controller/cacheservice/event/flow/parser.go b/src/source_controller/cacheservice/event/flow/parser.go index 3a25ec5734..f0217ed58b 100644 --- a/src/source_controller/cacheservice/event/flow/parser.go +++ b/src/source_controller/cacheservice/event/flow/parser.go @@ -36,8 +36,8 @@ import ( ) // parseEventFunc function type for parsing db event into chain node and detail -type parseEventFunc func(db dal.DB, key event.Key, e *types.Event, oidDetailMap map[oidCollKey][]byte, id uint64, - rid string) (*watch.ChainNode, *eventDetail, bool, error) +type parseEventFunc func(db dal.DB, key event.Key, e *types.Event, id uint64, rid string) (string, *watch.ChainNode, + *eventDetail, bool, error) // eventDetail is the parsed event detail type eventDetail struct { @@ -48,8 +48,8 @@ type eventDetail struct { } // parseEvent parse event into db chain nodes to store in db and details to store in redis -func parseEvent(db dal.DB, key event.Key, e *types.Event, oidDetailMap map[oidCollKey][]byte, id uint64, rid string) ( - *watch.ChainNode, *eventDetail, bool, error) { +func parseEvent(db dal.DB, key event.Key, e *types.Event, id uint64, rid string) (string, *watch.ChainNode, + *eventDetail, bool, error) { switch e.OperationType { case types.Insert, types.Update, types.Replace: @@ -58,86 +58,126 @@ func parseEvent(db dal.DB, key event.Key, e *types.Event, oidDetailMap map[oidCo if err := key.Validate(e.DocBytes); err != nil { blog.Errorf("run flow, received %s event, but got invalid event, doc: %s, oid: %s, err: %v, rid: %s", key.Collection(), e.DocBytes, e.Oid, err, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } case types.Delete: - doc, exist := oidDetailMap[oidCollKey{oid: e.Oid, coll: e.Collection}] - if !exist { - blog.Errorf("run flow, received %s event, but delete doc[oid: %s] detail not exists, rid: %s", - key.Collection(), e.Oid, rid) - return nil, nil, false, nil - } - // update delete event detail doc bytes. - e.DocBytes = doc - // validate the event is valid or not. // the invalid event will be dropped. - if err := key.Validate(doc); err != nil { + if err := key.Validate(e.DocBytes); err != nil { blog.Errorf("run flow, received %s event, but got invalid event, doc: %s, oid: %s, err: %v, rid: %s", key.Collection(), e.DocBytes, e.Oid, err, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } // since following event cannot be parsed, skip them and do not retry case types.Invalidate: blog.Errorf("loop flow, received invalid event operation type, doc: %s, rid: %s", e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil case types.Drop: blog.Errorf("loop flow, received drop collection event operation type, **delete object will send a drop "+ "instance collection event, ignore it**. doc: %s, rid: %s", e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil default: blog.Errorf("loop flow, received unsupported event operation type: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } return parseEventToNodeAndDetail(key, e, id, rid) } +// parseEventToNodeAndDetail parse validated event into db chain nodes to store in db and details to store in redis +func parseEventToNodeAndDetail(key event.Key, e *types.Event, id uint64, rid string) (string, *watch.ChainNode, + *eventDetail, bool, error) { + + name := key.Name(e.DocBytes) + instID := key.InstanceID(e.DocBytes) + currentCursor, err := watch.GetEventCursor(key.Collection(), e, instID) + if err != nil { + blog.Errorf("get %s event cursor failed, name: %s, err: %v, oid: %s, rid: %s", key.Collection(), name, + err, e.ID(), rid) + + monitor.Collect(&meta.Alarm{ + RequestID: rid, + Type: meta.FlowFatalError, + Detail: fmt.Sprintf("run event flow, but get invalid %s cursor, inst id: %d, name: %s", + key.Collection(), instID, name), + Module: types2.CC_MODULE_CACHESERVICE, + Dimension: map[string]string{"hit_invalid_cursor": "yes"}, + }) + + return "", nil, nil, false, err + } + + chainNode := &watch.ChainNode{ + ID: id, + ClusterTime: e.ClusterTime, + Oid: e.Oid, + EventType: watch.ConvertOperateType(e.OperationType), + Token: e.Token.Data, + Cursor: currentCursor, + } + + if instID > 0 { + chainNode.InstanceID = instID + } + + detail := types.EventInfo{ + UpdatedFields: e.ChangeDesc.UpdatedFields, + RemovedFields: e.ChangeDesc.RemovedFields, + } + detailBytes, err := json.Marshal(detail) + if err != nil { + blog.Errorf("run flow, %s, marshal detail failed, name: %s, detail: %+v, err: %v, oid: %s, rid: %s", + key.Collection(), name, detail, err, e.ID(), rid) + return "", nil, nil, false, err + } + + return e.TenantID, chainNode, &eventDetail{eventInfo: detailBytes, resDetail: e.DocBytes}, false, nil +} + // parseInstAsstEvent parse instance association event into db chain nodes to store in db and details to store in redis -func parseInstAsstEvent(db dal.DB, key event.Key, e *types.Event, oidDetailMap map[oidCollKey][]byte, id uint64, - rid string) (*watch.ChainNode, *eventDetail, bool, error) { +func parseInstAsstEvent(db dal.DB, key event.Key, e *types.Event, id uint64, rid string) (string, *watch.ChainNode, + *eventDetail, bool, error) { switch e.OperationType { case types.Insert: if err := key.Validate(e.DocBytes); err != nil { blog.Errorf("run flow, received %s event, but got invalid event, doc: %s, oid: %s, err: %v, rid: %s", key.Collection(), e.DocBytes, e.Oid, err, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } case types.Delete: - doc, exist := oidDetailMap[oidCollKey{oid: e.Oid, coll: e.Collection}] - if !exist { - blog.Errorf("%s event delete doc[oid: %s] detail not exists, rid: %s", key.Collection(), e.Oid, rid) - return nil, nil, false, nil - } - // update delete event detail doc bytes from del archive - e.DocBytes = doc - - if err := key.Validate(doc); err != nil { + if err := key.Validate(e.DocBytes); err != nil { blog.Errorf("run flow, received %s event, but got invalid event, doc: %s, oid: %s, err: %v, rid: %s", key.Collection(), e.DocBytes, e.Oid, err, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } // since following event cannot be parsed, skip them and do not retry case types.Invalidate: blog.Errorf("loop flow, received invalid event operation type, doc: %s, rid: %s", e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil case types.Drop: blog.Errorf("loop flow, received drop collection event operation type, **delete object will send a drop "+ "instance association collection event, ignore it**. doc: %s, rid: %s", e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil default: blog.Errorf("loop flow, received invalid event op type: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } + return parseInstAsstEventToNodeAndDetail(key, e, id, rid) +} + +// parseInstAsstEventToNodeAndDetail parse inst asst event into db chain nodes and details +func parseInstAsstEventToNodeAndDetail(key event.Key, e *types.Event, id uint64, rid string) (string, *watch.ChainNode, + *eventDetail, bool, error) { + instAsstID := key.InstanceID(e.DocBytes) if instAsstID == 0 { blog.Errorf("loop flow, received invalid event id, doc: %s, rid: %s", e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } // since instance association is saved in both source and target object inst asst table, one change will generate 2 @@ -156,7 +196,7 @@ func parseInstAsstEvent(db dal.DB, key event.Key, e *types.Event, oidDetailMap m Dimension: map[string]string{"hit_invalid_cursor": "yes"}, }) - return nil, nil, false, err + return "", nil, nil, false, err } chainNode := &watch.ChainNode{ @@ -167,7 +207,6 @@ func parseInstAsstEvent(db dal.DB, key event.Key, e *types.Event, oidDetailMap m Token: e.Token.Data, Cursor: currentCursor, InstanceID: instAsstID, - TenantID: key.SupplierAccount(e.DocBytes), } chainNode.SubResource = []string{gjson.GetBytes(e.DocBytes, common.BKObjIDField).String(), @@ -181,59 +220,8 @@ func parseInstAsstEvent(db dal.DB, key event.Key, e *types.Event, oidDetailMap m if err != nil { blog.Errorf("run flow, %s, marshal detail failed, detail: %+v, err: %v, oid: %s, rid: %s", key.Collection(), detail, err, e.ID(), rid) - return nil, nil, false, err - } - - return chainNode, &eventDetail{eventInfo: detailBytes, resDetail: e.DocBytes}, false, nil -} - -// parseEventToNodeAndDetail parse validated event into db chain nodes to store in db and details to store in redis -func parseEventToNodeAndDetail(key event.Key, e *types.Event, id uint64, rid string) (*watch.ChainNode, *eventDetail, - bool, error) { - - name := key.Name(e.DocBytes) - instID := key.InstanceID(e.DocBytes) - currentCursor, err := watch.GetEventCursor(key.Collection(), e, instID) - if err != nil { - blog.Errorf("get %s event cursor failed, name: %s, err: %v, oid: %s, rid: %s", key.Collection(), name, - err, e.ID(), rid) - - monitor.Collect(&meta.Alarm{ - RequestID: rid, - Type: meta.FlowFatalError, - Detail: fmt.Sprintf("run event flow, but get invalid %s cursor, inst id: %d, name: %s", - key.Collection(), instID, name), - Module: types2.CC_MODULE_CACHESERVICE, - Dimension: map[string]string{"hit_invalid_cursor": "yes"}, - }) - - return nil, nil, false, err - } - - chainNode := &watch.ChainNode{ - ID: id, - ClusterTime: e.ClusterTime, - Oid: e.Oid, - EventType: watch.ConvertOperateType(e.OperationType), - Token: e.Token.Data, - Cursor: currentCursor, - TenantID: key.SupplierAccount(e.DocBytes), - } - - if instID > 0 { - chainNode.InstanceID = instID - } - - detail := types.EventInfo{ - UpdatedFields: e.ChangeDesc.UpdatedFields, - RemovedFields: e.ChangeDesc.RemovedFields, - } - detailBytes, err := json.Marshal(detail) - if err != nil { - blog.Errorf("run flow, %s, marshal detail failed, name: %s, detail: %+v, err: %v, oid: %s, rid: %s", - key.Collection(), name, detail, err, e.ID(), rid) - return nil, nil, false, err + return "", nil, nil, false, err } - return chainNode, &eventDetail{eventInfo: detailBytes, resDetail: e.DocBytes}, false, nil + return e.TenantID, chainNode, &eventDetail{eventInfo: detailBytes, resDetail: e.DocBytes}, false, nil } diff --git a/src/source_controller/cacheservice/event/flow/workload_flow.go b/src/source_controller/cacheservice/event/flow/workload_flow.go deleted file mode 100644 index 0d8003ba86..0000000000 --- a/src/source_controller/cacheservice/event/flow/workload_flow.go +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making - * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. - * Copyright (C) 2017 THL A29 Limited, - * a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - * We undertake not to change the open source license (MIT license) applicable - * to the current version of the project delivered to anyone in the future. - */ - -package flow - -import ( - "context" - "time" - - "configcenter/src/common" - "configcenter/src/common/blog" - kubetypes "configcenter/src/kube/types" - "configcenter/src/storage/stream/types" -) - -func newWorkloadFlow(ctx context.Context, opts flowOptions, getDeleteEventDetails getDeleteEventDetailsFunc, - parseEvent parseEventFunc) error { - - flow, err := NewFlow(opts, getDeleteEventDetails, parseEvent) - if err != nil { - return err - } - workloadFlow := WorkloadFlow{ - Flow: flow, - } - - return workloadFlow.RunFlow(ctx) -} - -// WorkloadFlow instance association event watch flow -type WorkloadFlow struct { - Flow -} - -// RunFlow run instance association event watch flow -func (f *WorkloadFlow) RunFlow(ctx context.Context) error { - blog.Infof("start run flow for key: %s.", f.key.Namespace()) - - f.tokenHandler = NewFlowTokenHandler(f.key, f.watchDB, f.metrics) - - startAtTime, err := f.tokenHandler.getStartWatchTime(ctx) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", f.key.Collection(), err) - return err - } - - watchOpts := &types.WatchOptions{ - Options: types.Options{ - EventStruct: f.EventStruct, - // watch all kube workload tables - CollectionFilter: map[string]interface{}{ - common.BKDBIN: []string{kubetypes.BKTableNameBaseDeployment, kubetypes.BKTableNameBaseStatefulSet, - kubetypes.BKTableNameBaseDaemonSet, kubetypes.BKTableNameGameDeployment, - kubetypes.BKTableNameGameStatefulSet, kubetypes.BKTableNameBaseCronJob, - kubetypes.BKTableNameBaseJob, kubetypes.BKTableNameBasePodWorkload}, - }, - StartAtTime: startAtTime, - WatchFatalErrorCallback: f.tokenHandler.resetWatchToken, - }, - } - - opts := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: f.key.Namespace(), - WatchOpt: watchOpts, - TokenHandler: f.tokenHandler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: 10, - RetryDuration: 1 * time.Second, - }, - }, - EventHandler: &types.BatchHandler{ - DoBatch: f.doBatch, - }, - BatchSize: batchSize, - } - - if err := f.watch.WithBatch(opts); err != nil { - blog.Errorf("run flow, but watch batch failed, err: %v", err) - return err - } - - return nil -} diff --git a/src/source_controller/cacheservice/event/identifier/converter.go b/src/source_controller/cacheservice/event/identifier/converter.go index a3a0f0c26f..8d532ba700 100644 --- a/src/source_controller/cacheservice/event/identifier/converter.go +++ b/src/source_controller/cacheservice/event/identifier/converter.go @@ -15,17 +15,16 @@ package identifier import ( "context" "fmt" - "time" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/mapstr" "configcenter/src/common/util" + "configcenter/src/storage/dal/mongo/sharding" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" "github.com/tidwall/gjson" - "go.mongodb.org/mongo-driver/bson/primitive" - "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" ) var needCareHostFields = []string{ @@ -33,7 +32,6 @@ var needCareHostFields = []string{ common.BKOSTypeField, common.BKCloudIDField, common.BKHostIDField, - common.TenantID, common.BKAgentIDField, common.BKAddressingField, } @@ -63,7 +61,7 @@ loop: continue } - if _, yes := reminder[one.Oid]; yes { + if _, yes := reminder[genUniqueKey(one)]; yes { // this host event has already hit, then we aggregate these events with former one to only one // this is useful to decrease host identify events. blog.Infof("host identify event, host event: %s is aggregated, rid: %s", one.ID(), rid) @@ -74,7 +72,7 @@ loop: if len(one.ChangeDesc.UpdatedFields) == 0 && len(one.ChangeDesc.RemovedFields) == 0 { // we do not know what's info is changed, so we add this event directly. hitEvents = append(hitEvents, one) - reminder[one.Oid] = struct{}{} + reminder[genUniqueKey(one)] = struct{}{} continue } @@ -83,7 +81,7 @@ loop: for _, care := range needCareHostFields { if _, yes := one.ChangeDesc.UpdatedFields[care]; yes { hitEvents = append(hitEvents, one) - reminder[one.Oid] = struct{}{} + reminder[genUniqueKey(one)] = struct{}{} continue loop } } @@ -100,7 +98,7 @@ loop: if _, yes := check[care]; yes { // one of the cared fields is removed, we do need to care. hitEvents = append(hitEvents, one) - reminder[one.Oid] = struct{}{} + reminder[genUniqueKey(one)] = struct{}{} continue loop } } @@ -122,18 +120,11 @@ var hostIDJson = `{"bk_host_id":%d}` // 2. care about all kinds of event types. // 3. do not care the event's order, cause we all convert to host events type. func (f *hostIdentity) rearrangeHostRelationEvents(es []*types.Event, rid string) ([]*types.Event, error) { - deleteEventsMap := make(map[string]*types.Event, 0) - deleteOids := make([]string, 0) hitEvents := make([]*types.Event, 0) // remind if related host's events has already been hit, if yes, then skip this event. reminder := make(map[int64]struct{}) for idx := range es { one := es[idx] - if one.OperationType == types.Delete { - deleteEventsMap[one.Oid] = one - deleteOids = append(deleteOids, one.Oid) - continue - } hostID := gjson.GetBytes(one.DocBytes, common.BKHostIDField).Int() if hostID <= 0 { @@ -155,61 +146,10 @@ func (f *hostIdentity) rearrangeHostRelationEvents(es []*types.Event, rid string reminder[hostID] = struct{}{} } - if len(deleteEventsMap) == 0 { - // no delete type events, then return directly - return hitEvents, nil - } - - filter := map[string]interface{}{ - "oid": map[string]interface{}{common.BKDBIN: deleteOids}, - "coll": common.BKTableNameModuleHostConfig, - } - - docs := make([]bsoncore.Document, 0) - err := f.ccDB.Table(common.BKTableNameDelArchive).Find(filter).All(context.Background(), &docs) - if err != nil { - f.metrics.CollectMongoError() - blog.Errorf("host identify event, get archive host relation from mongodb failed, oid: %+v, err: %v, rid: %v", - deleteOids, err, rid) - return nil, err - } - - for _, doc := range docs { - hostID, ok := doc.Lookup("detail", common.BKHostIDField).Int64OK() - if !ok { - blog.Errorf("host id type is illegal, skip, relation: %s, rid: %s", doc.Lookup("detail").String(), rid) - continue - } - if hostID <= 0 { - blog.Errorf("host identify event, get host id from relation: %s failed, skip, rid: %s", - doc.Lookup("detail").String(), rid) - continue - } - - if _, exist := reminder[hostID]; exist { - // this host has already been hit, skip now. - blog.Infof("host identify event, relation deleted host id: %d is aggregated, oid: %s, rid: %s", - hostID, doc.Lookup("oid").String(), rid) - continue - } - reminder[hostID] = struct{}{} - - event, exist := deleteEventsMap[doc.Lookup("oid").String()] - if !exist { - blog.Errorf("host identify event, get archived event's instance with oid :%s failed, skip, rid: %s", - doc.Lookup("oid").String(), rid) - continue - } - event.DocBytes = []byte(fmt.Sprintf(hostIDJson, hostID)) - event.Document = nil - hitEvents = append(hitEvents, event) - } - return hitEvents, nil } -// rearrangeProcessEvents TODO -// process events arrange policy: +// rearrangeProcessEvents process events arrange policy: // 1. redirect process event to host change event with process host relation info. // 2. care about all kinds of event types. // 3. do not care the event's order, cause we all convert to host events type. @@ -218,25 +158,21 @@ func (f *hostIdentity) rearrangeProcessEvents(es []*types.Event, rid string) ([] return es, nil } - processIDs := make([]int64, 0) - deleteOids := make([]string, 0) - idToOid := make(map[int64]string) - oidToEvent := make(map[string]*types.Event) + processIDsMap := make(map[string][]int64) reminder := make(map[string]struct{}) for idx := range es { one := es[idx] - if _, exist := reminder[one.Oid]; exist { + if _, exist := reminder[genUniqueKey(one)]; exist { // skip event's with the same oid, which means it's the same process event. // cause we convert a process id to host id finally. blog.Infof("host identify event, process: %s is aggregated, rid: %s", one.ID(), rid) continue } - oidToEvent[one.Oid] = one - if one.OperationType == types.Delete { - deleteOids = append(deleteOids, one.Oid) - reminder[one.Oid] = struct{}{} + // process delete and insert event are handled by related process relation event. + if one.OperationType == types.Delete || one.OperationType == types.Insert { + reminder[genUniqueKey(one)] = struct{}{} continue } @@ -246,64 +182,44 @@ func (f *hostIdentity) rearrangeProcessEvents(es []*types.Event, rid string) ([] continue } - processIDs = append(processIDs, processID) - idToOid[processID] = one.Oid - reminder[one.Oid] = struct{}{} + processIDsMap[one.TenantID] = append(processIDsMap[one.TenantID], processID) + reminder[genUniqueKey(one)] = struct{}{} } // got 0 valid event - if len(processIDs) == 0 && len(deleteOids) == 0 { + if len(processIDsMap) == 0 { return es[:0], nil } - // now we need to convert these process ids and delete oids to host ids. - // convert process ids to host ids. - notHitProcess, hostList, err := f.convertProcessToHost(processIDs, rid) - if err != nil { - return nil, err - } - - // get these process's host from cc_DelArchive - if len(notHitProcess) != 0 { - start := int64(es[0].ClusterTime.Sec) - hostIDs, err := f.getHostWithProcessRelationFromDelArchive(start, notHitProcess, rid) + // now we need to convert these process ids to host ids. + hostListMap := make(map[string][]int64) + for tenant, processIDs := range processIDsMap { + hostList, err := f.convertProcessToHost(tenant, processIDs, rid) if err != nil { return nil, err } - hostList = append(hostList, hostIDs...) - } - if len(deleteOids) != 0 { - start := int64(es[0].ClusterTime.Sec) - hostIDs, err := f.getDeletedProcessHosts(start, deleteOids, rid) - if err != nil { - return nil, err - } - hostList = append(hostList, hostIDs...) + hostListMap[tenant] = hostList } - // now we get all the host's ids list - // it should be much more less than the process's count - hostList = util.IntArrayUnique(hostList) - - cnt := len(hostList) - if cnt > len(es) { - // host count is always less or equal than the count of events. - // when this happens, somethings must be wrong. - blog.ErrorJSON("got more host count than it's process, use host count instead, es: %s, host: %s, rid: %s", - es, hostList, rid) - // continue handle this, but redirect count to event's count - cnt = len(es) + for tenantID, hostList := range hostListMap { + // now we get all the host's ids list + // it should be much more less than the process's count + hostListMap[tenantID] = util.IntArrayUnique(hostList) } - // reset the event's document info to host id field. - for i := 0; i < cnt; i++ { - es[i].DocBytes = []byte(fmt.Sprintf(hostIDJson, hostList[i])) - es[i].Document = nil + events := make([]*types.Event, 0) + for _, e := range es { + hostList := hostListMap[e.TenantID] + if len(hostList) == 0 { + continue + } + // reset the event's document info to host id field. + e.DocBytes = []byte(fmt.Sprintf(hostIDJson, hostList[0])) + hostListMap[e.TenantID] = hostList[1:] + e.Document = nil + events = append(events, e) } - - // remove the unused events - es = es[:cnt] return es, nil } @@ -312,13 +228,10 @@ type processRelation struct { HostID int64 `bson:"bk_host_id"` } -// convertProcessToHost TODO -// convert process ids to host ids. -// we may can not find process's relations info, cause it may already been deleted. so we need -// to find it in cc_DelArchive collection. -func (f *hostIdentity) convertProcessToHost(pIDs []int64, rid string) ([]int64, []int64, error) { +// convertProcessToHost convert process ids to host ids. +func (f *hostIdentity) convertProcessToHost(tenantID string, pIDs []int64, rid string) ([]int64, error) { if len(pIDs) == 0 { - return make([]int64, 0), make([]int64, 0), nil + return make([]int64, 0), nil } filter := mapstr.MapStr{ @@ -326,110 +239,26 @@ func (f *hostIdentity) convertProcessToHost(pIDs []int64, rid string) ([]int64, } relations := make([]*processRelation, 0) - err := f.ccDB.Table(common.BKTableNameProcessInstanceRelation).Find(filter).Fields(common.BKHostIDField, - common.BKProcessIDField).All(context.Background(), &relations) + err := mongodb.Shard(sharding.NewShardOpts().WithTenant(tenantID)).Table(common.BKTableNameProcessInstanceRelation). + Find(filter).Fields(common.BKHostIDField, common.BKProcessIDField).All(context.Background(), &relations) if err != nil { blog.Errorf("host identify event, get process instance relation failed, err: %v, rid: %s", err, rid) - return nil, nil, err + return nil, err } - idMap := make(map[int64]struct{}) hostIDMap := make(map[int64]struct{}) for idx := range relations { - idMap[relations[idx].ProcessID] = struct{}{} hostIDMap[relations[idx].HostID] = struct{}{} } - notHitProcess := make([]int64, 0) - for _, id := range pIDs { - if _, exist := idMap[id]; !exist { - // this process's relations has already been deleted, so we can not find it. - // it will be try to search in cc_DelArchive later - notHitProcess = append(notHitProcess, id) - } - } - hostList := make([]int64, 0) for id := range hostIDMap { hostList = append(hostList, id) } - return notHitProcess, hostList, nil - + return hostList, nil } -// getHostWithProcessRelationFromDelArchive TODO -// get host ids from cc_DelArchive with process's ids -// a process has only one relation, so we can use process ids find it's unique relations. -func (f *hostIdentity) getHostWithProcessRelationFromDelArchive(startUnix int64, pIDs []int64, rid string) ( - []int64, error) { - filter := mapstr.MapStr{ - "coll": common.BKTableNameProcessInstanceRelation, - // this archive doc's created time must be greater than start unix time. - "_id": mapstr.MapStr{ - common.BKDBGTE: primitive.NewObjectIDFromTimestamp(time.Unix(startUnix-60, 0)), - }, - "detail.bk_process_id": mapstr.MapStr{common.BKDBIN: pIDs}, - } - - relations := make([]map[string]*processRelation, 0) - err := f.ccDB.Table(common.BKTableNameDelArchive).Find(filter).Fields("detail"). - All(context.Background(), &relations) - if err != nil { - f.metrics.CollectMongoError() - blog.Errorf("host identify event, get archive deleted instance process relations failed, "+ - "process ids: %v, err: %v, rid: %v", f.key.Collection(), pIDs, err, rid) - return nil, err - } - - if len(pIDs) != len(relations) { - blog.ErrorJSON("host identify event, can not find all process ids relations, ids: %s, relations: %s, rid: %s", - pIDs, relations) - } - - hostIDs := make([]int64, 0) - for _, doc := range relations { - relation := doc["detail"] - hostIDs = append(hostIDs, relation.HostID) - } - return hostIDs, nil -} - -func (f *hostIdentity) getDeletedProcessHosts(startUnix int64, oids []string, rid string) ([]int64, error) { - filter := map[string]interface{}{ - "oid": map[string]interface{}{common.BKDBIN: oids}, - "coll": common.BKTableNameBaseProcess, - } - - docs := make([]bsoncore.Document, 0) - err := f.ccDB.Table(common.BKTableNameDelArchive).Find(filter).Fields("detail").All(context.Background(), &docs) - if err != nil { - f.metrics.CollectMongoError() - blog.Errorf("host identify event, get archive deleted process instances, oids: %+v, err: %v, rid: %v", - oids, err, rid) - return nil, err - } - - pList := make([]int64, 0) - for _, doc := range docs { - pID, ok := doc.Lookup("detail", common.BKProcessIDField).Int64OK() - if !ok { - blog.Errorf("process id type is illegal, skip, instance: %s, rid: %s", doc.Lookup("detail").String(), rid) - continue - } - if pID <= 0 { - blog.Errorf("host identify event, get process id from instance: %s failed, skip, rid: %s", - doc.Lookup("detail").String(), rid) - continue - } - pList = append(pList, pID) - } - - if len(pList) == 0 { - blog.Warnf("got 0 valid process from archived collection with oids: %v, rid: %s", oids, rid) - return pList, nil - } - - // then get hosts list with these process ids. - return f.getHostWithProcessRelationFromDelArchive(startUnix, pList, rid) +func genUniqueKey(e *types.Event) string { + return e.Collection + "-" + e.Oid } diff --git a/src/source_controller/cacheservice/event/identifier/event.go b/src/source_controller/cacheservice/event/identifier/event.go index 091f6988ec..8c1949341b 100644 --- a/src/source_controller/cacheservice/event/identifier/event.go +++ b/src/source_controller/cacheservice/event/identifier/event.go @@ -17,15 +17,12 @@ import ( "fmt" "time" - "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/event" mixevent "configcenter/src/source_controller/cacheservice/event/mix-event" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/local" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" ) @@ -37,10 +34,7 @@ const ( type identityOptions struct { key event.Key watchFields []string - watch stream.LoopInterface - isMaster discovery.ServiceManageInterface - watchDB *local.Mongo - ccDB dal.DB + task *task.Task } func newIdentity(ctx context.Context, opts identityOptions) error { @@ -53,9 +47,7 @@ func newIdentity(ctx context.Context, opts identityOptions) error { MixKey: event.HostIdentityKey, Key: opts.key, WatchFields: opts.watchFields, - Watch: opts.watch, - WatchDB: opts.watchDB, - CcDB: opts.ccDB, + Task: opts.task, EventLockTTL: hostIdentityLockTTL, EventLockKey: hostIdentityLockKey, } @@ -81,6 +73,8 @@ func (f *hostIdentity) rearrangeEvents(rid string, es []*types.Event) ([]*types. return f.rearrangeHostRelationEvents(es, rid) case event.ProcessKey.Collection(): return f.rearrangeProcessEvents(es, rid) + case event.ProcessInstanceRelationKey.Collection(): + return f.rearrangeHostRelationEvents(es, rid) default: blog.ErrorJSON("received unsupported host identity event, skip, es: %s, rid :%s", es, rid) return es[:0], nil @@ -88,17 +82,18 @@ func (f *hostIdentity) rearrangeEvents(rid string, es []*types.Event) ([]*types. } // parseEvent parse event into chain nodes, host identifier detail is formed when watched, do not store in redis -func (f *hostIdentity) parseEvent(e *types.Event, id uint64, rid string) (*watch.ChainNode, []byte, bool, error) { +func (f *hostIdentity) parseEvent(e *types.Event, id uint64, rid string) (string, *watch.ChainNode, []byte, bool, + error) { switch e.OperationType { case types.Insert, types.Update, types.Replace, types.Delete: case types.Invalidate: blog.Errorf("host identify event, received invalid event operation type, doc: %s, rid: %s", e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil default: blog.Errorf("host identify event, received unsupported event operation type: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, rid) - return nil, nil, false, nil + return "", nil, nil, false, nil } name := f.key.Name(e.DocBytes) @@ -106,7 +101,7 @@ func (f *hostIdentity) parseEvent(e *types.Event, id uint64, rid string) (*watch if err != nil { blog.Errorf("get %s event cursor failed, name: %s, err: %v, oid: %s, rid: %s", f.key.Collection(), name, err, e.ID(), rid) - return nil, nil, false, err + return "", nil, nil, false, err } chainNode := &watch.ChainNode{ @@ -117,14 +112,13 @@ func (f *hostIdentity) parseEvent(e *types.Event, id uint64, rid string) (*watch EventType: watch.ConvertOperateType(types.Update), Token: e.Token.Data, Cursor: cursor, - TenantID: f.key.SupplierAccount(e.DocBytes), } if instanceID := event.HostIdentityKey.InstanceID(e.DocBytes); instanceID > 0 { chainNode.InstanceID = instanceID } - return chainNode, nil, false, nil + return e.TenantID, chainNode, nil, false, nil } func genHostIdentifyCursor(coll string, e *types.Event, rid string) (string, error) { @@ -136,6 +130,8 @@ func genHostIdentifyCursor(coll string, e *types.Event, rid string) (string, err curType = watch.ModuleHostRelation case common.BKTableNameBaseProcess: curType = watch.Process + case common.BKTableNameProcessInstanceRelation: + curType = watch.ProcessInstanceRelation default: blog.ErrorJSON("unsupported host identity cursor type collection: %s, event: %s, oid: %s", coll, e, rid) return "", fmt.Errorf("unsupported host identity cursor type collection: %s", coll) diff --git a/src/source_controller/cacheservice/event/identifier/identifier.go b/src/source_controller/cacheservice/event/identifier/identifier.go index 0b5139a93a..79bd1035ad 100644 --- a/src/source_controller/cacheservice/event/identifier/identifier.go +++ b/src/source_controller/cacheservice/event/identifier/identifier.go @@ -15,35 +15,17 @@ package identifier import ( "context" - "fmt" - "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/source_controller/cacheservice/event" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/local" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) -// NewIdentity TODO -func NewIdentity( - watch stream.LoopInterface, - isMaster discovery.ServiceManageInterface, - watchDB dal.DB, - ccDB dal.DB) error { - - watchMongoDB, ok := watchDB.(*local.Mongo) - if !ok { - blog.Errorf("watch event, but watch db is not an instance of local mongo to start transaction") - return fmt.Errorf("watch db is not an instance of local mongo") - } - +// NewIdentity new host identifier event watch +func NewIdentity(task *task.Task) error { base := identityOptions{ - watch: watch, - isMaster: isMaster, - watchDB: watchMongoDB, - ccDB: ccDB, + task: task, } host := base @@ -73,5 +55,14 @@ func NewIdentity( } blog.Info("host identity events, watch process success.") + procRel := base + procRel.key = event.ProcessInstanceRelationKey + procRel.watchFields = []string{common.BKHostIDField} + if err := newIdentity(context.Background(), procRel); err != nil { + blog.Errorf("new host identify process relation event failed, err: %v", err) + return err + } + blog.Info("host identity events, watch process relation success.") + return nil } diff --git a/src/source_controller/cacheservice/event/key.go b/src/source_controller/cacheservice/event/key.go index 5a39ffe7cb..34c301a5c3 100644 --- a/src/source_controller/cacheservice/event/key.go +++ b/src/source_controller/cacheservice/event/key.go @@ -18,7 +18,6 @@ import ( "configcenter/pkg/cache/general" "configcenter/src/common" "configcenter/src/common/watch" - kubetypes "configcenter/src/kube/types" "github.com/tidwall/gjson" ) @@ -364,128 +363,6 @@ var PlatKey = Key{ }, } -// kubeFields kube related resource id and name fields, used for validation -var kubeFields = []string{common.BKFieldID, common.BKFieldName} - -// KubeClusterKey kube cluster event watch key -var KubeClusterKey = Key{ - namespace: watchCacheNamespace + kubetypes.KubeCluster, - collection: kubetypes.BKTableNameBaseCluster, - ttlSeconds: 6 * 60 * 60, - generalResCacheKey: general.KubeClusterKey, - validator: func(doc []byte) error { - fields := gjson.GetManyBytes(doc, kubeFields...) - for idx := range kubeFields { - if !fields[idx].Exists() { - return fmt.Errorf("field %s not exist", kubeFields[idx]) - } - } - return nil - }, - instName: func(doc []byte) string { - return gjson.GetBytes(doc, common.BKFieldName).String() - }, - instID: func(doc []byte) int64 { - return gjson.GetBytes(doc, common.BKFieldID).Int() - }, -} - -// KubeNodeKey kube node event watch key -var KubeNodeKey = Key{ - namespace: watchCacheNamespace + kubetypes.KubeNode, - collection: kubetypes.BKTableNameBaseNode, - ttlSeconds: 6 * 60 * 60, - generalResCacheKey: general.KubeNodeKey, - validator: func(doc []byte) error { - fields := gjson.GetManyBytes(doc, kubeFields...) - for idx := range kubeFields { - if !fields[idx].Exists() { - return fmt.Errorf("field %s not exist", kubeFields[idx]) - } - } - return nil - }, - instName: func(doc []byte) string { - return gjson.GetBytes(doc, common.BKFieldName).String() - }, - instID: func(doc []byte) int64 { - return gjson.GetBytes(doc, common.BKFieldID).Int() - }, -} - -// KubeNamespaceKey kube namespace event watch key -var KubeNamespaceKey = Key{ - namespace: watchCacheNamespace + kubetypes.KubeNamespace, - collection: kubetypes.BKTableNameBaseNamespace, - ttlSeconds: 6 * 60 * 60, - generalResCacheKey: general.KubeNamespaceKey, - validator: func(doc []byte) error { - fields := gjson.GetManyBytes(doc, kubeFields...) - for idx := range kubeFields { - if !fields[idx].Exists() { - return fmt.Errorf("field %s not exist", kubeFields[idx]) - } - } - return nil - }, - instName: func(doc []byte) string { - return gjson.GetBytes(doc, common.BKFieldName).String() - }, - instID: func(doc []byte) int64 { - return gjson.GetBytes(doc, common.BKFieldID).Int() - }, -} - -// KubeWorkloadKey kube workload event watch key -var KubeWorkloadKey = Key{ - namespace: watchCacheNamespace + kubetypes.KubeWorkload, - collection: kubetypes.BKTableNameBaseWorkload, - ttlSeconds: 6 * 60 * 60, - generalResCacheKey: general.KubeWorkloadKey, - validator: func(doc []byte) error { - fields := gjson.GetManyBytes(doc, kubeFields...) - for idx := range kubeFields { - if !fields[idx].Exists() { - return fmt.Errorf("field %s not exist", kubeFields[idx]) - } - } - - if fields[0].Int() <= 0 { - return fmt.Errorf("invalid workload id: %s, should be integer type and > 0", fields[0].Raw) - } - return nil - }, - instName: func(doc []byte) string { - return gjson.GetBytes(doc, common.BKFieldName).String() - }, - instID: func(doc []byte) int64 { - return gjson.GetBytes(doc, common.BKFieldID).Int() - }, -} - -// KubePodKey kube Pod event watch key -// NOTE: pod event detail has container info, can not be treated as general resource cache detail -var KubePodKey = Key{ - namespace: watchCacheNamespace + kubetypes.KubePod, - collection: kubetypes.BKTableNameBasePod, - ttlSeconds: 6 * 60 * 60, - validator: func(doc []byte) error { - fields := gjson.GetManyBytes(doc, kubeFields...) - for idx := range kubeFields { - if !fields[idx].Exists() { - return fmt.Errorf("field %s not exist", kubeFields[idx]) - } - } - return nil - }, - instName: func(doc []byte) string { - return gjson.GetBytes(doc, common.BKFieldName).String() - }, - instID: func(doc []byte) int64 { - return gjson.GetBytes(doc, common.BKFieldID).Int() - }, -} - var projectFields = []string{common.BKFieldID, common.BKProjectNameField} // ProjectKey project event watch key @@ -539,20 +416,20 @@ type Key struct { // general resource detail will be stored by ResDetailKey while event related info will be stored by this key // Note: do not change the format, it will affect the way in event server to // get the details with lua scripts. -func (k Key) DetailKey(cursor string) string { - return k.namespace + ":detail:" + cursor +func (k Key) DetailKey(tenantID, cursor string) string { + return k.namespace + ":detail:" + tenantID + ":" + cursor } // GeneralResDetailKey generates the general resource detail key by chain node, in the order of instance id then oid // NOTE: only general resource detail will be stored by this key and reused by general resource detail cache, // mix-event or special event detail will all be stored by DetailKey -func (k Key) GeneralResDetailKey(node *watch.ChainNode) string { +func (k Key) GeneralResDetailKey(tenantID string, node *watch.ChainNode) string { if k.generalResCacheKey == nil || node == nil { return "" } uniqueKey, _ := k.generalResCacheKey.IDKey(node.InstanceID, node.Oid) - return k.generalResCacheKey.DetailKey(uniqueKey, node.SubResource...) + return k.generalResCacheKey.DetailKey(tenantID, uniqueKey, node.SubResource...) } // IsGeneralRes returns if the event is general resource whose detail is stored separately @@ -613,8 +490,3 @@ func (k Key) ShardingCollection(objID, supplierAccount string) string { return common.GetObjectInstTableName(objID, supplierAccount) } - -// SupplierAccount get event supplier account -func (k Key) SupplierAccount(doc []byte) string { - return gjson.GetBytes(doc, common.TenantID).String() -} diff --git a/src/source_controller/cacheservice/event/mix-event/flow.go b/src/source_controller/cacheservice/event/mix-event/flow.go index f92b0de84a..9efaca0aa2 100644 --- a/src/source_controller/cacheservice/event/mix-event/flow.go +++ b/src/source_controller/cacheservice/event/mix-event/flow.go @@ -15,23 +15,19 @@ package mixevent import ( "context" "fmt" - "strings" "time" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/mapstr" "configcenter/src/common/metadata" - types2 "configcenter/src/common/types" "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/event" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/dal/mongo/sharding" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/driver/redis" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" - "configcenter/src/thirdparty/monitor" - "configcenter/src/thirdparty/monitor/meta" "go.mongodb.org/mongo-driver/mongo" ) @@ -41,9 +37,7 @@ type MixEventFlowOptions struct { MixKey event.Key Key event.Key WatchFields []string - Watch stream.LoopInterface - WatchDB *local.Mongo - CcDB dal.DB + Task *task.Task EventLockTTL time.Duration EventLockKey string } @@ -61,7 +55,7 @@ type MixEventFlow struct { type rearrangeEventsFunc func(rid string, es []*types.Event) ([]*types.Event, error) // parseEventFunc function type for parsing mix event into chain node and detail -type parseEventFunc func(e *types.Event, id uint64, rid string) (*watch.ChainNode, []byte, bool, error) +type parseEventFunc func(e *types.Event, id uint64, rid string) (string, *watch.ChainNode, []byte, bool, error) // NewMixEventFlow create a new mix event watch flow func NewMixEventFlow(opts MixEventFlowOptions, rearrangeEvents rearrangeEventsFunc, parseEvent parseEventFunc) ( @@ -90,55 +84,49 @@ const batchSize = 500 // RunFlow run mix event flow func (f *MixEventFlow) RunFlow(ctx context.Context) error { blog.Infof("start run %s event flow for key: %s.", f.MixKey.Namespace(), f.Key.Namespace()) - es := make(map[string]interface{}) - watchOpts := &types.WatchOptions{ - Options: types.Options{ - EventStruct: &es, - Collection: f.Key.Collection(), - StartAfterToken: nil, - }, - } - if f.Key.Collection() == common.BKTableNameBaseHost { - watchOpts.EventStruct = new(metadata.HostMapStr) - } - f.tokenHandler = newMixEventTokenHandler(f.MixKey, f.Key, f.WatchDB, f.metrics) + es := make(map[string]interface{}) - startAtTime, err := f.tokenHandler.getStartWatchTime(ctx) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", f.Key.Collection(), err) - return err - } - watchOpts.StartAtTime = startAtTime - watchOpts.WatchFatalErrorCallback = f.tokenHandler.resetWatchToken - watchOpts.Fields = f.WatchFields - - opts := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: fmt.Sprintf("%s_%s", f.MixKey.Namespace(), f.Key.Namespace()), - WatchOpt: watchOpts, + f.tokenHandler = newMixEventTokenHandler(f.MixKey, f.Key, f.metrics) + + opts := &types.LoopBatchTaskOptions{ + WatchTaskOptions: &types.WatchTaskOptions{ + Name: fmt.Sprintf("%s_%s", f.MixKey.Namespace(), f.Key.Namespace()), + CollOpts: &types.WatchCollOptions{ + CollectionOptions: types.CollectionOptions{ + CollectionFilter: &types.CollectionFilter{ + Regex: fmt.Sprintf("_%s$", f.Key.Collection()), + }, + EventStruct: &es, + Fields: f.WatchFields, + }, + }, TokenHandler: f.tokenHandler, RetryOptions: &types.RetryOptions{ MaxRetryCount: 10, RetryDuration: 1 * time.Second, }, }, - EventHandler: &types.BatchHandler{ + EventHandler: &types.TaskBatchHandler{ DoBatch: f.doBatch, }, BatchSize: batchSize, } - if err := f.Watch.WithBatch(opts); err != nil { - blog.Errorf("watch %s events, but watch batch failed, err: %v", f.MixKey.Namespace(), err) - return err + if f.Key.Collection() == common.BKTableNameBaseHost { + opts.CollOpts.EventStruct = new(metadata.HostMapStr) } + err := f.Task.AddLoopBatchTask(opts) + if err != nil { + blog.Errorf("watch %s events, but add watch batch task failed, err: %v", f.MixKey.Namespace(), err) + return err + } return nil } // doBatch batch handle events -func (f *MixEventFlow) doBatch(es []*types.Event) (retry bool) { +func (f *MixEventFlow) doBatch(dbInfo *types.DBInfo, es []*types.Event) (retry bool) { if len(es) == 0 { return false } @@ -165,16 +153,20 @@ func (f *MixEventFlow) doBatch(es []*types.Event) (retry bool) { return true } + if len(events) == 0 { + return false + } + // get the lock to get sequences ids. // otherwise, we can not guarantee the multiple event's id is in the right order/sequences // it should be a natural increase order. - if err = f.getLock(rid); err != nil { + if err = f.getLock(dbInfo.UUID, rid); err != nil { blog.Errorf("get %s lock failed, err: %v, rid: %s", f.MixKey.Namespace(), err, rid) return true } // release the lock when the job is done or failed. - defer f.releaseLock(rid) + defer f.releaseLock(dbInfo.UUID, rid) // last event in original events is used to generate lastEvent := es[len(es)-1] @@ -184,7 +176,7 @@ func (f *MixEventFlow) doBatch(es []*types.Event) (retry bool) { } // handle the rearranged events - retry, err = f.handleEvents(events, lastTokenData, rid) + retry, err = f.handleEvents(dbInfo, events, lastTokenData, rid) if err != nil { return retry } @@ -194,8 +186,10 @@ func (f *MixEventFlow) doBatch(es []*types.Event) (retry bool) { } // handleEvents handle the rearranged events, parse them into chain nodes and details, then insert into db and redis -func (f *MixEventFlow) handleEvents(events []*types.Event, lastTokenData mapstr.MapStr, rid string) (bool, error) { - eventIDs, err := f.WatchDB.NextSequences(context.Background(), f.MixKey.Collection(), len(events)) +func (f *MixEventFlow) handleEvents(dbInfo *types.DBInfo, events []*types.Event, lastTokenData mapstr.MapStr, + rid string) (bool, error) { + + eventIDs, err := dbInfo.WatchDB.NextSequences(context.Background(), f.MixKey.Collection(), len(events)) if err != nil { blog.Errorf("get %s event ids failed, err: %v, rid: %s", f.Key.ChainCollection(), err, rid) return true, err @@ -205,7 +199,7 @@ func (f *MixEventFlow) handleEvents(events []*types.Event, lastTokenData mapstr. pipe := redis.Client().Pipeline() needSaveDetails := false - chainNodes := make([]*watch.ChainNode, 0) + chainNodes := make(map[string][]*watch.ChainNode, 0) oids := make([]string, 0) cursorMap := make(map[string]struct{}) for index, e := range events { @@ -213,7 +207,7 @@ func (f *MixEventFlow) handleEvents(events []*types.Event, lastTokenData mapstr. f.metrics.CollectBasic(e) oids = append(oids, e.ID()) - chainNode, detailBytes, retry, err := f.parseEvent(e, eventIDs[index], rid) + tenantID, chainNode, detailBytes, retry, err := f.parseEvent(e, eventIDs[index], rid) if err != nil { return retry, err } @@ -225,7 +219,7 @@ func (f *MixEventFlow) handleEvents(events []*types.Event, lastTokenData mapstr. if len(detailBytes) > 0 { // if hit cursor conflict, the former cursor node's detail will be overwritten by the later one, so it // is not needed to remove the overlapped cursor node's detail again. - pipe.Set(f.MixKey.DetailKey(chainNode.Cursor), string(detailBytes), + pipe.Set(f.MixKey.DetailKey(tenantID, chainNode.Cursor), string(detailBytes), time.Duration(f.MixKey.TTLSeconds())*time.Second) needSaveDetails = true } @@ -240,12 +234,13 @@ func (f *MixEventFlow) handleEvents(events []*types.Event, lastTokenData mapstr. } cursorMap[chainNode.Cursor] = struct{}{} - chainNodes = append(chainNodes, chainNode) + chainNodes[tenantID] = append(chainNodes[tenantID], chainNode) } // if all events are invalid, set last token to the last events' token, do not need to retry for the invalid ones if len(chainNodes) == 0 { - if err := f.tokenHandler.setLastWatchToken(context.Background(), lastTokenData); err != nil { + err = f.tokenHandler.setLastWatchToken(context.Background(), dbInfo.UUID, dbInfo.WatchDB, lastTokenData) + if err != nil { f.metrics.CollectMongoError() return false, err } @@ -262,28 +257,23 @@ func (f *MixEventFlow) handleEvents(events []*types.Event, lastTokenData mapstr. } } - retry, err := f.doInsertEvents(chainNodes, lastTokenData, rid) + retry, err := f.doInsertEvents(dbInfo, chainNodes, lastTokenData, rid) if err != nil { return retry, err } blog.Infof("insert %s event for %s success, oids: %v, rid: %s", f.MixKey.Namespace(), f.Key.Collection(), oids, rid) return false, nil - } -func (f *MixEventFlow) doInsertEvents(chainNodes []*watch.ChainNode, lastTokenData map[string]interface{}, rid string) ( - bool, error) { - - count := len(chainNodes) +func (f *MixEventFlow) doInsertEvents(dbInfo *types.DBInfo, chainNodeMap map[string][]*watch.ChainNode, + lastTokenData map[string]interface{}, rid string) (bool, error) { - if count == 0 { + if len(chainNodeMap) == 0 { return false, nil } - watchDBClient := f.WatchDB.GetDBClient() - - session, err := watchDBClient.StartSession() + session, err := dbInfo.WatchDB.GetDBClient().StartSession() if err != nil { blog.Errorf("watch %s events, but start session failed, coll: %s, err: %v, rid: %s", f.MixKey.Namespace(), f.Key.Collection(), err, rid) @@ -292,49 +282,14 @@ func (f *MixEventFlow) doInsertEvents(chainNodes []*watch.ChainNode, lastTokenDa defer session.EndSession(context.Background()) // insert events into db in an transaction - txnErr, conflictError := f.insertEvents(session, chainNodes, lastTokenData, rid) - + txnErr, conflictError, conflictTenantID := f.insertEvents(dbInfo, session, chainNodeMap, lastTokenData, rid) if txnErr != nil { blog.Errorf("do insert %s events failed, err: %v, rid: %s", f.MixKey.Namespace(), txnErr, rid) - rid = rid + ":" + chainNodes[0].Oid - if conflictError != nil && len(chainNodes) >= 1 { - monitor.Collect(&meta.Alarm{ - RequestID: rid, - Type: meta.EventFatalError, - Detail: fmt.Sprintf("host identifier, but got conflict %s cursor with chain nodes", - f.Key.Collection()), - Module: types2.CC_MODULE_CACHESERVICE, - Dimension: map[string]string{"retry_conflict_nodes": "yes"}, - }) - - var conflictNode *watch.ChainNode - // get the conflict cursor - for idx := range chainNodes { - if strings.Contains(conflictError.Error(), chainNodes[idx].Cursor) { - // record conflict node - conflictNode = chainNodes[idx] - // remove the conflict cursor - chainNodes = append(chainNodes[0:idx], chainNodes[idx+1:]...) - break - } - } - - if conflictNode == nil { - // this should not happen - // reduce event's one by one, then retry again. - blog.ErrorJSON("watch %s events, insert %s event with reduce node %s, remain nodes: %s, rid: %s", - f.MixKey.Namespace(), f.Key.Collection(), chainNodes[0], chainNodes[1:], rid) - - // retry insert events - return f.doInsertEvents(chainNodes[1:], lastTokenData, rid) - } - - blog.ErrorJSON("watch %s events, insert %s event with reduce node %s, remain nodes: %s, rid: %s", - f.MixKey.Namespace(), f.Key.Collection(), conflictNode, chainNodes, rid) - - // retry insert events - return f.doInsertEvents(chainNodes, lastTokenData, rid) + if conflictError != nil && len(chainNodeMap[conflictTenantID]) >= 1 { + chainNodeMap = event.ReduceChainNode(chainNodeMap, conflictTenantID, + f.MixKey.Namespace()+":"+f.Key.Collection(), txnErr, f.metrics, rid) + return f.doInsertEvents(dbInfo, chainNodeMap, lastTokenData, rid) } // if an error occurred, roll back and re-watch again @@ -346,11 +301,12 @@ func (f *MixEventFlow) doInsertEvents(chainNodes []*watch.ChainNode, lastTokenDa } // insertEvents insert events and last watch token -func (f *MixEventFlow) insertEvents(session mongo.Session, chainNodes []*watch.ChainNode, - lastTokenData map[string]interface{}, rid string) (error, error) { +func (f *MixEventFlow) insertEvents(dbInfo *types.DBInfo, session mongo.Session, + chainNodeMap map[string][]*watch.ChainNode, lastToken map[string]interface{}, rid string) (error, error, string) { // conflictError record the conflict cursor error var conflictError error + var conflictTenantID string txnErr := mongo.WithSession(context.Background(), session, func(sc mongo.SessionContext) error { if err := session.StartTransaction(); err != nil { @@ -359,23 +315,48 @@ func (f *MixEventFlow) insertEvents(session mongo.Session, chainNodes []*watch.C return err } - if err := f.WatchDB.Table(f.MixKey.ChainCollection()).Insert(sc, chainNodes); err != nil { - blog.ErrorJSON("watch %s events, but insert chain nodes for %s failed, nodes: %s, err: %v, rid: %s", - f.Key.Collection(), f.MixKey.Namespace(), chainNodes, err, rid) - f.metrics.CollectMongoError() - _ = session.AbortTransaction(context.Background()) + for tenantID, chainNodes := range chainNodeMap { + if len(chainNodes) == 0 { + continue + } + + shardingDB := mongodb.Dal("watch").Shard(sharding.NewShardOpts().WithTenant(tenantID)) + + // insert chain nodes into db + if err := shardingDB.Table(f.MixKey.ChainCollection()).Insert(sc, chainNodes); err != nil { + blog.ErrorJSON("watch %s events, but insert chain nodes for %s failed, nodes: %s, err: %v, rid: %s", + f.Key.Collection(), f.MixKey.Namespace(), chainNodes, err, rid) + f.metrics.CollectMongoError() + _ = session.AbortTransaction(context.Background()) - if event.IsConflictError(err) { - conflictError = err + if event.IsConflictError(err) { + conflictError = err + } + return err + } + + // set last watch event info + lastNode := chainNodes[len(chainNodes)-1] + lastNodeInfo := map[string]interface{}{ + common.BKFieldID: lastNode.ID, + common.BKCursorField: lastNode.Cursor, + } + + filter := map[string]interface{}{ + "_id": f.MixKey.Collection(), + common.BKFieldID: mapstr.MapStr{common.BKDBLT: lastNode.ID}, + } + + if err := shardingDB.Table(common.BKTableNameLastWatchEvent).Update(sc, filter, lastNodeInfo); err != nil { + blog.Errorf("insert tenant %s mix event %s coll %s last event info(%+v) failed, err: %v, rid: %s", + tenantID, f.MixKey.Namespace(), f.Key.Collection(), lastNodeInfo, err, rid) + f.metrics.CollectMongoError() + _ = session.AbortTransaction(context.Background()) + return err } - return err } - lastNode := chainNodes[len(chainNodes)-1] - lastTokenData[common.BKFieldID] = lastNode.ID - lastTokenData[common.BKCursorField] = lastNode.Cursor - lastTokenData[common.BKStartAtTimeField] = lastNode.ClusterTime - if err := f.tokenHandler.setLastWatchToken(sc, lastTokenData); err != nil { + if err := f.tokenHandler.setLastWatchToken(sc, dbInfo.UUID, dbInfo.WatchDB, lastToken); err != nil { f.metrics.CollectMongoError() _ = session.AbortTransaction(context.Background()) return err @@ -391,26 +372,27 @@ func (f *MixEventFlow) insertEvents(session mongo.Session, chainNodes []*watch.C return nil }) - return txnErr, conflictError + return txnErr, conflictError, conflictTenantID } -func (f *MixEventFlow) getLock(rid string) error { +func (f *MixEventFlow) getLock(uuid, rid string) error { + lockKey := f.EventLockKey + ":" + uuid timeout := time.After(f.EventLockTTL) for { select { case <-timeout: - return fmt.Errorf("get %s: %s lock timeout", f.MixKey.Namespace(), f.EventLockKey) + return fmt.Errorf("get %s: %s lock timeout", f.MixKey.Namespace(), lockKey) default: } - success, err := redis.Client().SetNX(context.Background(), f.EventLockKey, 1, f.EventLockTTL).Result() + success, err := redis.Client().SetNX(context.Background(), lockKey, 1, f.EventLockTTL).Result() if err != nil { - blog.Errorf("get %s: %s lock, err: %v, rid: %s", f.MixKey.Namespace(), f.EventLockKey, err, rid) + blog.Errorf("get %s: %s lock, err: %v, rid: %s", f.MixKey.Namespace(), lockKey, err, rid) return err } if !success { - blog.V(3).Infof("get %s: %s lock failed, retry later, rid: %s", f.MixKey.Namespace(), f.EventLockKey, rid) + blog.V(3).Infof("get %s: %s lock failed, retry later, rid: %s", f.MixKey.Namespace(), lockKey, rid) time.Sleep(300 * time.Millisecond) continue } @@ -420,10 +402,11 @@ func (f *MixEventFlow) getLock(rid string) error { } -func (f *MixEventFlow) releaseLock(rid string) { - _, err := redis.Client().Del(context.Background(), f.EventLockKey).Result() +func (f *MixEventFlow) releaseLock(uuid, rid string) { + lockKey := f.EventLockKey + ":" + uuid + _, err := redis.Client().Del(context.Background(), lockKey).Result() if err != nil { - blog.Errorf("delete %s lock key: %s failed, err: %v, rid: %s", f.MixKey.Namespace(), f.EventLockKey, err, rid) + blog.Errorf("delete %s lock key: %s failed, err: %v, rid: %s", f.MixKey.Namespace(), lockKey, err, rid) return } return diff --git a/src/source_controller/cacheservice/event/mix-event/handler.go b/src/source_controller/cacheservice/event/mix-event/handler.go index 6ba75c38ec..d326fa28ab 100644 --- a/src/source_controller/cacheservice/event/mix-event/handler.go +++ b/src/source_controller/cacheservice/event/mix-event/handler.go @@ -14,54 +14,52 @@ package mixevent import ( "context" + "time" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/mapstr" "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/event" - "configcenter/src/storage/dal" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) -var _ = types.TokenHandler(&mixEventHandler{}) - // mixEventHandler a token handler for mix event consisting of multiple types of events, stores the events' token in the // form of {"_id": $mix_key_collection, {$event_key_collection: {"token": $token, "start_at_time": $start_at_time}}... } type mixEventHandler struct { mixKey event.Key key event.Key - watchDB dal.DB metrics *event.EventMetrics } // newMixEventTokenHandler generate a new mix event token handler -func newMixEventTokenHandler(mixKey event.Key, key event.Key, watchDB dal.DB, - metrics *event.EventMetrics) *mixEventHandler { - +func newMixEventTokenHandler(mixKey event.Key, key event.Key, metrics *event.EventMetrics) *mixEventHandler { return &mixEventHandler{ mixKey: mixKey, key: key, - watchDB: watchDB, metrics: metrics, } } -// SetLastWatchToken TODO -/* SetLastWatchToken do not use this function in the mix events(set after events are successfully inserted) - when there are several masters watching db event, we use db transaction to avoid inserting duplicate data by setting - the last token after the insertion of db chain nodes in one transaction, since we have a unique index on the cursor - field, the later one will encounters an error when inserting nodes and roll back without setting the token and watch - another round from the last token of the last inserted node, thus ensures the sequence of db chain nodes. +/* +SetLastWatchToken do not use this function in the mix events(set after events are successfully inserted) +when there are several masters watching db event, we use db transaction to avoid inserting duplicate data by setting +the last token after the insertion of db chain nodes in one transaction, since we have a unique index on the cursor +field, the later one will encounter an error when inserting nodes and roll back without setting the token and watch +another round from the last token of the last inserted node, thus ensures the sequence of db chain nodes. */ -func (m *mixEventHandler) SetLastWatchToken(ctx context.Context, token string) error { +func (m *mixEventHandler) SetLastWatchToken(_ context.Context, _ string, _ local.DB, _ *types.TokenInfo) error { return nil } // setLastWatchToken set last watch token(used after events are successfully inserted) -func (m *mixEventHandler) setLastWatchToken(ctx context.Context, data map[string]interface{}) error { +func (m *mixEventHandler) setLastWatchToken(ctx context.Context, uuid string, watchDB local.DB, + data map[string]any) error { + filter := map[string]interface{}{ - "_id": m.mixKey.Collection(), + "_id": watch.GenDBWatchTokenID(uuid, m.mixKey.Collection()), } // only update the needed fields to avoid erasing the previous exist fields @@ -70,112 +68,41 @@ func (m *mixEventHandler) setLastWatchToken(ctx context.Context, data map[string tokenInfo[m.key.Collection()+"."+key] = value } - // update id and cursor field if set, to compensate for the scenario of searching with an outdated but latest cursor - if id, exists := data[common.BKFieldID]; exists { - tokenInfo[common.BKFieldID] = id - } - - if cursor, exists := data[common.BKCursorField]; exists { - tokenInfo[common.BKCursorField] = cursor - } - - if err := m.watchDB.Table(common.BKTableNameWatchToken).Update(ctx, filter, tokenInfo); err != nil { - blog.Errorf("set mix event %s last watch token failed, data: %+v, err: %v", m.key.Collection(), tokenInfo, err) + if err := watchDB.Table(common.BKTableNameWatchToken).Update(ctx, filter, tokenInfo); err != nil { + blog.Errorf("set mix event %s coll %s last watch token failed, data: %+v, err: %v", m.mixKey.Namespace(), + m.key.Collection(), tokenInfo, err) return err } return nil } // GetStartWatchToken get start watch token from watch token db first, if an error occurred, get from chain db -func (m *mixEventHandler) GetStartWatchToken(ctx context.Context) (token string, err error) { +func (m *mixEventHandler) GetStartWatchToken(ctx context.Context, uuid string, watchDB local.DB) (*types.TokenInfo, + error) { + filter := map[string]interface{}{ - "_id": m.mixKey.Collection(), + "_id": watch.GenDBWatchTokenID(uuid, m.mixKey.Collection()), } - data := make(map[string]watch.LastChainNodeData) - if err := m.watchDB.Table(common.BKTableNameWatchToken).Find(filter).Fields(m.key.Collection()). + data := make(map[string]types.TokenInfo) + if err := watchDB.Table(common.BKTableNameWatchToken).Find(filter).Fields(m.key.Collection()). One(ctx, &data); err != nil { - if !m.watchDB.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { m.metrics.CollectMongoError() blog.Errorf("get mix event start watch token, will get the last event's time and start watch, "+ "filter: %+v, err: %v", filter, err) } - tailNode := new(watch.ChainNode) - if err := m.watchDB.Table(m.mixKey.ChainCollection()).Find(nil).Fields(common.BKTokenField). - Sort(common.BKFieldID+":-1").One(context.Background(), tailNode); err != nil { - - if !m.watchDB.IsNotFoundError(err) { - m.metrics.CollectMongoError() - blog.Errorf("get mix event last watch token from mongo failed, err: %v", err) - return "", err - } - // the tail node is not exist. - return "", nil - } - return tailNode.Token, nil + // the tail node is not exist. + return &types.TokenInfo{Token: "", StartAtTime: &types.TimeStamp{Sec: uint32(time.Now().Unix())}}, nil } // check whether this field is exists or not node, exists := data[m.key.Collection()] if !exists { // watch from now on. - return "", nil - } - - return node.Token, nil -} - -// resetWatchToken set watch token to empty and set the start watch time to the given one for next watch -func (m *mixEventHandler) resetWatchToken(startAtTime types.TimeStamp) error { - data := mapstr.MapStr{ - m.key.Collection(): mapstr.MapStr{ - common.BKTokenField: "", - common.BKStartAtTimeField: startAtTime, - }, - } - - filter := map[string]interface{}{ - "_id": m.mixKey.Collection(), - } - - if err := m.watchDB.Table(common.BKTableNameWatchToken).Update(context.Background(), filter, data); err != nil { - blog.Errorf("clear watch token failed, collection: %s, data: %+v, err: %v", m.key.Collection(), data, err) - return err - } - return nil -} - -// getStartWatchTime get start watch tim of the event key in mix event token -func (m *mixEventHandler) getStartWatchTime(ctx context.Context) (*types.TimeStamp, error) { - filter := map[string]interface{}{ - "_id": m.mixKey.Collection(), - } - - data := make(map[string]watch.LastChainNodeData) - err := m.watchDB.Table(common.BKTableNameWatchToken).Find(filter).Fields(m.key.Collection()).One(ctx, &data) - if err != nil { - blog.Errorf("get mix event %s start watch time, but find in watch token failed, err: %v", m.key.Collection(), - err) - - if !m.watchDB.IsNotFoundError(err) { - m.metrics.CollectMongoError() - blog.Errorf("run flow, but get start watch time failed, filter: %+v, err: %v", filter, err) - return nil, err - } - - blog.Infof("get mix event %s start watch time, but not find in watch token, start watch from a minute ago", - m.key.Collection()) - return new(types.TimeStamp), nil - } - - node, exist := data[m.key.Collection()] - if !exist { - // can not find, start watch from one minute ago. - blog.Infof("get mix event %s start watch time, but not find in watch token, start watch from a minute ago", - m.key.Collection()) - return new(types.TimeStamp), nil + return &types.TokenInfo{Token: "", StartAtTime: &types.TimeStamp{Sec: uint32(time.Now().Unix())}}, nil } - return &node.StartAtTime, nil + return &node, nil } diff --git a/src/source_controller/cacheservice/event/util.go b/src/source_controller/cacheservice/event/util.go index da44ef5445..5e8832ff4f 100644 --- a/src/source_controller/cacheservice/event/util.go +++ b/src/source_controller/cacheservice/event/util.go @@ -16,11 +16,11 @@ import ( "fmt" "strings" - "configcenter/pkg/conv" - "configcenter/src/common" - "configcenter/src/common/metadata" + "configcenter/src/common/blog" + types2 "configcenter/src/common/types" "configcenter/src/common/watch" - kubetypes "configcenter/src/kube/types" + "configcenter/src/thirdparty/monitor" + "configcenter/src/thirdparty/monitor/meta" ) var resourceKeyMap = map[watch.CursorType]Key{ @@ -38,11 +38,6 @@ var resourceKeyMap = map[watch.CursorType]Key{ watch.BizSet: BizSetKey, watch.BizSetRelation: BizSetRelationKey, watch.Plat: PlatKey, - watch.KubeCluster: KubeClusterKey, - watch.KubeNode: KubeNodeKey, - watch.KubeNamespace: KubeNamespaceKey, - watch.KubeWorkload: KubeWorkloadKey, - watch.KubePod: KubePodKey, watch.Project: ProjectKey, } @@ -69,35 +64,47 @@ func IsConflictError(err error) bool { return false } -// HostArchive TODO -type HostArchive struct { - Oid string `bson:"oid"` - Detail metadata.HostMapStr `bson:"detail"` -} - -// ObjInstTablePrefixRegex TODO -const ObjInstTablePrefixRegex = "^" + common.BKObjectInstShardingTablePrefix - -// InstAsstTablePrefixRegex TODO -const InstAsstTablePrefixRegex = "^" + common.BKObjectInstAsstShardingTablePrefix - -// ConvertLabel 由于目前使用版本的mongodb不支持key中包含的.的查询,存入db的时候是将.以编码的方式存入,这里需要进行解码 -func ConvertLabel(podDetail map[string]interface{}) map[string]interface{} { - labels, ok := podDetail[kubetypes.LabelsField] - if !ok { - return podDetail +// ReduceChainNode remove conflict chain node, returns reduced chain nodes +func ReduceChainNode(chainNodeMap map[string][]*watch.ChainNode, tenantID, flowKey string, conflictErr error, + metrics *EventMetrics, rid string) map[string][]*watch.ChainNode { + + chainNodes := chainNodeMap[tenantID] + + rid = rid + ":" + chainNodes[0].Oid + monitor.Collect(&meta.Alarm{ + RequestID: rid, + Type: meta.EventFatalError, + Detail: fmt.Sprintf("run event flow, but got conflict %s tenant %s cursor with chain nodes", + flowKey, tenantID), + Module: types2.CC_MODULE_CACHESERVICE, + Dimension: map[string]string{"retry_conflict_nodes": "yes"}, + }) + + if len(chainNodes) <= 1 { + delete(chainNodeMap, tenantID) + return chainNodeMap } - labelMap, ok := labels.(map[string]string) - if !ok { - return podDetail + for index, reducedChainNode := range chainNodes { + if isConflictChainNode(reducedChainNode, conflictErr) { + metrics.CollectConflict() + chainNodes = append(chainNodes[:index], chainNodes[index+1:]...) + + // need do with retry with reduce + blog.ErrorJSON("run flow, insert %s tenant %s event with reduce node %s, remain nodes: %s, rid: %s", + flowKey, tenantID, reducedChainNode, chainNodes, rid) + chainNodeMap[tenantID] = chainNodes + return chainNodeMap + } } - newLabels := make(map[string]string) - for key, val := range labelMap { - newLabels[conv.DecodeDot(key)] = val - } - podDetail[kubetypes.LabelsField] = newLabels + // when no cursor conflict node is found, discard the first node and try to insert the others + blog.ErrorJSON("run flow, insert %s tenant %s event with reduce node %s, remain nodes: %s, rid: %s", + flowKey, tenantID, chainNodes[0], chainNodes[1:], rid) + chainNodeMap[tenantID] = chainNodes[1:] + return chainNodeMap +} - return podDetail +func isConflictChainNode(chainNode *watch.ChainNode, err error) bool { + return strings.Contains(err.Error(), chainNode.Cursor) && strings.Contains(err.Error(), "index_cursor") } diff --git a/src/source_controller/cacheservice/event/watch/client.go b/src/source_controller/cacheservice/event/watch/client.go index 4e846ddd36..1cc59716c5 100644 --- a/src/source_controller/cacheservice/event/watch/client.go +++ b/src/source_controller/cacheservice/event/watch/client.go @@ -21,34 +21,32 @@ import ( "configcenter/src/common/http/rest" "configcenter/src/common/json" "configcenter/src/common/metadata" - "configcenter/src/common/util" - "configcenter/src/common/util/table" "configcenter/src/common/watch" - kubetypes "configcenter/src/kube/types" "configcenter/src/source_controller/cacheservice/event" "configcenter/src/storage/dal" "configcenter/src/storage/dal/redis" daltypes "configcenter/src/storage/dal/types" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/driver/mongodb/instancemapping" "configcenter/src/storage/stream/types" "go.mongodb.org/mongo-driver/bson/primitive" ) -// Client TODO +// Client is watch client type Client struct { // cache is cc redis client. cache redis.Client // watchDB is cc event watch database. - watchDB dal.DB + watchDB dal.Dal // db is cc main database. - db dal.DB + db dal.Dal } -// NewClient TODO -func NewClient(watchDB dal.DB, db dal.DB, cache redis.Client) *Client { +// NewClient new watch client +func NewClient(watchDB dal.Dal, db dal.Dal, cache redis.Client) *Client { return &Client{watchDB: watchDB, db: db, cache: cache} } @@ -61,9 +59,10 @@ func (c *Client) getLatestEvent(kit *rest.Kit, key event.Key) (*watch.ChainNode, } node := new(watch.ChainNode) - err := c.watchDB.Table(key.ChainCollection()).Find(filter).Sort(common.BKFieldID+":-1").One(kit.Ctx, node) + err := c.watchDB.Shard(kit.ShardOpts()).Table(key.ChainCollection()).Find(filter).Sort(common.BKFieldID+":-1"). + One(kit.Ctx, node) if err != nil { - if !c.watchDB.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { blog.ErrorJSON("get chain node from mongo failed, err: %s, filter: %s, rid: %s", err, filter, kit.Rid) return nil, false, fmt.Errorf("get last chain node from mongo failed, err: %v", err) } @@ -81,9 +80,10 @@ func (c *Client) getEarliestEvent(kit *rest.Kit, key event.Key) (*watch.ChainNod } node := new(watch.ChainNode) - err := c.watchDB.Table(key.ChainCollection()).Find(filter).Sort(common.BKFieldID).One(kit.Ctx, node) + err := c.watchDB.Shard(kit.ShardOpts()).Table(key.ChainCollection()).Find(filter).Sort(common.BKFieldID). + One(kit.Ctx, node) if err != nil { - if !c.watchDB.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { blog.ErrorJSON("get chain node from mongo failed, err: %s, collection: %s, filter: %s, rid: %s", err, key.ChainCollection(), filter, kit.Rid) return nil, false, fmt.Errorf("get first chain node from mongo failed, err: %v", err) @@ -146,9 +146,9 @@ func (c *Client) getEventDetailFromRedis(kit *rest.Kit, node *watch.ChainNode, f var detailKey string if key.IsGeneralRes() { - detailKey = key.GeneralResDetailKey(node) + detailKey = key.GeneralResDetailKey(kit.TenantID, node) } else { - detailKey = key.DetailKey(node.Cursor) + detailKey = key.DetailKey(kit.TenantID, node.Cursor) } detail, err := c.cache.Get(kit.Ctx, detailKey).Result() @@ -176,76 +176,6 @@ func (c *Client) getEventDetailFromRedis(kit *rest.Kit, node *watch.ChainNode, f func (c *Client) getEventDetailFromMongo(kit *rest.Kit, node *watch.ChainNode, fields []string, key event.Key) ( *string, bool, error) { - // get delete events' detail with oid from cmdb - if node.EventType == watch.Delete { - filter := map[string]interface{}{ - "oid": node.Oid, - } - - if key.Collection() == common.BKTableNameBaseInst || key.Collection() == common.BKTableNameMainlineInstance { - if len(node.SubResource) == 0 { - blog.Errorf("%s delete event chain node has no sub resource, oid: %s", key.Collection(), node.Oid) - return nil, false, nil - } - filter["coll"] = key.ShardingCollection(node.SubResource[0], node.TenantID) - } else { - filter["coll"] = key.Collection() - } - - detailFields := make([]string, len(fields)) - for index, field := range fields { - detailFields[index] = "detail." + field - } - - if key.Collection() == common.BKTableNameBaseHost { - doc := new(event.HostArchive) - err := c.db.Table(common.BKTableNameDelArchive).Find(filter).Fields(detailFields...).One(kit.Ctx, doc) - if err != nil { - if c.db.IsNotFoundError(err) { - return nil, false, nil - } - blog.Errorf("get archive deleted doc for collection %s from mongodb failed, oid: %, err: %v", - key.Collection(), node.Oid, err) - return nil, false, fmt.Errorf("get archive deleted doc from mongo failed, err: %v", err) - } - - byt, err := json.Marshal(doc.Detail) - if err != nil { - blog.Errorf("received delete %s event, but marshal detail to bytes failed, oid: %s, err: %v", - key.Collection(), node.Oid, err) - return nil, false, fmt.Errorf("marshal detail failed, err: %v", err) - } - detail := string(byt) - return &detail, true, nil - } else { - delArchiveTable, exists := table.GetDelArchiveTable(key.Collection()) - if !exists { - blog.Errorf("collection %s related del archive table not exists", key.Collection()) - return nil, false, fmt.Errorf("collection %s related del archive table not exists", key.Collection()) - } - - doc := make(map[string]interface{}) - err := c.db.Table(delArchiveTable).Find(filter).Fields(detailFields...).One(kit.Ctx, &doc) - if err != nil { - if c.db.IsNotFoundError(err) { - return nil, false, nil - } - blog.Errorf("get archive deleted doc for collection %s from mongodb failed, oid: %, err: %v", - key.Collection(), node.Oid, err) - return nil, false, fmt.Errorf("get archive deleted doc from mongo failed, err: %v", err) - } - - byt, err := json.Marshal(doc["detail"]) - if err != nil { - blog.Errorf("received delete %s event, but marshal detail to bytes failed, oid: %s, err: %v", - key.Collection(), node.Oid, err) - return nil, false, fmt.Errorf("marshal detail failed, err: %v", err) - } - detail := string(byt) - return &detail, true, nil - } - } - // get current detail from mongodb with oid objectId, err := primitive.ObjectIDFromHex(node.Oid) if err != nil { @@ -269,11 +199,12 @@ func (c *Client) getEventDetailFromMongo(kit *rest.Kit, node *watch.ChainNode, f blog.Errorf("%s event chain node has no sub resource, oid: %s", key.Collection(), node.Oid) return nil, false, nil } - collection = key.ShardingCollection(node.SubResource[0], node.TenantID) + collection = key.ShardingCollection(node.SubResource[0], kit.TenantID) } - if err := c.db.Table(collection).Find(filter).Fields(fields...).One(kit.Ctx, detailMap); err != nil { - if c.db.IsNotFoundError(err) { + err = c.db.Shard(kit.ShardOpts()).Table(collection).Find(filter).Fields(fields...).One(kit.Ctx, detailMap) + if err != nil { + if mongodb.IsNotFoundError(err) { return nil, false, nil } blog.ErrorJSON("get detail from db failed, err: %s, filter: %s, rid: %s", err, filter, kit.Rid) @@ -335,9 +266,10 @@ func (c *Client) searchFollowingEventChainNodes(kit *rest.Kit, opts *searchFollo } node := new(watch.ChainNode) - err := c.watchDB.Table(opts.key.ChainCollection()).Find(filter).Fields(common.BKFieldID).One(kit.Ctx, node) + err := c.watchDB.Shard(kit.ShardOpts()).Table(opts.key.ChainCollection()).Find(filter).Fields(common.BKFieldID). + One(kit.Ctx, node) if err != nil { - if !c.watchDB.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { blog.ErrorJSON("get chain node from mongo failed, err: %s, filter: %s, rid: %s", err, filter, kit.Rid) return false, nil, 0, err } @@ -348,9 +280,10 @@ func (c *Client) searchFollowingEventChainNodes(kit *rest.Kit, opts *searchFollo } data := new(watch.LastChainNodeData) - err := c.watchDB.Table(common.BKTableNameWatchToken).Find(filter).Fields(common.BKFieldID).One(kit.Ctx, data) + err := c.watchDB.Shard(kit.ShardOpts()).Table(common.BKTableNameLastWatchEvent).Find(filter). + Fields(common.BKFieldID).One(kit.Ctx, data) if err != nil { - if !c.watchDB.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { blog.ErrorJSON("get last watch id failed, err: %s, filter: %s, rid: %s", err, filter, kit.Rid) return false, nil, 0, err } @@ -380,9 +313,10 @@ func (c *Client) getLastEventID(kit *rest.Kit, key event.Key) (uint64, error) { // host identifier event can use this logic too, since we've added an extra field of last id and cursor in it data := new(watch.LastChainNodeData) - err := c.watchDB.Table(common.BKTableNameWatchToken).Find(filter).Fields(common.BKFieldID).One(kit.Ctx, data) + err := c.watchDB.Shard(kit.ShardOpts()).Table(common.BKTableNameLastWatchEvent).Find(filter). + Fields(common.BKFieldID).One(kit.Ctx, data) if err != nil { - if !c.watchDB.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { blog.ErrorJSON("get last watch id failed, err: %s, filter: %s, rid: %s", err, filter, kit.Rid) return 0, err } @@ -412,8 +346,8 @@ func (c *Client) searchFollowingEventChainNodesByID(kit *rest.Kit, opt *searchFo } nodes := make([]*watch.ChainNode, 0) - if err := c.watchDB.Table(opt.key.ChainCollection()).Find(filter).Sort(common.BKFieldID).Limit(opt.limit). - All(kit.Ctx, &nodes); err != nil { + if err := c.watchDB.Shard(kit.ShardOpts()).Table(opt.key.ChainCollection()).Find(filter).Sort(common.BKFieldID). + Limit(opt.limit).All(kit.Ctx, &nodes); err != nil { blog.Errorf("get chain nodes from mongo failed, err: %v, start id: %d, rid: %s", err, opt.id, kit.Rid) return nil, fmt.Errorf("get chain nodes from mongo failed, err: %v, start id: %d", err, opt.id) } @@ -434,10 +368,10 @@ func (c *Client) searchEventDetailsFromRedis(kit *rest.Kit, nodes []*watch.Chain detailKeys := make([]string, len(nodes)) for idx, node := range nodes { if key.IsGeneralRes() { - detailKeys[idx] = key.GeneralResDetailKey(node) + detailKeys[idx] = key.GeneralResDetailKey(kit.TenantID, node) continue } - detailKeys[idx] = key.DetailKey(node.Cursor) + detailKeys[idx] = key.DetailKey(kit.TenantID, node.Cursor) } results, err := c.cache.MGet(kit.Ctx, detailKeys...).Result() @@ -485,31 +419,26 @@ func (c *Client) searchEventDetailsFromMongo(kit *rest.Kit, nodes []*watch.Chain // get oids and its mapping with the detail array indexes oids := make([]primitive.ObjectID, 0) - deletedOids := make([]string, 0) - deleteInstIDs := make([]int64, 0) oidIndexMap := make(map[string][]int) coll := key.Collection() instIDs := make([]int64, 0) + delOidDetailMap := make(map[int]string) for _, node := range nodes { if node.EventType == watch.Delete { - deletedOids = append(deletedOids, node.Oid) - - if coll == common.BKTableNameBaseInst || coll == common.BKTableNameMainlineInstance { - deleteInstIDs = append(deleteInstIDs, node.InstanceID) - } - } else { - objectId, err := primitive.ObjectIDFromHex(node.Oid) - if err != nil { - blog.Errorf("get mongodb _id from oid(%s) failed, err: %v, rid: %s", node.Oid, err, kit.Rid) - return nil, fmt.Errorf("get mongodb _id from oid(%s) failed, err: %v", node.Oid, err) - } - oids = append(oids, objectId) - - if coll == common.BKTableNameBaseInst || coll == common.BKTableNameMainlineInstance { - instIDs = append(instIDs, node.InstanceID) - } + // delete event detail can not be retrieved from mongo + delOidDetailMap[errCursorIndexMap[node.Cursor]] = "{}" + continue + } + objectId, err := primitive.ObjectIDFromHex(node.Oid) + if err != nil { + blog.Errorf("get mongodb _id from oid(%s) failed, err: %v, rid: %s", node.Oid, err, kit.Rid) + return nil, fmt.Errorf("get mongodb _id from oid(%s) failed, err: %v", node.Oid, err) } + oids = append(oids, objectId) + if coll == common.BKTableNameBaseInst || coll == common.BKTableNameMainlineInstance { + instIDs = append(instIDs, node.InstanceID) + } oidIndexMap[node.Oid] = append(oidIndexMap[node.Oid], errCursorIndexMap[node.Cursor]) } @@ -523,15 +452,8 @@ func (c *Client) searchEventDetailsFromMongo(kit *rest.Kit, nodes []*watch.Chain } } - if len(deletedOids) == 0 { - return oidDetailMap, nil - } - - oidDetailMap, err := c.searchDeletedEventDetailsFromMongo(kit, coll, deletedOids, fields, deleteInstIDs, - oidIndexMap, oidDetailMap) - if err != nil { - blog.Errorf("get delete details from db failed, err: %v, oids: %+v, rid: %s", err, deletedOids, kit.Rid) - return nil, err + for idx, detail := range delOidDetailMap { + oidDetailMap[idx] = detail } return oidDetailMap, nil @@ -552,27 +474,8 @@ func (c *Client) getDetailsByOids(kit *rest.Kit, oids []primitive.ObjectID, fiel switch coll { case common.BKTableNameBaseHost: detailArr := make([]metadata.HostMapStr, 0) - if err := c.db.Table(coll).Find(filter, findOpts).Fields(fields...).All(kit.Ctx, &detailArr); err != nil { - blog.Errorf("get details from db failed, err: %v, oids: %+v, rid: %s", err, oids, kit.Rid) - return nil, fmt.Errorf("get details from mongo failed, err: %v, oids: %+v", err, oids) - } - - for _, detailMap := range detailArr { - objectID, ok := detailMap["_id"].(primitive.ObjectID) - if !ok { - return nil, fmt.Errorf("parse detail oid failed, oid: %+v", detailMap["_id"]) - } - delete(detailMap, "_id") - detailJson, _ := json.Marshal(detailMap) - for _, index := range oidIndexMap[objectID.Hex()] { - oidDetailMap[index] = string(detailJson) - } - } - return oidDetailMap, nil - - case kubetypes.BKTableNameBasePod: - detailArr := make([]map[string]interface{}, 0) - if err := c.db.Table(coll).Find(filter, findOpts).Fields(fields...).All(kit.Ctx, &detailArr); err != nil { + err := c.db.Shard(kit.ShardOpts()).Table(coll).Find(filter, findOpts).Fields(fields...).All(kit.Ctx, &detailArr) + if err != nil { blog.Errorf("get details from db failed, err: %v, oids: %+v, rid: %s", err, oids, kit.Rid) return nil, fmt.Errorf("get details from mongo failed, err: %v, oids: %+v", err, oids) } @@ -583,9 +486,6 @@ func (c *Client) getDetailsByOids(kit *rest.Kit, oids []primitive.ObjectID, fiel return nil, fmt.Errorf("parse detail oid failed, oid: %+v", detailMap["_id"]) } delete(detailMap, "_id") - - detailMap = event.ConvertLabel(detailMap) - detailJson, _ := json.Marshal(detailMap) for _, index := range oidIndexMap[objectID.Hex()] { oidDetailMap[index] = string(detailJson) @@ -617,8 +517,8 @@ func (c *Client) getDetailsByOids(kit *rest.Kit, oids []primitive.ObjectID, fiel } objColl := common.GetInstTableName(objID, ownerID) - if err := c.db.Table(objColl).Find(filter, findOpts).Fields(fields...).All(kit.Ctx, - &detailArr); err != nil { + if err := c.db.Shard(kit.ShardOpts()).Table(objColl).Find(filter, findOpts).Fields(fields...). + All(kit.Ctx, &detailArr); err != nil { blog.Errorf("get details from db failed, err: %v, inst ids: %+v, rid: %s", err, instIDs, kit.Rid) return nil, fmt.Errorf("get details from mongo failed, err: %v, oids: %+v", err, oids) } @@ -637,7 +537,8 @@ func (c *Client) getDetailsByOids(kit *rest.Kit, oids []primitive.ObjectID, fiel } detailArr := make([]mapStrWithOid, 0) - if err := c.db.Table(coll).Find(filter, findOpts).Fields(fields...).All(kit.Ctx, &detailArr); err != nil { + err := c.db.Shard(kit.ShardOpts()).Table(coll).Find(filter, findOpts).Fields(fields...).All(kit.Ctx, &detailArr) + if err != nil { blog.Errorf("get details from db failed, err: %v, oids: %+v, rid: %s", err, oids, kit.Rid) return nil, fmt.Errorf("get details from mongo failed, err: %v, oids: %+v", err, oids) } @@ -652,74 +553,6 @@ func (c *Client) getDetailsByOids(kit *rest.Kit, oids []primitive.ObjectID, fiel return oidDetailMap, nil } -// searchDeletedEventDetailsFromMongo search delete events' details from the cc_DelArchive table by oids -func (c *Client) searchDeletedEventDetailsFromMongo(kit *rest.Kit, coll string, deletedOids []string, fields []string, - deleteInstIDs []int64, oidIndexMap map[string][]int, oidDetailMap map[int]string) (map[int]string, error) { - - detailFields := make([]string, 0) - if len(fields) > 0 { - for _, field := range fields { - detailFields = append(detailFields, "detail."+field) - } - detailFields = append(detailFields, "oid") - } - - deleteFilter := map[string]interface{}{ - "oid": map[string]interface{}{common.BKDBIN: deletedOids}, - } - - if coll == common.BKTableNameBaseInst || coll == common.BKTableNameMainlineInstance { - deleteFilter["detail.bk_inst_id"] = map[string]interface{}{common.BKDBIN: deleteInstIDs} - } else { - deleteFilter["coll"] = coll - } - - if coll == common.BKTableNameBaseHost { - docs := make([]event.HostArchive, 0) - err := c.db.Table(common.BKTableNameDelArchive).Find(deleteFilter).Fields(detailFields...).All(kit.Ctx, &docs) - if err != nil { - blog.Errorf("get archive deleted doc for collection %s from mongodb failed, oids: %+v, err: %v, "+ - "rid: %s", coll, deletedOids, err, kit.Rid) - return nil, fmt.Errorf("get archive deleted docs from mongo failed, err: %v, oids: %+v", err, deletedOids) - } - - for _, doc := range docs { - detailJson, _ := json.Marshal(doc.Detail) - for _, index := range oidIndexMap[doc.Oid] { - oidDetailMap[index] = string(detailJson) - } - } - } else { - delArchiveTable, exists := table.GetDelArchiveTable(coll) - if !exists { - blog.Errorf("collection %s related del archive table not exists, rid: %s", coll, kit.Rid) - return nil, fmt.Errorf("collection %s related del archive table not exists", coll) - } - - docs := make([]map[string]interface{}, 0) - err := c.db.Table(delArchiveTable).Find(deleteFilter).Fields(detailFields...).All(kit.Ctx, &docs) - if err != nil { - blog.Errorf("get archive deleted doc for collection %s from mongodb failed, oids: %+v, err: %v, "+ - "rid: %s", coll, deletedOids, err, kit.Rid) - return nil, fmt.Errorf("get archive deleted docs from mongo failed, err: %v, oids: %+v", err, deletedOids) - } - - for _, doc := range docs { - oid := util.GetStrByInterface(doc["oid"]) - detailJson, err := json.Marshal(doc["detail"]) - if err != nil { - blog.Errorf("marshal detail failed, oid: %s, err: %v, rid: %s", oid, err, kit.Rid) - return nil, fmt.Errorf("marshal detail failed, oid: %s, err: %v", oid, err) - } - for _, index := range oidIndexMap[oid] { - oidDetailMap[index] = string(detailJson) - } - } - } - - return oidDetailMap, nil -} - type mapStrWithOid struct { Oid primitive.ObjectID `bson:"_id"` MapStr map[string]interface{} `bson:",inline"` diff --git a/src/source_controller/cacheservice/event/watch/watch.go b/src/source_controller/cacheservice/event/watch/watch.go index ef4d3b25d1..bd0c1c948d 100644 --- a/src/source_controller/cacheservice/event/watch/watch.go +++ b/src/source_controller/cacheservice/event/watch/watch.go @@ -134,10 +134,11 @@ func (c *Client) WatchWithStartFrom(kit *rest.Kit, key event.Key, opts *watch.Wa } node := new(watch.ChainNode) - err = c.watchDB.Table(key.ChainCollection()).Find(filter).Sort(common.BKFieldID).One(kit.Ctx, node) + err = c.watchDB.Shard(kit.ShardOpts()).Table(key.ChainCollection()).Find(filter).Sort(common.BKFieldID). + One(kit.Ctx, node) if err != nil { blog.ErrorJSON("get chain node from mongo failed, err: %s, filter: %s, rid: %s", err, filter, kit.Rid) - if !c.watchDB.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { return nil, kit.CCError.CCError(common.CCErrCommDBSelectFailed) } @@ -415,7 +416,7 @@ func (c *Client) getBizSetRelationEventDetailFromMongo(kit *rest.Kit, bizSetIDs } bizSets := make([]metadata.BizSetInst, 0) - err := mongodb.Client().Table(common.BKTableNameBaseBizSet).Find(bizSetCond). + err := c.db.Shard(kit.ShardOpts()).Table(common.BKTableNameBaseBizSet).Find(bizSetCond). Fields(common.BKBizSetIDField, common.BKBizSetScopeField).All(kit.Ctx, &bizSets) if err != nil { blog.Errorf("get biz sets by cond(%+v) failed, err: %v, rid: %s", bizSetCond, err, kit.Rid) @@ -477,8 +478,8 @@ func (c *Client) getBizIDArrStrByCond(kit *rest.Kit, cond map[string]interface{} for start := uint64(0); ; start += step { oneStep := make([]metadata.BizInst, 0) - err := c.db.Table(common.BKTableNameBaseApp).Find(cond).Fields(common.BKAppIDField).Start(start). - Limit(step).Sort(common.BKAppIDField).All(kit.Ctx, &oneStep) + err := c.db.Shard(kit.ShardOpts()).Table(common.BKTableNameBaseApp).Find(cond).Fields(common.BKAppIDField). + Start(start).Limit(step).Sort(common.BKAppIDField).All(kit.Ctx, &oneStep) if err != nil { blog.Errorf("get biz by cond(%+v) failed, err: %v, rid: %s", cond, err, kit.Rid) return "", err diff --git a/src/source_controller/cacheservice/service/health.go b/src/source_controller/cacheservice/service/health.go index 8905d47837..2e5dc264ec 100644 --- a/src/source_controller/cacheservice/service/health.go +++ b/src/source_controller/cacheservice/service/health.go @@ -38,15 +38,7 @@ func (s *cacheService) Healthz(req *restful.Request, resp *restful.Response) { meta.Items = append(meta.Items, zkItem) // mongodb status - mongoItem := metric.HealthItem{IsHealthy: true, Name: types.CCFunctionalityMongo} - if mongodb.Client() == nil { - mongoItem.IsHealthy = false - mongoItem.Message = "not connected" - } else if err := mongodb.Client().Ping(); err != nil { - mongoItem.IsHealthy = false - mongoItem.Message = err.Error() - } - meta.Items = append(meta.Items, mongoItem) + meta.Items = append(meta.Items, mongodb.Healthz()...) // redis status redisItem := metric.HealthItem{IsHealthy: true, Name: types.CCFunctionalityRedis} diff --git a/src/source_controller/cacheservice/service/service.go b/src/source_controller/cacheservice/service/service.go index 56fc8ef860..1a3a92f2ff 100644 --- a/src/source_controller/cacheservice/service/service.go +++ b/src/source_controller/cacheservice/service/service.go @@ -38,7 +38,8 @@ import ( "configcenter/src/source_controller/coreservice/core" "configcenter/src/storage/dal/mongo/local" "configcenter/src/storage/reflector" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" + "configcenter/src/storage/stream/types" "configcenter/src/thirdparty/logplatform/opentelemetry" "github.com/emicklei/go-restful/v3" @@ -94,10 +95,8 @@ func (s *cacheService) SetConfig(cfg options.Config, engine *backbone.Engine, er } s.authManager = extensions.NewAuthManager(engine.CoreAPI, iamCli) - loopW, loopErr := stream.NewLoopStream(s.cfg.Mongo.GetMongoConf(), engine.ServiceManageInterface) - if loopErr != nil { - blog.Errorf("new loop stream failed, err: %v", loopErr) - return loopErr + watchTaskOpt := &types.NewTaskOptions{ + StopNotifier: make(<-chan struct{}), } event, eventErr := reflector.NewReflector(s.cfg.Mongo.GetMongoConf()) @@ -119,34 +118,27 @@ func (s *cacheService) SetConfig(cfg options.Config, engine *backbone.Engine, er } s.cacheSet = c - watcher, watchErr := stream.NewLoopStream(s.cfg.Mongo.GetMongoConf(), engine.ServiceManageInterface) - if watchErr != nil { - blog.Errorf("new loop watch stream failed, err: %v", watchErr) - return watchErr - } - - ccDB, dbErr := local.NewMgo(s.cfg.Mongo.GetMongoConf(), time.Minute) - if dbErr != nil { - blog.Errorf("new cc mongo client failed, err: %v", dbErr) - return dbErr - } - - flowErr := flow.NewEvent(watcher, engine.ServiceManageInterface, watchDB, ccDB) + flowErr := flow.NewEvent(watchTask) if flowErr != nil { blog.Errorf("new watch event failed, err: %v", flowErr) return flowErr } - if err := identifier.NewIdentity(watcher, engine.ServiceManageInterface, watchDB, ccDB); err != nil { + if err := identifier.NewIdentity(watchTask); err != nil { blog.Errorf("new host identity event failed, err: %v", err) return err } - if err := bsrelation.NewBizSetRelation(watcher, watchDB, ccDB); err != nil { + if err := bsrelation.NewBizSetRelation(watchTask); err != nil { blog.Errorf("new biz set relation event failed, err: %v", err) return err } + taskErr = watchTask.Start() + if taskErr != nil { + return taskErr + } + return nil } From 301adc56b5128e9a8650a0fc665c42a2a053fd0e Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 13 Mar 2025 17:37:30 +0800 Subject: [PATCH 06/10] feat: cache support multi-tenant --story=121320113 --- pkg/cache/full-sync-cond/types.go | 4 +- pkg/cache/general/key.go | 14 +- pkg/cache/general/keys.go | 15 - pkg/cache/general/mapping/event.go | 5 - pkg/cache/general/resource.go | 11 - pkg/tenant/event.go | 134 +++++ pkg/tenant/tenant.go | 3 +- src/common/metadata/inst.go | 1 + src/common/tablenames.go | 9 + .../cacheservice/cache/biz-topo/client.go | 10 +- .../cacheservice/cache/biz-topo/key/brief.go | 59 +++ .../cacheservice/cache/biz-topo/key/key.go | 16 +- .../cache/biz-topo/level/brief.go | 204 ++++++++ .../cache/biz-topo/level/common.go | 9 +- .../cacheservice/cache/biz-topo/level/kube.go | 104 ++-- .../cache/biz-topo/level/level.go | 7 +- .../cache/biz-topo/logics/node/node.go | 83 ++-- .../cache/biz-topo/logics/topo/queue.go | 37 +- .../cache/biz-topo/logics/topo/topo.go | 34 +- .../cacheservice/cache/biz-topo/topo.go | 59 ++- .../cacheservice/cache/biz-topo/topo/kube.go | 13 +- .../cacheservice/cache/biz-topo/topo/topo.go | 41 +- .../cacheservice/cache/biz-topo/tree/brief.go | 98 ++++ .../cacheservice/cache/biz-topo/tree/count.go | 18 +- .../cacheservice/cache/biz-topo/tree/tree.go | 6 +- .../cache/biz-topo/types/brief.go | 57 +++ .../cache/biz-topo/types/types.go | 5 + .../cache/biz-topo/watch/brief.go | 136 +++++ .../cacheservice/cache/biz-topo/watch/kube.go | 385 +++++++-------- .../cache/biz-topo/watch/watch.go | 20 +- .../cacheservice/cache/cache.go | 34 +- .../cacheservice/cache/custom/cache.go | 6 +- .../cacheservice/cache/custom/cache/count.go | 32 +- .../cacheservice/cache/custom/cache/key.go | 2 +- .../cacheservice/cache/custom/cache/label.go | 126 ++--- .../cache/custom/cache/shared_ns_rel.go | 68 +-- .../cacheservice/cache/custom/cache/string.go | 40 +- .../cacheservice/cache/custom/client.go | 8 +- .../cacheservice/cache/custom/watch/label.go | 124 ++--- .../cache/custom/watch/shared_ns_rel.go | 100 ++-- .../cacheservice/cache/custom/watch/watch.go | 42 +- .../cacheservice/cache/general/cache.go | 8 +- .../cacheservice/cache/general/cache/cache.go | 26 +- .../cache/general/cache/cache_with_id.go | 41 +- .../cache/cache_with_id_and_sub_res.go | 54 +- .../cache/general/cache/cache_with_oid.go | 33 +- .../cache/general/cache/detail.go | 103 ++-- .../cache/general/cache/full_sync_cond.go | 55 ++- .../cacheservice/cache/general/cache/host.go | 27 +- .../cache/general/cache/id_list.go | 220 ++++----- .../cache/general/cache/obj_inst.go | 24 +- .../cacheservice/cache/general/cache/util.go | 11 +- .../cacheservice/cache/general/client.go | 33 +- .../cache/general/full-sync-cond/client.go | 16 +- .../general/full-sync-cond/full_sync_cond.go | 10 +- .../cache/general/full-sync-cond/watch.go | 113 ++--- .../cacheservice/cache/general/types/types.go | 22 +- .../cacheservice/cache/general/watch/watch.go | 200 +++----- .../cacheservice/cache/mainline/biz.go | 131 ----- .../cacheservice/cache/mainline/client.go | 463 +++--------------- .../cacheservice/cache/mainline/custom.go | 336 ------------- .../cacheservice/cache/mainline/handler.go | 118 ----- .../cacheservice/cache/mainline/key.go | 89 ---- .../cacheservice/cache/mainline/key_test.go | 51 -- .../cacheservice/cache/mainline/logic.go | 361 -------------- .../cacheservice/cache/mainline/mainline.go | 144 +++++- .../cacheservice/cache/mainline/module.go | 133 ----- .../cacheservice/cache/mainline/set.go | 133 ----- .../cacheservice/cache/mainline/types.go | 99 +--- .../cacheservice/cache/mainline/types_test.go | 45 -- .../cache/token-handler/memory.go | 21 +- .../cacheservice/cache/token-handler/mix.go | 96 +--- .../cache/token-handler/single.go | 110 ++--- .../cacheservice/cache/tools/kube.go | 13 +- .../cacheservice/cache/topology/client.go | 61 --- .../cacheservice/cache/topology/key.go | 157 ------ .../cacheservice/cache/topology/logic.go | 421 ---------------- .../cacheservice/cache/topology/readme.md | 17 - .../cacheservice/cache/topology/topology.go | 189 ------- .../cacheservice/cache/topology/types.go | 114 ----- .../cacheservice/cache/topology/watch.go | 336 ------------- .../cacheservice/cache/topotree/path.go | 199 ++++---- .../cacheservice/event/loop/loop_watch.go | 84 ++++ .../cacheservice/event/loop/task.go | 207 ++++++++ .../cacheservice/service/cache.go | 32 +- .../cacheservice/service/service.go | 21 +- 86 files changed, 2534 insertions(+), 4822 deletions(-) create mode 100644 pkg/tenant/event.go create mode 100644 src/source_controller/cacheservice/cache/biz-topo/key/brief.go create mode 100644 src/source_controller/cacheservice/cache/biz-topo/level/brief.go create mode 100644 src/source_controller/cacheservice/cache/biz-topo/tree/brief.go create mode 100644 src/source_controller/cacheservice/cache/biz-topo/types/brief.go create mode 100644 src/source_controller/cacheservice/cache/biz-topo/watch/brief.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/biz.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/custom.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/handler.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/key.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/key_test.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/logic.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/module.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/set.go delete mode 100644 src/source_controller/cacheservice/cache/mainline/types_test.go delete mode 100644 src/source_controller/cacheservice/cache/topology/client.go delete mode 100644 src/source_controller/cacheservice/cache/topology/key.go delete mode 100644 src/source_controller/cacheservice/cache/topology/logic.go delete mode 100644 src/source_controller/cacheservice/cache/topology/readme.md delete mode 100644 src/source_controller/cacheservice/cache/topology/topology.go delete mode 100644 src/source_controller/cacheservice/cache/topology/types.go delete mode 100644 src/source_controller/cacheservice/cache/topology/watch.go create mode 100644 src/source_controller/cacheservice/event/loop/loop_watch.go create mode 100644 src/source_controller/cacheservice/event/loop/task.go diff --git a/pkg/cache/full-sync-cond/types.go b/pkg/cache/full-sync-cond/types.go index 133cba774b..7efbdbb336 100644 --- a/pkg/cache/full-sync-cond/types.go +++ b/pkg/cache/full-sync-cond/types.go @@ -31,7 +31,7 @@ import ( const ( // BKTableNameFullSyncCond is the full synchronization cache condition table - BKTableNameFullSyncCond = "FullSyncCond" + BKTableNameFullSyncCond = common.BKTableNameFullSyncCond ) // FullSyncCond is the full synchronization cache condition @@ -42,7 +42,7 @@ type FullSyncCond struct { IsAll bool `json:"is_all" bson:"is_all"` Interval int `json:"interval" bson:"interval"` Condition *filter.Expression `json:"condition,omitempty" bson:"condition,omitempty"` - TenantID string `json:"tenant_id" bson:"tenant_id"` + TenantID string `json:"-" bson:"tenant_id"` } // full sync cond field names diff --git a/pkg/cache/general/key.go b/pkg/cache/general/key.go index d590ccc1f0..06ce6ec7e7 100644 --- a/pkg/cache/general/key.go +++ b/pkg/cache/general/key.go @@ -76,24 +76,24 @@ func (k *Key) Resource() ResType { } // DetailKey returns the general resource detail redis key generated by id key and extra keys -func (k *Key) DetailKey(idKey string, key ...string) string { - return k.namespace + "detail:" + k.genDetailKey(idKey, key...) +func (k *Key) DetailKey(tenantID string, idKey string, key ...string) string { + return k.namespace + "detail:" + tenantID + ":" + k.genDetailKey(idKey, key...) } // UniqueKey generates a unique key of the specified type, this cache key stores the unique identifier of the resource -func (k *Key) UniqueKey(typ, key string) string { - return k.namespace + typ + ":" + key +func (k *Key) UniqueKey(typ, tenantID, key string) string { + return k.namespace + typ + ":" + tenantID + ":" + key } // IDListKey is a redis zset(sorted set) key to store all the related data ids, which is used to page id quickly, // without use mongodb's sort method, which is much more expensive. // This key's ttl is defined where it is used, it might not be the same with the detail cache's ttl. // NOTE: if the resource has sub resource, the id list key must contain the sub resource -func (k *Key) IDListKey(key ...string) string { +func (k *Key) IDListKey(tenantID string, key ...string) string { if len(key) == 0 { - return k.namespace + "id_list" + return k.namespace + "id_list:" + tenantID } - return k.namespace + "id_list:" + strings.Join(key, ":") + return k.namespace + "id_list:" + tenantID + ":" + strings.Join(key, ":") } // IDListTempKey is used to store the id list during refresh, diff --git a/pkg/cache/general/keys.go b/pkg/cache/general/keys.go index d73fe9f833..a7e567500b 100644 --- a/pkg/cache/general/keys.go +++ b/pkg/cache/general/keys.go @@ -49,16 +49,6 @@ var ( MainlineInstKey = NewKey(MainlineInstance, 6*time.Hour, [2]int{0, 30 * 60}, genIDKeyByID, genDetailKeyWithoutSubRes) // InstAsstKey is the instance association detail cache key InstAsstKey = NewKey(InstAsst, 6*time.Hour, [2]int{0, 30 * 60}, genIDKeyByID, genDetailKeyWithoutSubRes) - // KubeClusterKey is the detail cache key - KubeClusterKey = newGeneralKey(KubeCluster, 6*time.Hour, [2]int{0, 30 * 60}) - // KubeNodeKey is the detail cache key - KubeNodeKey = newGeneralKey(KubeNode, 6*time.Hour, [2]int{0, 30 * 60}) - // KubeNamespaceKey is the detail cache key - KubeNamespaceKey = newGeneralKey(KubeNamespace, 6*time.Hour, [2]int{0, 30 * 60}) - // KubeWorkloadKey is the detail cache key - KubeWorkloadKey = newGeneralKey(KubeWorkload, 6*time.Hour, [2]int{0, 30 * 60}) - // KubePodKey is the detail cache key - KubePodKey = newGeneralKey(KubePod, 6*time.Hour, [2]int{0, 30 * 60}) ) // newGeneralKey new general Key @@ -93,11 +83,6 @@ var cacheKeyMap = map[ResType]*Key{ ObjectInstance: ObjInstKey, MainlineInstance: MainlineInstKey, InstAsst: InstAsstKey, - KubeCluster: KubeClusterKey, - KubeNode: KubeNodeKey, - KubeNamespace: KubeNamespaceKey, - KubeWorkload: KubeWorkloadKey, - KubePod: KubePodKey, } // GetCacheKeyByResType get general resource detail cache key by resource type diff --git a/pkg/cache/general/mapping/event.go b/pkg/cache/general/mapping/event.go index 55564feffa..0cb848456f 100644 --- a/pkg/cache/general/mapping/event.go +++ b/pkg/cache/general/mapping/event.go @@ -38,11 +38,6 @@ var cursorTypeMap = map[general.ResType]watch.CursorType{ general.ObjectInstance: watch.ObjectBase, general.MainlineInstance: watch.MainlineInstance, general.InstAsst: watch.InstAsst, - general.KubeCluster: watch.KubeCluster, - general.KubeNode: watch.KubeNode, - general.KubeNamespace: watch.KubeNamespace, - general.KubeWorkload: watch.KubeWorkload, - general.KubePod: watch.KubePod, } // GetCursorTypeByResType get event watch cursor type by resource type diff --git a/pkg/cache/general/resource.go b/pkg/cache/general/resource.go index 9dbc91ec07..0bf137c1dc 100644 --- a/pkg/cache/general/resource.go +++ b/pkg/cache/general/resource.go @@ -52,16 +52,6 @@ const ( MainlineInstance ResType = "mainline_instance" // InstAsst is the resource type for instance association cache, its sub resource specifies the associated object id InstAsst ResType = "inst_asst" - // KubeCluster is the resource type for kube cluster cache - KubeCluster ResType = "kube_cluster" - // KubeNode is the resource type for kube node cache - KubeNode ResType = "kube_node" - // KubeNamespace is the resource type for kube namespace cache - KubeNamespace ResType = "kube_namespace" - // KubeWorkload is the resource type for kube workload cache, its sub resource specifies the workload type - KubeWorkload ResType = "kube_workload" - // KubePod is the resource type for kube pod cache, its event detail is pod info with containers in it - KubePod ResType = "kube_pod" ) // SupportedResTypeMap is a map whose key is resource type that is supported by general resource cache @@ -82,7 +72,6 @@ var ResTypeHasSubResMap = map[ResType]struct{}{ ObjectInstance: {}, MainlineInstance: {}, InstAsst: {}, - KubeWorkload: {}, } // ValidateWithSubRes validate ResType with sub resource diff --git a/pkg/tenant/event.go b/pkg/tenant/event.go new file mode 100644 index 0000000000..828794b0c6 --- /dev/null +++ b/pkg/tenant/event.go @@ -0,0 +1,134 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package tenant + +import ( + "sync" + + "configcenter/pkg/tenant/types" +) + +var ( + prevTenantInfo = make(map[string]types.Tenant) + tenantEventChannels = make(map[string]chan TenantEvent) + tenantEventChLock sync.RWMutex +) + +// TenantEvent is the tenant event info +type TenantEvent struct { + EventType EventType + TenantID string +} + +// EventType is the tenant event type +type EventType string + +const ( + // Create is the create or enable tenant event type + Create EventType = "create" + // Delete is the delete or disable tenant event type + Delete EventType = "delete" +) + +// NewTenantEventChan generate a new tenant event chan +func NewTenantEventChan(name string) <-chan TenantEvent { + tenantEventChLock.Lock() + defer tenantEventChLock.Unlock() + + if ch, exists := tenantEventChannels[name]; exists { + return ch + } + + eventChan := make(chan TenantEvent) + tenantEventChannels[name] = eventChan + go func() { + for _, tenant := range allTenants { + if tenant.Status == types.EnabledStatus { + eventChan <- TenantEvent{ + EventType: Create, + TenantID: tenant.TenantID, + } + } + } + }() + return eventChan +} + +// RemoveTenantEventChan remove tenant event chan +func RemoveTenantEventChan(name string) { + tenantEventChLock.Lock() + defer tenantEventChLock.Unlock() + + ch, exists := tenantEventChannels[name] + if !exists { + return + } + + close(ch) + delete(tenantEventChannels, name) +} + +// generateAndPushTenantEvent compare the tenant with the previous tenant info to generate and push event +func generateAndPushTenantEvent(tenants []types.Tenant) { + tenantEventChLock.RLock() + defer tenantEventChLock.RUnlock() + + prevTenantMap := make(map[string]types.Tenant) + + for _, tenant := range tenants { + tenantID := tenant.TenantID + prevTenantMap[tenantID] = tenant + + prevTenant, exists := prevTenantInfo[tenantID] + if !exists && tenant.Status == types.EnabledStatus { + for _, eventChan := range tenantEventChannels { + eventChan <- TenantEvent{ + EventType: Create, + TenantID: tenantID, + } + } + continue + } + + if prevTenant.Status != tenant.Status { + eventType := Create + if tenant.Status == types.DisabledStatus { + eventType = Delete + } + for _, eventChan := range tenantEventChannels { + eventChan <- TenantEvent{ + EventType: eventType, + TenantID: tenantID, + } + } + } + + delete(prevTenantInfo, tenantID) + } + + for tenantID := range prevTenantInfo { + for _, eventChan := range tenantEventChannels { + eventChan <- TenantEvent{ + EventType: Delete, + TenantID: tenantID, + } + } + } + + prevTenantInfo = prevTenantMap +} diff --git a/pkg/tenant/tenant.go b/pkg/tenant/tenant.go index a40439421b..13e75ae79b 100644 --- a/pkg/tenant/tenant.go +++ b/pkg/tenant/tenant.go @@ -99,7 +99,7 @@ func SetTenant(tenant []types.Tenant) { for _, t := range allTenants { tenantMap[t.TenantID] = &t } - + generateAndPushTenantEvent(allTenants) lock.Unlock() } @@ -110,6 +110,7 @@ func refreshTenantInfo() error { if db != nil { tenants, err = GetAllTenantsFromDB(context.Background(), db) if err != nil { + blog.Errorf("get all tenants from db failed, err: %v", err) return err } } diff --git a/src/common/metadata/inst.go b/src/common/metadata/inst.go index af68dd0ede..dc01b7c208 100644 --- a/src/common/metadata/inst.go +++ b/src/common/metadata/inst.go @@ -58,6 +58,7 @@ type ModuleInst struct { type BizInst struct { BizID int64 `bson:"bk_biz_id" mapstructure:"bk_biz_id"` BizName string `bson:"bk_biz_name" mapstructure:"bk_biz_name"` + Default int `bson:"default,omitempty"` } // BizBasicInfo TODO diff --git a/src/common/tablenames.go b/src/common/tablenames.go index 05218b1413..f642f0cbcc 100644 --- a/src/common/tablenames.go +++ b/src/common/tablenames.go @@ -123,6 +123,12 @@ const ( // BKTableNameWatchDBRelation is the db and watch db relation table BKTableNameWatchDBRelation = "WatchDBRelation" + + // BKTableNameFullSyncCond is the full synchronization cache condition table + BKTableNameFullSyncCond = "FullSyncCond" + + // BKTableNameCacheWatchToken is the cache event watch token table + BKTableNameCacheWatchToken = "CacheWatchToken" ) // AllTables is all table names, not include the sharding tables which is created dynamically, @@ -287,6 +293,7 @@ var platformTableMap = map[string]struct{}{ BKTableNameAPITask: {}, BKTableNameAPITaskSyncHistory: {}, BKTableNameWatchDBRelation: {}, + BKTableNameFullSyncCond: {}, } // IsPlatformTable returns if the target table is a platform table @@ -298,6 +305,8 @@ func IsPlatformTable(tableName string) bool { var platformTableWithTenantMap = map[string]struct{}{ BKTableNameAPITask: {}, BKTableNameAPITaskSyncHistory: {}, + BKTableNameFullSyncCond: {}, + BKTableNameCacheWatchToken: {}, } // IsPlatformTableWithTenant returns if the target table is a platform table with tenant id field diff --git a/src/source_controller/cacheservice/cache/biz-topo/client.go b/src/source_controller/cacheservice/cache/biz-topo/client.go index 227e3c9dba..4c0a89b259 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/client.go +++ b/src/source_controller/cacheservice/cache/biz-topo/client.go @@ -44,7 +44,7 @@ func (t *Topo) GetBizTopo(kit *rest.Kit, typ string, opt *types.GetBizTopoOption return nil, kit.CCError.Errorf(common.CCErrCommParamsIsInvalid, "opt") } - topology, err := topoKey.GetTopology(kit.Ctx, opt.BizID) + topology, err := topoKey.GetTopology(kit, opt.BizID) if err == nil { if len(*topology) != 0 { // get data from cache succeed @@ -57,14 +57,14 @@ func (t *Topo) GetBizTopo(kit *rest.Kit, typ string, opt *types.GetBizTopoOption err, kit.Rid) // do not get biz topology from cache, get it from db directly. - bizTopo, err := topo.GenBizTopo(kit.Ctx, opt.BizID, topoType, false, kit.Rid) + bizTopo, err := topo.GenBizTopo(kit, opt.BizID, topoType, false) if err != nil { blog.Errorf("generate biz: %d %s topology from db failed, err: %v, rid: %s", opt.BizID, topoType, err, kit.Rid) return nil, err } // update it to cache directly. - topology, err = topoKey.UpdateTopology(kit.Ctx, bizTopo) + topology, err = topoKey.UpdateTopology(kit, opt.BizID, bizTopo) if err != nil { blog.Errorf("update biz: %d %s topology cache failed, err: %v, rid: %s", opt.BizID, topoType, err, kit.Rid) // do not return error @@ -90,13 +90,13 @@ func (t *Topo) RefreshBizTopo(kit *rest.Kit, typ string, opt *types.RefreshBizTo return kit.CCError.Errorf(common.CCErrCommParamsIsInvalid, "opt") } - bizTopo, err := topo.GenBizTopo(kit.Ctx, opt.BizID, topoType, false, kit.Rid) + bizTopo, err := topo.GenBizTopo(kit, opt.BizID, topoType, false) if err != nil { blog.Errorf("generate biz: %d %s topology from db failed, err: %v, rid: %s", opt.BizID, topoType, err, kit.Rid) return err } - _, err = topoKey.UpdateTopology(kit.Ctx, bizTopo) + _, err = topoKey.UpdateTopology(kit, opt.BizID, bizTopo) if err != nil { blog.Errorf("update biz: %d %s topology cache failed, err: %v, rid: %s", opt.BizID, topoType, err, kit.Rid) return err diff --git a/src/source_controller/cacheservice/cache/biz-topo/key/brief.go b/src/source_controller/cacheservice/cache/biz-topo/key/brief.go new file mode 100644 index 0000000000..900ec31d48 --- /dev/null +++ b/src/source_controller/cacheservice/cache/biz-topo/key/brief.go @@ -0,0 +1,59 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package key + +import ( + "fmt" + "time" + + "configcenter/src/common" + "configcenter/src/common/backbone/configcenter" + "configcenter/src/common/blog" + "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" +) + +var ( + defaultBriefRefreshInterval = 15 * time.Minute + briefRefreshIntervalConfig = "cacheService.briefTopologySyncIntervalMinutes" +) + +func init() { + TopoKeyMap[types.BriefType] = Key{ + topoType: types.BriefType, + namespace: fmt.Sprintf("%stopology:%s", common.BKCacheKeyV3Prefix, types.BriefType), + ttl: 24 * time.Hour, + GetRefreshInterval: func() time.Duration { + if !configcenter.IsExist(briefRefreshIntervalConfig) { + return defaultBriefRefreshInterval + } + + duration, err := configcenter.Int(briefRefreshIntervalConfig) + if err != nil { + blog.Errorf("get brief biz topology cache refresh interval failed, err: %v, use default value", err) + return defaultBriefRefreshInterval + } + + if duration < 2 { + blog.Warnf("brief biz topology cache refresh interval %d is invalid, use default value", duration) + return defaultBriefRefreshInterval + } + + return time.Duration(duration) * time.Minute + }, + } +} diff --git a/src/source_controller/cacheservice/cache/biz-topo/key/key.go b/src/source_controller/cacheservice/cache/biz-topo/key/key.go index 54fd2ff5f7..f6d79f93d7 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/key/key.go +++ b/src/source_controller/cacheservice/cache/biz-topo/key/key.go @@ -19,10 +19,10 @@ package key import ( - "context" "fmt" "time" + "configcenter/src/common/http/rest" "configcenter/src/common/json" "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" "configcenter/src/storage/driver/redis" @@ -59,24 +59,24 @@ func (k Key) TTL() time.Duration { } // BizTopoKey is the redis key to store the biz topology tree -func (k Key) BizTopoKey(biz int64) string { - return fmt.Sprintf("%s:%d", k.namespace, biz) +func (k Key) BizTopoKey(tenantID string, biz int64) string { + return fmt.Sprintf("%s:%s:%d", k.namespace, tenantID, biz) } // UpdateTopology update biz topology cache -func (k Key) UpdateTopology(ctx context.Context, topo *types.BizTopo) (*string, error) { +func (k Key) UpdateTopology(kit *rest.Kit, bizID int64, topo any) (*string, error) { js, err := json.Marshal(topo) if err != nil { - return nil, fmt.Errorf("marshal %s biz %d topology failed, err: %v", k.topoType, topo.Biz.ID, err) + return nil, fmt.Errorf("marshal %s biz %d topology failed, err: %v", k.topoType, bizID, err) } value := string(js) - return &value, redis.Client().Set(ctx, k.BizTopoKey(topo.Biz.ID), value, k.ttl).Err() + return &value, redis.Client().Set(kit.Ctx, k.BizTopoKey(kit.TenantID, bizID), value, k.ttl).Err() } // GetTopology get biz Topology from cache -func (k Key) GetTopology(ctx context.Context, biz int64) (*string, error) { - dat, err := redis.Client().Get(ctx, k.BizTopoKey(biz)).Result() +func (k Key) GetTopology(kit *rest.Kit, biz int64) (*string, error) { + dat, err := redis.Client().Get(kit.Ctx, k.BizTopoKey(kit.TenantID, biz)).Result() if err != nil { if redis.IsNilErr(err) { empty := "" diff --git a/src/source_controller/cacheservice/cache/biz-topo/level/brief.go b/src/source_controller/cacheservice/cache/biz-topo/level/brief.go new file mode 100644 index 0000000000..680b6c31c1 --- /dev/null +++ b/src/source_controller/cacheservice/cache/biz-topo/level/brief.go @@ -0,0 +1,204 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package level + +import ( + "sort" + + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/common/http/rest" + "configcenter/src/common/mapstr" + "configcenter/src/common/metadata" + "configcenter/src/common/util" + nlgc "configcenter/src/source_controller/cacheservice/cache/biz-topo/logics/node" + "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" + "configcenter/src/storage/driver/mongodb" +) + +var briefTopLevel = newBriefTopoLevel() + +// GetBriefTopLevel get the top level of brief biz topology +func GetBriefTopLevel() LevelI { + return briefTopLevel +} + +type briefTopoLevel struct{} + +func newBriefTopoLevel() *briefTopoLevel { + return &briefTopoLevel{} +} + +// GetNodesByDB get all nodes that belongs to the topology level +func (l *briefTopoLevel) GetNodesByDB(kit *rest.Kit, bizID int64, _ []mapstr.MapStr) ([]types.Node, error) { + parentObjMap, err := l.getMainlineObjMap(kit) + if err != nil { + return nil, err + } + + topNodes := make([]types.Node, 0) + prevNodeMap := make(map[int64][]types.Node) + + for objID := common.BKInnerObjIDModule; objID != common.BKInnerObjIDApp; objID = parentObjMap[objID] { + nodes, err := l.getBriefTopoNodesByObj(kit, bizID, objID) + if err != nil { + return nil, err + } + + if len(nodes) == 0 { + prevNodeMap = make(map[int64][]types.Node) + continue + } + + for i, node := range nodes { + l.sortNodes(prevNodeMap[node.ID]) + nodes[i].SubNodes = prevNodeMap[node.ID] + } + + if objID == common.BKInnerObjIDSet { + normalNodes := make([]types.Node, 0) + for i, node := range nodes { + defaultVal, err := util.GetIntByInterface(node.Extra) + if err != nil { + blog.Errorf("parse brief set node(%+v) failed, err: %v, rid: %s", node, err, kit.Rid) + return nil, err + } + if defaultVal == common.DefaultResSetFlag { + topNodes = append(topNodes, nodes[i]) + continue + } + normalNodes = append(normalNodes, nodes[i]) + } + nodes = normalNodes + } + + if parentObjMap[objID] == common.BKInnerObjIDApp { + topNodes = append(topNodes, nodes...) + break + } + + prevNodeMap = make(map[int64][]types.Node) + for _, node := range nodes { + prevNodeMap[node.ParentID] = append(prevNodeMap[node.ParentID], node) + } + } + + l.sortNodes(topNodes) + return topNodes, nil +} + +// GetNodesByCache get all nodes that belongs to the brief biz topo level +func (l *briefTopoLevel) GetNodesByCache(kit *rest.Kit, bizID int64) ([]types.Node, error) { + return l.GetNodesByDB(kit, bizID, nil) +} + +// getMainlineObjMap get mainline object to parent object map +func (l *briefTopoLevel) getMainlineObjMap(kit *rest.Kit) (map[string]string, error) { + relations := make([]metadata.Association, 0) + filter := mapstr.MapStr{ + common.AssociationKindIDField: common.AssociationKindMainline, + } + err := mongodb.Shard(kit.ShardOpts()).Table(common.BKTableNameObjAsst).Find(filter).Fields(common.BKObjIDField, + common.BKAsstObjIDField).All(kit.Ctx, &relations) + if err != nil { + blog.Errorf("get mainline topology association failed, err: %v, rid: %s", err, kit.Rid) + return nil, err + } + + mainlineMap := make(map[string]string) + for _, relation := range relations { + if relation.ObjectID == common.BKInnerObjIDHost { + continue + } + mainlineMap[relation.ObjectID] = relation.AsstObjID + } + + return mainlineMap, nil +} + +func (l *briefTopoLevel) getBriefTopoNodesByObj(kit *rest.Kit, bizID int64, objID string) ([]types.Node, error) { + tableName := common.GetInstTableName(objID, kit.TenantID) + cond := mapstr.MapStr{common.BKAppIDField: bizID} + + idField := common.GetInstIDField(objID) + nameField := common.GetInstNameField(objID) + parentField := common.BKParentIDField + fields := []string{idField, nameField} + switch objID { + case common.BKInnerObjIDSet: + fields = append(fields, common.BKDefaultField) + case common.BKInnerObjIDModule: + fields = append(fields, common.BKDefaultField) + parentField = common.BKSetIDField + } + fields = append(fields, parentField) + + nodes, err := nlgc.PagedGetNodes(kit, tableName, cond, fields, l.nodeParser(objID, idField, nameField, parentField)) + if err != nil { + return nil, err + } + + return nodes, nil +} + +func (l *briefTopoLevel) nodeParser(objID, idField, nameField, parentIDField string) nlgc.NodeParser { + return func(kit *rest.Kit, data []mapstr.MapStr) ([]types.Node, error) { + nodes := make([]types.Node, len(data)) + for i, item := range data { + id, err := util.GetInt64ByInterface(item[idField]) + if err != nil { + blog.Errorf("parse %s brief node id failed, err: %v, item: %+v, rid: %s", objID, err, item, kit.Rid) + return nil, err + } + + parentID, err := util.GetInt64ByInterface(item[parentIDField]) + if err != nil { + blog.Errorf("parse %s brief node parent id failed, err: %v, item: %+v, rid: %s", objID, err, item, + kit.Rid) + return nil, err + } + + nodes[i] = types.Node{ + Kind: objID, + ID: id, + Name: util.GetStrByInterface(item[nameField]), + ParentID: parentID, + } + + switch objID { + case common.BKInnerObjIDSet, common.BKInnerObjIDModule: + defaultVal, err := util.GetInt64ByInterface(item[common.BKDefaultField]) + if err != nil { + blog.Errorf("parse %s brief node default value failed, err: %v, item: %+v, rid: %s", objID, err, + item, kit.Rid) + return nil, err + } + nodes[i].Extra = defaultVal + } + } + + return nodes, nil + } +} + +// sortNodes sort nodes by name +func (l *briefTopoLevel) sortNodes(nodes []types.Node) { + sort.Slice(nodes, func(i, j int) bool { + return nodes[i].Name < nodes[j].Name + }) +} diff --git a/src/source_controller/cacheservice/cache/biz-topo/level/common.go b/src/source_controller/cacheservice/cache/biz-topo/level/common.go index f7a3e7b96b..27b212633f 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/level/common.go +++ b/src/source_controller/cacheservice/cache/biz-topo/level/common.go @@ -19,8 +19,7 @@ package level import ( - "context" - + "configcenter/src/common/http/rest" "configcenter/src/source_controller/cacheservice/cache/biz-topo/key" nodelgc "configcenter/src/source_controller/cacheservice/cache/biz-topo/logics/node" "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" @@ -42,10 +41,10 @@ func newCommonCacheLevel(topoType types.TopoType, nextLevel LevelI, kinds ...str } // GetNodesByCache get topo nodes info by cache -func (l *commonCacheLevel) GetNodesByCache(ctx context.Context, bizID int64, rid string) ([]types.Node, error) { +func (l *commonCacheLevel) GetNodesByCache(kit *rest.Kit, bizID int64) ([]types.Node, error) { allNodes := make([]types.Node, 0) for _, kind := range l.kinds { - nodes, err := nodelgc.GetNodeInfoCache(l.topoKey, bizID, kind, rid) + nodes, err := nodelgc.GetNodeInfoCache(kit, l.topoKey, bizID, kind) if err != nil { return nil, err } @@ -56,7 +55,7 @@ func (l *commonCacheLevel) GetNodesByCache(ctx context.Context, bizID int64, rid return allNodes, nil } - childNodes, err := l.nextLevel.GetNodesByCache(ctx, bizID, rid) + childNodes, err := l.nextLevel.GetNodesByCache(kit, bizID) if err != nil { return nil, err } diff --git a/src/source_controller/cacheservice/cache/biz-topo/level/kube.go b/src/source_controller/cacheservice/cache/biz-topo/level/kube.go index c15ab6182f..4761a16215 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/level/kube.go +++ b/src/source_controller/cacheservice/cache/biz-topo/level/kube.go @@ -18,11 +18,11 @@ package level import ( - "context" "strconv" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/util" "configcenter/src/kube/types" @@ -52,16 +52,14 @@ func newKubeClusterLevel() *kubeClusterLevel { } // GetNodesByDB get all nodes that belongs to the topology level -func (l *kubeClusterLevel) GetNodesByDB(ctx context.Context, bizID int64, _ []mapstr.MapStr, rid string) ( - []topotypes.Node, error) { - +func (l *kubeClusterLevel) GetNodesByDB(kit *rest.Kit, bizID int64, _ []mapstr.MapStr) ([]topotypes.Node, error) { sharedCond := mapstr.MapStr{types.BKBizIDField: bizID} relations := make([]types.NsSharedClusterRel, 0) - err := mongodb.Client().Table(types.BKTableNameNsSharedClusterRel).Find(sharedCond). - Fields(types.BKClusterIDFiled).All(ctx, &relations) + err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameNsSharedClusterRel).Find(sharedCond). + Fields(types.BKClusterIDFiled).All(kit.Ctx, &relations) if err != nil { - blog.Errorf("list kube shared cluster rel failed, err: %v, cond: %+v, rid: %v", err, sharedCond, rid) + blog.Errorf("list kube shared cluster rel failed, err: %v, cond: %+v, rid: %v", err, sharedCond, kit.Rid) return nil, err } @@ -78,12 +76,12 @@ func (l *kubeClusterLevel) GetNodesByDB(ctx context.Context, bizID int64, _ []ma } fields := []string{types.BKIDField, types.KubeNameField} - nodes, err := nlgc.PagedGetNodes(ctx, types.BKTableNameBaseCluster, cond, fields, l.clusterNodeParser(), rid) + nodes, err := nlgc.PagedGetNodes(kit, types.BKTableNameBaseCluster, cond, fields, l.clusterNodeParser()) if err != nil { return nil, err } - err = nlgc.CrossCompareNodeInfoCache(l.topoKey, bizID, types.KubeCluster, nodes, rid) + err = nlgc.CrossCompareNodeInfoCache(kit, l.topoKey, bizID, types.KubeCluster, nodes) if err != nil { return nil, err } @@ -92,7 +90,7 @@ func (l *kubeClusterLevel) GetNodesByDB(ctx context.Context, bizID int64, _ []ma return nodes, nil } - childNodes, err := l.nextLevel.GetNodesByDB(ctx, bizID, nil, rid) + childNodes, err := l.nextLevel.GetNodesByDB(kit, bizID, nil) if err != nil { return nil, err } @@ -102,12 +100,12 @@ func (l *kubeClusterLevel) GetNodesByDB(ctx context.Context, bizID int64, _ []ma } func (l *kubeClusterLevel) clusterNodeParser() nlgc.NodeParser { - return func(ctx context.Context, data []mapstr.MapStr, rid string) ([]topotypes.Node, error) { + return func(kit *rest.Kit, data []mapstr.MapStr) ([]topotypes.Node, error) { nodes := make([]topotypes.Node, len(data)) for i, item := range data { id, err := util.GetInt64ByInterface(item[types.BKIDField]) if err != nil { - blog.Errorf("parse kube cluster id failed, err: %v, item: %+v, rid: %s", err, item, rid) + blog.Errorf("parse kube cluster id failed, err: %v, item: %+v, rid: %s", err, item, kit.Rid) return nil, err } @@ -133,21 +131,19 @@ func newKubeNsLevel() *kubeNsLevel { } // GetNodesByDB get all nodes that belongs to the topology level -func (l *kubeNsLevel) GetNodesByDB(ctx context.Context, bizID int64, _ []mapstr.MapStr, rid string) ([]topotypes.Node, - error) { - - cond, err := tools.GenKubeSharedNsCond(ctx, bizID, types.BKIDField, rid) +func (l *kubeNsLevel) GetNodesByDB(kit *rest.Kit, bizID int64, _ []mapstr.MapStr) ([]topotypes.Node, error) { + cond, err := tools.GenKubeSharedNsCond(kit, bizID, types.BKIDField) if err != nil { return nil, err } fields := []string{types.BKIDField, types.KubeNameField, types.BKClusterIDFiled} - nodes, err := nlgc.PagedGetNodes(ctx, types.BKTableNameBaseNamespace, cond, fields, l.nsNodeParser, rid) + nodes, err := nlgc.PagedGetNodes(kit, types.BKTableNameBaseNamespace, cond, fields, l.nsNodeParser) if err != nil { return nil, err } - err = nlgc.CrossCompareNodeInfoCache(l.topoKey, bizID, types.KubeNamespace, nodes, rid) + err = nlgc.CrossCompareNodeInfoCache(kit, l.topoKey, bizID, types.KubeNamespace, nodes) if err != nil { return nil, err } @@ -170,7 +166,7 @@ func (l *kubeNsLevel) GetNodesByDB(ctx context.Context, bizID int64, _ []mapstr. nextConds = append(nextConds, mapstr.MapStr{types.BKNamespaceIDField: mapstr.MapStr{common.BKDBIN: nsIDs}}) } - childNodes, err := l.nextLevel.GetNodesByDB(ctx, bizID, nextConds, rid) + childNodes, err := l.nextLevel.GetNodesByDB(kit, bizID, nextConds) if err != nil { return nil, err } @@ -180,19 +176,19 @@ func (l *kubeNsLevel) GetNodesByDB(ctx context.Context, bizID int64, _ []mapstr. return nodes, nil } -func (l *kubeNsLevel) nsNodeParser(ctx context.Context, data []mapstr.MapStr, rid string) ([]topotypes.Node, error) { +func (l *kubeNsLevel) nsNodeParser(kit *rest.Kit, data []mapstr.MapStr) ([]topotypes.Node, error) { // parse nodes nodes := make([]topotypes.Node, len(data)) for i, item := range data { id, err := util.GetInt64ByInterface(item[types.BKIDField]) if err != nil { - blog.Errorf("parse kube ns id failed, err: %v, item: %+v, rid: %s", err, item, rid) + blog.Errorf("parse kube ns id failed, err: %v, item: %+v, rid: %s", err, item, kit.Rid) return nil, err } parentID, err := util.GetInt64ByInterface(item[types.BKClusterIDFiled]) if err != nil { - blog.Errorf("parse kube ns cluster id failed, err: %v, item: %+v, rid: %s", err, item, rid) + blog.Errorf("parse kube ns cluster id failed, err: %v, item: %+v, rid: %s", err, item, kit.Rid) return nil, err } @@ -217,11 +213,9 @@ func newKubeWlLevel() *kubeWlLevel { } // GetNodesByDB get all nodes that belongs to the topology level -func (l *kubeWlLevel) GetNodesByDB(ctx context.Context, bizID int64, conds []mapstr.MapStr, rid string) ( - []topotypes.Node, error) { - +func (l *kubeWlLevel) GetNodesByDB(kit *rest.Kit, bizID int64, conds []mapstr.MapStr) ([]topotypes.Node, error) { if len(conds) == 0 { - cond, err := tools.GenKubeSharedNsCond(ctx, bizID, types.BKNamespaceIDField, rid) + cond, err := tools.GenKubeSharedNsCond(kit, bizID, types.BKNamespaceIDField) if err != nil { return nil, err } @@ -238,7 +232,7 @@ func (l *kubeWlLevel) GetNodesByDB(ctx context.Context, bizID int64, conds []map return nil, err } - nodes, err := nlgc.PagedGetNodes(ctx, table, cond, fields, wlNodeParser(wlType, bizID), rid) + nodes, err := nlgc.PagedGetNodes(kit, table, cond, fields, wlNodeParser(wlType, bizID)) if err != nil { return nil, err } @@ -247,7 +241,7 @@ func (l *kubeWlLevel) GetNodesByDB(ctx context.Context, bizID int64, conds []map allNodes = append(allNodes, nodes...) } - err := nlgc.CrossCompareNodeInfoCache(l.topoKey, bizID, string(wlType), wlNodes, rid) + err := nlgc.CrossCompareNodeInfoCache(kit, l.topoKey, bizID, string(wlType), wlNodes) if err != nil { return nil, err } @@ -257,7 +251,7 @@ func (l *kubeWlLevel) GetNodesByDB(ctx context.Context, bizID int64, conds []map } func wlNodeParser(wlType types.WorkloadType, bizID int64) nlgc.NodeParser { - return func(ctx context.Context, data []mapstr.MapStr, rid string) ([]topotypes.Node, error) { + return func(kit *rest.Kit, data []mapstr.MapStr) ([]topotypes.Node, error) { zeroCount := int64(0) nodes := make([]topotypes.Node, len(data)) @@ -265,13 +259,13 @@ func wlNodeParser(wlType types.WorkloadType, bizID int64) nlgc.NodeParser { for i, item := range data { id, err := util.GetInt64ByInterface(item[types.BKIDField]) if err != nil { - blog.Errorf("parse kube wl id failed, err: %v, item: %+v, rid: %s", err, item, rid) + blog.Errorf("parse kube wl id failed, err: %v, item: %+v, rid: %s", err, item, kit.Rid) return nil, err } parentID, err := util.GetInt64ByInterface(item[types.BKNamespaceIDField]) if err != nil { - blog.Errorf("parse kube wl cluster id failed, err: %v, item: %+v, rid: %s", err, item, rid) + blog.Errorf("parse kube wl cluster id failed, err: %v, item: %+v, rid: %s", err, item, kit.Rid) return nil, err } @@ -285,13 +279,13 @@ func wlNodeParser(wlType types.WorkloadType, bizID int64) nlgc.NodeParser { wlIDs[i] = id } - return fillKubeWlNodeCount(ctx, bizID, string(wlType), wlIDs, nodes, rid) + return fillKubeWlNodeCount(kit, bizID, string(wlType), wlIDs, nodes) } } // fillKubeWlNodeCount fill workload nodes with container count -func fillKubeWlNodeCount(ctx context.Context, bizID int64, kind string, wlIDs []int64, nodes []topotypes.Node, - rid string) ([]topotypes.Node, error) { +func fillKubeWlNodeCount(kit *rest.Kit, bizID int64, kind string, wlIDs []int64, nodes []topotypes.Node) ( + []topotypes.Node, error) { if len(wlIDs) == 0 { return nodes, nil @@ -304,14 +298,14 @@ func fillKubeWlNodeCount(ctx context.Context, bizID int64, kind string, wlIDs [] } pods := make([]types.Pod, 0) - if err := mongodb.Client().Table(types.BKTableNameBasePod).Find(podCond).Fields(types.BKIDField, - types.RefIDField).All(ctx, &pods); err != nil { - blog.Errorf("get pod ids failed, cond: %+v, err: %v, rid: %s", podCond, err, rid) + if err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameBasePod).Find(podCond).Fields(types.BKIDField, + types.RefIDField).All(kit.Ctx, &pods); err != nil { + blog.Errorf("get pod ids failed, cond: %+v, err: %v, rid: %s", podCond, err, kit.Rid) return nil, err } if len(pods) == 0 { - if err := nlgc.DeleteNodeCountCache(key.TopoKeyMap[topotypes.KubeType], bizID, kind, wlIDs, rid); err != nil { + if err := nlgc.DeleteNodeCountCache(kit, key.TopoKeyMap[topotypes.KubeType], bizID, kind, wlIDs); err != nil { return nil, err } return nodes, nil @@ -330,14 +324,14 @@ func fillKubeWlNodeCount(ctx context.Context, bizID int64, kind string, wlIDs [] } containers := make([]types.Container, 0) - if err := mongodb.Client().Table(types.BKTableNameBaseContainer).Find(containerCond).Fields(types.BKPodIDField). - All(ctx, &containers); err != nil { - blog.Errorf("get containers failed, cond: %+v, err: %v, rid: %s", containerCond, err, rid) + if err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameBaseContainer).Find(containerCond). + Fields(types.BKPodIDField).All(kit.Ctx, &containers); err != nil { + blog.Errorf("get containers failed, cond: %+v, err: %v, rid: %s", containerCond, err, kit.Rid) return nil, err } if len(containers) == 0 { - if err := nlgc.DeleteNodeCountCache(key.TopoKeyMap[topotypes.KubeType], bizID, kind, wlIDs, rid); err != nil { + if err := nlgc.DeleteNodeCountCache(kit, key.TopoKeyMap[topotypes.KubeType], bizID, kind, wlIDs); err != nil { return nil, err } return nodes, nil @@ -355,20 +349,20 @@ func fillKubeWlNodeCount(ctx context.Context, bizID int64, kind string, wlIDs [] } // add workload topo nodes count cache - if err := nlgc.AddNodeCountCache(key.TopoKeyMap[topotypes.KubeType], bizID, kind, wlCountMap, rid); err != nil { + if err := nlgc.AddNodeCountCache(kit, key.TopoKeyMap[topotypes.KubeType], bizID, kind, wlCountMap); err != nil { return nil, err } return nodes, nil } -func (l *kubeWlLevel) GetNodesByCache(ctx context.Context, bizID int64, rid string) ([]topotypes.Node, error) { +func (l *kubeWlLevel) GetNodesByCache(kit *rest.Kit, bizID int64) ([]topotypes.Node, error) { allNodes := make([]topotypes.Node, 0) for _, wlType := range types.GetWorkLoadObjects() { kind := string(wlType) // get nodes from cache - nodes, err := nlgc.GetNodeInfoCache(l.topoKey, bizID, kind, rid) + nodes, err := nlgc.GetNodeInfoCache(kit, l.topoKey, bizID, kind) if err != nil { return nil, err } @@ -378,7 +372,7 @@ func (l *kubeWlLevel) GetNodesByCache(ctx context.Context, bizID int64, rid stri for _, node := range nodes { pagedNodes = append(pagedNodes, node) if len(pagedNodes) == common.BKMaxPageSize { - if err = l.fillWlNodesCountFromCache(ctx, bizID, kind, pagedNodes, rid); err != nil { + if err = l.fillWlNodesCountFromCache(kit, bizID, kind, pagedNodes); err != nil { return nil, err } allNodes = append(allNodes, pagedNodes...) @@ -387,7 +381,7 @@ func (l *kubeWlLevel) GetNodesByCache(ctx context.Context, bizID int64, rid stri } if len(pagedNodes) > 0 { - if err = l.fillWlNodesCountFromCache(ctx, bizID, kind, pagedNodes, rid); err != nil { + if err = l.fillWlNodesCountFromCache(kit, bizID, kind, pagedNodes); err != nil { return nil, err } allNodes = append(allNodes, pagedNodes...) @@ -397,17 +391,15 @@ func (l *kubeWlLevel) GetNodesByCache(ctx context.Context, bizID int64, rid stri return allNodes, nil } -func (l *kubeWlLevel) fillWlNodesCountFromCache(ctx context.Context, bizID int64, kind string, - nodes []topotypes.Node, rid string) error { - +func (l *kubeWlLevel) fillWlNodesCountFromCache(kit *rest.Kit, bizID int64, kind string, nodes []topotypes.Node) error { cntKeys := make([]string, len(nodes)) for i, node := range nodes { - cntKeys[i] = nlgc.GenNodeInfoCntKey(l.topoKey, bizID, kind, node.ID) + cntKeys[i] = nlgc.GenNodeInfoCntKey(l.topoKey, kit.TenantID, bizID, kind, node.ID) } - cntRes, err := redis.Client().MGet(ctx, cntKeys...).Result() + cntRes, err := redis.Client().MGet(kit.Ctx, cntKeys...).Result() if err != nil { - blog.Errorf("get node count cache by keys: %+v failed, err: %v, rid: %s", cntKeys, err, rid) + blog.Errorf("get node count cache by keys: %+v failed, err: %v, rid: %s", cntKeys, err, kit.Rid) return err } @@ -420,14 +412,16 @@ func (l *kubeWlLevel) fillWlNodesCountFromCache(ctx context.Context, bizID int64 cntStr, ok := cnt.(string) if !ok { - blog.Errorf("%s workload %d count cache %v type %T is invalid, rid: %s", kind, nodes[i].ID, cnt, cnt, rid) + blog.Errorf("%s workload %d count cache %v type %T is invalid, rid: %s", kind, nodes[i].ID, cnt, cnt, + kit.Rid) nodes[i].Count = &zero continue } count, err := strconv.ParseInt(cntStr, 10, 64) if err != nil { - blog.Errorf("parse %s wl %d count cache %s failed, err: %v, rid: %s", kind, nodes[i].ID, cntStr, err, rid) + blog.Errorf("parse %s wl %d count cache %s failed, err: %v, rid: %s", kind, nodes[i].ID, cntStr, err, + kit.Rid) return err } diff --git a/src/source_controller/cacheservice/cache/biz-topo/level/level.go b/src/source_controller/cacheservice/cache/biz-topo/level/level.go index 1aef50143f..fdf00cfaa8 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/level/level.go +++ b/src/source_controller/cacheservice/cache/biz-topo/level/level.go @@ -19,14 +19,13 @@ package level import ( - "context" - + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" ) // LevelI is the interface for topology tree level type LevelI interface { - GetNodesByDB(ctx context.Context, bizID int64, cond []mapstr.MapStr, rid string) ([]types.Node, error) - GetNodesByCache(ctx context.Context, bizID int64, rid string) ([]types.Node, error) + GetNodesByDB(kit *rest.Kit, bizID int64, cond []mapstr.MapStr) ([]types.Node, error) + GetNodesByCache(kit *rest.Kit, bizID int64) ([]types.Node, error) } diff --git a/src/source_controller/cacheservice/cache/biz-topo/logics/node/node.go b/src/source_controller/cacheservice/cache/biz-topo/logics/node/node.go index 09adb09ef2..5a93e1b86c 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/logics/node/node.go +++ b/src/source_controller/cacheservice/cache/biz-topo/logics/node/node.go @@ -25,6 +25,7 @@ import ( "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/source_controller/cacheservice/cache/biz-topo/key" "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" @@ -38,8 +39,8 @@ import ( const step = 500 // PagedGetNodes loop getting paged nodes from db -func PagedGetNodes(ctx context.Context, table string, nodeCond mapstr.MapStr, fields []string, parser NodeParser, - rid string) ([]types.Node, error) { +func PagedGetNodes(kit *rest.Kit, table string, nodeCond mapstr.MapStr, fields []string, parser NodeParser) ( + []types.Node, error) { nodes := make([]types.Node, 0) @@ -47,10 +48,10 @@ func PagedGetNodes(ctx context.Context, table string, nodeCond mapstr.MapStr, fi findOpt := dbtypes.NewFindOpts().SetWithObjectID(true) for { data := make([]mapstr.MapStr, 0) - err := mongodb.Client().Table(table).Find(cond, findOpt).Fields(fields...).Sort("_id").Limit(step). - All(ctx, &data) + err := mongodb.Shard(kit.ShardOpts()).Table(table).Find(cond, findOpt).Fields(fields...).Sort("_id"). + Limit(step).All(kit.Ctx, &data) if err != nil { - blog.Errorf("get node data failed, table: %s, cond: %+v, err: %v, rid: %s", table, cond, err, rid) + blog.Errorf("get node data failed, table: %s, cond: %+v, err: %v, rid: %s", table, cond, err, kit.Rid) return nil, err } @@ -58,7 +59,7 @@ func PagedGetNodes(ctx context.Context, table string, nodeCond mapstr.MapStr, fi break } - parsed, err := parser(ctx, data, rid) + parsed, err := parser(kit, data) if err != nil { return nil, err } @@ -75,7 +76,7 @@ func PagedGetNodes(ctx context.Context, table string, nodeCond mapstr.MapStr, fi } // NodeParser parse db node data to topo nodes -type NodeParser func(ctx context.Context, data []mapstr.MapStr, rid string) ([]types.Node, error) +type NodeParser func(kit *rest.Kit, data []mapstr.MapStr) ([]types.Node, error) // CombineChildNodes combine parent nodes with child nodes info func CombineChildNodes(nodes, childNodes []types.Node) []types.Node { @@ -103,25 +104,25 @@ func CombineChildNodes(nodes, childNodes []types.Node) []types.Node { } // GenBizNodeListKey generate biz to topo node ids list cache key -func GenBizNodeListKey(topoKey key.Key, bizID int64, kind string) string { - return fmt.Sprintf("%s:%s:list", topoKey.BizTopoKey(bizID), kind) +func GenBizNodeListKey(topoKey key.Key, tenantID string, bizID int64, kind string) string { + return fmt.Sprintf("%s:%s:list", topoKey.BizTopoKey(tenantID, bizID), kind) } // GenNodeInfoKey generate biz topo node info separate cache key -func GenNodeInfoKey(topoKey key.Key, bizID int64, kind string, id int64) string { - return fmt.Sprintf("%s:%s:%d", topoKey.BizTopoKey(bizID), kind, id) +func GenNodeInfoKey(topoKey key.Key, tenantID string, bizID int64, kind string, id int64) string { + return fmt.Sprintf("%s:%s:%d", topoKey.BizTopoKey(tenantID, bizID), kind, id) } // AddNodeInfoCache add biz topo nodes info cache by kind -func AddNodeInfoCache(topoKey key.Key, bizID int64, kind string, nodes []types.Node, rid string) error { +func AddNodeInfoCache(kit *rest.Kit, topoKey key.Key, bizID int64, kind string, nodes []types.Node) error { pip := redis.Client().Pipeline() defer pip.Close() - listKey := GenBizNodeListKey(topoKey, bizID, kind) + listKey := GenBizNodeListKey(topoKey, kit.TenantID, bizID, kind) ids := make([]interface{}, len(nodes)) for i, node := range nodes { - nodeKey := GenNodeInfoKey(topoKey, bizID, kind, node.ID) + nodeKey := GenNodeInfoKey(topoKey, kit.TenantID, bizID, kind, node.ID) pip.Set(nodeKey, fmt.Sprintf(`{"id":%d,"nm":"%s","par":%d}`, node.ID, node.Name, node.ParentID), topoKey.TTL()) ids[i] = node.ID } @@ -130,23 +131,23 @@ func AddNodeInfoCache(topoKey key.Key, bizID int64, kind string, nodes []types.N _, err := pip.Exec() if err != nil { - blog.Errorf("cache biz %d topo nodes info failed, err: %v, nodes: %+v, rid: %s", bizID, err, nodes, rid) + blog.Errorf("cache biz %d topo nodes info failed, err: %v, nodes: %+v, rid: %s", bizID, err, nodes, kit.Rid) return err } return nil } // DeleteNodeInfoCache delete biz topo nodes info cache by kind -func DeleteNodeInfoCache(topoKey key.Key, bizID int64, kind string, ids []int64, rid string) error { +func DeleteNodeInfoCache(kit *rest.Kit, topoKey key.Key, bizID int64, kind string, ids []int64) error { pip := redis.Client().Pipeline() defer pip.Close() - listKey := GenBizNodeListKey(topoKey, bizID, kind) + listKey := GenBizNodeListKey(topoKey, kit.TenantID, bizID, kind) pip.Expire(listKey, topoKey.TTL()) idList := make([]interface{}, len(ids)) for i, id := range ids { - nodeKey := GenNodeInfoKey(topoKey, bizID, kind, id) + nodeKey := GenNodeInfoKey(topoKey, kit.TenantID, bizID, kind, id) pip.Del(nodeKey) idList[i] = id } @@ -154,17 +155,17 @@ func DeleteNodeInfoCache(topoKey key.Key, bizID int64, kind string, ids []int64, _, err := pip.Exec() if err != nil { - blog.Errorf("delete biz %d topo nodes info cache failed, err: %v, ids: %+v, rid: %s", bizID, err, ids, rid) + blog.Errorf("delete biz %d topo nodes info cache failed, err: %v, ids: %+v, rid: %s", bizID, err, ids, kit.Rid) return err } return nil } // GetNodeInfoCache get biz topo nodes info cache by kind -func GetNodeInfoCache(topoKey key.Key, bizID int64, kind string, rid string) ([]types.Node, error) { +func GetNodeInfoCache(kit *rest.Kit, topoKey key.Key, bizID int64, kind string) ([]types.Node, error) { ctx := context.Background() - listKey := GenBizNodeListKey(topoKey, bizID, kind) + listKey := GenBizNodeListKey(topoKey, kit.TenantID, bizID, kind) cursor := uint64(0) @@ -172,7 +173,7 @@ func GetNodeInfoCache(topoKey key.Key, bizID int64, kind string, rid string) ([] for { ids, nextCursor, err := redis.Client().SScan(listKey, cursor, "", step).Result() if err != nil { - blog.Errorf("scan topo node cache list %s %d failed, err: %v, rid: %s", listKey, cursor, err, rid) + blog.Errorf("scan topo node cache list %s %d failed, err: %v, rid: %s", listKey, cursor, err, kit.Rid) return nil, err } cursor = nextCursor @@ -188,15 +189,15 @@ func GetNodeInfoCache(topoKey key.Key, bizID int64, kind string, rid string) ([] for i, idStr := range ids { id, err := strconv.ParseInt(idStr, 10, 64) if err != nil { - blog.Errorf("parse node id %s failed, err: %v, rid: %s", idStr, err, rid) + blog.Errorf("parse node id %s failed, err: %v, rid: %s", idStr, err, kit.Rid) continue } - detailKeys[i] = GenNodeInfoKey(topoKey, bizID, kind, id) + detailKeys[i] = GenNodeInfoKey(topoKey, kit.TenantID, bizID, kind, id) } details, err := redis.Client().MGet(ctx, detailKeys...).Result() if err != nil { - blog.Errorf("get topo node cache details by keys: %+v failed, err: %v, rid: %s", detailKeys, err, rid) + blog.Errorf("get topo node cache details by keys: %+v failed, err: %v, rid: %s", detailKeys, err, kit.Rid) return nil, err } @@ -207,7 +208,7 @@ func GetNodeInfoCache(topoKey key.Key, bizID int64, kind string, rid string) ([] strVal, ok := detail.(string) if !ok { - blog.Errorf("node info cache detail type %T is invalid, detail: %v, rid: %s", detail, detail, rid) + blog.Errorf("node info cache detail type %T is invalid, detail: %v, rid: %s", detail, detail, kit.Rid) continue } @@ -226,7 +227,7 @@ func GetNodeInfoCache(topoKey key.Key, bizID int64, kind string, rid string) ([] } // CrossCompareNodeInfoCache cross compare biz topo nodes info cache by kind -func CrossCompareNodeInfoCache(topoKey key.Key, bizID int64, kind string, nodes []types.Node, rid string) error { +func CrossCompareNodeInfoCache(kit *rest.Kit, topoKey key.Key, bizID int64, kind string, nodes []types.Node) error { nodeMap := make(map[int64]struct{}, len(nodes)) // paged add biz topo node info cache @@ -236,7 +237,7 @@ func CrossCompareNodeInfoCache(topoKey key.Key, bizID int64, kind string, nodes pagedNodes = append(pagedNodes, node) if len(pagedNodes) == step { - if err := AddNodeInfoCache(topoKey, bizID, kind, pagedNodes, rid); err != nil { + if err := AddNodeInfoCache(kit, topoKey, bizID, kind, pagedNodes); err != nil { return err } pagedNodes = make([]types.Node, 0) @@ -244,19 +245,19 @@ func CrossCompareNodeInfoCache(topoKey key.Key, bizID int64, kind string, nodes } if len(pagedNodes) > 0 { - if err := AddNodeInfoCache(topoKey, bizID, kind, pagedNodes, rid); err != nil { + if err := AddNodeInfoCache(kit, topoKey, bizID, kind, pagedNodes); err != nil { return err } } - listKey := GenBizNodeListKey(topoKey, bizID, kind) + listKey := GenBizNodeListKey(topoKey, kit.TenantID, bizID, kind) cursor := uint64(0) // paged delete redundant biz topo node info cache for { ids, nextCursor, err := redis.Client().SScan(listKey, cursor, "", step).Result() if err != nil { - blog.Errorf("scan topo node cache list %s %d failed, err: %v, rid: %s", listKey, cursor, err, rid) + blog.Errorf("scan topo node cache list %s %d failed, err: %v, rid: %s", listKey, cursor, err, kit.Rid) return err } cursor = nextCursor @@ -272,7 +273,7 @@ func CrossCompareNodeInfoCache(topoKey key.Key, bizID int64, kind string, nodes for _, idStr := range ids { id, err := strconv.ParseInt(idStr, 10, 64) if err != nil { - blog.Errorf("parse node id %s failed, err: %v, rid: %s", idStr, err, rid) + blog.Errorf("parse node id %s failed, err: %v, rid: %s", idStr, err, kit.Rid) continue } @@ -283,7 +284,7 @@ func CrossCompareNodeInfoCache(topoKey key.Key, bizID int64, kind string, nodes } if len(delIDs) > 0 { - if err = DeleteNodeInfoCache(topoKey, bizID, kind, delIDs, rid); err != nil { + if err = DeleteNodeInfoCache(kit, topoKey, bizID, kind, delIDs); err != nil { return err } } @@ -295,41 +296,41 @@ func CrossCompareNodeInfoCache(topoKey key.Key, bizID int64, kind string, nodes } // GenNodeInfoCntKey generate biz topo node info count cache key -func GenNodeInfoCntKey(topoKey key.Key, bizID int64, kind string, id int64) string { - return GenNodeInfoKey(topoKey, bizID, kind, id) + ":count" +func GenNodeInfoCntKey(topoKey key.Key, tenantID string, bizID int64, kind string, id int64) string { + return GenNodeInfoKey(topoKey, tenantID, bizID, kind, id) + ":count" } // AddNodeCountCache add biz topo nodes count cache by kind -func AddNodeCountCache(topoKey key.Key, bizID int64, kind string, cntMap map[int64]int64, rid string) error { +func AddNodeCountCache(kit *rest.Kit, topoKey key.Key, bizID int64, kind string, cntMap map[int64]int64) error { pip := redis.Client().Pipeline() defer pip.Close() for id, cnt := range cntMap { - cntKey := GenNodeInfoCntKey(topoKey, bizID, kind, id) + cntKey := GenNodeInfoCntKey(topoKey, kit.TenantID, bizID, kind, id) pip.Set(cntKey, cnt, topoKey.TTL()) } _, err := pip.Exec() if err != nil { - blog.Errorf("cache biz %d topo node count info %+v failed, err: %v, rid: %s", bizID, cntMap, err, rid) + blog.Errorf("cache biz %d topo node count info %+v failed, err: %v, rid: %s", bizID, cntMap, err, kit.Rid) return err } return nil } // DeleteNodeCountCache delete biz topo node count cache by kind -func DeleteNodeCountCache(topoKey key.Key, bizID int64, kind string, ids []int64, rid string) error { +func DeleteNodeCountCache(kit *rest.Kit, topoKey key.Key, bizID int64, kind string, ids []int64) error { pip := redis.Client().Pipeline() defer pip.Close() for _, id := range ids { - nodeKey := GenNodeInfoCntKey(topoKey, bizID, kind, id) + nodeKey := GenNodeInfoCntKey(topoKey, kit.TenantID, bizID, kind, id) pip.Del(nodeKey) } _, err := pip.Exec() if err != nil { - blog.Errorf("delete biz %d topo nodes count cache failed, err: %v, ids: %+v, rid: %s", bizID, err, ids, rid) + blog.Errorf("delete biz %d topo nodes count cache failed, err: %v, ids: %+v, rid: %s", bizID, err, ids, kit.Rid) return err } return nil diff --git a/src/source_controller/cacheservice/cache/biz-topo/logics/topo/queue.go b/src/source_controller/cacheservice/cache/biz-topo/logics/topo/queue.go index 3898e37ae0..5e751f37aa 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/logics/topo/queue.go +++ b/src/source_controller/cacheservice/cache/biz-topo/logics/topo/queue.go @@ -23,6 +23,7 @@ import ( "time" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/util" "configcenter/src/source_controller/cacheservice/cache/biz-topo/key" "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" @@ -32,14 +33,14 @@ type bizRefreshQueue struct { sync.Mutex topoKey key.Key bizIDs []int64 - bizIDMap map[int64]struct{} + bizIDMap map[int64]string } func newBizRefreshQueue(topoType types.TopoType) *bizRefreshQueue { queue := &bizRefreshQueue{ topoKey: key.TopoKeyMap[topoType], bizIDs: make([]int64, 0), - bizIDMap: make(map[int64]struct{}), + bizIDMap: make(map[int64]string), } return queue @@ -48,14 +49,15 @@ func newBizRefreshQueue(topoType types.TopoType) *bizRefreshQueue { // Run refreshing biz topo cache task func (q *bizRefreshQueue) Run() { for { - bizID, exists := q.Pop() + tenantID, bizID, exists := q.Pop() if !exists { time.Sleep(time.Millisecond * 50) continue } rid := util.GenerateRID() - err := TryRefreshBizTopoByCache(q.topoKey, bizID, rid) + kit := rest.NewKit().WithRid(rid).WithTenant(tenantID) + err := TryRefreshBizTopoByCache(kit, q.topoKey, bizID) if err != nil { blog.Errorf("try refresh biz %d %s topo failed, err: %v, rid: %s", bizID, q.topoKey.Type(), err, rid) time.Sleep(time.Millisecond * 100) @@ -65,7 +67,7 @@ func (q *bizRefreshQueue) Run() { } // Push some need refresh bizs -func (q *bizRefreshQueue) Push(bizIDs ...int64) { +func (q *bizRefreshQueue) Push(tenantID string, bizIDs ...int64) { q.Lock() defer q.Unlock() @@ -73,34 +75,35 @@ func (q *bizRefreshQueue) Push(bizIDs ...int64) { _, exists := q.bizIDMap[bizID] if !exists { q.bizIDs = append(q.bizIDs, bizID) - q.bizIDMap[bizID] = struct{}{} + q.bizIDMap[bizID] = tenantID } } } // Pop one need refresh biz -func (q *bizRefreshQueue) Pop() (int64, bool) { +func (q *bizRefreshQueue) Pop() (string, int64, bool) { q.Lock() defer q.Unlock() if len(q.bizIDs) == 0 { - return 0, false + return "", 0, false } bizID := q.bizIDs[0] q.bizIDs = q.bizIDs[1:] + tenantID := q.bizIDMap[bizID] delete(q.bizIDMap, bizID) - return bizID, true + return tenantID, bizID, true } // Remove one need refresh biz -func (q *bizRefreshQueue) Remove(bizID int64) { +func (q *bizRefreshQueue) Remove(tenantID string, bizID int64) { q.Lock() defer q.Unlock() - _, exists := q.bizIDMap[bizID] - if !exists { + tenant, exists := q.bizIDMap[bizID] + if !exists || tenant != tenantID { return } @@ -117,7 +120,7 @@ func (q *bizRefreshQueue) Remove(bizID int64) { var bizRefreshQueuePool = make(map[types.TopoType]*bizRefreshQueue) func init() { - refreshQueueTypes := []types.TopoType{types.KubeType} + refreshQueueTypes := []types.TopoType{types.KubeType, types.BriefType} for _, queueType := range refreshQueueTypes { queue := newBizRefreshQueue(queueType) bizRefreshQueuePool[queueType] = queue @@ -126,13 +129,15 @@ func init() { } // AddRefreshBizTopoTask add refresh biz topo cache task -func AddRefreshBizTopoTask(topoType types.TopoType, bizIDs []int64, rid string) { +func AddRefreshBizTopoTask(topoType types.TopoType, bizListMap map[string][]int64, rid string) { queue, exists := bizRefreshQueuePool[topoType] if !exists { blog.Errorf("topo type %s has no biz refresh queue, rid: %s", topoType, rid) return } - bizIDs = util.IntArrayUnique(bizIDs) - queue.Push(bizIDs...) + for tenantID, bizIDs := range bizListMap { + bizIDs = util.IntArrayUnique(bizIDs) + queue.Push(tenantID, bizIDs...) + } } diff --git a/src/source_controller/cacheservice/cache/biz-topo/logics/topo/topo.go b/src/source_controller/cacheservice/cache/biz-topo/logics/topo/topo.go index 25d8753b9d..dd670babde 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/logics/topo/topo.go +++ b/src/source_controller/cacheservice/cache/biz-topo/logics/topo/topo.go @@ -23,6 +23,7 @@ import ( "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/util" "configcenter/src/source_controller/cacheservice/cache/biz-topo/key" "configcenter/src/source_controller/cacheservice/cache/biz-topo/topo" @@ -30,52 +31,55 @@ import ( ) // RefreshBizTopo get biz topo info from db and update it to cache -func RefreshBizTopo(topoKey key.Key, bizID int64, byCache bool, rid string) error { - ctx := context.WithValue(context.Background(), common.ContextRequestIDField, rid) +func RefreshBizTopo(kit *rest.Kit, topoKey key.Key, bizID int64, byCache bool) error { + ctx := context.WithValue(context.Background(), common.ContextRequestIDField, kit.Rid) ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + kit = kit.WithCtx(ctx) topoType := topoKey.Type() - blog.Infof("start refreshing biz %d %s topology, by cache: %v, rid: %s", bizID, topoType, byCache, rid) + blog.Infof("start refreshing biz %d %s topology, by cache: %v, rid: %s", bizID, topoType, byCache, kit.Rid) - bizTopo, err := topo.GenBizTopo(ctx, bizID, topoType, byCache, rid) + bizTopo, err := topo.GenBizTopo(kit, bizID, topoType, byCache) if err != nil { - blog.Errorf("get biz %d %s topology to refresh failed, err: %v, rid: %s", bizID, topoType, err, rid) + blog.Errorf("get biz %d %s topology to refresh failed, err: %v, rid: %s", bizID, topoType, err, kit.Rid) return err } // update it to cache directly. - _, err = topoKey.UpdateTopology(ctx, bizTopo) + _, err = topoKey.UpdateTopology(kit, bizID, bizTopo) if err != nil { - blog.Errorf("refresh biz %d %s topology cache failed, err: %v, rid: %s", bizID, topoType, err, rid) + blog.Errorf("refresh biz %d %s topology cache failed, err: %v, rid: %s", bizID, topoType, err, kit.Rid) return err } queue, exists := bizRefreshQueuePool[topoType] if exists { - queue.Remove(bizID) + queue.Remove(kit.TenantID, bizID) } - blog.Infof("refresh biz %d %s topology success, by cache: %v, rid: %s", bizID, topoType, byCache, rid) + blog.Infof("refresh biz %d %s topology success, by cache: %v, rid: %s", bizID, topoType, byCache, kit.Rid) return nil } // TryRefreshBizTopoByCache try refresh biz topo cache by separate node cache, refresh from db data for the first time -func TryRefreshBizTopoByCache(topoKey key.Key, bizID int64, rid string) error { - ctx := context.WithValue(context.Background(), common.ContextRequestIDField, rid) +func TryRefreshBizTopoByCache(kit *rest.Kit, topoKey key.Key, bizID int64) error { + ctx := context.WithValue(context.Background(), common.ContextRequestIDField, kit.Rid) ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + kit = kit.WithCtx(ctx) // check if biz topo cache exists, if not, refresh from db data - bizTopoKey := topoKey.BizTopoKey(bizID) + bizTopoKey := topoKey.BizTopoKey(kit.TenantID, bizID) existRes, err := redis.Client().Exists(ctx, bizTopoKey).Result() if err != nil { - blog.Errorf("check if biz %d topo cache exists failed, key: %s, err: %v, rid: %s", bizID, bizTopoKey, err, rid) + blog.Errorf("check if biz %d topo cache exists failed, key: %s, err: %v, rid: %s", bizID, bizTopoKey, err, + kit.Rid) return err } if existRes != 1 { - return RefreshBizTopo(topoKey, bizID, false, rid) + return RefreshBizTopo(kit, topoKey, bizID, false) } // refresh biz topo from cache - return RefreshBizTopo(topoKey, bizID, true, rid) + return RefreshBizTopo(kit, topoKey, bizID, true) } diff --git a/src/source_controller/cacheservice/cache/biz-topo/topo.go b/src/source_controller/cacheservice/cache/biz-topo/topo.go index d19ecd09e7..eb82933a8b 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/topo.go +++ b/src/source_controller/cacheservice/cache/biz-topo/topo.go @@ -23,9 +23,11 @@ import ( "fmt" "time" + "configcenter/pkg/tenant" "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/metadata" "configcenter/src/common/util" @@ -33,8 +35,9 @@ import ( topolgc "configcenter/src/source_controller/cacheservice/cache/biz-topo/logics/topo" "configcenter/src/source_controller/cacheservice/cache/biz-topo/watch" "configcenter/src/source_controller/cacheservice/cache/custom/cache" + watchcli "configcenter/src/source_controller/cacheservice/event/watch" "configcenter/src/storage/driver/mongodb" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) // Topo defines the business topology caching logics @@ -44,14 +47,14 @@ type Topo struct { } // New Topo -func New(isMaster discovery.ServiceManageInterface, loopW stream.LoopInterface, cacheSet *cache.CacheSet) (*Topo, - error) { +func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, cacheSet *cache.CacheSet, + watchCli *watchcli.Client) (*Topo, error) { t := &Topo{ isMaster: isMaster, } - watcher, err := watch.New(loopW, cacheSet) + watcher, err := watch.New(isMaster, watchTask, cacheSet, watchCli) if err != nil { return nil, fmt.Errorf("new watcher failed, err: %v", err) } @@ -85,42 +88,48 @@ func (t *Topo) loopBizTopoCache(topoKey key.Key) { } func (t *Topo) doLoopBizTopoToCache(topoKey key.Key, rid string) { - // read from secondary in mongodb cluster. - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) + _ = tenant.ExecForAllTenants(func(tenantID string) error { + // read from secondary in mongodb cluster. + ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) + kit := rest.NewKit().WithCtx(ctx).WithRid(rid).WithTenant(tenantID) - all, err := t.listAllBusiness(ctx) - if err != nil { - blog.Errorf("loop %s biz topology, but list all business failed, err: %v, rid: %s", topoKey.Type(), err, rid) - return - } + all, err := t.listAllBusiness(kit) + if err != nil { + blog.Errorf("loop %s biz topology, but list all business failed, err: %v, rid: %s", topoKey.Type(), err, + rid) + return err + } - for _, biz := range all { - time.Sleep(50 * time.Millisecond) + for _, biz := range all { + time.Sleep(50 * time.Millisecond) - rid := fmt.Sprintf("%s:%d", rid, biz.BizID) + kit = kit.WithRid(fmt.Sprintf("%s:%d", rid, biz.BizID)) - err = topolgc.RefreshBizTopo(topoKey, biz.BizID, false, rid) - if err != nil { - blog.Errorf("loop refresh biz %d/%s %s topology failed, err: %v, rid: %s", biz.BizID, biz.BizName, - topoKey.Type(), err, rid) - continue - } + err = topolgc.RefreshBizTopo(kit, topoKey, biz.BizID, false) + if err != nil { + blog.Errorf("loop refresh biz %d/%s %s topology failed, err: %v, rid: %s", biz.BizID, biz.BizName, + topoKey.Type(), err, kit.Rid) + continue + } - blog.Infof("loop refresh biz %d/%s %s topology success, rid: %s", biz.BizID, biz.BizName, topoKey.Type(), rid) - } + blog.Infof("loop refresh biz %d/%s %s topology success, rid: %s", biz.BizID, biz.BizName, topoKey.Type(), + kit.Rid) + } + return nil + }) } const bizStep = 100 // listAllBusiness list all business brief info -func (t *Topo) listAllBusiness(ctx context.Context) ([]metadata.BizInst, error) { +func (t *Topo) listAllBusiness(kit *rest.Kit) ([]metadata.BizInst, error) { filter := mapstr.MapStr{} all := make([]metadata.BizInst, 0) for { oneStep := make([]metadata.BizInst, 0) - err := mongodb.Client().Table(common.BKTableNameBaseApp).Find(filter).Fields(common.BKAppIDField, - common.BKAppNameField).Limit(bizStep).Sort(common.BKAppIDField).All(ctx, &oneStep) + err := mongodb.Shard(kit.ShardOpts()).Table(common.BKTableNameBaseApp).Find(filter).Fields(common.BKAppIDField, + common.BKAppNameField).Limit(bizStep).Sort(common.BKAppIDField).All(kit.Ctx, &oneStep) if err != nil { return nil, err } diff --git a/src/source_controller/cacheservice/cache/biz-topo/topo/kube.go b/src/source_controller/cacheservice/cache/biz-topo/topo/kube.go index aff523890f..0dbc3098d8 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/topo/kube.go +++ b/src/source_controller/cacheservice/cache/biz-topo/topo/kube.go @@ -18,8 +18,7 @@ package topo import ( - "context" - + "configcenter/src/common/http/rest" "configcenter/src/source_controller/cacheservice/cache/biz-topo/level" "configcenter/src/source_controller/cacheservice/cache/biz-topo/tree" "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" @@ -30,8 +29,16 @@ var kubeTopo = &Topology{ TopLevel: level.GetKubeTopLevel(), } +var briefTopo = &Topology{ + Tree: new(tree.BriefTopoTree), + TopLevel: level.GetBriefTopLevel(), +} + func init() { - topoGetterMap[types.KubeType] = func(ctx context.Context, rid string) (*Topology, error) { + topoGetterMap[types.KubeType] = func(kit *rest.Kit) (*Topology, error) { return kubeTopo, nil } + topoGetterMap[types.BriefType] = func(kit *rest.Kit) (*Topology, error) { + return briefTopo, nil + } } diff --git a/src/source_controller/cacheservice/cache/biz-topo/topo/topo.go b/src/source_controller/cacheservice/cache/biz-topo/topo/topo.go index a320317786..1009c0e9dc 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/topo/topo.go +++ b/src/source_controller/cacheservice/cache/biz-topo/topo/topo.go @@ -19,11 +19,11 @@ package topo import ( - "context" "fmt" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/metadata" "configcenter/src/common/util" @@ -34,40 +34,38 @@ import ( ) // GenBizTopo generate business topology tree -func GenBizTopo(ctx context.Context, bizID int64, topoType types.TopoType, byCache bool, rid string) (*types.BizTopo, - error) { - +func GenBizTopo(kit *rest.Kit, bizID int64, topoType types.TopoType, byCache bool) (any, error) { // read from secondary node in mongodb cluster - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) // get biz info filter := mapstr.MapStr{ common.BKAppIDField: bizID, } biz := new(metadata.BizInst) - if err := mongodb.Client().Table(common.BKTableNameBaseApp).Find(filter).Fields(common.BKAppIDField, - common.BKAppNameField).One(ctx, biz); err != nil { - blog.Errorf("get biz %d failed, err: %v, rid: %s", biz, err, rid) + if err := mongodb.Shard(kit.ShardOpts()).Table(common.BKTableNameBaseApp).Find(filter).Fields(common.BKAppIDField, + common.BKAppNameField, common.BKDefaultField).One(kit.Ctx, biz); err != nil { + blog.Errorf("get biz %d failed, err: %v, rid: %s", bizID, err, kit.Rid) return nil, err } // get topology generator - topology, err := GetTopology(ctx, topoType, rid) + topology, err := GetTopology(kit, topoType) if err != nil { - blog.Errorf("get %s topology generator failed, err: %v, rid: %v", topoType, err, rid) + blog.Errorf("get %s topology generator failed, err: %v, rid: %v", topoType, err, kit.Rid) return nil, err } // get topology nodes and generate biz topology tree var nodes []types.Node if byCache { - nodes, err = topology.TopLevel.GetNodesByCache(ctx, bizID, rid) + nodes, err = topology.TopLevel.GetNodesByCache(kit, bizID) } else { - nodes, err = topology.TopLevel.GetNodesByDB(ctx, bizID, nil, rid) + nodes, err = topology.TopLevel.GetNodesByDB(kit, bizID, nil) } if err != nil { blog.Errorf("get biz %d %s topo nodes failed, by cache: %v, err: %v, rid: %s", biz.BizID, topoType, byCache, - err, rid) + err, kit.Rid) return nil, err } @@ -79,13 +77,14 @@ func GenBizTopo(ctx context.Context, bizID int64, topoType types.TopoType, byCac Nodes: nodes, } - bizTopo, err = topology.Tree.RearrangeBizTopo(ctx, bizTopo, rid) + topo, err := topology.Tree.RearrangeBizTopo(kit, biz, nodes) if err != nil { - blog.Errorf("rearrange biz %d %s topo failed, err: %v, topo: %+v, rid: %s", biz, topoType, err, bizTopo, rid) + blog.Errorf("rearrange biz %d %s topo failed, err: %v, topo: %+v, rid: %s", biz, topoType, err, bizTopo, + kit.Rid) return nil, err } - return bizTopo, nil + return topo, nil } // Topology defines the topology generator @@ -97,22 +96,22 @@ type Topology struct { } // TopologyGetter defines the function to get topology generator -type TopologyGetter func(ctx context.Context, rid string) (*Topology, error) +type TopologyGetter func(kit *rest.Kit) (*Topology, error) // topoGetterMap is the mapping of topology type to TopologyGetter var topoGetterMap = map[types.TopoType]TopologyGetter{} // GetTopology get topology generator -func GetTopology(ctx context.Context, topoType types.TopoType, rid string) (*Topology, error) { +func GetTopology(kit *rest.Kit, topoType types.TopoType) (*Topology, error) { getter, exists := topoGetterMap[topoType] if !exists { - blog.Errorf("%s topology getter not exists, rid: %v", topoType, rid) + blog.Errorf("%s topology getter not exists, rid: %v", topoType, kit.Rid) return nil, fmt.Errorf("topology type %s is invalid", topoType) } - topology, err := getter(ctx, rid) + topology, err := getter(kit) if err != nil { - blog.Errorf("get %s topology generator failed, rid: %v", topoType, rid) + blog.Errorf("get %s topology generator failed, rid: %v", topoType, kit.Rid) return nil, err } diff --git a/src/source_controller/cacheservice/cache/biz-topo/tree/brief.go b/src/source_controller/cacheservice/cache/biz-topo/tree/brief.go new file mode 100644 index 0000000000..f7fbf3dcc2 --- /dev/null +++ b/src/source_controller/cacheservice/cache/biz-topo/tree/brief.go @@ -0,0 +1,98 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package tree + +import ( + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/common/http/rest" + "configcenter/src/common/metadata" + "configcenter/src/common/util" + "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" +) + +// BriefTopoTree defines brief biz topology tree type +type BriefTopoTree struct{} + +// RearrangeBizTopo rearrange brief business topology tree +func (t *BriefTopoTree) RearrangeBizTopo(kit *rest.Kit, biz *metadata.BizInst, nodes []types.Node) (any, error) { + parsedNodes, idleSets := make([]*types.BriefNode, 0), make([]*types.BriefNode, 0) + for _, node := range nodes { + parsedNode, isIdle, err := parseBriefNode(kit, &node) + if err != nil { + return nil, err + } + + if isIdle { + idleSets = append(idleSets, parsedNode) + continue + } + parsedNodes = append(parsedNodes, parsedNode) + } + + return &types.BizBriefTopology{ + Biz: &types.BriefBizInfo{ + ID: biz.BizID, + Name: biz.BizName, + Default: biz.Default, + }, + Idle: idleSets, + Nodes: parsedNodes, + }, nil +} + +func parseBriefNode(kit *rest.Kit, node *types.Node) (*types.BriefNode, bool, error) { + parsedNode := &types.BriefNode{ + Object: node.Kind, + ID: node.ID, + Name: node.Name, + } + + isIdle := false + + switch node.Kind { + case common.BKInnerObjIDSet: + defaultVal, err := util.GetIntByInterface(node.Extra) + if err != nil { + blog.Errorf("parse brief set node(%+v) failed, err: %v, rid: %s", node, err, kit.Rid) + return nil, false, err + } + if defaultVal == common.DefaultResSetFlag { + isIdle = true + } + parsedNode.Default = &defaultVal + + case common.BKInnerObjIDModule: + defaultVal, err := util.GetIntByInterface(node.Extra) + if err != nil { + blog.Errorf("parse brief set node(%+v) failed, err: %v, rid: %s", node, err, kit.Rid) + return nil, false, err + } + parsedNode.Default = &defaultVal + } + + for _, subNode := range node.SubNodes { + parsedSubNode, _, err := parseBriefNode(kit, &subNode) + if err != nil { + return nil, false, err + } + parsedNode.SubNodes = append(parsedNode.SubNodes, parsedSubNode) + } + + return parsedNode, isIdle, nil +} diff --git a/src/source_controller/cacheservice/cache/biz-topo/tree/count.go b/src/source_controller/cacheservice/cache/biz-topo/tree/count.go index 0b819ac4b2..d2c1c6a872 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/tree/count.go +++ b/src/source_controller/cacheservice/cache/biz-topo/tree/count.go @@ -18,8 +18,8 @@ package tree import ( - "context" - + "configcenter/src/common/http/rest" + "configcenter/src/common/metadata" "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" ) @@ -27,13 +27,19 @@ import ( type TreeWithCount struct{} // RearrangeBizTopo rearrange business topology tree -func (t *TreeWithCount) RearrangeBizTopo(_ context.Context, topo *types.BizTopo, _ string) (*types.BizTopo, error) { +func (t *TreeWithCount) RearrangeBizTopo(_ *rest.Kit, biz *metadata.BizInst, nodes []types.Node) (any, error) { cnt := int64(0) - for _, node := range topo.Nodes { + for _, node := range nodes { if node.Count != nil { cnt += *node.Count } } - topo.Biz.Count = &cnt - return topo, nil + return &types.BizTopo{ + Biz: &types.BizInfo{ + ID: biz.BizID, + Name: biz.BizName, + Count: &cnt, + }, + Nodes: nodes, + }, nil } diff --git a/src/source_controller/cacheservice/cache/biz-topo/tree/tree.go b/src/source_controller/cacheservice/cache/biz-topo/tree/tree.go index 92761d0722..4af4838b24 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/tree/tree.go +++ b/src/source_controller/cacheservice/cache/biz-topo/tree/tree.go @@ -19,12 +19,12 @@ package tree import ( - "context" - + "configcenter/src/common/http/rest" + "configcenter/src/common/metadata" "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" ) // TreeI is the interface for topology tree type TreeI interface { - RearrangeBizTopo(ctx context.Context, topo *types.BizTopo, rid string) (*types.BizTopo, error) + RearrangeBizTopo(kit *rest.Kit, biz *metadata.BizInst, nodes []types.Node) (any, error) } diff --git a/src/source_controller/cacheservice/cache/biz-topo/types/brief.go b/src/source_controller/cacheservice/cache/biz-topo/types/brief.go new file mode 100644 index 0000000000..b3fab46a47 --- /dev/null +++ b/src/source_controller/cacheservice/cache/biz-topo/types/brief.go @@ -0,0 +1,57 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package types + +// BizBriefTopology is the brief topology of business +type BizBriefTopology struct { + // basic business info + Biz *BriefBizInfo `json:"biz"` + // the idle set nodes info + Idle []*BriefNode `json:"idle"` + // the other common nodes + Nodes []*BriefNode `json:"nds"` +} + +// BriefBizInfo is the brief info of business +type BriefBizInfo struct { + // business id + ID int64 `json:"id" bson:"bk_biz_id"` + // business name + Name string `json:"nm" bson:"bk_biz_name"` + // describe it's a resource pool business or normal business. + // 0: normal business + // >0: special business, like resource pool business. + Default int `json:"dft" bson:"default"` +} + +// BriefNode is the brief biz topo node +type BriefNode struct { + // the object of this node, like set or module + Object string `json:"obj"` + // the node's instance id, like set id or module id + ID int64 `json:"id"` + // the node's name, like set name or module name + Name string `json:"nm"` + // only set, module has this field. + // describe what kind of set or module this node is. + // 0: normal module or set. + // >1: special set or module + Default *int `json:"dft,omitempty"` + // the sub-nodes of current node + SubNodes []*BriefNode `json:"nds"` +} diff --git a/src/source_controller/cacheservice/cache/biz-topo/types/types.go b/src/source_controller/cacheservice/cache/biz-topo/types/types.go index 83c6f0eb0e..49b6c5ad60 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/types/types.go +++ b/src/source_controller/cacheservice/cache/biz-topo/types/types.go @@ -56,6 +56,9 @@ type Node struct { // ParentID topology node parent id, is an intermediate value only used to rearrange topology tree ParentID int64 `json:"-"` + + // Extra is the extra info for topology node + Extra any `json:"-"` } // TopoType is the topology tree type @@ -64,4 +67,6 @@ type TopoType string const ( // KubeType is the kube topology tree type KubeType TopoType = "kube" + // BriefType is the brief mainline topology tree type + BriefType TopoType = "brief" ) diff --git a/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go b/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go new file mode 100644 index 0000000000..7c7672a8e7 --- /dev/null +++ b/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go @@ -0,0 +1,136 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package watch + +import ( + "fmt" + + "configcenter/pkg/tenant" + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/common/http/rest" + "configcenter/src/common/util" + "configcenter/src/common/watch" + "configcenter/src/source_controller/cacheservice/cache/biz-topo/key" + topolgc "configcenter/src/source_controller/cacheservice/cache/biz-topo/logics/topo" + "configcenter/src/source_controller/cacheservice/cache/biz-topo/types" + tokenhandler "configcenter/src/source_controller/cacheservice/cache/token-handler" + "configcenter/src/source_controller/cacheservice/event/loop" + + "github.com/tidwall/gjson" +) + +type briefWatcher struct { + cacheKey key.Key + loopWatcher *loop.LoopWatcher +} + +func (w *Watcher) watchBrief() error { + cacheKey, exists := key.TopoKeyMap[types.BriefType] + if !exists { + return fmt.Errorf("watch brief but topo type %s is invalid", types.BriefType) + } + + watcher := &briefWatcher{ + loopWatcher: loop.NewLoopWatcher(w.isMaster, w.watchCli), + cacheKey: cacheKey, + } + + cursorTypes := []watch.CursorType{watch.Set, watch.Module, watch.MainlineInstance} + for _, cursorType := range cursorTypes { + err := watcher.watchEvents(cursorType) + if err != nil { + return err + } + } + + return nil +} + +func (w *briefWatcher) watchEvents(cursorType watch.CursorType) error { + loopEventChan := make(chan loop.TenantEvent) + + name := fmt.Sprintf("biz topo %s %s", w.cacheKey.Type(), cursorType) + tenantChan := tenant.NewTenantEventChan(name) + go func() { + for e := range tenantChan { + switch e.EventType { + case tenant.Create: + loopEventChan <- loop.TenantEvent{ + EventType: watch.Create, + TenantID: e.TenantID, + WatchOpts: &watch.WatchEventOptions{ + EventTypes: []watch.EventType{watch.Create, watch.Delete}, + Fields: []string{common.BKAppIDField}, + Resource: cursorType, + }, + } + case tenant.Delete: + loopEventChan <- loop.TenantEvent{ + EventType: watch.Delete, + TenantID: e.TenantID, + } + } + } + }() + + opts := &loop.LoopWatchTaskOptions{ + Name: name, + CursorType: cursorType, + TokenHandler: tokenhandler.NewMixTokenHandler(w.cacheKey.Namespace(), string(cursorType)), + EventHandler: w.handleEvents, + TenantChan: loopEventChan, + } + + err := w.loopWatcher.AddLoopWatchTask(opts) + if err != nil { + blog.Errorf("watch %s brief biz topo cache failed, err: %v, rid: %s", cursorType, err) + return err + } + + return nil +} + +func (w *briefWatcher) handleEvents(kit *rest.Kit, events []*watch.WatchEventDetail) error { + bizList := make([]int64, 0) + + for _, event := range events { + if event.Detail == nil { + continue + } + + var bizID int64 + switch t := event.Detail.(type) { + case watch.JsonString: + bizID = gjson.Get(string(t), common.BKAppIDField).Int() + } + + if bizID == 0 { + continue + } + + blog.V(4).Infof("watch brief biz topo cache, received resource:%s, tenant: %s, biz: %d, cursor: %s, rid: %s", + event.Resource, kit.TenantID, bizID, event.Cursor, kit.Rid) + + bizList = append(bizList, bizID) + } + + bizList = util.IntArrayUnique(bizList) + topolgc.AddRefreshBizTopoTask(types.BriefType, map[string][]int64{kit.TenantID: bizList}, kit.Rid) + return nil +} diff --git a/src/source_controller/cacheservice/cache/biz-topo/watch/kube.go b/src/source_controller/cacheservice/cache/biz-topo/watch/kube.go index 14ab63f9ed..c85a9c3176 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/watch/kube.go +++ b/src/source_controller/cacheservice/cache/biz-topo/watch/kube.go @@ -24,6 +24,7 @@ import ( "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/util" kubetypes "configcenter/src/kube/types" @@ -75,7 +76,7 @@ func (w *Watcher) watchKube() error { } // watchTopoLevel watch kube topo event -func (w *kubeWatcher) watchTopo(obj string, doBatch func(es []*streamtypes.Event) bool) error { +func (w *kubeWatcher) watchTopo(obj string, doBatch func(*streamtypes.DBInfo, []*streamtypes.Event) bool) error { collections, err := kubetypes.GetCollectionWithObject(obj) if err != nil { blog.Errorf("get collections to watch for kube biz topo obj %s failed, err: %v", obj, err) @@ -83,40 +84,37 @@ func (w *kubeWatcher) watchTopo(obj string, doBatch func(es []*streamtypes.Event } for _, collection := range collections { - watchOpts := &streamtypes.WatchOptions{ - Options: streamtypes.Options{ - EventStruct: kubeEventStructMap[obj], - Collection: collection, - Filter: mapstr.MapStr{}, - }, - } - - tokenHandler := tokenhandler.NewMixTokenHandler(w.cacheKey.Namespace(), collection, mongodb.Client()) - startAtTime, err := tokenHandler.GetStartWatchTime(context.Background()) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", watchOpts.Collection, err) - return err - } - watchOpts.StartAtTime = startAtTime - watchOpts.WatchFatalErrorCallback = tokenHandler.ResetWatchToken - - loopOptions := &streamtypes.LoopBatchOptions{ - LoopOptions: streamtypes.LoopOptions{ - Name: fmt.Sprintf("%s kube biz topo cache", obj), - WatchOpt: watchOpts, + tokenHandler := tokenhandler.NewMixTokenHandler(w.cacheKey.Namespace(), collection) + + opts := &streamtypes.LoopBatchTaskOptions{ + WatchTaskOptions: &streamtypes.WatchTaskOptions{ + Name: fmt.Sprintf("%s kube biz topo cache", collection), + CollOpts: &streamtypes.WatchCollOptions{ + CollectionOptions: streamtypes.CollectionOptions{ + CollectionFilter: &streamtypes.CollectionFilter{ + Regex: fmt.Sprintf("_%s$", collection), + }, + EventStruct: new(commonResBaseInfo), + }, + }, TokenHandler: tokenHandler, RetryOptions: &streamtypes.RetryOptions{ MaxRetryCount: 10, RetryDuration: 1 * time.Second, }, }, - EventHandler: &streamtypes.BatchHandler{ + EventHandler: &streamtypes.TaskBatchHandler{ DoBatch: doBatch, }, BatchSize: 200, } - if err = w.watcher.loopW.WithBatch(loopOptions); err != nil { + if obj == kubetypes.KubePod { + opts.WatchTaskOptions.CollOpts.EventStruct = new(kubetypes.Pod) + } + + err = w.watcher.task.AddLoopBatchTask(opts) + if err != nil { blog.Errorf("watch kube biz topo collection %s failed, err: %v", collection, err) return err } @@ -125,16 +123,9 @@ func (w *kubeWatcher) watchTopo(obj string, doBatch func(es []*streamtypes.Event return nil } -var kubeEventStructMap = map[string]interface{}{ - kubetypes.KubeCluster: new(kubetypes.Cluster), - kubetypes.KubeNamespace: new(kubetypes.Namespace), - kubetypes.KubeWorkload: new(kubetypes.WorkloadBase), - kubetypes.KubePod: new(kubetypes.Pod), -} - // onTopoLevelChange handle kube topo level event -func (w *kubeWatcher) onTopoLevelChange(obj string) func(es []*streamtypes.Event) (retry bool) { - return func(es []*streamtypes.Event) (retry bool) { +func (w *kubeWatcher) onTopoLevelChange(obj string) func(*streamtypes.DBInfo, []*streamtypes.Event) bool { + return func(dbInfo *streamtypes.DBInfo, es []*streamtypes.Event) bool { if len(es) == 0 { return false } @@ -143,7 +134,7 @@ func (w *kubeWatcher) onTopoLevelChange(obj string) func(es []*streamtypes.Event rid := es[0].ID() upsertCollOidMap := make(map[string][]primitive.ObjectID) - delCollOidMap := make(map[string][]string) + delCollOidMap := make(map[string][]commonResBaseInfo) for idx := range es { one := es[idx] @@ -163,7 +154,9 @@ func (w *kubeWatcher) onTopoLevelChange(obj string) func(es []*streamtypes.Event } upsertCollOidMap[one.Collection] = append(upsertCollOidMap[one.Collection], oid) case streamtypes.Delete: - delCollOidMap[one.Collection] = append(delCollOidMap[one.Collection], one.Oid) + baseInfo := *one.Document.(*commonResBaseInfo) + baseInfo.Oid = one.Oid + delCollOidMap[one.Collection] = append(delCollOidMap[one.Collection], baseInfo) default: continue } @@ -182,7 +175,7 @@ func (w *kubeWatcher) onTopoLevelChange(obj string) func(es []*streamtypes.Event return true } - bizList := make([]int64, 0) + bizListMap := make(map[string][]int64, 0) for _, one := range es { collOidKey := genCollOidKey(one.Collection, one.Oid) switch one.OperationType { @@ -194,19 +187,19 @@ func (w *kubeWatcher) onTopoLevelChange(obj string) func(es []*streamtypes.Event case streamtypes.Insert: bizID, exists := upsertBizIDMap[collOidKey] if exists { - bizList = append(bizList, bizID...) + bizListMap[one.TenantID] = append(bizListMap[one.TenantID], bizID...) } case streamtypes.Delete: bizID, exists := delBizIDMap[collOidKey] if exists { - bizList = append(bizList, bizID...) + bizListMap[one.TenantID] = append(bizListMap[one.TenantID], bizID...) } default: continue } } - topolgc.AddRefreshBizTopoTask(types.KubeType, util.IntArrayUnique(bizList), rid) + topolgc.AddRefreshBizTopoTask(types.KubeType, bizListMap, rid) return false } } @@ -235,8 +228,24 @@ var kubeFieldsMap = map[string][]string{ func (w *kubeWatcher) handleUpsertTopoLevelEvent(ctx context.Context, obj string, collOidMap map[string][]primitive.ObjectID, rid string) (map[string][]int64, error) { + kit := rest.NewKit().WithCtx(ctx).WithRid(rid) + collOidBizMap := make(map[string][]int64) for coll, oids := range collOidMap { + tenantID, table, err := common.SplitTenantTableName(coll) + if err != nil { + blog.Errorf("received invalid kube topology events, collection %s, oids: %+v, rid: %s", coll, oids, rid) + continue + } + + kind, err := getKubeNodeKind(obj, table) + if err != nil { + blog.Errorf("get %s kube node kind by coll %s failed, err: %v, rid: %s", obj, coll, err, rid) + continue + } + + kit = kit.WithTenant(tenantID) + // get upsert data from db cond := mapstr.MapStr{ "_id": mapstr.MapStr{common.BKDBIN: oids}, @@ -245,108 +254,105 @@ func (w *kubeWatcher) handleUpsertTopoLevelEvent(ctx context.Context, obj string docs := make([]mapStrWithOid, 0) findOpt := dbtypes.NewFindOpts().SetWithObjectID(true) - err := mongodb.Client().Table(coll).Find(cond, findOpt).Fields(kubeFieldsMap[obj]...).All(ctx, &docs) + err = mongodb.Shard(kit.ShardOpts()).Table(table).Find(cond, findOpt).Fields(kubeFieldsMap[obj]...).All(ctx, + &docs) if err != nil { blog.Errorf("get %s data by cond: %+v failed, err: %v, rid: %s", coll, cond, err, rid) return nil, err } - kind, err := getKubeNodeKind(obj, coll) + collOidBizMap, err = w.handleUpsertTopoNodes(kit, docs, obj, coll, kind, collOidBizMap) if err != nil { - blog.Errorf("get %s kube node kind by coll %s failed, err: %v, rid: %s", obj, coll, err, rid) - continue + return nil, err } + } - bizNodeMap := make(map[int64][]types.Node) - nsIDs := make([]int64, 0) - nsNodeMap := make(map[int64][]types.Node) - nsKeyMap := make(map[int64][]string) - for _, doc := range docs { - // parse event to biz id and topo level node - bizID, node, err := kubeEventDocParserMap[obj](doc) - if err != nil { - blog.Errorf("parse %s doc %+v failed, err: %v, rid: %s", coll, doc, err, rid) - continue - } - - var nsID int64 - switch obj { - case kubetypes.KubeNamespace: - nsID = node.ID - case kubetypes.KubeWorkload: - nsID = node.ParentID - } - nsIDs = append(nsIDs, nsID) + return collOidBizMap, nil +} - node.Kind = kind - bizNodeMap[bizID] = append(bizNodeMap[bizID], node) - nsNodeMap[nsID] = append(nsNodeMap[nsID], node) +func (w *kubeWatcher) handleUpsertTopoNodes(kit *rest.Kit, docs []mapStrWithOid, obj, coll, kind string, + collOidBizMap map[string][]int64) (map[string][]int64, error) { - collOidKey := genCollOidKey(coll, doc.Oid.Hex()) - collOidBizMap[collOidKey] = []int64{bizID} - nsKeyMap[nsID] = append(nsKeyMap[nsID], collOidKey) - } - - // add shared namespace nodes to asst biz's cache - asstBizInfo, err := w.sharedNsCache.GetAsstBiz(ctx, nsIDs, rid) + bizNodeMap := make(map[int64][]types.Node) + nsIDs := make([]int64, 0) + nsNodeMap := make(map[int64][]types.Node) + nsKeyMap := make(map[int64][]string) + for _, doc := range docs { + // parse event to biz id and topo level node + bizID, node, err := kubeEventDocParserMap[obj](doc) if err != nil { - return nil, err + blog.Errorf("parse %s doc %+v failed, err: %v, rid: %s", coll, doc, err, kit.Rid) + continue } - for nsID, nodes := range nsNodeMap { - asstBizID, exists := asstBizInfo[nsID] - if !exists { - continue - } - bizNodeMap[asstBizID] = append(bizNodeMap[asstBizID], nodes...) - for _, collOidKey := range nsKeyMap[nsID] { - collOidBizMap[collOidKey] = append(collOidBizMap[collOidKey], asstBizID) - } + var nsID int64 + switch obj { + case kubetypes.KubeNamespace: + nsID = node.ID + case kubetypes.KubeWorkload: + nsID = node.ParentID } + nsIDs = append(nsIDs, nsID) - for bizID, nodes := range bizNodeMap { - // save kube topo level node info to redis - err = nodelgc.AddNodeInfoCache(w.cacheKey, bizID, kind, nodes, rid) - if err != nil { - return nil, err - } + node.Kind = kind + bizNodeMap[bizID] = append(bizNodeMap[bizID], node) + nsNodeMap[nsID] = append(nsNodeMap[nsID], node) + + collOidKey := genCollOidKey(coll, doc.Oid.Hex()) + collOidBizMap[collOidKey] = []int64{bizID} + nsKeyMap[nsID] = append(nsKeyMap[nsID], collOidKey) + } + + // add shared namespace nodes to asst biz's cache + asstBizInfo, err := w.sharedNsCache.GetAsstBiz(kit, nsIDs) + if err != nil { + return nil, err + } + + for nsID, nodes := range nsNodeMap { + asstBizID, exists := asstBizInfo[nsID] + if !exists { + continue + } + bizNodeMap[asstBizID] = append(bizNodeMap[asstBizID], nodes...) + for _, collOidKey := range nsKeyMap[nsID] { + collOidBizMap[collOidKey] = append(collOidBizMap[collOidKey], asstBizID) } } + for bizID, nodes := range bizNodeMap { + // save kube topo level node info to redis + err = nodelgc.AddNodeInfoCache(kit, w.cacheKey, bizID, kind, nodes) + if err != nil { + return nil, err + } + } return collOidBizMap, nil } -type commonDelArchive struct { - Oid string `bson:"oid"` - Detail commonResBaseInfo `bson:"detail"` -} - type commonResBaseInfo struct { - BizID int64 `bson:"bk_biz_id"` - ID int64 `bson:"id"` - NsID int64 `bson:"bk_namespace_id"` + Oid string `bson:"-"` + BizID int64 `bson:"bk_biz_id"` + ID int64 `bson:"id"` + NsID int64 `bson:"bk_namespace_id"` } // handleDeleteTopoLevelEvent handle delete event for kube topo level -func (w *kubeWatcher) handleDeleteTopoLevelEvent(ctx context.Context, obj string, collOidMap map[string][]string, - rid string) (map[string][]int64, error) { +func (w *kubeWatcher) handleDeleteTopoLevelEvent(ctx context.Context, obj string, + collOidMap map[string][]commonResBaseInfo, rid string) (map[string][]int64, error) { - collOidBizMap := make(map[string][]int64) - for coll, oids := range collOidMap { - // get del archive data - cond := mapstr.MapStr{ - "oid": mapstr.MapStr{common.BKDBIN: oids}, - "coll": coll, - } + kit := rest.NewKit().WithCtx(ctx).WithRid(rid) - docs := make([]commonDelArchive, 0) - err := mongodb.Client().Table(common.BKTableNameKubeDelArchive).Find(cond).All(ctx, &docs) + collOidBizMap := make(map[string][]int64) + for coll, docs := range collOidMap { + tenantID, table, err := common.SplitTenantTableName(coll) if err != nil { - blog.Errorf("get del archive by cond: %+v failed, err: %v, rid: %s", cond, err, rid) - return nil, err + blog.Errorf("received invalid delete kube topology events, coll %s, docs: %+v, rid: %s", coll, docs, rid) + continue } + kit = kit.WithTenant(tenantID) - kind, err := getKubeNodeKind(obj, coll) + kind, err := getKubeNodeKind(obj, table) if err != nil { blog.Errorf("get %s kube node kind by coll %s failed, err: %v, rid: %s", obj, coll, err, rid) continue @@ -360,22 +366,22 @@ func (w *kubeWatcher) handleDeleteTopoLevelEvent(ctx context.Context, obj string var nsID int64 switch obj { case kubetypes.KubeNamespace: - nsID = doc.Detail.ID + nsID = doc.ID case kubetypes.KubeWorkload: - nsID = doc.Detail.NsID + nsID = doc.NsID } nsIDs = append(nsIDs, nsID) - bizIDMap[doc.Detail.BizID] = append(bizIDMap[doc.Detail.BizID], doc.Detail.ID) - nsDocIDsMap[nsID] = append(nsDocIDsMap[nsID], doc.Detail.ID) + bizIDMap[doc.BizID] = append(bizIDMap[doc.BizID], doc.ID) + nsDocIDsMap[nsID] = append(nsDocIDsMap[nsID], doc.ID) collOidKey := genCollOidKey(coll, doc.Oid) - collOidBizMap[collOidKey] = []int64{doc.Detail.BizID} + collOidBizMap[collOidKey] = []int64{doc.BizID} nsKeyMap[nsID] = append(nsKeyMap[nsID], collOidKey) } // delete shared namespace node info in asst biz's cache - asstBizInfo, err := w.sharedNsCache.GetAsstBiz(ctx, nsIDs, rid) + asstBizInfo, err := w.sharedNsCache.GetAsstBiz(kit, nsIDs) if err != nil { return nil, err } @@ -393,12 +399,12 @@ func (w *kubeWatcher) handleDeleteTopoLevelEvent(ctx context.Context, obj string // delete kube topo level node info in redis for bizID, ids := range bizIDMap { - err = nodelgc.DeleteNodeInfoCache(w.cacheKey, bizID, kind, ids, rid) + err = nodelgc.DeleteNodeInfoCache(kit, w.cacheKey, bizID, kind, ids) if err != nil { return nil, err } - err = nodelgc.DeleteNodeCountCache(w.cacheKey, bizID, kind, ids, rid) + err = nodelgc.DeleteNodeCountCache(kit, w.cacheKey, bizID, kind, ids) if err != nil { return nil, err } @@ -460,151 +466,117 @@ var kubeEventDocParserMap = map[string]func(doc mapStrWithOid) (int64, types.Nod } // onContainerCountChange handle container count change event -func (w *kubeWatcher) onContainerCountChange(es []*streamtypes.Event) (retry bool) { +func (w *kubeWatcher) onContainerCountChange(dbInfo *streamtypes.DBInfo, es []*streamtypes.Event) bool { if len(es) == 0 { return false } - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) - rid := es[0].ID() + kit := rest.NewKit().WithCtx(util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode)). + WithRid(es[0].ID()) - insertPodNsIDs := make([]int64, 0) - delOids := make([]string, 0) + tenantNsIDsMap := make(map[string][]int64) for idx := range es { one := es[idx] switch one.OperationType { - case streamtypes.Insert: + case streamtypes.Insert, streamtypes.Delete: pod, ok := one.Document.(*kubetypes.Pod) if !ok { - blog.Errorf("kube pod event %s doc type %T is invalid, rid: %s", one.Oid, one.Document, rid) + blog.Errorf("kube pod event %s doc type %T is invalid, rid: %s", one.Oid, one.Document, kit.Rid) continue } - insertPodNsIDs = append(insertPodNsIDs, pod.NamespaceID) - case streamtypes.Delete: - delOids = append(delOids, one.Oid) + tenantNsIDsMap[one.TenantID] = append(tenantNsIDsMap[one.TenantID], pod.NamespaceID) default: // only handle insert and delete pod event. continue } blog.V(5).Infof("watch kube topo container count cache, received oid: %s, op-time: %s, %s event, rid: %s", - one.Oid, one.ClusterTime.String(), one.OperationType, rid) + one.Oid, one.ClusterTime.String(), one.OperationType, kit.Rid) } - delOidPodMap, delPodNsIDs, err := w.getDeletePodInfo(ctx, delOids, rid) - if err != nil { - return true - } - - nsIDs := util.IntArrayUnique(append(insertPodNsIDs, delPodNsIDs...)) - asstBizInfo, err := w.sharedNsCache.GetAsstBiz(ctx, nsIDs, rid) - if err != nil { - return true + asstBizInfo := make(map[string]map[int64]int64) + for tenantID, nsIDs := range tenantNsIDsMap { + kit = kit.WithTenant(tenantID) + asstBiz, err := w.sharedNsCache.GetAsstBiz(kit, util.IntArrayUnique(nsIDs)) + if err != nil { + return true + } + asstBizInfo[tenantID] = asstBiz } - bizList, err := w.handlePodEvents(ctx, es, delOidPodMap, asstBizInfo, rid) + bizList, err := w.handlePodEvents(kit, es, asstBizInfo) if err != nil { return true } - topolgc.AddRefreshBizTopoTask(types.KubeType, bizList, rid) + topolgc.AddRefreshBizTopoTask(types.KubeType, bizList, kit.Rid) return false } -type podDelArchive struct { - Oid string `bson:"oid"` - Detail *kubetypes.Pod `bson:"detail"` -} - -// getDeletePodInfo get delete pod info from del archive -func (w *kubeWatcher) getDeletePodInfo(ctx context.Context, oids []string, rid string) (map[string]*kubetypes.Pod, - []int64, error) { - - cond := mapstr.MapStr{ - "oid": mapstr.MapStr{common.BKDBIN: oids}, - "coll": kubetypes.BKTableNameBasePod, - } - - archives := make([]podDelArchive, 0) - err := mongodb.Client().Table(common.BKTableNameKubeDelArchive).Find(cond).Fields("oid", "detail").All(ctx, - &archives) - if err != nil { - blog.Errorf("get pod del archive by cond: %+v failed, err: %v, rid: %s", cond, err, rid) - return nil, nil, err - } - - podMap := make(map[string]*kubetypes.Pod) - nsIDs := make([]int64, 0) - for _, archive := range archives { - podMap[archive.Oid] = archive.Detail - nsIDs = append(nsIDs, archive.Detail.NamespaceID) - } - - return podMap, nsIDs, nil -} - // handlePodEvents refresh pod events related workload container count, returns biz ids whose topo tree needs refreshing -func (w *kubeWatcher) handlePodEvents(ctx context.Context, es []*streamtypes.Event, - delOidPodMap map[string]*kubetypes.Pod, asstBizInfo map[int64]int64, rid string) ([]int64, error) { +func (w *kubeWatcher) handlePodEvents(kit *rest.Kit, es []*streamtypes.Event, asstBizInfo map[string]map[int64]int64) ( + map[string][]int64, error) { - // wlKindIDMap is map[workload_kind][]workload_id, stores the pod events related workload info - wlKindIDMap := make(map[kubetypes.WorkloadType][]int64) + // tenantWlKindIDMap is map[tenant_id][workload_kind][]workload_id, stores the pod events related workload info + tenantWlKindIDMap := make(map[string]map[kubetypes.WorkloadType][]int64) // wlBizIDMap is map[workload_kind]map[workload_id][]bk_biz_id, stores the workload to its related biz ids wlBizIDMap := make(map[kubetypes.WorkloadType]map[int64][]int64) - // bizList is biz ids whose topo tree needs refreshing, in the order of pod events - bizList := make([]int64, 0) + // bizListMap is tenant to biz ids map whose topo tree needs refreshing, in the order of pod events + bizListMap := make(map[string][]int64, 0) for idx := range es { one := es[idx] + tenantID := one.TenantID var pod *kubetypes.Pod switch one.OperationType { - case streamtypes.Insert: + case streamtypes.Insert, streamtypes.Delete: pod = one.Document.(*kubetypes.Pod) - case streamtypes.Delete: - var exists bool - pod, exists = delOidPodMap[one.Oid] - if !exists { - continue - } default: continue } // record the workloads that needs to refresh container count - wlKindIDMap[pod.Ref.Kind] = append(wlKindIDMap[pod.Ref.Kind], pod.Ref.ID) + if _, exists := tenantWlKindIDMap[tenantID]; !exists { + tenantWlKindIDMap[tenantID] = make(map[kubetypes.WorkloadType][]int64) + } + tenantWlKindIDMap[tenantID][pod.Ref.Kind] = append(tenantWlKindIDMap[tenantID][pod.Ref.Kind], pod.Ref.ID) // record workload related biz info, including the pod's biz id and shared namespace asst biz id - _, exists := wlBizIDMap[pod.Ref.Kind] - if !exists { + if _, exists := wlBizIDMap[pod.Ref.Kind]; !exists { wlBizIDMap[pod.Ref.Kind] = make(map[int64][]int64) } wlBizIDMap[pod.Ref.Kind][pod.Ref.ID] = []int64{pod.BizID} - bizList = append(bizList, pod.BizID) + bizListMap[tenantID] = append(bizListMap[tenantID], pod.BizID) - asstBizID, exists := asstBizInfo[pod.NamespaceID] + asstBizMap, exists := asstBizInfo[tenantID] + if !exists { + continue + } + asstBizID, exists := asstBizMap[pod.NamespaceID] if exists { wlBizIDMap[pod.Ref.Kind][pod.Ref.ID] = append(wlBizIDMap[pod.Ref.Kind][pod.Ref.ID], asstBizID) - bizList = append(bizList, asstBizID) + bizListMap[tenantID] = append(bizListMap[tenantID], asstBizID) } } // refresh workload topo node container count cache by workload kind - for wlType, wlIDs := range wlKindIDMap { - wlIDs = util.IntArrayUnique(wlIDs) - if err := w.refreshWlCountCache(ctx, string(wlType), wlIDs, wlBizIDMap[wlType], rid); err != nil { - return nil, err + for tenantID, wlKindIDMap := range tenantWlKindIDMap { + kit = kit.WithTenant(tenantID) + for wlType, wlIDs := range wlKindIDMap { + wlIDs = util.IntArrayUnique(wlIDs) + if err := w.refreshWlCountCache(kit, string(wlType), wlIDs, wlBizIDMap[wlType]); err != nil { + return nil, err + } } } - return bizList, nil + return bizListMap, nil } // refreshWlCountCache refresh workload topo node container count cache -func (w *kubeWatcher) refreshWlCountCache(ctx context.Context, kind string, ids []int64, bizMap map[int64][]int64, - rid string) error { - +func (w *kubeWatcher) refreshWlCountCache(kit *rest.Kit, kind string, ids []int64, bizMap map[int64][]int64) error { // get pods by pod workloads podCond := mapstr.MapStr{ kubetypes.RefIDField: mapstr.MapStr{common.BKDBIN: ids}, @@ -612,9 +584,9 @@ func (w *kubeWatcher) refreshWlCountCache(ctx context.Context, kind string, ids } pods := make([]kubetypes.Pod, 0) - if err := mongodb.Client().Table(kubetypes.BKTableNameBasePod).Find(podCond).Fields(kubetypes.BKIDField, - kubetypes.RefIDField).All(ctx, &pods); err != nil { - blog.Errorf("get pod ids failed, cond: %+v, err: %v, rid: %s", podCond, err, rid) + if err := mongodb.Shard(kit.ShardOpts()).Table(kubetypes.BKTableNameBasePod).Find(podCond). + Fields(kubetypes.BKIDField, kubetypes.RefIDField).All(kit.Ctx, &pods); err != nil { + blog.Errorf("get pod ids failed, cond: %+v, err: %v, rid: %s", podCond, err, kit.Rid) return err } @@ -631,9 +603,10 @@ func (w *kubeWatcher) refreshWlCountCache(ctx context.Context, kind string, ids kubetypes.BKPodIDField: mapstr.MapStr{common.BKDBIN: podIDs}, } - cnt, err := mongodb.Client().Table(kubetypes.BKTableNameBaseContainer).Find(containerCond).Count(ctx) + cnt, err := mongodb.Shard(kit.ShardOpts()).Table(kubetypes.BKTableNameBaseContainer).Find(containerCond). + Count(kit.Ctx) if err != nil { - blog.Errorf("count containers failed, cond: %+v, err: %v, rid: %s", containerCond, err, rid) + blog.Errorf("count containers failed, cond: %+v, err: %v, rid: %s", containerCond, err, kit.Rid) return err } containerCnt = int64(cnt) @@ -641,7 +614,7 @@ func (w *kubeWatcher) refreshWlCountCache(ctx context.Context, kind string, ids // refresh workload topo node count cache in related bizs for _, bizID := range bizMap[wlID] { - err := nodelgc.AddNodeCountCache(w.cacheKey, bizID, kind, map[int64]int64{wlID: containerCnt}, rid) + err := nodelgc.AddNodeCountCache(kit, w.cacheKey, bizID, kind, map[int64]int64{wlID: containerCnt}) if err != nil { return err } diff --git a/src/source_controller/cacheservice/cache/biz-topo/watch/watch.go b/src/source_controller/cacheservice/cache/biz-topo/watch/watch.go index 74cbfccf8a..31b7107f6d 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/watch/watch.go +++ b/src/source_controller/cacheservice/cache/biz-topo/watch/watch.go @@ -19,26 +19,38 @@ package watch import ( + "configcenter/src/apimachinery/discovery" "configcenter/src/source_controller/cacheservice/cache/custom/cache" - "configcenter/src/storage/stream" + watchcli "configcenter/src/source_controller/cacheservice/event/watch" + "configcenter/src/storage/stream/task" ) // Watcher defines mongodb event watcher for biz topology type Watcher struct { - loopW stream.LoopInterface + isMaster discovery.ServiceManageInterface + task *task.Task cacheSet *cache.CacheSet + watchCli *watchcli.Client } // New biz topology mongodb event watcher -func New(loopW stream.LoopInterface, cacheSet *cache.CacheSet) (*Watcher, error) { +func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, cacheSet *cache.CacheSet, + watchCli *watchcli.Client) (*Watcher, error) { + watcher := &Watcher{ - loopW: loopW, + isMaster: isMaster, + task: watchTask, cacheSet: cacheSet, + watchCli: watchCli, } if err := watcher.watchKube(); err != nil { return nil, err } + if err := watcher.watchBrief(); err != nil { + return nil, err + } + return watcher, nil } diff --git a/src/source_controller/cacheservice/cache/cache.go b/src/source_controller/cacheservice/cache/cache.go index fcc740df1d..0edd9c9e39 100644 --- a/src/source_controller/cacheservice/cache/cache.go +++ b/src/source_controller/cacheservice/cache/cache.go @@ -21,52 +21,41 @@ import ( "configcenter/src/source_controller/cacheservice/cache/custom" "configcenter/src/source_controller/cacheservice/cache/general" "configcenter/src/source_controller/cacheservice/cache/mainline" - "configcenter/src/source_controller/cacheservice/cache/topology" "configcenter/src/source_controller/cacheservice/cache/topotree" "configcenter/src/source_controller/cacheservice/event/watch" - "configcenter/src/storage/dal" "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/driver/redis" - "configcenter/src/storage/reflector" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) // NewCache new cache service -func NewCache(reflector reflector.Interface, loopW stream.LoopInterface, isMaster discovery.ServiceManageInterface, - watchDB dal.DB) (*ClientSet, error) { - - if err := mainline.NewMainlineCache(loopW); err != nil { - return nil, fmt.Errorf("new business cache failed, err: %v", err) +func NewCache(watchTask *task.Task, isMaster discovery.ServiceManageInterface) (*ClientSet, error) { + if err := mainline.NewMainlineCache(isMaster); err != nil { + return nil, fmt.Errorf("new mainline cache failed, err: %v", err) } - bizBriefTopoClient, err := topology.NewTopology(isMaster, loopW) + customCache, err := custom.New(isMaster, watchTask) if err != nil { - return nil, err + return nil, fmt.Errorf("new custom resource cache failed, err: %v", err) } - mainlineClient := mainline.NewMainlineClient() + watchCli := watch.NewClient(mongodb.Dal("watch"), mongodb.Dal(), redis.Client()) - customCache, err := custom.New(isMaster, loopW) + generalCache, err := general.New(isMaster, watchTask, watchCli) if err != nil { - return nil, fmt.Errorf("new custom resource cache failed, err: %v", err) + return nil, fmt.Errorf("new general resource cache failed, err: %v", err) } - topoTreeClient, err := biztopo.New(isMaster, loopW, customCache.CacheSet()) + topoTreeClient, err := biztopo.New(isMaster, watchTask, customCache.CacheSet(), watchCli) if err != nil { return nil, fmt.Errorf("new common topo cache failed, err: %v", err) } - watchCli := watch.NewClient(watchDB, mongodb.Client(), redis.Client()) - - generalCache, err := general.New(isMaster, loopW, watchCli) - if err != nil { - return nil, fmt.Errorf("new general resource cache failed, err: %v", err) - } + mainlineClient := mainline.NewMainlineClient(generalCache) cache := &ClientSet{ Tree: topotree.NewTopologyTree(mainlineClient), Business: mainlineClient, - Topology: bizBriefTopoClient, Topo: topoTreeClient, Event: watchCli, Custom: customCache, @@ -78,7 +67,6 @@ func NewCache(reflector reflector.Interface, loopW stream.LoopInterface, isMaste // ClientSet is the cache client set type ClientSet struct { Tree *topotree.TopologyTree - Topology *topology.Topology Topo *biztopo.Topo Business *mainline.Client Event *watch.Client diff --git a/src/source_controller/cacheservice/cache/custom/cache.go b/src/source_controller/cacheservice/cache/custom/cache.go index e30937314b..02cd56fef1 100644 --- a/src/source_controller/cacheservice/cache/custom/cache.go +++ b/src/source_controller/cacheservice/cache/custom/cache.go @@ -24,7 +24,7 @@ import ( "configcenter/src/apimachinery/discovery" "configcenter/src/source_controller/cacheservice/cache/custom/cache" "configcenter/src/source_controller/cacheservice/cache/custom/watch" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) // Cache defines the custom resource caching logics @@ -33,12 +33,12 @@ type Cache struct { } // New Cache -func New(isMaster discovery.ServiceManageInterface, loopW stream.LoopInterface) (*Cache, error) { +func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task) (*Cache, error) { t := &Cache{ cacheSet: cache.New(isMaster), } - if err := watch.Init(loopW, t.cacheSet); err != nil { + if err := watch.Init(watchTask, t.cacheSet); err != nil { return nil, fmt.Errorf("initialize custom resource watcher failed, err: %v", err) } diff --git a/src/source_controller/cacheservice/cache/custom/cache/count.go b/src/source_controller/cacheservice/cache/custom/cache/count.go index 4b191859ad..cf89d50f84 100644 --- a/src/source_controller/cacheservice/cache/custom/cache/count.go +++ b/src/source_controller/cacheservice/cache/custom/cache/count.go @@ -18,10 +18,10 @@ package cache import ( - "context" "strconv" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/source_controller/cacheservice/cache/custom/types" "configcenter/src/storage/driver/redis" ) @@ -39,15 +39,15 @@ func NewCountCache(key Key) *CountCache { } // GetDataList get data list by cache key -func (c *CountCache) GetDataList(ctx context.Context, key string, rid string) ([]string, error) { - cacheKey := c.key.Key(key) +func (c *CountCache) GetDataList(kit *rest.Kit, key string) ([]string, error) { + cacheKey := c.key.Key(kit.TenantID, key) cursor := uint64(0) all := make([]string, 0) for { - list, nextCursor, err := redis.Client().HScan(ctx, cacheKey, cursor, "", types.RedisPage).Result() + list, nextCursor, err := redis.Client().HScan(kit.Ctx, cacheKey, cursor, "", types.RedisPage).Result() if err != nil { - blog.Errorf("scan %s data count cache by cursor %d failed, err: %v, rid: %s", cacheKey, cursor, err, rid) + blog.Errorf("scan %s data list cache by cursor %d failed, err: %v, rid: %s", cacheKey, cursor, err, kit.Rid) return nil, err } @@ -63,7 +63,7 @@ func (c *CountCache) GetDataList(ctx context.Context, key string, rid string) ([ } // UpdateCount batch update cache by map[key]map[data]count -func (c *CountCache) UpdateCount(ctx context.Context, cntMap map[string]map[string]int64, rid string) error { +func (c *CountCache) UpdateCount(kit *rest.Kit, cntMap map[string]map[string]int64) error { for key, dataCnt := range cntMap { var args []interface{} for k, v := range dataCnt { @@ -71,10 +71,10 @@ func (c *CountCache) UpdateCount(ctx context.Context, cntMap map[string]map[stri } args = append(args, int64(c.key.TTL().Seconds())) - err := redis.Client().Eval(ctx, updateCountScript, []string{c.key.Key(key)}, args...).Err() + err := redis.Client().Eval(kit.Ctx, updateCountScript, []string{c.key.Key(kit.TenantID, key)}, args...).Err() if err != nil { blog.Errorf("update type: %s key: %s count cache failed, err: %v, data: %+v, rid: %s", c.key.Type(), key, - err, dataCnt, rid) + err, dataCnt, kit.Rid) return err } } @@ -98,10 +98,10 @@ return 1 ` // RefreshCount replace the cache info to map[data]count, returns the deleted data list -func (c *CountCache) RefreshCount(ctx context.Context, key string, cntMap map[string]int64, rid string) ([]string, +func (c *CountCache) RefreshCount(kit *rest.Kit, key string, cntMap map[string]int64) ([]string, error) { - cacheKey := c.key.Key(key) + cacheKey := c.key.Key(kit.TenantID, key) pip := redis.Client().Pipeline() defer pip.Close() @@ -119,7 +119,7 @@ func (c *CountCache) RefreshCount(ctx context.Context, key string, cntMap map[st pip.HMSet(cacheKey, cntKeyValues) } - list, err := c.GetDataList(ctx, key, rid) + list, err := c.GetDataList(kit, key) if err != nil { return nil, err } @@ -146,7 +146,7 @@ func (c *CountCache) RefreshCount(ctx context.Context, key string, cntMap map[st _, err = pip.Exec() if err != nil { blog.Errorf("refresh type: %s key: %s count cache failed, err: %v, data: %+v, rid: %s", c.key.Type(), key, err, - cntMap, rid) + cntMap, kit.Rid) return nil, err } @@ -154,13 +154,13 @@ func (c *CountCache) RefreshCount(ctx context.Context, key string, cntMap map[st } // Delete delete cache key -func (c *CountCache) Delete(ctx context.Context, key string, rid string) error { - cacheKey := c.key.Key(key) +func (c *CountCache) Delete(kit *rest.Kit, key string) error { + cacheKey := c.key.Key(kit.TenantID, key) pip := redis.Client().Pipeline() defer pip.Close() - list, err := c.GetDataList(ctx, key, rid) + list, err := c.GetDataList(kit, key) if err != nil { return err } @@ -181,7 +181,7 @@ func (c *CountCache) Delete(ctx context.Context, key string, rid string) error { _, err = pip.Exec() if err != nil { - blog.Errorf("delete %s count cache key failed, err: %v, rid: %s", cacheKey, err, rid) + blog.Errorf("delete %s count cache key failed, err: %v, rid: %s", cacheKey, err, kit.Rid) return err } diff --git a/src/source_controller/cacheservice/cache/custom/cache/key.go b/src/source_controller/cacheservice/cache/custom/cache/key.go index d1266f1e30..df29c5c4cc 100644 --- a/src/source_controller/cacheservice/cache/custom/cache/key.go +++ b/src/source_controller/cacheservice/cache/custom/cache/key.go @@ -47,6 +47,6 @@ func (k Key) TTL() time.Duration { } // Key is the redis key to store the custom resource cache data -func (k Key) Key(key string) string { +func (k Key) Key(tenantID, key string) string { return fmt.Sprintf("%s:%s:%s", Namespace, k.resType, key) } diff --git a/src/source_controller/cacheservice/cache/custom/cache/label.go b/src/source_controller/cacheservice/cache/custom/cache/label.go index 896bffcaaa..10e0d8e751 100644 --- a/src/source_controller/cacheservice/cache/custom/cache/label.go +++ b/src/source_controller/cacheservice/cache/custom/cache/label.go @@ -25,9 +25,11 @@ import ( "time" "configcenter/pkg/conv" + "configcenter/pkg/tenant" "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/lock" "configcenter/src/common/mapstr" "configcenter/src/common/metadata" @@ -66,20 +68,20 @@ func (c *PodLabelCache) genValueRedisKey(bizID int64, key string) string { } // GetKeys get biz pod label keys -func (c *PodLabelCache) GetKeys(ctx context.Context, bizID int64, rid string) ([]string, error) { +func (c *PodLabelCache) GetKeys(kit *rest.Kit, bizID int64) ([]string, error) { redisKey := c.genKeyRedisKey(bizID) - existRes, err := redis.Client().Exists(ctx, c.keyCache.key.Key(redisKey)).Result() + existRes, err := redis.Client().Exists(kit.Ctx, c.keyCache.key.Key(kit.TenantID, redisKey)).Result() if err != nil { - blog.Errorf("check if biz %d pod label cache exists failed, err: %v, rid: %s", bizID, err, rid) + blog.Errorf("check if biz %d pod label cache exists failed, err: %v, rid: %s", bizID, err, kit.Rid) return nil, err } // get pod label keys from cache if cache exists if existRes == 1 { - keys, err := c.keyCache.GetDataList(ctx, redisKey, rid) + keys, err := c.keyCache.GetDataList(kit, redisKey) if err != nil { - blog.Errorf("get pod label keys from cache %s failed, err: %v, rid: %s", redisKey, err, rid) + blog.Errorf("get pod label keys from cache %s failed, err: %v, rid: %s", redisKey, err, kit.Rid) return nil, err } return keys, nil @@ -90,9 +92,9 @@ func (c *PodLabelCache) GetKeys(ctx context.Context, bizID int64, rid string) ([ BizID: bizID, ReturnType: LabelKeyReturnType, } - keys, err := c.RefreshPodLabel(ctx, refreshOpt, rid) + keys, err := c.RefreshPodLabel(kit, refreshOpt) if err != nil { - blog.Errorf("refresh biz: %d pod label cache failed, err: %v, rid: %s", bizID, err, rid) + blog.Errorf("refresh biz: %d pod label cache failed, err: %v, rid: %s", bizID, err, kit.Rid) if len(keys) > 0 { // do not return error if keys are acquired but cache update failed return keys, nil @@ -104,18 +106,19 @@ func (c *PodLabelCache) GetKeys(ctx context.Context, bizID int64, rid string) ([ } // GetValues get biz pod label values for specified key -func (c *PodLabelCache) GetValues(ctx context.Context, bizID int64, key string, rid string) ([]string, error) { - existRes, err := redis.Client().Exists(ctx, c.keyCache.key.Key(c.genKeyRedisKey(bizID))).Result() +func (c *PodLabelCache) GetValues(kit *rest.Kit, bizID int64, key string) ([]string, error) { + existRes, err := redis.Client().Exists(kit.Ctx, c.keyCache.key.Key(kit.TenantID, c.genKeyRedisKey(bizID))).Result() if err != nil { - blog.Errorf("check if biz %d pod label cache exists failed, err: %v, rid: %s", bizID, err, rid) + blog.Errorf("check if biz %d pod label cache exists failed, err: %v, rid: %s", bizID, err, kit.Rid) return nil, err } // get pod label values from cache if cache exists if existRes == 1 { - values, err := c.valueCache.GetDataList(ctx, c.genValueRedisKey(bizID, key), rid) + values, err := c.valueCache.GetDataList(kit, c.genValueRedisKey(bizID, key)) if err != nil { - blog.Errorf("get biz %d pod label key %s values from cache failed, err: %v, rid: %s", bizID, key, err, rid) + blog.Errorf("get biz %d pod label key %s values from cache failed, err: %v, rid: %s", bizID, key, err, + kit.Rid) return nil, err } return values, nil @@ -127,9 +130,9 @@ func (c *PodLabelCache) GetValues(ctx context.Context, bizID int64, key string, ReturnType: LabelValueReturnType, LabelKey: key, } - values, err := c.RefreshPodLabel(ctx, refreshOpt, rid) + values, err := c.RefreshPodLabel(kit, refreshOpt) if err != nil { - blog.Errorf("refresh biz: %d pod label cache failed, err: %v, rid: %s", bizID, err, rid) + blog.Errorf("refresh biz: %d pod label cache failed, err: %v, rid: %s", bizID, err, kit.Rid) if len(values) > 0 { // do not return error if values are acquired but cache update failed return values, nil @@ -141,7 +144,7 @@ func (c *PodLabelCache) GetValues(ctx context.Context, bizID int64, key string, } // UpdateKeyCount update pod label key count cache by map[bizID]map[labelKey]count -func (c *PodLabelCache) UpdateKeyCount(ctx context.Context, keyCntMap map[int64]map[string]int64, rid string) error { +func (c *PodLabelCache) UpdateKeyCount(kit *rest.Kit, keyCntMap map[int64]map[string]int64) error { cntMap := make(map[string]map[string]int64) for bizID, keyCnt := range keyCntMap { @@ -159,17 +162,15 @@ func (c *PodLabelCache) UpdateKeyCount(ctx context.Context, keyCntMap map[int64] return nil } - if err := c.keyCache.UpdateCount(ctx, cntMap, rid); err != nil { - blog.Errorf("update pod label count failed, err: %v, count info: %+v, rid: %s", err, cntMap, rid) + if err := c.keyCache.UpdateCount(kit, cntMap); err != nil { + blog.Errorf("update pod label count failed, err: %v, count info: %+v, rid: %s", err, cntMap, kit.Rid) return err } return nil } // UpdateValueCount update pod label value count cache by map[bizID]map[labelKey]map[labelValue]count -func (c *PodLabelCache) UpdateValueCount(ctx context.Context, valueCntMap map[int64]map[string]map[string]int64, - rid string) error { - +func (c *PodLabelCache) UpdateValueCount(kit *rest.Kit, valueCntMap map[int64]map[string]map[string]int64) error { cntMap := make(map[string]map[string]int64) for bizID, keyValueCnt := range valueCntMap { @@ -190,8 +191,8 @@ func (c *PodLabelCache) UpdateValueCount(ctx context.Context, valueCntMap map[in return nil } - if err := c.valueCache.UpdateCount(ctx, cntMap, rid); err != nil { - blog.Errorf("update pod label count failed, err: %v, count info: %+v, rid: %s", err, cntMap, rid) + if err := c.valueCache.UpdateCount(kit, cntMap); err != nil { + blog.Errorf("update pod label count failed, err: %v, count info: %+v, rid: %s", err, cntMap, kit.Rid) return err } return nil @@ -237,7 +238,7 @@ func (opt *RefreshPodLabelOpt) Validate() error { } // RefreshPodLabel refresh pod label key and value cache -func (c *PodLabelCache) RefreshPodLabel(ctx context.Context, opt *RefreshPodLabelOpt, rid string) ([]string, error) { +func (c *PodLabelCache) RefreshPodLabel(kit *rest.Kit, opt *RefreshPodLabelOpt) ([]string, error) { if err := opt.Validate(); err != nil { return nil, err } @@ -249,18 +250,18 @@ func (c *PodLabelCache) RefreshPodLabel(ctx context.Context, opt *RefreshPodLabe locked, err := locker.Lock(lock.StrFormat(lockKey), 5*time.Minute) defer locker.Unlock() if err != nil { - blog.Errorf("get %s lock failed, err: %v, rid: %s", lockKey, err, rid) + blog.Errorf("get %s lock failed, err: %v, rid: %s", lockKey, err, kit.Rid) return nil, err } if !locked { - blog.Errorf("%s task is already lock, rid: %s", lockKey, rid) + blog.Errorf("%s task is already lock, rid: %s", lockKey, kit.Rid) return nil, errors.New("there's a same refreshing task running, please retry later") } - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) - keyCntMap, keyValueCntMap, err := c.getBizPodLabelCountInfo(ctx, opt.BizID, rid) + keyCntMap, keyValueCntMap, err := c.getBizPodLabelCountInfo(kit, opt.BizID) if err != nil { return nil, err } @@ -283,21 +284,22 @@ func (c *PodLabelCache) RefreshPodLabel(ctx context.Context, opt *RefreshPodLabe } // refresh label key and value count cache - delLabelKeys, err := c.keyCache.RefreshCount(ctx, c.genKeyRedisKey(opt.BizID), keyCntMap, rid) + delLabelKeys, err := c.keyCache.RefreshCount(kit, c.genKeyRedisKey(opt.BizID), keyCntMap) if err != nil { - blog.Errorf("refresh pod label key count failed, err: %v, count info: %+v, rid: %s", err, keyCntMap, rid) + blog.Errorf("refresh pod label key count failed, err: %v, count info: %+v, rid: %s", err, keyCntMap, kit.Rid) return results, err } for key, valueCntMap := range keyValueCntMap { - if _, err = c.valueCache.RefreshCount(ctx, c.genValueRedisKey(opt.BizID, key), valueCntMap, rid); err != nil { - blog.Errorf("refresh pod label key count failed, err: %v, count info: %+v, rid: %s", err, keyCntMap, rid) + if _, err = c.valueCache.RefreshCount(kit, c.genValueRedisKey(opt.BizID, key), valueCntMap); err != nil { + blog.Errorf("refresh pod label key count failed, err: %v, count info: %+v, rid: %s", err, keyCntMap, + kit.Rid) return results, err } } for _, key := range delLabelKeys { - if err = c.valueCache.Delete(ctx, c.genValueRedisKey(opt.BizID, key), rid); err != nil { + if err = c.valueCache.Delete(kit, c.genValueRedisKey(opt.BizID, key)); err != nil { return nil, err } } @@ -306,12 +308,12 @@ func (c *PodLabelCache) RefreshPodLabel(ctx context.Context, opt *RefreshPodLabe } // getBizPodLabelCountInfo generate map[label_key]count & map[label_key]map[label_value]count by biz pods -func (c *PodLabelCache) getBizPodLabelCountInfo(ctx context.Context, bizID int64, rid string) (map[string]int64, +func (c *PodLabelCache) getBizPodLabelCountInfo(kit *rest.Kit, bizID int64) (map[string]int64, map[string]map[string]int64, error) { - cond, err := tools.GenKubeSharedNsCond(ctx, bizID, kubetypes.BKNamespaceIDField, rid) + cond, err := tools.GenKubeSharedNsCond(kit, bizID, kubetypes.BKNamespaceIDField) if err != nil { - blog.Errorf("generate shared namespace condition failed, err: %v, biz: %d, rid: %v", err, bizID, rid) + blog.Errorf("generate shared namespace condition failed, err: %v, biz: %d, rid: %v", err, bizID, kit.Rid) return nil, nil, err } @@ -321,10 +323,10 @@ func (c *PodLabelCache) getBizPodLabelCountInfo(ctx context.Context, bizID int64 for { pods := make([]kubetypes.Pod, 0) - err = mongodb.Client().Table(kubetypes.BKTableNameBasePod).Find(cond).Fields(kubetypes.BKIDField, - kubetypes.LabelsField).Sort(kubetypes.BKIDField).Limit(types.DBPage).All(ctx, &pods) + err = mongodb.Shard(kit.ShardOpts()).Table(kubetypes.BKTableNameBasePod).Find(cond).Fields(kubetypes.BKIDField, + kubetypes.LabelsField).Sort(kubetypes.BKIDField).Limit(types.DBPage).All(kit.Ctx, &pods) if err != nil { - blog.Errorf("get pods to refresh label cache failed, cond: %+v, err: %v, rid: %s", cond, err, rid) + blog.Errorf("get pods to refresh label cache failed, cond: %+v, err: %v, rid: %s", cond, err, kit.Rid) return nil, nil, err } @@ -387,34 +389,40 @@ func (c *PodLabelCache) loopRefreshCache() { // RefreshCache loop refresh pod label cache for all bizs func (c *PodLabelCache) RefreshCache(rid string) { - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) + kit := rest.NewKit().WithCtx(util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode)). + WithRid(rid) - bizIDs, err := c.getAllBizID(ctx) - if err != nil { - blog.Errorf("list all biz id for refresh pod label cache task failed, err: %v, rid: %s", err, rid) - return - } + _ = tenant.ExecForAllTenants(func(tenantID string) error { + kit = kit.WithTenant(tenantID) - for _, bizID := range bizIDs { - time.Sleep(100 * time.Millisecond) + bizIDs, err := c.getAllBizID(kit) + if err != nil { + blog.Errorf("list all biz id for refresh pod label cache task failed, err: %v, rid: %s", err, rid) + return nil + } - bizRid := fmt.Sprintf("%s:%d", rid, bizID) + for _, bizID := range bizIDs { + time.Sleep(100 * time.Millisecond) - blog.Infof("start refresh biz %d pod label cache task, rid: %s", bizID, rid) + kit = kit.WithRid(fmt.Sprintf("%s:%d", rid, bizID)) - refreshOpt := &RefreshPodLabelOpt{BizID: bizID} - _, err = c.RefreshPodLabel(ctx, refreshOpt, bizRid) - if err != nil { - blog.Errorf("refresh biz %d pod label cache task failed, err: %v, rid: %s", bizID, err, rid) - continue - } + blog.Infof("start refresh biz %d pod label cache task, rid: %s", bizID, rid) - blog.Infof("refresh biz %d pod label cache task successfully, rid: %s", bizID, rid) - } + refreshOpt := &RefreshPodLabelOpt{BizID: bizID} + _, err = c.RefreshPodLabel(kit, refreshOpt) + if err != nil { + blog.Errorf("refresh biz %d pod label cache task failed, err: %v, rid: %s", bizID, err, rid) + continue + } + + blog.Infof("refresh biz %d pod label cache task successfully, rid: %s", bizID, rid) + } + return nil + }) } // getAllBizID get all biz id -func (c *PodLabelCache) getAllBizID(ctx context.Context) ([]int64, error) { +func (c *PodLabelCache) getAllBizID(kit *rest.Kit) ([]int64, error) { cond := mapstr.MapStr{ common.BKDefaultField: mapstr.MapStr{common.BKDBNE: common.DefaultAppFlag}, common.BKDataStatusField: mapstr.MapStr{common.BKDBNE: common.DataStatusDisabled}, @@ -424,8 +432,8 @@ func (c *PodLabelCache) getAllBizID(ctx context.Context) ([]int64, error) { for { bizs := make([]metadata.BizInst, 0) - err := mongodb.Client().Table(common.BKTableNameBaseApp).Find(cond).Fields(common.BKAppIDField). - Limit(types.DBPage).Sort(common.BKAppIDField).All(ctx, &bizs) + err := mongodb.Shard(kit.ShardOpts()).Table(common.BKTableNameBaseApp).Find(cond).Fields(common.BKAppIDField). + Limit(types.DBPage).Sort(common.BKAppIDField).All(kit.Ctx, &bizs) if err != nil { return nil, err } diff --git a/src/source_controller/cacheservice/cache/custom/cache/shared_ns_rel.go b/src/source_controller/cacheservice/cache/custom/cache/shared_ns_rel.go index f29d81eb81..9865599731 100644 --- a/src/source_controller/cacheservice/cache/custom/cache/shared_ns_rel.go +++ b/src/source_controller/cacheservice/cache/custom/cache/shared_ns_rel.go @@ -24,9 +24,11 @@ import ( "strconv" "time" + "configcenter/pkg/tenant" "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/lock" "configcenter/src/common/mapstr" "configcenter/src/common/util" @@ -56,13 +58,13 @@ func (c *SharedNsRelCache) genNsAsstBizRedisKey(nsID int64) string { } // GetAsstBiz get shared namespace associated biz info -func (c *SharedNsRelCache) GetAsstBiz(ctx context.Context, nsIDs []int64, rid string) (map[int64]int64, error) { +func (c *SharedNsRelCache) GetAsstBiz(kit *rest.Kit, nsIDs []int64) (map[int64]int64, error) { redisKeys := make([]string, len(nsIDs)) for i, nsID := range nsIDs { redisKeys[i] = c.genNsAsstBizRedisKey(nsID) } - redisDataMap, err := c.nsAsstBizCache.List(ctx, redisKeys, rid) + redisDataMap, err := c.nsAsstBizCache.List(kit, redisKeys) if err != nil { return nil, err } @@ -71,13 +73,13 @@ func (c *SharedNsRelCache) GetAsstBiz(ctx context.Context, nsIDs []int64, rid st for redisKey, redisData := range redisDataMap { nsID, err := strconv.ParseInt(redisKey, 10, 64) if err != nil { - blog.Errorf("parse shared ns id from redis key %s failed, err: %v", redisKey, err) + blog.Errorf("parse shared ns id from redis key %s failed, err: %v, rid: %s", redisKey, err, kit.Rid) return nil, err } asstID, err := strconv.ParseInt(redisData, 10, 64) if err != nil { - blog.Errorf("parse asst biz id from redis data %s failed, err: %v", redisData, err) + blog.Errorf("parse asst biz id from redis data %s failed, err: %v, rid: %s", redisData, err, kit.Rid) return nil, err } @@ -88,35 +90,35 @@ func (c *SharedNsRelCache) GetAsstBiz(ctx context.Context, nsIDs []int64, rid st } // UpdateAsstBiz update shared namespace to associated biz cache by map[nsID]asstBizID -func (c *SharedNsRelCache) UpdateAsstBiz(ctx context.Context, nsAsstBizMap map[int64]int64, rid string) error { +func (c *SharedNsRelCache) UpdateAsstBiz(kit *rest.Kit, nsAsstBizMap map[int64]int64) error { redisDataMap := make(map[string]interface{}) for nsID, asstBizID := range nsAsstBizMap { redisDataMap[c.genNsAsstBizRedisKey(nsID)] = asstBizID } - if err := c.nsAsstBizCache.BatchUpdate(ctx, redisDataMap, rid); err != nil { - blog.Errorf("update shared ns asst biz cache failed, err: %v, data: %+v, rid: %s", err, nsAsstBizMap, rid) + if err := c.nsAsstBizCache.BatchUpdate(kit, redisDataMap); err != nil { + blog.Errorf("update shared ns asst biz cache failed, err: %v, data: %+v, rid: %s", err, nsAsstBizMap, kit.Rid) return err } return nil } // DeleteAsstBiz delete shared namespace to associated biz cache by nsIDs -func (c *SharedNsRelCache) DeleteAsstBiz(ctx context.Context, nsIDs []int64, rid string) error { +func (c *SharedNsRelCache) DeleteAsstBiz(kit *rest.Kit, nsIDs []int64) error { redisKeys := make([]string, len(nsIDs)) for i, nsID := range nsIDs { redisKeys[i] = c.genNsAsstBizRedisKey(nsID) } - if err := c.nsAsstBizCache.BatchDelete(ctx, redisKeys, rid); err != nil { - blog.Errorf("delete shared ns asst biz cache failed, err: %v, keys: %+v, rid: %s", err, redisKeys, rid) + if err := c.nsAsstBizCache.BatchDelete(kit, redisKeys); err != nil { + blog.Errorf("delete shared ns asst biz cache failed, err: %v, keys: %+v, rid: %s", err, redisKeys, kit.Rid) return err } return nil } // RefreshSharedNsRel refresh shared namespace relation key and value cache -func (c *SharedNsRelCache) RefreshSharedNsRel(ctx context.Context, rid string) error { +func (c *SharedNsRelCache) RefreshSharedNsRel(rid string) error { // lock refresh shared namespace relation cache operation, returns error if it is already locked lockKey := fmt.Sprintf("%s:shared_ns_rel_refresh:lock", Namespace) @@ -133,22 +135,31 @@ func (c *SharedNsRelCache) RefreshSharedNsRel(ctx context.Context, rid string) e return errors.New("there's a same refreshing task running, please retry later") } - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + kit := rest.NewKit().WithCtx(util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode)). + WithRid(rid) + err = tenant.ExecForAllTenants(func(tenantID string) error { + kit = kit.WithTenant(tenantID) - relations, err := c.getAllSharedNsRel(ctx, rid) - if err != nil { - return err - } + relations, err := c.getAllSharedNsRel(kit) + if err != nil { + return err + } - nsAsstBizMap := make(map[string]interface{}) - for _, rel := range relations { - nsAsstBizMap[c.genNsAsstBizRedisKey(rel.NamespaceID)] = rel.AsstBizID - } + nsAsstBizMap := make(map[string]interface{}) + for _, rel := range relations { + nsAsstBizMap[c.genNsAsstBizRedisKey(rel.NamespaceID)] = rel.AsstBizID + } - // refresh label key and value count cache - err = c.nsAsstBizCache.Refresh(ctx, "*", nsAsstBizMap, rid) + // refresh label key and value count cache + err = c.nsAsstBizCache.Refresh(kit, "*", nsAsstBizMap) + if err != nil { + blog.Errorf("refresh shared ns asst biz cache failed, err: %v, data: %+v, rid: %s", err, nsAsstBizMap, + kit.Rid) + return err + } + return nil + }) if err != nil { - blog.Errorf("refresh shared ns asst biz cache failed, err: %v, data: %+v, rid: %s", err, nsAsstBizMap, rid) return err } @@ -156,17 +167,17 @@ func (c *SharedNsRelCache) RefreshSharedNsRel(ctx context.Context, rid string) e } // getAllSharedNsRel get all shared namespace relations -func (c *SharedNsRelCache) getAllSharedNsRel(ctx context.Context, rid string) ([]kubetypes.NsSharedClusterRel, error) { +func (c *SharedNsRelCache) getAllSharedNsRel(kit *rest.Kit) ([]kubetypes.NsSharedClusterRel, error) { cond := make(mapstr.MapStr) all := make([]kubetypes.NsSharedClusterRel, 0) for { relations := make([]kubetypes.NsSharedClusterRel, 0) - err := mongodb.Client().Table(kubetypes.BKTableNameNsSharedClusterRel).Find(cond). + err := mongodb.Shard(kit.ShardOpts()).Table(kubetypes.BKTableNameNsSharedClusterRel).Find(cond). Sort(kubetypes.BKNamespaceIDField).Fields(kubetypes.BKNamespaceIDField, kubetypes.BKAsstBizIDField). - All(ctx, &relations) + All(kit.Ctx, &relations) if err != nil { - blog.Errorf("list kube shared namespace rel failed, err: %v, cond: %+v, rid: %v", err, cond, rid) + blog.Errorf("list kube shared namespace rel failed, err: %v, cond: %+v, rid: %v", err, cond, kit.Rid) return nil, err } @@ -183,7 +194,6 @@ func (c *SharedNsRelCache) getAllSharedNsRel(ctx context.Context, rid string) ([ // loopRefreshCache loop refresh shared namespace relation key and value cache every day at 3am func (c *SharedNsRelCache) loopRefreshCache() { - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) for { time.Sleep(2 * time.Hour) @@ -196,7 +206,7 @@ func (c *SharedNsRelCache) loopRefreshCache() { rid := util.GenerateRID() blog.Infof("start refresh shared namespace relation cache task, rid: %s", rid) - err := c.RefreshSharedNsRel(ctx, rid) + err := c.RefreshSharedNsRel(rid) if err != nil { blog.Errorf("refresh shared namespace relation cache failed, err: %v, rid: %s", err, rid) continue diff --git a/src/source_controller/cacheservice/cache/custom/cache/string.go b/src/source_controller/cacheservice/cache/custom/cache/string.go index d5d1c657f0..e5f5439f99 100644 --- a/src/source_controller/cacheservice/cache/custom/cache/string.go +++ b/src/source_controller/cacheservice/cache/custom/cache/string.go @@ -18,10 +18,10 @@ package cache import ( - "context" "errors" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/util" "configcenter/src/source_controller/cacheservice/cache/custom/types" "configcenter/src/storage/driver/redis" @@ -40,7 +40,7 @@ func NewStrCache(key Key) *StrCache { } // List get data list by cache keys -func (c *StrCache) List(ctx context.Context, keys []string, rid string) (map[string]string, error) { +func (c *StrCache) List(kit *rest.Kit, keys []string) (map[string]string, error) { if len(keys) == 0 { return make(map[string]string), nil } @@ -48,17 +48,17 @@ func (c *StrCache) List(ctx context.Context, keys []string, rid string) (map[str redisKeys := make([]string, len(keys)) for i, key := range keys { - redisKeys[i] = c.key.Key(key) + redisKeys[i] = c.key.Key(kit.TenantID, key) } - result, err := redis.Client().MGet(ctx, redisKeys...).Result() + result, err := redis.Client().MGet(kit.Ctx, redisKeys...).Result() if err != nil { - blog.Errorf("list %s data from redis failed, err: %v, keys: %+v, rid: %s", c.key.Type(), err, keys, rid) + blog.Errorf("list %s data from redis failed, err: %v, keys: %+v, rid: %s", c.key.Type(), err, keys, kit.Rid) return nil, err } if len(result) != len(keys) { - blog.Errorf("%s redis result(%+v) length is invalid, keys: %+v, rid: %s", c.key.Type(), result, keys, rid) + blog.Errorf("%s redis result(%+v) length is invalid, keys: %+v, rid: %s", c.key.Type(), result, keys, kit.Rid) return nil, errors.New("redis result length is invalid") } @@ -69,7 +69,7 @@ func (c *StrCache) List(ctx context.Context, keys []string, rid string) (map[str } detail, ok := res.(string) if !ok { - blog.Errorf("%s redis result type %T is invalid, result: %+v, rid: %s", keys[idx], res, res, rid) + blog.Errorf("%s redis result type %T is invalid, result: %+v, rid: %s", keys[idx], res, res, kit.Rid) return nil, errors.New("redis result type is invalid") } dataMap[keys[idx]] = detail @@ -79,7 +79,7 @@ func (c *StrCache) List(ctx context.Context, keys []string, rid string) (map[str } // BatchUpdate batch update cache by map[key]data -func (c *StrCache) BatchUpdate(ctx context.Context, dataMap map[string]interface{}, rid string) error { +func (c *StrCache) BatchUpdate(kit *rest.Kit, dataMap map[string]interface{}) error { if len(dataMap) == 0 { return nil } @@ -88,12 +88,12 @@ func (c *StrCache) BatchUpdate(ctx context.Context, dataMap map[string]interface defer pip.Close() for key, data := range dataMap { - pip.Set(c.key.Key(key), data, c.key.ttl) + pip.Set(c.key.Key(kit.TenantID, key), data, c.key.ttl) } _, err := pip.Exec() if err != nil { - blog.Errorf("update %s cache failed, err: %v, dataMap: %+v, rid: %s", c.key.Type(), err, dataMap, rid) + blog.Errorf("update %s cache failed, err: %v, dataMap: %+v, rid: %s", c.key.Type(), err, dataMap, kit.Rid) return err } @@ -101,19 +101,19 @@ func (c *StrCache) BatchUpdate(ctx context.Context, dataMap map[string]interface } // BatchDelete batch delete cache keys -func (c *StrCache) BatchDelete(ctx context.Context, keys []string, rid string) error { +func (c *StrCache) BatchDelete(kit *rest.Kit, keys []string) error { if len(keys) == 0 { return nil } keys = util.StrArrayUnique(keys) for i, key := range keys { - keys[i] = c.key.Key(key) + keys[i] = c.key.Key(kit.TenantID, key) } - err := redis.Client().Del(ctx, keys...).Err() + err := redis.Client().Del(kit.Ctx, keys...).Err() if err != nil { - blog.Errorf("delete %s cache failed, err: %v, keys: %+v, rid: %s", c.key.Type(), err, keys, rid) + blog.Errorf("delete %s cache failed, err: %v, keys: %+v, rid: %s", c.key.Type(), err, keys, kit.Rid) return err } @@ -121,25 +121,25 @@ func (c *StrCache) BatchDelete(ctx context.Context, keys []string, rid string) e } // Refresh replace the cache info to map[data]count, returns the deleted data list -func (c *StrCache) Refresh(ctx context.Context, match string, dataMap map[string]interface{}, rid string) error { +func (c *StrCache) Refresh(kit *rest.Kit, match string, dataMap map[string]interface{}) error { pip := redis.Client().Pipeline() defer pip.Close() keyDataMap := make(map[string]interface{}) for key, data := range dataMap { - redisKey := c.key.Key(key) + redisKey := c.key.Key(kit.TenantID, key) keyDataMap[redisKey] = data pip.Set(redisKey, data, c.key.ttl) } - match = c.key.Key(match) + match = c.key.Key(kit.TenantID, match) cursor := uint64(0) for { - list, nextCursor, err := redis.Client().Scan(ctx, cursor, match, types.RedisPage).Result() + list, nextCursor, err := redis.Client().Scan(kit.Ctx, cursor, match, types.RedisPage).Result() if err != nil { blog.Errorf("scan %s cache matching %s by cursor %d failed, err: %v, rid: %s", c.key.Type(), match, cursor, - err, rid) + err, kit.Rid) return err } @@ -159,7 +159,7 @@ func (c *StrCache) Refresh(ctx context.Context, match string, dataMap map[string _, err := pip.Exec() if err != nil { blog.Errorf("refresh %s cache matching %s failed, err: %v, dataMap: %+v, rid: %s", c.key.Type(), match, err, - dataMap, rid) + dataMap, kit.Rid) return err } diff --git a/src/source_controller/cacheservice/cache/custom/client.go b/src/source_controller/cacheservice/cache/custom/client.go index 4026da44f6..65a3988bea 100644 --- a/src/source_controller/cacheservice/cache/custom/client.go +++ b/src/source_controller/cacheservice/cache/custom/client.go @@ -38,7 +38,7 @@ func (c *Cache) ListPodLabelKey(kit *rest.Kit, opt *types.ListPodLabelKeyOption) return nil, kit.CCError.Errorf(common.CCErrCommParamsIsInvalid, "opt") } - return c.cacheSet.Label.GetKeys(kit.Ctx, opt.BizID, kit.Rid) + return c.cacheSet.Label.GetKeys(kit, opt.BizID) } // ListPodLabelValue list pod label values cache info @@ -51,13 +51,13 @@ func (c *Cache) ListPodLabelValue(kit *rest.Kit, opt *types.ListPodLabelValueOpt return nil, kit.CCError.Errorf(common.CCErrCommParamsIsInvalid, "opt") } - return c.cacheSet.Label.GetValues(kit.Ctx, opt.BizID, opt.Key, kit.Rid) + return c.cacheSet.Label.GetValues(kit, opt.BizID, opt.Key) } // RefreshPodLabel refresh biz pod label key and value cache func (c *Cache) RefreshPodLabel(kit *rest.Kit, opt *types.RefreshPodLabelOption) error { // read from secondary in mongodb cluster. - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) + kit.Ctx = util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) if opt == nil || opt.BizID <= 0 { blog.Errorf("refresh pod label option %+v is invalid, rid: %s", opt, kit.Rid) @@ -68,7 +68,7 @@ func (c *Cache) RefreshPodLabel(kit *rest.Kit, opt *types.RefreshPodLabelOption) go func() { blog.Infof("start refresh biz: %d pod label cache, rid: %s", opt.BizID, kit.Rid) - _, err := c.cacheSet.Label.RefreshPodLabel(ctx, refreshOpt, kit.Rid) + _, err := c.cacheSet.Label.RefreshPodLabel(kit, refreshOpt) if err != nil { blog.Errorf("refresh biz: %d pod label cache failed, err: %v, rid: %s", opt.BizID, err, kit.Rid) return diff --git a/src/source_controller/cacheservice/cache/custom/watch/label.go b/src/source_controller/cacheservice/cache/custom/watch/label.go index 03de1b2742..3625fa12d2 100644 --- a/src/source_controller/cacheservice/cache/custom/watch/label.go +++ b/src/source_controller/cacheservice/cache/custom/watch/label.go @@ -19,15 +19,15 @@ package watch import ( "context" + "fmt" "configcenter/pkg/conv" "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/common/mapstr" + "configcenter/src/common/http/rest" "configcenter/src/common/util" kubetypes "configcenter/src/kube/types" "configcenter/src/source_controller/cacheservice/cache/custom/cache" - "configcenter/src/storage/driver/mongodb" streamtypes "configcenter/src/storage/stream/types" ) @@ -40,11 +40,12 @@ func (w *Watcher) watchPodLabel() error { opt := &watchOptions{ watchType: PodLabelWatchType, - watchOpts: &streamtypes.WatchOptions{ - Options: streamtypes.Options{ - Filter: make(mapstr.MapStr), + watchOpts: &streamtypes.WatchCollOptions{ + CollectionOptions: streamtypes.CollectionOptions{ EventStruct: new(kubetypes.Pod), - Collection: kubetypes.BKTableNameBasePod, + CollectionFilter: &streamtypes.CollectionFilter{ + Regex: fmt.Sprintf("_%s$", kubetypes.BKTableNameBasePod), + }, Fields: []string{kubetypes.BKIDField, kubetypes.BKBizIDField, kubetypes.LabelsField, kubetypes.BKNamespaceIDField}, }, @@ -72,77 +73,82 @@ type podLabelWatcher struct { } // doBatch batch handle pod event for label key and value cache -func (w *podLabelWatcher) doBatch(es []*streamtypes.Event) (retry bool) { +func (w *podLabelWatcher) doBatch(dbInfo *streamtypes.DBInfo, es []*streamtypes.Event) bool { if len(es) == 0 { return false } - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) - rid := es[0].ID() + kit := rest.NewKit().WithCtx(util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode)). + WithRid(es[0].ID()) // group inserted and deleted pod events - insertPodMap := make(map[string]*kubetypes.Pod) - delOids := make([]string, 0) - nsIDs := make([]int64, 0) + insertPodMap := make(map[string]map[string]*kubetypes.Pod) + delPodMap := make(map[string]map[string]*kubetypes.Pod) + nsIDMap := make(map[string][]int64) for idx := range es { one := es[idx] + tenantID := one.TenantID + pod := one.Document.(*kubetypes.Pod) + switch one.OperationType { case streamtypes.Insert: - pod := one.Document.(*kubetypes.Pod) - insertPodMap[one.Oid] = pod - nsIDs = append(nsIDs, pod.NamespaceID) + _, exists := insertPodMap[tenantID] + if !exists { + insertPodMap[tenantID] = make(map[string]*kubetypes.Pod) + } + insertPodMap[tenantID][one.Oid] = pod + nsIDMap[tenantID] = append(nsIDMap[tenantID], pod.NamespaceID) case streamtypes.Delete: _, exists := insertPodMap[one.Oid] if exists { delete(insertPodMap, one.Oid) continue } - delOids = append(delOids, one.Oid) + _, exists = delPodMap[tenantID] + if !exists { + delPodMap[tenantID] = make(map[string]*kubetypes.Pod) + } + delPodMap[tenantID][one.Oid] = pod + nsIDMap[tenantID] = append(nsIDMap[tenantID], pod.NamespaceID) default: // right now, pod can not be updated, so we only need to handle insert and delete event continue } blog.V(5).Infof("watch custom resource cache, received coll: %s, oid: %s, op-time: %s, %s event, rid: %s", - one.Collection, one.Oid, one.ClusterTime.String(), one.OperationType, rid) - } - - delArchives, err := w.getDeletedPodInfo(ctx, delOids, rid) - if err != nil { - return true + one.Collection, one.Oid, one.ClusterTime.String(), one.OperationType, kit.Rid) } - for _, archive := range delArchives { - nsIDs = append(nsIDs, archive.Detail.NamespaceID) - } - - nsIDs = util.IntArrayUnique(nsIDs) - asstBizInfo, err := w.sharedNsCache.GetAsstBiz(ctx, nsIDs, rid) - if err != nil { - return false - } + for tenantID, nsIDs := range nsIDMap { + nsIDs = util.IntArrayUnique(nsIDs) + asstBiz, err := w.sharedNsCache.GetAsstBiz(kit, nsIDs) + if err != nil { + return false + } - // get biz to pod label key and value count map - keyCnt := make(map[int64]map[string]int64) - valueCnt := make(map[int64]map[string]map[string]int64) + // get biz to pod label key and value count map + keyCnt := make(map[int64]map[string]int64) + valueCnt := make(map[int64]map[string]map[string]int64) - for _, pod := range insertPodMap { - w.countPodLabel(pod, asstBizInfo, keyCnt, valueCnt, 1) - } + for _, pod := range insertPodMap[tenantID] { + w.countPodLabel(pod, asstBiz, keyCnt, valueCnt, 1) + } + for _, pod := range delPodMap[tenantID] { + w.countPodLabel(pod, asstBiz, keyCnt, valueCnt, -1) + } - for _, archive := range delArchives { - w.countPodLabel(archive.Detail, asstBizInfo, keyCnt, valueCnt, -1) - } + kit = kit.WithTenant(tenantID) - // update changed pod label key and value cache - if err = w.labelCache.UpdateKeyCount(ctx, keyCnt, rid); err != nil { - return true - } + // update changed pod label key and value cache + if err := w.labelCache.UpdateKeyCount(kit, keyCnt); err != nil { + return true + } - if err = w.labelCache.UpdateValueCount(ctx, valueCnt, rid); err != nil { - return true + if err := w.labelCache.UpdateValueCount(kit, valueCnt); err != nil { + return true + } } return false @@ -180,29 +186,3 @@ func (w *podLabelWatcher) countPodLabel(pod *kubetypes.Pod, asstBiz map[int64]in } } } - -type podDelArchive struct { - Detail *kubetypes.Pod `bson:"detail"` -} - -// getDeletedPodInfo get deleted pod info -func (w *podLabelWatcher) getDeletedPodInfo(ctx context.Context, oids []string, rid string) ([]podDelArchive, error) { - delArchives := make([]podDelArchive, 0) - if len(oids) == 0 { - return delArchives, nil - } - - cond := mapstr.MapStr{ - "oid": mapstr.MapStr{common.BKDBIN: oids}, - "coll": kubetypes.BKTableNameBasePod, - } - - err := mongodb.Client().Table(common.BKTableNameKubeDelArchive).Find(cond).Fields("detail.labels", - "detail.bk_biz_id", "detail.bk_namespace_id").All(ctx, &delArchives) - if err != nil { - blog.Errorf("get pod del archive by cond: %+v failed, err: %v, rid: %s", cond, err, rid) - return nil, err - } - - return delArchives, nil -} diff --git a/src/source_controller/cacheservice/cache/custom/watch/shared_ns_rel.go b/src/source_controller/cacheservice/cache/custom/watch/shared_ns_rel.go index 5cc24418e5..66bb244c6b 100644 --- a/src/source_controller/cacheservice/cache/custom/watch/shared_ns_rel.go +++ b/src/source_controller/cacheservice/cache/custom/watch/shared_ns_rel.go @@ -19,14 +19,14 @@ package watch import ( "context" + "fmt" "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/common/mapstr" + "configcenter/src/common/http/rest" "configcenter/src/common/util" kubetypes "configcenter/src/kube/types" "configcenter/src/source_controller/cacheservice/cache/custom/cache" - "configcenter/src/storage/driver/mongodb" streamtypes "configcenter/src/storage/stream/types" ) @@ -38,12 +38,13 @@ func (w *Watcher) watchSharedNsRel() error { opt := &watchOptions{ watchType: SharedNsRelWatchType, - watchOpts: &streamtypes.WatchOptions{ - Options: streamtypes.Options{ - Filter: make(mapstr.MapStr), + watchOpts: &streamtypes.WatchCollOptions{ + CollectionOptions: streamtypes.CollectionOptions{ EventStruct: new(kubetypes.NsSharedClusterRel), - Collection: kubetypes.BKTableNameNsSharedClusterRel, - Fields: []string{kubetypes.BKNamespaceIDField, kubetypes.BKAsstBizIDField}, + CollectionFilter: &streamtypes.CollectionFilter{ + Regex: fmt.Sprintf("_%s$", kubetypes.BKTableNameNsSharedClusterRel), + }, + Fields: []string{kubetypes.BKNamespaceIDField, kubetypes.BKAsstBizIDField}, }, }, doBatch: watcher.doBatch, @@ -55,10 +56,9 @@ func (w *Watcher) watchSharedNsRel() error { } if !tokenExists { - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) rid := util.GenerateRID() blog.Infof("token not exists, start init all shared namespace relation cache task, rid: %s", rid) - go w.cacheSet.SharedNsRel.RefreshSharedNsRel(ctx, rid) + go w.cacheSet.SharedNsRel.RefreshSharedNsRel(rid) } return nil @@ -69,82 +69,58 @@ type sharedNsRelWatcher struct { } // doBatch batch handle shared namespace relation event for cache -func (w *sharedNsRelWatcher) doBatch(es []*streamtypes.Event) (retry bool) { +func (w *sharedNsRelWatcher) doBatch(dbInfo *streamtypes.DBInfo, es []*streamtypes.Event) bool { if len(es) == 0 { return false } - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) - rid := es[0].ID() + kit := rest.NewKit().WithCtx(util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode)). + WithRid(es[0].ID()) - nsAsstBizMap := make(map[int64]int64) - delOids := make([]string, 0) + nsAsstBizMap := make(map[string]map[int64]int64) + delNsIDsMap := make(map[string][]int64) for idx := range es { one := es[idx] + tenantID := one.TenantID + rel := one.Document.(*kubetypes.NsSharedClusterRel) + switch one.OperationType { case streamtypes.Insert: - rel := one.Document.(*kubetypes.NsSharedClusterRel) - nsAsstBizMap[rel.NamespaceID] = rel.AsstBizID + _, exists := nsAsstBizMap[tenantID] + if !exists { + nsAsstBizMap[tenantID] = make(map[int64]int64) + } + nsAsstBizMap[tenantID][rel.NamespaceID] = rel.AsstBizID + case streamtypes.Delete: - delOids = append(delOids, one.Oid) + delNsIDsMap[tenantID] = append(delNsIDsMap[tenantID], rel.NamespaceID) + default: // shared namespace relation can not be updated, so we only need to handle insert and delete event continue } blog.V(5).Infof("watch custom resource cache, received coll: %s, oid: %s, op-time: %s, %s event, rid: %s", - one.Collection, one.Oid, one.ClusterTime.String(), one.OperationType, rid) - } - - err := w.cache.UpdateAsstBiz(ctx, nsAsstBizMap, rid) - if err != nil { - return true + one.Collection, one.Oid, one.ClusterTime.String(), one.OperationType, kit.Rid) } - delArchives, err := w.getDeletedRelInfo(ctx, delOids, rid) - if err != nil { - return true - } - - delNsIDs := make([]int64, len(delArchives)) - for i, archive := range delArchives { - delNsIDs[i] = archive.Detail.NamespaceID + for tenantID, nsAsstBizInfo := range nsAsstBizMap { + kit = kit.WithTenant(tenantID) + err := w.cache.UpdateAsstBiz(kit, nsAsstBizInfo) + if err != nil { + return true + } } - err = w.cache.DeleteAsstBiz(ctx, delNsIDs, rid) - if err != nil { - return true + for tenantID, delNsIDs := range delNsIDsMap { + kit = kit.WithTenant(tenantID) + err := w.cache.DeleteAsstBiz(kit, delNsIDs) + if err != nil { + return true + } } return false } - -type sharedNsRelDelArchive struct { - Detail *kubetypes.NsSharedClusterRel `bson:"detail"` -} - -// getDeletedRelInfo get deleted shared namespace relation info -func (w *sharedNsRelWatcher) getDeletedRelInfo(ctx context.Context, oids []string, rid string) ([]sharedNsRelDelArchive, - error) { - - delArchives := make([]sharedNsRelDelArchive, 0) - if len(oids) == 0 { - return delArchives, nil - } - - cond := mapstr.MapStr{ - "oid": mapstr.MapStr{common.BKDBIN: oids}, - "coll": kubetypes.BKTableNameNsSharedClusterRel, - } - - err := mongodb.Client().Table(common.BKTableNameKubeDelArchive).Find(cond).Fields("detail.bk_namespace_id"). - All(ctx, &delArchives) - if err != nil { - blog.Errorf("get shared ns relation del archive by cond: %+v failed, err: %v, rid: %s", cond, err, rid) - return nil, err - } - - return delArchives, nil -} diff --git a/src/source_controller/cacheservice/cache/custom/watch/watch.go b/src/source_controller/cacheservice/cache/custom/watch/watch.go index 1b94937611..fc724b1eb1 100644 --- a/src/source_controller/cacheservice/cache/custom/watch/watch.go +++ b/src/source_controller/cacheservice/cache/custom/watch/watch.go @@ -29,20 +29,20 @@ import ( "configcenter/src/source_controller/cacheservice/cache/custom/cache" tokenhandler "configcenter/src/source_controller/cacheservice/cache/token-handler" "configcenter/src/storage/driver/mongodb" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" ) // Watcher defines mongodb event watcher for custom resource type Watcher struct { - loopW stream.LoopInterface + task *task.Task cacheSet *cache.CacheSet } // Init custom resource mongodb event watcher -func Init(loopW stream.LoopInterface, cacheSet *cache.CacheSet) error { +func Init(watchTask *task.Task, cacheSet *cache.CacheSet) error { watcher := &Watcher{ - loopW: loopW, + task: watchTask, cacheSet: cacheSet, } @@ -59,8 +59,8 @@ func Init(loopW stream.LoopInterface, cacheSet *cache.CacheSet) error { type watchOptions struct { watchType WatchType - watchOpts *types.WatchOptions - doBatch func(es []*types.Event) (retry bool) + watchOpts *types.WatchCollOptions + doBatch func(dbInfo *types.DBInfo, es []*types.Event) bool } // WatchType is the custom resource watch type @@ -78,45 +78,33 @@ func (w *Watcher) watchCustomResource(opt *watchOptions) (bool, error) { ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) name := fmt.Sprintf("%s:%s", cache.Namespace, opt.watchType) - tokenHandler := tokenhandler.NewSingleTokenHandler(name, mongodb.Client()) + tokenHandler := tokenhandler.NewSingleTokenHandler(name) - exists, err := tokenHandler.IsTokenExists(ctx) + exists, err := tokenHandler.IsTokenExists(ctx, mongodb.Dal("watch")) if err != nil { blog.Errorf("check if custom resource %s watch token exists failed, err: %v", name, err) return false, err } - if exists { - startAtTime, err := tokenHandler.GetStartWatchTime(ctx) - if err != nil { - blog.Errorf("get custom resource %s start watch time failed, err: %v", name, err) - return false, err - } - opt.watchOpts.StartAtTime = startAtTime - } else { - opt.watchOpts.StartAtTime = new(types.TimeStamp) - } - - opt.watchOpts.WatchFatalErrorCallback = tokenHandler.ResetWatchToken - - loopOptions := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ + opts := &types.LoopBatchTaskOptions{ + WatchTaskOptions: &types.WatchTaskOptions{ Name: name, - WatchOpt: opt.watchOpts, + CollOpts: opt.watchOpts, TokenHandler: tokenHandler, RetryOptions: &types.RetryOptions{ MaxRetryCount: 3, RetryDuration: 1 * time.Second, }, }, - EventHandler: &types.BatchHandler{ + EventHandler: &types.TaskBatchHandler{ DoBatch: opt.doBatch, }, BatchSize: 200, } - if err = w.loopW.WithBatch(loopOptions); err != nil { - blog.Errorf("watch custom resource %s failed, err: %v", name, err) + err = w.task.AddLoopBatchTask(opts) + if err != nil { + blog.Errorf("watch custom resource %s, but add loop batch task failed, err: %v", name, err) return false, err } diff --git a/src/source_controller/cacheservice/cache/general/cache.go b/src/source_controller/cacheservice/cache/general/cache.go index 469c03fb90..a6758ad03d 100644 --- a/src/source_controller/cacheservice/cache/general/cache.go +++ b/src/source_controller/cacheservice/cache/general/cache.go @@ -28,7 +28,7 @@ import ( "configcenter/src/source_controller/cacheservice/cache/general/types" "configcenter/src/source_controller/cacheservice/cache/general/watch" watchcli "configcenter/src/source_controller/cacheservice/event/watch" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) // Cache defines the general resource caching logics @@ -38,9 +38,7 @@ type Cache struct { } // New Cache -func New(isMaster discovery.ServiceManageInterface, loopW stream.LoopInterface, watchCli *watchcli.Client) (*Cache, - error) { - +func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, watchCli *watchcli.Client) (*Cache, error) { cacheSet := cache.GetAllCache() fullSyncCondChMap := make(map[general.ResType]chan<- types.FullSyncCondEvent) @@ -51,7 +49,7 @@ func New(isMaster discovery.ServiceManageInterface, loopW stream.LoopInterface, fullSyncCondChMap[resType] = cacheInst.FullSyncCondCh() } - fullSyncCondCli, err := fullsynccond.New(loopW, fullSyncCondChMap) + fullSyncCondCli, err := fullsynccond.New(watchTask, fullSyncCondChMap) if err != nil { return nil, fmt.Errorf("init full sync cond failed, err: %v", err) } diff --git a/src/source_controller/cacheservice/cache/general/cache/cache.go b/src/source_controller/cacheservice/cache/general/cache/cache.go index dc129b2f30..18b5d47f62 100644 --- a/src/source_controller/cacheservice/cache/general/cache/cache.go +++ b/src/source_controller/cacheservice/cache/general/cache/cache.go @@ -19,12 +19,12 @@ package cache import ( - "context" "math/rand" "sync" "time" "configcenter/pkg/cache/general" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/source_controller/cacheservice/cache/general/types" "configcenter/src/source_controller/cacheservice/cache/tools" @@ -94,8 +94,6 @@ type basicInfo struct { id int64 oid string subRes []string - // tenant is tenant account - tenant string } // dataParser parse the general resource data to basic info @@ -108,7 +106,7 @@ type getDataByKeysOpt struct { } // dataGetterByKeys get mongodb data by redis keys -type dataGetterByKeys func(ctx context.Context, opt *getDataByKeysOpt, rid string) ([]any, error) +type dataGetterByKeys func(kit *rest.Kit, opt *getDataByKeysOpt) ([]any, error) // listDataOpt is list general resource data from db option type listDataOpt struct { @@ -127,7 +125,7 @@ type listDataRes struct { } // dataLister list mongodb data -type dataLister func(ctx context.Context, opt *listDataOpt, rid string) (*listDataRes, error) +type dataLister func(kit *rest.Kit, opt *listDataOpt) (*listDataRes, error) type uniqueKeyLogics struct { // genKey generator redis key of the unique key type @@ -174,28 +172,28 @@ func (c *Cache) CacheChangeCh() chan struct{} { } // NeedWatchRes returns whether all resource data needs to be watched, and the specified sub-resources to be watched -func (c *Cache) NeedWatchRes() (bool, []string) { +func (c *Cache) NeedWatchRes() (bool, map[string][]string) { if c.needCacheAll || len(c.uniqueKeyLogics) != 0 { return true, nil } - needWatchAll := false - subRes := make([]string, 0) + needWatchAllTenants := make([]string, 0) + tenantSubResMap := make(map[string][]string) c.fullSyncCondMap.Range(func(idListKey string, cond *types.FullSyncCondInfo) bool { if cond.SubResource == "" { - needWatchAll = true - return true + needWatchAllTenants = append(needWatchAllTenants, cond.TenantID) + return false } - subRes = append(subRes, cond.SubResource) + tenantSubResMap[cond.TenantID] = append(tenantSubResMap[cond.TenantID], cond.SubResource) return false }) - if needWatchAll { - return true, nil + for _, tenantID := range needWatchAllTenants { + tenantSubResMap[tenantID] = make([]string, 0) } - return false, subRes + return false, tenantSubResMap } // NeedCache returns if the general resource needs to be cached diff --git a/src/source_controller/cacheservice/cache/general/cache/cache_with_id.go b/src/source_controller/cacheservice/cache/general/cache/cache_with_id.go index 9dc182c3c3..0da7e907c2 100644 --- a/src/source_controller/cacheservice/cache/general/cache/cache_with_id.go +++ b/src/source_controller/cacheservice/cache/general/cache/cache_with_id.go @@ -18,7 +18,6 @@ package cache import ( - "context" "fmt" "strconv" "time" @@ -26,6 +25,7 @@ import ( "configcenter/pkg/cache/general" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/util" "configcenter/src/source_controller/cacheservice/cache/general/types" @@ -49,8 +49,7 @@ func newMapStrCacheWithID(key *general.Key, needCacheAll bool, table, idField st return nil, fmt.Errorf("parse id %+v failed, err: %v", data[idField], err) } return &basicInfo{ - id: id, - tenant: util.GetStrByInterface(data[common.TenantID]), + id: id, }, nil }) } @@ -100,10 +99,10 @@ func parseDataWithID[T any](idField string, parser func(data T, idField string) } func getDataByID[T any](table, idField string) dataGetterByKeys { - return func(ctx context.Context, opt *getDataByKeysOpt, rid string) ([]any, error) { - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + return func(kit *rest.Kit, opt *getDataByKeysOpt) ([]any, error) { + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) - dataArr, err := getDBDataByID[T](ctx, opt, table, idField, rid) + dataArr, err := getDBDataByID[T](kit, opt, table, idField) if err != nil { return nil, err } @@ -112,7 +111,7 @@ func getDataByID[T any](table, idField string) dataGetterByKeys { } } -func getDBDataByID[T any](ctx context.Context, opt *getDataByKeysOpt, table, idField string, rid string) ([]T, error) { +func getDBDataByID[T any](kit *rest.Kit, opt *getDataByKeysOpt, table, idField string) ([]T, error) { if len(opt.Keys) == 0 { return make([]T, 0), nil } @@ -122,7 +121,7 @@ func getDBDataByID[T any](ctx context.Context, opt *getDataByKeysOpt, table, idF for i, key := range opt.Keys { id, err := strconv.ParseInt(key, 10, 64) if err != nil { - blog.Errorf("parse id (index: %d, key: %s) failed, err: %v, rid: %s", i, key, err, rid) + blog.Errorf("parse id (index: %d, key: %s) failed, err: %v, rid: %s", i, key, err, kit.Rid) return nil, err } ids[i] = id @@ -133,19 +132,19 @@ func getDBDataByID[T any](ctx context.Context, opt *getDataByKeysOpt, table, idF } dataArr := make([]T, 0) - if err := mongodb.Client().Table(table).Find(cond).All(ctx, &dataArr); err != nil { - blog.Errorf("get %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, rid) + if err := mongodb.Shard(kit.ShardOpts()).Table(table).Find(cond).All(kit.Ctx, &dataArr); err != nil { + blog.Errorf("get %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, kit.Rid) return nil, err } return dataArr, nil } func listDataWithID[T any](table, idField string) dataLister { - return func(ctx context.Context, opt *listDataOpt, rid string) (*listDataRes, error) { - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + return func(kit *rest.Kit, opt *listDataOpt) (*listDataRes, error) { + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) if rawErr := opt.Validate(false); rawErr.ErrCode != 0 { - blog.Errorf("list general data option is invalid, err: %v, opt: %+v, rid: %s", rawErr, opt, rid) + blog.Errorf("list general data option is invalid, err: %v, opt: %+v, rid: %s", rawErr, opt, kit.Rid) return nil, fmt.Errorf("list data option is invalid") } @@ -153,7 +152,7 @@ func listDataWithID[T any](table, idField string) dataLister { opt.Fields = []string{idField} } - cnt, dataArr, err := listDBDataWithID[T](ctx, opt, table, idField, rid) + cnt, dataArr, err := listDBDataWithID[T](kit, opt, table, idField) if err != nil { return nil, err } @@ -162,18 +161,16 @@ func listDataWithID[T any](table, idField string) dataLister { } } -func listDBDataWithID[T any](ctx context.Context, opt *listDataOpt, table, idField string, rid string) (uint64, []T, - error) { - +func listDBDataWithID[T any](kit *rest.Kit, opt *listDataOpt, table, idField string) (uint64, []T, error) { cond := opt.Cond if cond == nil { cond = make(mapstr.MapStr) } if opt.Page.EnableCount { - cnt, err := mongodb.Client().Table(table).Find(cond).Count(ctx) + cnt, err := mongodb.Shard(kit.ShardOpts()).Table(table).Find(cond).Count(kit.Ctx) if err != nil { - blog.Errorf("count %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, rid) + blog.Errorf("count %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, kit.Rid) return 0, nil, err } @@ -198,10 +195,10 @@ func listDBDataWithID[T any](ctx context.Context, opt *listDataOpt, table, idFie } dataArr := make([]T, 0) - err := mongodb.Client().Table(table).Find(cond).Sort(idField).Start(uint64(opt.Page.StartIndex)). - Limit(uint64(opt.Page.Limit)).Fields(opt.Fields...).All(ctx, &dataArr) + err := mongodb.Shard(kit.ShardOpts()).Table(table).Find(cond).Sort(idField).Start(uint64(opt.Page.StartIndex)). + Limit(uint64(opt.Page.Limit)).Fields(opt.Fields...).All(kit.Ctx, &dataArr) if err != nil { - blog.Errorf("list %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, rid) + blog.Errorf("list %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, kit.Rid) return 0, nil, err } diff --git a/src/source_controller/cacheservice/cache/general/cache/cache_with_id_and_sub_res.go b/src/source_controller/cacheservice/cache/general/cache/cache_with_id_and_sub_res.go index 861ad57a27..db7923196c 100644 --- a/src/source_controller/cacheservice/cache/general/cache/cache_with_id_and_sub_res.go +++ b/src/source_controller/cacheservice/cache/general/cache/cache_with_id_and_sub_res.go @@ -18,13 +18,13 @@ package cache import ( - "context" "fmt" "time" "configcenter/pkg/cache/general" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/json" "configcenter/src/common/util" "configcenter/src/source_controller/cacheservice/cache/general/types" @@ -32,8 +32,8 @@ import ( // newCacheWithID new general cache whose data uses id as id key func newCacheWithIDAndSubRes[T any](key *general.Key, idField string, subResFields []string, - getTable func(ctx context.Context, filter *types.BasicFilter, rid string) (string, error), - parseData func(data dataWithTable[T]) (*basicInfo, error)) *Cache { + getTable func(kit *rest.Kit, filter *types.BasicFilter) (string, error), + parseData func(data dataWithTenant[T]) (*basicInfo, error)) *Cache { cache := NewCache() cache.key = key @@ -46,23 +46,23 @@ func newCacheWithIDAndSubRes[T any](key *general.Key, idField string, subResFiel return cache } -type dataWithTable[T any] struct { - Table string `json:"-" bson:"-"` - Data T `json:",inline" bson:",inline"` +type dataWithTenant[T any] struct { + TenantID string `json:"-" bson:"-"` + Data T `json:",inline" bson:",inline"` } // MarshalJSON marshal json -func (data dataWithTable[T]) MarshalJSON() ([]byte, error) { +func (data dataWithTenant[T]) MarshalJSON() ([]byte, error) { return json.Marshal(data.Data) } -func parseDataWithIDAndSubRes[T any](parser func(data dataWithTable[T]) (*basicInfo, error)) dataParser { +func parseDataWithIDAndSubRes[T any](parser func(data dataWithTenant[T]) (*basicInfo, error)) dataParser { return func(data any) (*basicInfo, error) { var info *basicInfo var err error switch val := data.(type) { - case dataWithTable[T]: + case dataWithTenant[T]: info, err = parser(val) case types.WatchEventData: info, err = parseWatchChainNode(val.ChainNode) @@ -87,25 +87,25 @@ func parseDataWithIDAndSubRes[T any](parser func(data dataWithTable[T]) (*basicI } func getDataByIDAndSubRes[T any](idField string, - getTable func(ctx context.Context, filter *types.BasicFilter, rid string) (string, error)) dataGetterByKeys { + getTable func(kit *rest.Kit, filter *types.BasicFilter) (string, error)) dataGetterByKeys { - return func(ctx context.Context, opt *getDataByKeysOpt, rid string) ([]any, error) { - table, err := getTable(ctx, opt.BasicFilter, rid) + return func(kit *rest.Kit, opt *getDataByKeysOpt) ([]any, error) { + table, err := getTable(kit, opt.BasicFilter) if err != nil { - blog.Errorf("get table by basic filter(%+v) failed, err: %v, rid: %s", opt.BasicFilter, err, rid) + blog.Errorf("get table by basic filter(%+v) failed, err: %v, rid: %s", opt.BasicFilter, err, kit.Rid) return nil, err } - dataArr, err := getDBDataByID[T](ctx, opt, table, idField, rid) + dataArr, err := getDBDataByID[T](kit, opt, table, idField) if err != nil { return nil, err } allData := make([]interface{}, 0) for _, data := range dataArr { - allData = append(allData, dataWithTable[T]{ - Table: table, - Data: data, + allData = append(allData, dataWithTenant[T]{ + TenantID: kit.TenantID, + Data: data, }) } return allData, nil @@ -113,19 +113,19 @@ func getDataByIDAndSubRes[T any](idField string, } func listDataWithIDAndSubRes[T any](idField string, subResFields []string, - getTable func(ctx context.Context, filter *types.BasicFilter, rid string) (string, error)) dataLister { + getTable func(kit *rest.Kit, filter *types.BasicFilter) (string, error)) dataLister { - return func(ctx context.Context, opt *listDataOpt, rid string) (*listDataRes, error) { - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + return func(kit *rest.Kit, opt *listDataOpt) (*listDataRes, error) { + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) if rawErr := opt.Validate(true); rawErr.ErrCode != 0 { - blog.Errorf("list general data option is invalid, err: %v, opt: %+v, rid: %s", rawErr, opt, rid) + blog.Errorf("list general data option is invalid, err: %v, opt: %+v, rid: %s", rawErr, opt, kit.Rid) return nil, fmt.Errorf("list data option is invalid") } - table, err := getTable(ctx, opt.BasicFilter, rid) + table, err := getTable(kit, opt.BasicFilter) if err != nil { - blog.Errorf("get table by basic filter(%+v) failed, err: %v, rid: %s", opt.BasicFilter, err, rid) + blog.Errorf("get table by basic filter(%+v) failed, err: %v, rid: %s", opt.BasicFilter, err, kit.Rid) return nil, err } @@ -133,16 +133,16 @@ func listDataWithIDAndSubRes[T any](idField string, subResFields []string, opt.Fields = append(subResFields, idField) } - cnt, dataArr, err := listDBDataWithID[T](ctx, opt, table, idField, rid) + cnt, dataArr, err := listDBDataWithID[T](kit, opt, table, idField) if err != nil { return nil, err } allData := make([]interface{}, 0) for _, data := range dataArr { - allData = append(allData, dataWithTable[T]{ - Table: table, - Data: data, + allData = append(allData, dataWithTenant[T]{ + TenantID: kit.TenantID, + Data: data, }) } diff --git a/src/source_controller/cacheservice/cache/general/cache/cache_with_oid.go b/src/source_controller/cacheservice/cache/general/cache/cache_with_oid.go index 36e8a44f1c..b7c8e8119b 100644 --- a/src/source_controller/cacheservice/cache/general/cache/cache_with_oid.go +++ b/src/source_controller/cacheservice/cache/general/cache/cache_with_oid.go @@ -18,7 +18,6 @@ package cache import ( - "context" "fmt" "time" @@ -26,6 +25,7 @@ import ( "configcenter/pkg/filter" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/json" "configcenter/src/common/mapstr" "configcenter/src/common/util" @@ -74,8 +74,7 @@ func parseDataWithOid[T any](parser func(data T) (*basicInfo, error)) dataParser info.oid = val.Oid.Hex() case filter.JsonString: info = &basicInfo{ - oid: gjson.Get(string(val), common.MongoMetaID).String(), - tenant: gjson.Get(string(val), common.TenantID).String(), + oid: gjson.Get(string(val), common.MongoMetaID).String(), } default: return nil, fmt.Errorf("data type %T is invalid", data) @@ -90,8 +89,8 @@ func parseDataWithOid[T any](parser func(data T) (*basicInfo, error)) dataParser } func getDataByOid[T any](table string) dataGetterByKeys { - return func(ctx context.Context, opt *getDataByKeysOpt, rid string) ([]any, error) { - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + return func(kit *rest.Kit, opt *getDataByKeysOpt) ([]any, error) { + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) if len(opt.Keys) == 0 { return make([]any, 0), nil @@ -101,7 +100,7 @@ func getDataByOid[T any](table string) dataGetterByKeys { for i, key := range opt.Keys { oid, err := primitive.ObjectIDFromHex(key) if err != nil { - blog.Errorf("parse oid (index: %d, key: %s) failed, err: %v, rid: %s", i, key, err, rid) + blog.Errorf("parse oid (index: %d, key: %s) failed, err: %v, rid: %s", i, key, err, kit.Rid) return nil, err } oids[i] = oid @@ -114,8 +113,8 @@ func getDataByOid[T any](table string) dataGetterByKeys { dbOpts := dbtypes.NewFindOpts().SetWithObjectID(true) dataArr := make([]dataWithOid[T], 0) - if err := mongodb.Client().Table(table).Find(cond, dbOpts).All(ctx, &dataArr); err != nil { - blog.Errorf("get %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, rid) + if err := mongodb.Shard(kit.ShardOpts()).Table(table).Find(cond, dbOpts).All(kit.Ctx, &dataArr); err != nil { + blog.Errorf("get %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, kit.Rid) return nil, err } @@ -124,11 +123,11 @@ func getDataByOid[T any](table string) dataGetterByKeys { } func listDataWithOid[T any](table string) dataLister { - return func(ctx context.Context, opt *listDataOpt, rid string) (*listDataRes, error) { - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + return func(kit *rest.Kit, opt *listDataOpt) (*listDataRes, error) { + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) if rawErr := opt.Validate(false); rawErr.ErrCode != 0 { - blog.Errorf("list general data option is invalid, err: %v, opt: %+v, rid: %s", rawErr, opt, rid) + blog.Errorf("list general data option is invalid, err: %v, opt: %+v, rid: %s", rawErr, opt, kit.Rid) return nil, fmt.Errorf("list data option is invalid") } @@ -142,9 +141,9 @@ func listDataWithOid[T any](table string) dataLister { } if opt.Page.EnableCount { - cnt, err := mongodb.Client().Table(table).Find(cond).Count(ctx) + cnt, err := mongodb.Shard(kit.ShardOpts()).Table(table).Find(cond).Count(kit.Ctx) if err != nil { - blog.Errorf("count %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, rid) + blog.Errorf("count %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, kit.Rid) return nil, err } @@ -154,7 +153,7 @@ func listDataWithOid[T any](table string) dataLister { if opt.Page.StartOid != "" { oid, err := primitive.ObjectIDFromHex(opt.Page.StartOid) if err != nil { - blog.Errorf("parse start oid %s failed, err: %v, rid: %s", opt.Page.StartOid, err, rid) + blog.Errorf("parse start oid %s failed, err: %v, rid: %s", opt.Page.StartOid, err, kit.Rid) return nil, err } @@ -171,10 +170,10 @@ func listDataWithOid[T any](table string) dataLister { dbOpts := dbtypes.NewFindOpts().SetWithObjectID(true) dataArr := make([]dataWithOid[T], 0) - err := mongodb.Client().Table(table).Find(cond, dbOpts).Sort(common.MongoMetaID). - Start(uint64(opt.Page.StartIndex)).Limit(uint64(opt.Page.Limit)).Fields(opt.Fields...).All(ctx, &dataArr) + err := mongodb.Shard(kit.ShardOpts()).Table(table).Find(cond, dbOpts).Sort(common.MongoMetaID).Start( + uint64(opt.Page.StartIndex)).Limit(uint64(opt.Page.Limit)).Fields(opt.Fields...).All(kit.Ctx, &dataArr) if err != nil { - blog.Errorf("list %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, rid) + blog.Errorf("list %s data by cond(%+v) failed, err: %v, rid: %s", table, cond, err, kit.Rid) return nil, err } diff --git a/src/source_controller/cacheservice/cache/general/cache/detail.go b/src/source_controller/cacheservice/cache/general/cache/detail.go index ad5b1153fe..5a75b177c6 100644 --- a/src/source_controller/cacheservice/cache/general/cache/detail.go +++ b/src/source_controller/cacheservice/cache/general/cache/detail.go @@ -18,7 +18,6 @@ package cache import ( - "context" "fmt" "configcenter/pkg/cache/general" @@ -29,8 +28,6 @@ import ( "configcenter/src/common/util" "configcenter/src/source_controller/cacheservice/cache/general/types" "configcenter/src/storage/driver/redis" - - "github.com/tidwall/gjson" ) // ListDetailByIDs list general resource detail cache by ids @@ -66,9 +63,9 @@ func (c *Cache) listDetailByIDs(kit *rest.Kit, opt *types.ListDetailByIDsOpt) (m detailKeys := make([]string, len(idKeys)) for i, id := range idKeys { if opt.SubRes == "" { - detailKeys[i] = c.key.DetailKey(id) + detailKeys[i] = c.key.DetailKey(kit.TenantID, id) } else { - detailKeys[i] = c.key.DetailKey(id, opt.SubRes) + detailKeys[i] = c.key.DetailKey(kit.TenantID, id, opt.SubRes) } } @@ -99,13 +96,6 @@ func (c *Cache) listDetailByIDs(kit *rest.Kit, opt *types.ListDetailByIDsOpt) (m continue } - if !opt.IsSystem && kit.TenantID != common.BKSuperTenantID { - tenantID := gjson.Get(detail, common.TenantID).String() - if tenantID != common.BKDefaultTenantID && tenantID != kit.TenantID { - continue - } - } - if len(opt.Fields) != 0 { idDetailMap[idKeys[idx]] = *json.CutJsonDataWithFields(&detail, opt.Fields) } else { @@ -120,19 +110,17 @@ func (c *Cache) listDetailByIDs(kit *rest.Kit, opt *types.ListDetailByIDsOpt) (m // can not find detail in cache, need refresh the cache getDataOpt := &getDataByKeysOpt{ BasicFilter: &types.BasicFilter{ - SubRes: opt.SubRes, - TenantID: kit.TenantID, - IsSystem: opt.IsSystem, + SubRes: opt.SubRes, }, Keys: needRefreshIDs, } - dbData, err := c.getDataByID(kit.Ctx, getDataOpt, kit.Rid) + dbData, err := c.getDataByID(kit, getDataOpt) if err != nil { return nil, err } - c.tryRefreshDetail(&tryRefreshDetailOpt{toRefreshKeys: needRefreshKeys, dbData: dbData, fields: opt.Fields, - idDetailMap: idDetailMap}, kit.Rid) + c.tryRefreshDetail(kit, &tryRefreshDetailOpt{toRefreshKeys: needRefreshKeys, dbData: dbData, fields: opt.Fields, + idDetailMap: idDetailMap}) return idDetailMap, nil } @@ -147,7 +135,7 @@ type tryRefreshDetailOpt struct { } // tryRefreshDetail try refresh the general resource detail cache if it's not locked -func (c *Cache) tryRefreshDetail(opt *tryRefreshDetailOpt, rid string) { +func (c *Cache) tryRefreshDetail(kit *rest.Kit, opt *tryRefreshDetailOpt) { toRefreshKeyMap := make(map[string]struct{}) for _, key := range opt.toRefreshKeys { toRefreshKeyMap[key] = struct{}{} @@ -157,7 +145,7 @@ func (c *Cache) tryRefreshDetail(opt *tryRefreshDetailOpt, rid string) { // generate id detail map info, err := c.parseData(data) if err != nil { - blog.Errorf("parse %s data: %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, rid) + blog.Errorf("parse %s data: %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, kit.Rid) continue } @@ -165,7 +153,7 @@ func (c *Cache) tryRefreshDetail(opt *tryRefreshDetailOpt, rid string) { detailJs, err := json.Marshal(data) if err != nil { - blog.Errorf("marshal %s mongo data %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, rid) + blog.Errorf("marshal %s mongo data %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, kit.Rid) continue } detailStr := string(detailJs) @@ -181,12 +169,12 @@ func (c *Cache) tryRefreshDetail(opt *tryRefreshDetailOpt, rid string) { redisKeys, _ := lgc.genKey(data, info) for _, redisKey := range redisKeys { opt.keyDetailMap[redisKey] = opt.idDetailMap[idKey] - delete(toRefreshKeyMap, c.key.UniqueKey(string(opt.uniqueKeyType), redisKey)) + delete(toRefreshKeyMap, c.key.UniqueKey(string(opt.uniqueKeyType), kit.TenantID, redisKey)) } } // refresh the general resource detail cache when we had the lock - detailKey := c.key.DetailKey(idKey, info.subRes...) + detailKey := c.key.DetailKey(kit.TenantID, idKey, info.subRes...) delete(toRefreshKeyMap, detailKey) if !c.refreshingLock.CanRefresh(detailKey) { continue @@ -204,12 +192,12 @@ func (c *Cache) tryRefreshDetail(opt *tryRefreshDetailOpt, rid string) { redisKeys, err := lgc.genKey(data, info) if err != nil { blog.Errorf("generate %s %s key failed, err: %v, data: %+v, rid: %s", c.key.Resource(), typ, err, - data, rid) + data, kit.Rid) continue } for _, redisKey := range redisKeys { - pipeline.SetNX(c.key.UniqueKey(string(typ), redisKey), idKey, ttl) + pipeline.SetNX(c.key.UniqueKey(string(typ), kit.TenantID, redisKey), idKey, ttl) } } @@ -219,19 +207,19 @@ func (c *Cache) tryRefreshDetail(opt *tryRefreshDetailOpt, rid string) { _, err = pipeline.Exec() if err != nil { blog.Errorf("refresh %s cache failed, err: %v, data: %s, rid: %s", c.key.Resource(), idKey, err, - detailStr, rid) + detailStr, kit.Rid) return } - blog.V(4).Infof("refresh %s cache success, id: %s, rid: %s", c.key.Resource(), idKey, rid) + blog.V(4).Infof("refresh %s cache success, id: %s, rid: %s", c.key.Resource(), idKey, kit.Rid) }(data) } - go c.handleNotExistKey(toRefreshKeyMap, rid) + go c.handleNotExistKey(kit, toRefreshKeyMap) } // handleNotExistKey set not exist refresh key cache to empty string to avoid cache penetration -func (c *Cache) handleNotExistKey(notExistKeyMap map[string]struct{}, rid string) error { +func (c *Cache) handleNotExistKey(kit *rest.Kit, notExistKeyMap map[string]struct{}) error { if len(notExistKeyMap) == 0 { return nil } @@ -243,11 +231,12 @@ func (c *Cache) handleNotExistKey(notExistKeyMap map[string]struct{}, rid string if _, err := pipeline.Exec(); err != nil { blog.Errorf("refresh not exist %s cache failed, err: %v, key info: %+v, rid: %s", c.key.Resource(), err, - notExistKeyMap, rid) + notExistKeyMap, kit.Rid) return err } - blog.V(4).Infof("refresh not exist %s cache success, key info: %+v, rid: %s", c.key.Resource(), notExistKeyMap, rid) + blog.V(4).Infof("refresh not exist %s cache success, key info: %+v, rid: %s", c.key.Resource(), notExistKeyMap, + kit.Rid) return nil } @@ -290,7 +279,7 @@ func (c *Cache) listDetailByUniqueKey(kit *rest.Kit, opt *types.ListDetailByUniq keys := util.StrArrayUnique(opt.Keys) uniqueKeys := make([]string, len(keys)) for i, key := range keys { - uniqueKeys[i] = c.key.UniqueKey(string(opt.Type), key) + uniqueKeys[i] = c.key.UniqueKey(string(opt.Type), kit.TenantID, key) } results, err := redis.Client().MGet(kit.Ctx, uniqueKeys...).Result() @@ -329,8 +318,7 @@ func (c *Cache) listDetailByUniqueKey(kit *rest.Kit, opt *types.ListDetailByUniq idDetailMap := make(map[string]string) keyDetailMap := make(map[string]string) if len(idKeys) > 0 { - listByIDOpt := &types.ListDetailByIDsOpt{SubRes: opt.SubRes, IsSystem: opt.IsSystem, IDKeys: idKeys, - Fields: opt.Fields} + listByIDOpt := &types.ListDetailByIDsOpt{SubRes: opt.SubRes, IDKeys: idKeys, Fields: opt.Fields} idDetailMap, err = c.listDetailByIDs(kit, listByIDOpt) if err != nil { blog.Errorf("list detail by ids(%+v) failed, err: %v, rid: %s", listByIDOpt, err, kit.Rid) @@ -348,17 +336,16 @@ func (c *Cache) listDetailByUniqueKey(kit *rest.Kit, opt *types.ListDetailByUniq // can not find detail in cache, need refresh the cache getDataOpt := &getDataByKeysOpt{ - BasicFilter: &types.BasicFilter{SubRes: opt.SubRes, TenantID: kit.TenantID, - IsSystem: opt.IsSystem}, - Keys: needRefreshKeys, + BasicFilter: &types.BasicFilter{SubRes: opt.SubRes}, + Keys: needRefreshKeys, } - dbData, err := uniqueKeyLgc.getData(kit.Ctx, getDataOpt, kit.Rid) + dbData, err := uniqueKeyLgc.getData(kit, getDataOpt) if err != nil { return nil, err } - c.tryRefreshDetail(&tryRefreshDetailOpt{toRefreshKeys: needRefreshRedisKeys, dbData: dbData, fields: opt.Fields, - idDetailMap: idDetailMap, uniqueKeyType: opt.Type, keyDetailMap: keyDetailMap}, kit.Rid) + c.tryRefreshDetail(kit, &tryRefreshDetailOpt{toRefreshKeys: needRefreshRedisKeys, dbData: dbData, + fields: opt.Fields, idDetailMap: idDetailMap, uniqueKeyType: opt.Type, keyDetailMap: keyDetailMap}) return keyDetailMap, nil } @@ -380,7 +367,7 @@ func (c *Cache) ListDetail(kit *rest.Kit, opt *types.ListDetailOpt) ([]string, e filterOpt: opt.IDListFilter, ttl: idListTTL, } - notExists, _, err := c.tryRefreshIDListIfNeeded(kit.Ctx, refreshOpt, kit.Rid) + notExists, _, err := c.tryRefreshIDListIfNeeded(kit, refreshOpt) if err != nil { blog.Errorf("try refresh id list failed, err: %v, opt: %+v, rid: %s", err, opt, kit.Rid) return nil, err @@ -388,7 +375,7 @@ func (c *Cache) ListDetail(kit *rest.Kit, opt *types.ListDetailOpt) ([]string, e // id list not exists, get detail from db if notExists { - dbRes, err := c.listDataFromDB(kit.Ctx, opt, kit.Rid) + dbRes, err := c.listDataFromDB(kit, opt) if err != nil { return nil, err } @@ -406,7 +393,7 @@ func (c *Cache) ListDetail(kit *rest.Kit, opt *types.ListDetailOpt) ([]string, e } // id list exists, get id list and detail from redis - idKeys, err := c.listIDsFromRedis(kit.Ctx, opt.IDListFilter.IDListKey, opt.Page, kit.Rid) + idKeys, err := c.listIDsFromRedis(kit, opt.IDListFilter.IDListKey, opt.Page) if err != nil { return nil, err } @@ -416,18 +403,18 @@ func (c *Cache) ListDetail(kit *rest.Kit, opt *types.ListDetailOpt) ([]string, e } listByIDsOpt := &types.ListDetailByIDsOpt{ - SubRes: opt.IDListFilter.SubRes, - IsSystem: opt.IDListFilter.IsSystem, - IDKeys: idKeys, - Fields: opt.Fields, + SubRes: opt.IDListFilter.SubRes, + IDKeys: idKeys, + Fields: opt.Fields, } return c.ListDetailByIDs(kit, listByIDsOpt) } // listDataFromDB list detail from db -func (c *Cache) listDataFromDB(ctx context.Context, opt *types.ListDetailOpt, rid string) (*listDataRes, error) { +func (c *Cache) listDataFromDB(kit *rest.Kit, opt *types.ListDetailOpt) (*listDataRes, error) { if rawErr := opt.Validate(c.key.HasSubRes()); rawErr.ErrCode != 0 { - blog.Errorf("list %s detail option is invalid, err: %v, opt: %+v, rid: %s", c.key.Resource(), rawErr, opt, rid) + blog.Errorf("list %s detail option is invalid, err: %v, opt: %+v, rid: %s", c.key.Resource(), rawErr, opt, + kit.Rid) return nil, fmt.Errorf("list detail option is invalid") } @@ -443,12 +430,12 @@ func (c *Cache) listDataFromDB(ctx context.Context, opt *types.ListDetailOpt, ri listOpt.Cond, err = opt.IDListFilter.Cond.ToMgo() if err != nil { blog.Errorf("parse list %s detail cond(%s) failed, err: %v, rid: %s", c.key.Resource(), - opt.IDListFilter.Cond, err, rid) + opt.IDListFilter.Cond, err, kit.Rid) return nil, err } } - return c.listData(ctx, listOpt, rid) + return c.listData(kit, listOpt) } // RefreshDetailByIDs refresh general resource detail cache by ids @@ -460,18 +447,16 @@ func (c *Cache) RefreshDetailByIDs(kit *rest.Kit, opt *types.RefreshDetailByIDsO getDataOpt := &getDataByKeysOpt{ BasicFilter: &types.BasicFilter{ - SubRes: opt.SubResource, - TenantID: kit.TenantID, - IsSystem: true, + SubRes: opt.SubResource, }, Keys: opt.IDKeys, } - dbData, err := c.getDataByID(kit.Ctx, getDataOpt, kit.Rid) + dbData, err := c.getDataByID(kit, getDataOpt) if err != nil { return err } - c.tryRefreshDetail(&tryRefreshDetailOpt{dbData: dbData, idDetailMap: make(map[string]string)}, kit.Rid) + c.tryRefreshDetail(kit, &tryRefreshDetailOpt{dbData: dbData, idDetailMap: make(map[string]string)}) return nil } @@ -488,14 +473,14 @@ func (c *Cache) CountData(kit *rest.Kit, opt *types.ListDetailOpt) (int64, error return 0, err } - exists, err := isIDListExists(kit.Ctx, opt.IDListFilter.IDListKey, kit.Rid) + exists, err := isIDListExists(kit, opt.IDListFilter.IDListKey) if err != nil { return 0, err } // id list not exists, get data count from db if !exists { - dbRes, err := c.listDataFromDB(kit.Ctx, opt, kit.Rid) + dbRes, err := c.listDataFromDB(kit, opt) if err != nil { return 0, err } @@ -503,7 +488,7 @@ func (c *Cache) CountData(kit *rest.Kit, opt *types.ListDetailOpt) (int64, error } // id list exists, get id list count from redis - cnt, err := c.countIDsFromRedis(kit.Ctx, opt.IDListFilter.IDListKey, kit.Rid) + cnt, err := c.countIDsFromRedis(kit, opt.IDListFilter.IDListKey) if err != nil { return 0, err } diff --git a/src/source_controller/cacheservice/cache/general/cache/full_sync_cond.go b/src/source_controller/cacheservice/cache/general/cache/full_sync_cond.go index 94429dfbb5..b2fd78cf12 100644 --- a/src/source_controller/cacheservice/cache/general/cache/full_sync_cond.go +++ b/src/source_controller/cacheservice/cache/general/cache/full_sync_cond.go @@ -18,7 +18,6 @@ package cache import ( - "context" "fmt" "strconv" "sync" @@ -26,7 +25,7 @@ import ( fullsynccond "configcenter/pkg/cache/full-sync-cond" "configcenter/src/common/blog" - "configcenter/src/common/util" + "configcenter/src/common/http/rest" "configcenter/src/source_controller/cacheservice/cache/general/types" "configcenter/src/storage/driver/redis" ) @@ -87,8 +86,8 @@ func (c *Cache) handleFullSyncCondEvent() { for { select { case e := <-c.fullSyncCondCh: - rid := util.GenerateRID() - blog.V(4).Infof("received %s full sync cond event: %+v, rid: %s", c.key.Resource(), e, rid) + kit := rest.NewKit() + blog.V(4).Infof("received %s full sync cond event: %+v, rid: %s", c.key.Resource(), e, kit.Rid) for eventType, conds := range e.EventMap { switch eventType { @@ -97,9 +96,11 @@ func (c *Cache) handleFullSyncCondEvent() { fallthrough case types.Upsert: for _, cond := range conds { + kit = kit.WithTenant(cond.TenantID) idListKey, err := c.GenFullSyncCondIDListKey(cond) if err != nil { - blog.Errorf("gen full sync cond(%+v) id list key failed, err: %v, rid: %s", cond, err, rid) + blog.Errorf("gen full sync cond(%+v) id list key failed, err: %v, rid: %s", cond, err, + kit.Rid) continue } @@ -109,7 +110,7 @@ func (c *Cache) handleFullSyncCondEvent() { condInfo, exists := c.fullSyncCondMap.Get(idListKey) if !exists { if !cond.IsAll && cond.Condition == nil { - blog.Errorf("full sync cond %d is invalid, rid: %s", cond.ID, rid) + blog.Errorf("full sync cond %d is invalid, rid: %s", cond.ID, kit.Rid) continue } c.fullSyncCondMap.Set(idListKey, &types.FullSyncCondInfo{ @@ -128,7 +129,7 @@ func (c *Cache) handleFullSyncCondEvent() { c.fullSyncCondMap.Set(idListKey, condInfo) for retry := 0; retry < 3; retry++ { - if err = c.updateFullSyncCondTTL(idListKey, ttl, rid); err == nil { + if err = c.updateFullSyncCondTTL(kit, idListKey, ttl); err == nil { break } time.Sleep(100 * time.Millisecond * time.Duration(retry)) @@ -137,9 +138,11 @@ func (c *Cache) handleFullSyncCondEvent() { } case types.Delete: for _, cond := range conds { + kit = kit.WithTenant(cond.TenantID) idListKey, err := c.GenFullSyncCondIDListKey(cond) if err != nil { - blog.Errorf("gen full sync cond(%+v) id list key failed, err: %v, rid: %s", cond, err, rid) + blog.Errorf("gen full sync cond(%+v) id list key failed, err: %v, rid: %s", cond, err, + kit.Rid) continue } @@ -147,7 +150,7 @@ func (c *Cache) handleFullSyncCondEvent() { c.fullSyncCondMap.Remove(idListKey) for retry := 0; retry < 3; retry++ { - if err = c.deleteFullSyncCondIDList(idListKey, rid); err == nil { + if err = c.deleteFullSyncCondIDList(kit, idListKey); err == nil { break } time.Sleep(100 * time.Millisecond * time.Duration(retry)) @@ -160,36 +163,36 @@ func (c *Cache) handleFullSyncCondEvent() { } } -func (c *Cache) updateFullSyncCondTTL(idListKey string, ttl time.Duration, rid string) error { +func (c *Cache) updateFullSyncCondTTL(kit *rest.Kit, idListKey string, ttl time.Duration) error { // update id list ttl - err := redis.Client().Expire(context.Background(), idListKey, c.withRandomExpireSeconds(ttl*2)).Err() + err := redis.Client().Expire(kit.Ctx, idListKey, c.withRandomExpireSeconds(ttl*2)).Err() if err != nil { - blog.Errorf("update id list key: %s ttl to %s failed, err: %v, rid: %s", idListKey, ttl, err, rid) + blog.Errorf("update id list key: %s ttl to %s failed, err: %v, rid: %s", idListKey, ttl, err, kit.Rid) return err } // update id list expire key ttl expireKey := c.key.IDListExpireKey(idListKey) - err = redis.Client().Expire(context.Background(), expireKey, c.withRandomExpireSeconds(ttl)).Err() + err = redis.Client().Expire(kit.Ctx, expireKey, c.withRandomExpireSeconds(ttl)).Err() if err != nil { - blog.Errorf("update id list expire key: %s ttl to %s failed, err: %v, rid: %s", expireKey, ttl, err, rid) + blog.Errorf("update id list expire key: %s ttl to %s failed, err: %v, rid: %s", expireKey, ttl, err, kit.Rid) return err } return nil } -func (c *Cache) deleteFullSyncCondIDList(idListKey string, rid string) error { +func (c *Cache) deleteFullSyncCondIDList(kit *rest.Kit, idListKey string) error { // remove id list expire key, the id list will be treated as expired expireKey := c.key.IDListExpireKey(idListKey) - if err := redis.Client().Del(context.Background(), expireKey).Err(); err != nil { - blog.Errorf("delete expire key: %s failed, err: %v, rid: %s", expireKey, err, rid) + if err := redis.Client().Del(kit.Ctx, expireKey).Err(); err != nil { + blog.Errorf("delete expire key: %s failed, err: %v, rid: %s", expireKey, err, kit.Rid) return err } - exists, err := isIDListExists(context.Background(), idListKey, rid) + exists, err := isIDListExists(kit, idListKey) if err != nil { - blog.Errorf("check if id list key %s exists failed, err: %v, rid: %s", idListKey, err, rid) + blog.Errorf("check if id list key %s exists failed, err: %v, rid: %s", idListKey, err, kit.Rid) return err } @@ -198,14 +201,14 @@ func (c *Cache) deleteFullSyncCondIDList(idListKey string, rid string) error { } // rename the id list to avoid reusing the out-dated id list if same id list is watched again - oldIDListKey := fmt.Sprintf("%s-old", c.key.IDListTempKey(idListKey, rid)) - err = redis.Client().Rename(context.Background(), idListKey, oldIDListKey).Err() + oldIDListKey := fmt.Sprintf("%s-old", c.key.IDListTempKey(idListKey, kit.Rid)) + err = redis.Client().Rename(kit.Ctx, idListKey, oldIDListKey).Err() if err != nil { return err } // delete old id list in background - go c.deleteIDList(context.Background(), oldIDListKey, rid) + go c.deleteIDList(kit, oldIDListKey) return nil } @@ -220,7 +223,7 @@ func (c *Cache) GenFullSyncCondIDListKey(cond *fullsynccond.FullSyncCond) (strin } // generate id list key by sub resource and full sync cond id - keys := []string{cond.TenantID} + keys := make([]string, 0) if cond.SubResource != "" { keys = append(keys, cond.SubResource) @@ -230,7 +233,7 @@ func (c *Cache) GenFullSyncCondIDListKey(cond *fullsynccond.FullSyncCond) (strin keys = append(keys, strconv.FormatInt(cond.ID, 10)) } - return c.key.IDListKey(keys...), nil + return c.key.IDListKey(cond.TenantID, keys...), nil } // genFullSyncCondRefreshIDListOpt generate refresh id list option by full sync cond @@ -240,9 +243,7 @@ func genFullSyncCondRefreshIDListOpt(idListKey string, condInfo *types.FullSyncC filterOpt: &types.IDListFilterOpt{ IDListKey: idListKey, BasicFilter: &types.BasicFilter{ - SubRes: condInfo.SubResource, - TenantID: condInfo.TenantID, - IsSystem: false, + SubRes: condInfo.SubResource, }, IsAll: condInfo.IsAll, Cond: condInfo.Condition, diff --git a/src/source_controller/cacheservice/cache/general/cache/host.go b/src/source_controller/cacheservice/cache/general/cache/host.go index dcf8ca34fb..b6966067ac 100644 --- a/src/source_controller/cacheservice/cache/general/cache/host.go +++ b/src/source_controller/cacheservice/cache/general/cache/host.go @@ -18,7 +18,6 @@ package cache import ( - "context" "fmt" "strconv" "strings" @@ -26,6 +25,7 @@ import ( "configcenter/pkg/cache/general" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/metadata" "configcenter/src/common/util" @@ -43,8 +43,7 @@ func init() { return nil, fmt.Errorf("parse host id %+v failed, err: %v", data[idField], err) } return &basicInfo{ - id: id, - tenant: util.GetStrByInterface(data[common.TenantID]), + id: id, }, nil }) @@ -103,24 +102,24 @@ func genHostIPCloudIDKey(data any, info *basicInfo) ([]string, error) { return keys, nil } -func genHostByIPCloudIDKey(ctx context.Context, opt *getDataByKeysOpt, rid string) ([]any, error) { +func genHostByIPCloudIDKey(kit *rest.Kit, opt *getDataByKeysOpt) ([]any, error) { if len(opt.Keys) == 0 { return make([]any, 0), nil } - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) cloudIDIPMap := make(map[int64][]string) for i, key := range opt.Keys { pair := strings.Split(key, ":") if len(pair) != 2 { - blog.Errorf("host ip cloud id key %s is invalid, index: %d, rid: %s", key, i, rid) + blog.Errorf("host ip cloud id key %s is invalid, index: %d, rid: %s", key, i, kit.Rid) return nil, fmt.Errorf("ip cloud id key %s is invalid", key) } cloudID, err := strconv.ParseInt(pair[1], 10, 64) if err != nil { - blog.Errorf("parse cloud id (index: %d, key: %s) failed, err: %v, rid: %s", i, key, err, rid) + blog.Errorf("parse cloud id (index: %d, key: %s) failed, err: %v, rid: %s", i, key, err, kit.Rid) return nil, err } @@ -143,8 +142,9 @@ func genHostByIPCloudIDKey(ctx context.Context, opt *getDataByKeysOpt, rid strin } hosts := make([]metadata.HostMapStr, 0) - if err := mongodb.Client().Table(common.BKTableNameBaseHost).Find(cond).All(ctx, &hosts); err != nil { - blog.Errorf("get host data by cond(%+v) failed, err: %v, rid: %s", cond, err, rid) + err := mongodb.Shard(kit.ShardOpts()).Table(common.BKTableNameBaseHost).Find(cond).All(kit.Ctx, &hosts) + if err != nil { + blog.Errorf("get host data by cond(%+v) failed, err: %v, rid: %s", cond, err, kit.Rid) return nil, err } @@ -170,20 +170,21 @@ func genHostAgentIDKey(data any, info *basicInfo) ([]string, error) { return []string{general.AgentIDKey(agentID)}, nil } -func genHostByAgentIDKey(ctx context.Context, opt *getDataByKeysOpt, rid string) ([]any, error) { +func genHostByAgentIDKey(kit *rest.Kit, opt *getDataByKeysOpt) ([]any, error) { if len(opt.Keys) == 0 { return make([]any, 0), nil } - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) cond := mapstr.MapStr{ common.BKAgentIDField: mapstr.MapStr{common.BKDBType: "string", common.BKDBIN: opt.Keys}, } hosts := make([]metadata.HostMapStr, 0) - if err := mongodb.Client().Table(common.BKTableNameBaseHost).Find(cond).All(ctx, &hosts); err != nil { - blog.Errorf("get host data by cond(%+v) failed, err: %v, rid: %s", cond, err, rid) + err := mongodb.Shard(kit.ShardOpts()).Table(common.BKTableNameBaseHost).Find(cond).All(kit.Ctx, &hosts) + if err != nil { + blog.Errorf("get host data by cond(%+v) failed, err: %v, rid: %s", cond, err, kit.Rid) return nil, err } diff --git a/src/source_controller/cacheservice/cache/general/cache/id_list.go b/src/source_controller/cacheservice/cache/general/cache/id_list.go index dba278b3e2..4c78b1aeb6 100644 --- a/src/source_controller/cacheservice/cache/general/cache/id_list.go +++ b/src/source_controller/cacheservice/cache/general/cache/id_list.go @@ -38,7 +38,7 @@ import ( ) // AddData add data to general resource cache -func (c *Cache) AddData(ctx context.Context, dataArr []types.WatchEventData, rid string) error { +func (c *Cache) AddData(kit *rest.Kit, dataArr []types.WatchEventData) error { if !c.NeedCache() { return nil } @@ -53,20 +53,20 @@ func (c *Cache) AddData(ctx context.Context, dataArr []types.WatchEventData, rid idKeyMap := make(map[string]*addIDToListOpt) for _, data := range dataArr { - pip, idKeyMap = c.parseAddData(data, pip, idKeyMap, rid) + pip, idKeyMap = c.parseAddData(kit, data, pip, idKeyMap) } var err error for _, addOpt := range idKeyMap { addOpt.pip = pip - pip, err = c.addIDToListWithRefresh(ctx, addOpt, rid) + pip, err = c.addIDToListWithRefresh(kit, addOpt) if err != nil { return err } } if _, err = pip.Exec(); err != nil { - blog.Errorf("add data to %s cache failed, err: %v, data: %+v, rid: %s", c.key.Resource(), err, dataArr, rid) + blog.Errorf("add data to %s cache failed, err: %v, data: %+v, rid: %s", c.key.Resource(), err, dataArr, kit.Rid) return err } @@ -74,12 +74,12 @@ func (c *Cache) AddData(ctx context.Context, dataArr []types.WatchEventData, rid } // parseAddData parse added event watch data -func (c *Cache) parseAddData(data types.WatchEventData, pip ccredis.Pipeliner, idKeyMap map[string]*addIDToListOpt, - rid string) (ccredis.Pipeliner, map[string]*addIDToListOpt) { +func (c *Cache) parseAddData(kit *rest.Kit, data types.WatchEventData, pip ccredis.Pipeliner, + idKeyMap map[string]*addIDToListOpt) (ccredis.Pipeliner, map[string]*addIDToListOpt) { info, err := c.parseData(data) if err != nil { - blog.Errorf("parse %s data: %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, rid) + blog.Errorf("parse %s data: %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, kit.Rid) return pip, idKeyMap } @@ -88,10 +88,10 @@ func (c *Cache) parseAddData(data types.WatchEventData, pip ccredis.Pipeliner, i // add id to system id lists if c.needCacheAll { if len(info.subRes) == 0 { - idKeyMap = c.recordIDToAddForSystem(idKeyMap, id, score, "", info.tenant) + idKeyMap = c.recordIDToAddForSystem(kit, idKeyMap, id, score, "") } else { for _, subRes := range info.subRes { - idKeyMap = c.recordIDToAddForSystem(idKeyMap, id, score, subRes, info.tenant) + idKeyMap = c.recordIDToAddForSystem(kit, idKeyMap, id, score, subRes) } } } @@ -99,7 +99,7 @@ func (c *Cache) parseAddData(data types.WatchEventData, pip ccredis.Pipeliner, i // generate full sync cond id list to id & score map c.fullSyncCondMap.Range(func(idListKey string, cond *types.FullSyncCondInfo) bool { // remove id from the id list if not matches the full sync cond - if !c.isFullSyncCondMatched(data.Data, info, cond, rid) { + if !c.isFullSyncCondMatched(data.Data, info, cond, kit.Rid) { pip.ZRem(idListKey, id) return false } @@ -120,24 +120,25 @@ func (c *Cache) parseAddData(data types.WatchEventData, pip ccredis.Pipeliner, i redisKeys, err := lgc.genKey(data, info) if err != nil { blog.Errorf("generate %s %s redis key from data: %+v failed, err: %v, rid: %s", c.key.Resource(), typ, - data, err, rid) + data, err, kit.Rid) continue } for _, redisKey := range redisKeys { - pip.Set(c.key.UniqueKey(string(typ), redisKey), id, c.withRandomExpireSeconds(c.expireSeconds)) + pip.Set(c.key.UniqueKey(string(typ), kit.TenantID, redisKey), id, + c.withRandomExpireSeconds(c.expireSeconds)) } } return pip, idKeyMap } -func (c *Cache) recordIDToAddForSystem(idKeyMap map[string]*addIDToListOpt, id string, score float64, - subRes, supplier string) map[string]*addIDToListOpt { +func (c *Cache) recordIDToAddForSystem(kit *rest.Kit, idKeyMap map[string]*addIDToListOpt, id string, score float64, + subRes string) map[string]*addIDToListOpt { - idListKey := c.Key().IDListKey() + idListKey := c.Key().IDListKey(kit.TenantID) if subRes != "" { - idListKey = c.Key().IDListKey(subRes) + idListKey = c.Key().IDListKey(kit.TenantID, subRes) } _, exists := idKeyMap[idListKey] @@ -149,9 +150,7 @@ func (c *Cache) recordIDToAddForSystem(idKeyMap map[string]*addIDToListOpt, id s filterOpt: &types.IDListFilterOpt{ IDListKey: idListKey, BasicFilter: &types.BasicFilter{ - SubRes: subRes, - TenantID: supplier, - IsSystem: true, + SubRes: subRes, }, IsAll: true, }, @@ -167,12 +166,6 @@ func (c *Cache) recordIDToAddForSystem(idKeyMap map[string]*addIDToListOpt, id s func (c *Cache) isFullSyncCondMatched(data filter.MatchedData, info *basicInfo, cond *types.FullSyncCondInfo, rid string) bool { - if info.tenant != common.BKDefaultTenantID && info.tenant != cond.TenantID && - cond.TenantID != common.BKSuperTenantID { - blog.V(4).Infof("%s data(%+v) tenant not matches cond(%+v), rid: %s", c.key.Resource(), data, cond, rid) - return false - } - subResMatched := true if cond.SubResource != "" { subResMatched = false @@ -216,27 +209,27 @@ type addIDToListOpt struct { } // addIDToListWithRefresh add id to id list cache, refresh the id list if needed -func (c *Cache) addIDToListWithRefresh(ctx context.Context, opt *addIDToListOpt, rid string) (ccredis.Pipeliner, +func (c *Cache) addIDToListWithRefresh(kit *rest.Kit, opt *addIDToListOpt) (ccredis.Pipeliner, error) { idListKey := opt.filterOpt.IDListKey // try refresh id list if it's not exist or is expired - notExists, expired, err := c.tryRefreshIDListIfNeeded(ctx, opt.refreshIDListOpt, rid) + notExists, expired, err := c.tryRefreshIDListIfNeeded(kit, opt.refreshIDListOpt) if err != nil { - blog.Errorf("try refresh id list %s failed, err: %v, opt: %+v, rid: %s", idListKey, err, opt.filterOpt, rid) + blog.Errorf("try refresh id list %s failed, err: %v, opt: %+v, rid: %s", idListKey, err, opt.filterOpt, kit.Rid) return nil, err } // id list is refreshing not exist or expired, add id to temp id list key if notExists || expired { - tempKey, err := redis.Client().Get(ctx, c.key.IDListTempKey(idListKey)).Result() + tempKey, err := redis.Client().Get(kit.Ctx, c.key.IDListTempKey(idListKey)).Result() if err != nil { if !redis.IsNilErr(err) { - blog.Errorf("get id list %s temp key failed, err: %v, rid: %s", idListKey, err, rid) + blog.Errorf("get id list %s temp key failed, err: %v, rid: %s", idListKey, err, kit.Rid) return nil, err } - tempKey = c.key.IDListTempKey(idListKey, rid) + tempKey = c.key.IDListTempKey(idListKey, kit.Rid) } for id, score := range opt.idMap { @@ -270,7 +263,7 @@ func (c *Cache) addIDToList(opt *addIDToListOpt) ccredis.Pipeliner { } // RemoveData remove data from general resource cache -func (c *Cache) RemoveData(ctx context.Context, dataArr []types.WatchEventData, rid string) error { +func (c *Cache) RemoveData(kit *rest.Kit, dataArr []types.WatchEventData) error { if !c.NeedCache() { return nil } @@ -285,32 +278,33 @@ func (c *Cache) RemoveData(ctx context.Context, dataArr []types.WatchEventData, idKeyMap := make(map[string]*removeIDFromListOpt) for _, data := range dataArr { - pip, idKeyMap = c.parseRemoveData(data, pip, idKeyMap, rid) + pip, idKeyMap = c.parseRemoveData(kit, data, pip, idKeyMap) } var err error for _, delOpt := range idKeyMap { delOpt.pip = pip - pip, err = c.removeIDFromListWithRefresh(ctx, delOpt, rid) + pip, err = c.removeIDFromListWithRefresh(kit, delOpt) if err != nil { return err } } if _, err = pip.Exec(); err != nil { - blog.Errorf("del data from %s cache failed, err: %v, data: %+v, rid: %s", c.key.Resource(), err, dataArr, rid) + blog.Errorf("del data from %s cache failed, err: %v, data: %+v, rid: %s", c.key.Resource(), err, dataArr, + kit.Rid) return err } return nil } -func (c *Cache) parseRemoveData(data types.WatchEventData, pip ccredis.Pipeliner, - idKeyMap map[string]*removeIDFromListOpt, rid string) (ccredis.Pipeliner, map[string]*removeIDFromListOpt) { +func (c *Cache) parseRemoveData(kit *rest.Kit, data types.WatchEventData, pip ccredis.Pipeliner, + idKeyMap map[string]*removeIDFromListOpt) (ccredis.Pipeliner, map[string]*removeIDFromListOpt) { info, err := c.parseData(data) if err != nil { - blog.Errorf("parse %s data: %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, rid) + blog.Errorf("parse %s data: %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, kit.Rid) return pip, idKeyMap } @@ -319,17 +313,17 @@ func (c *Cache) parseRemoveData(data types.WatchEventData, pip ccredis.Pipeliner // remove id from system id lists if c.needCacheAll { if len(info.subRes) == 0 { - idKeyMap = c.recordIDToRemoveForSystem(idKeyMap, id, "", info.tenant) + idKeyMap = c.recordIDToRemoveForSystem(kit, idKeyMap, id, "") } else { for _, subRes := range info.subRes { - idKeyMap = c.recordIDToRemoveForSystem(idKeyMap, id, subRes, info.tenant) + idKeyMap = c.recordIDToRemoveForSystem(kit, idKeyMap, id, subRes) } } } // generate full sync cond id list to id & score map c.fullSyncCondMap.Range(func(idListKey string, cond *types.FullSyncCondInfo) bool { - if !c.isFullSyncCondMatched(data.Data, info, cond, rid) { + if !c.isFullSyncCondMatched(data.Data, info, cond, kit.Rid) { return false } @@ -349,23 +343,23 @@ func (c *Cache) parseRemoveData(data types.WatchEventData, pip ccredis.Pipeliner redisKeys, err := lgc.genKey(data, info) if err != nil { blog.Errorf("generate %s %s redis key from data: %+v failed, err: %v, rid: %s", c.key.Resource(), typ, - data, err, rid) + data, err, kit.Rid) continue } for _, redisKey := range redisKeys { - pip.Del(c.key.UniqueKey(string(typ), redisKey)) + pip.Del(c.key.UniqueKey(string(typ), kit.TenantID, redisKey)) } } return pip, idKeyMap } -func (c *Cache) recordIDToRemoveForSystem(idKeyMap map[string]*removeIDFromListOpt, id string, - subRes, supplier string) map[string]*removeIDFromListOpt { +func (c *Cache) recordIDToRemoveForSystem(kit *rest.Kit, idKeyMap map[string]*removeIDFromListOpt, id string, + subRes string) map[string]*removeIDFromListOpt { - idListKey := c.Key().IDListKey() + idListKey := c.Key().IDListKey(kit.TenantID) if subRes != "" { - idListKey = c.Key().IDListKey(subRes) + idListKey = c.Key().IDListKey(kit.TenantID, subRes) } _, exists := idKeyMap[idListKey] @@ -377,9 +371,7 @@ func (c *Cache) recordIDToRemoveForSystem(idKeyMap map[string]*removeIDFromListO filterOpt: &types.IDListFilterOpt{ IDListKey: idListKey, BasicFilter: &types.BasicFilter{ - SubRes: subRes, - TenantID: supplier, - IsSystem: true, + SubRes: subRes, }, IsAll: true, }, @@ -398,23 +390,23 @@ type removeIDFromListOpt struct { } // removeIDFromList remove id from id list cache, refresh the id list if needed -func (c *Cache) removeIDFromListWithRefresh(ctx context.Context, opt *removeIDFromListOpt, rid string) ( +func (c *Cache) removeIDFromListWithRefresh(kit *rest.Kit, opt *removeIDFromListOpt) ( ccredis.Pipeliner, error) { idListKey := opt.filterOpt.IDListKey // try refresh id list if it's not exist or is expired - notExists, expired, err := c.tryRefreshIDListIfNeeded(ctx, opt.refreshIDListOpt, rid) + notExists, expired, err := c.tryRefreshIDListIfNeeded(kit, opt.refreshIDListOpt) if err != nil { - blog.Errorf("try refresh id list %s failed, err: %v, opt: %+v, rid: %s", idListKey, err, opt.filterOpt, rid) + blog.Errorf("try refresh id list %s failed, err: %v, opt: %+v, rid: %s", idListKey, err, opt.filterOpt, kit.Rid) return nil, err } // id list is refreshing not exist or expired, remove id from temp id list key if notExists || expired { - tempKey, err := redis.Client().Get(ctx, c.key.IDListTempKey(idListKey)).Result() + tempKey, err := redis.Client().Get(kit.Ctx, c.key.IDListTempKey(idListKey)).Result() if err != nil { - blog.Errorf("get id list %s temp key failed, err: %v, rid: %s", idListKey, err, rid) + blog.Errorf("get id list %s temp key failed, err: %v, rid: %s", idListKey, err, kit.Rid) return nil, err } @@ -447,35 +439,35 @@ type refreshIDListOpt struct { // tryRefreshIDListIfNeeded try refresh id list cache if it's not exist or expired // return params: notExists: returns if the id list is not exist, expired: returns if the id list is expired -func (c *Cache) tryRefreshIDListIfNeeded(ctx context.Context, opt *refreshIDListOpt, rid string) (notExists bool, +func (c *Cache) tryRefreshIDListIfNeeded(kit *rest.Kit, opt *refreshIDListOpt) (notExists bool, expired bool, err error) { idListKey := opt.filterOpt.IDListKey - exists, err := isIDListExists(ctx, idListKey, rid) + exists, err := isIDListExists(kit, idListKey) if err != nil { return false, false, err } if !exists { - c.tryRefreshIDList(ctx, opt, rid) + c.tryRefreshIDList(kit, opt) return true, false, nil } - expire, err := redis.Client().Get(ctx, c.key.IDListExpireKey(idListKey)).Result() + expire, err := redis.Client().Get(kit.Ctx, c.key.IDListExpireKey(idListKey)).Result() if err != nil { if redis.IsNilErr(err) { - blog.V(4).Infof("id list %s expire key not exists, refresh it now. rid: %s", idListKey, rid) - c.tryRefreshIDList(ctx, opt, rid) + blog.V(4).Infof("id list %s expire key not exists, refresh it now. rid: %s", idListKey, kit.Rid) + c.tryRefreshIDList(kit, opt) return false, true, nil } - blog.Errorf("get host id list expire key failed, err: %v, rid :%v", err, rid) + blog.Errorf("get host id list expire key failed, err: %v, rid :%v", err, kit.Rid) return false, false, err } expireAt, err := strconv.ParseInt(expire, 10, 64) if err != nil { - blog.Errorf("parse id list %s expire time %s failed, err: %v, rid: %s", idListKey, expire, err, rid) + blog.Errorf("parse id list %s expire time %s failed, err: %v, rid: %s", idListKey, expire, err, kit.Rid) return false, false, err } @@ -488,19 +480,19 @@ func (c *Cache) tryRefreshIDListIfNeeded(ctx context.Context, opt *refreshIDList // set expire key with a value which will enforce the id list key to expire within one minute // which will block the refresh request for the next minute. This policy is used to avoid refreshing keys // when redis is under high pressure or not well performed. - redis.Client().Set(ctx, c.key.IDListExpireKey(idListKey), time.Now().Unix()-expireSeconds+60, time.Minute) + redis.Client().Set(kit.Ctx, c.key.IDListExpireKey(idListKey), time.Now().Unix()-expireSeconds+60, time.Minute) // expired, we refresh it now. - blog.V(4).Infof("id list %s is expired, refresh it now. rid: %s", idListKey, rid) - c.tryRefreshIDList(ctx, opt, rid) + blog.V(4).Infof("id list %s is expired, refresh it now. rid: %s", idListKey, kit.Rid) + c.tryRefreshIDList(kit, opt) return false, true, nil } // tryRefreshIDList try refresh the general resource id list cache if it's not locked -func (c *Cache) tryRefreshIDList(ctx context.Context, opt *refreshIDListOpt, rid string) { +func (c *Cache) tryRefreshIDList(kit *rest.Kit, opt *refreshIDListOpt) { idListKey := opt.filterOpt.IDListKey if idListKey == "" { - blog.Errorf("id list key is not set, opt: %+v, rid: %s", opt, rid) + blog.Errorf("id list key is not set, opt: %+v, rid: %s", opt, kit.Rid) return } @@ -508,7 +500,7 @@ func (c *Cache) tryRefreshIDList(ctx context.Context, opt *refreshIDListOpt, rid // get local lock if !c.refreshingLock.CanRefresh(lockKey) { - blog.V(4).Infof("%s id list lock %s is locked, skip refresh, rid: %s", c.key.Resource(), lockKey, rid) + blog.V(4).Infof("%s id list lock %s is locked, skip refresh, rid: %s", c.key.Resource(), lockKey, kit.Rid) return } @@ -516,57 +508,60 @@ func (c *Cache) tryRefreshIDList(ctx context.Context, opt *refreshIDListOpt, rid c.refreshingLock.SetRefreshing(lockKey) // then get distribute lock - locked, err := redis.Client().SetNX(ctx, lockKey, rid, 5*time.Minute).Result() + locked, err := redis.Client().SetNX(kit.Ctx, lockKey, kit.Rid, 5*time.Minute).Result() if err != nil { - blog.Errorf("get id list %s lock failed, err: %v, rid: %s", idListKey, err, rid) + blog.Errorf("get id list %s lock failed, err: %v, rid: %s", idListKey, err, kit.Rid) c.refreshingLock.SetUnRefreshing(lockKey) return } if !locked { - blog.V(4).Infof("%s id list key redis lock %s is locked, skip refresh, rid: %s", c.key.Resource(), lockKey, rid) + blog.V(4).Infof("%s id list key redis lock %s is locked, skip refresh, rid: %s", c.key.Resource(), lockKey, + kit.Rid) c.refreshingLock.SetUnRefreshing(lockKey) return } go func() { - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) - blog.V(4).Infof("start refresh %s id list cache %s, rid: %s", c.key.Resource(), idListKey, rid) + kit.Ctx = util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) + blog.V(4).Infof("start refresh %s id list cache %s, rid: %s", c.key.Resource(), idListKey, kit.Rid) defer c.refreshingLock.SetUnRefreshing(lockKey) - defer redis.Client().Del(ctx, lockKey) + defer redis.Client().Del(kit.Ctx, lockKey) // already get lock, refresh the id list cache now - err = c.refreshIDList(ctx, opt, rid) + err = c.refreshIDList(kit, opt) if err != nil { - blog.Errorf("refresh %s id list cache %s failed, err: %v, rid: %s", c.key.Resource(), idListKey, err, rid) + blog.Errorf("refresh %s id list cache %s failed, err: %v, rid: %s", c.key.Resource(), idListKey, err, + kit.Rid) return } - blog.V(4).Infof("refresh %s id list cache %s success, rid: %s", c.key.Resource(), idListKey, rid) + blog.V(4).Infof("refresh %s id list cache %s success, rid: %s", c.key.Resource(), idListKey, kit.Rid) }() } // refreshIDList refresh the general resource id list cache -func (c *Cache) refreshIDList(ctx context.Context, opt *refreshIDListOpt, rid string) error { - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) +func (c *Cache) refreshIDList(kit *rest.Kit, opt *refreshIDListOpt) error { + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) idListKey := opt.filterOpt.IDListKey - tempKey := c.key.IDListTempKey(idListKey, rid) + tempKey := c.key.IDListTempKey(idListKey, kit.Rid) // set the temp id list key in redis for the event watch to judge which temp id list to write to - err := redis.Client().Set(ctx, c.key.IDListTempKey(idListKey), tempKey, c.withRandomExpireSeconds(opt.ttl)).Err() + err := redis.Client().Set(kit.Ctx, c.key.IDListTempKey(idListKey), tempKey, + c.withRandomExpireSeconds(opt.ttl)).Err() if err != nil { - blog.Errorf("set temp id list key %s failed, err: %v, rid: %s", tempKey, err, rid) + blog.Errorf("set temp id list key %s failed, err: %v, rid: %s", tempKey, err, kit.Rid) return err } defer func() { if err := redis.Client().Del(context.Background(), c.key.IDListTempKey(idListKey)).Err(); err != nil { - blog.Errorf("delete temp id list key %s failed, err: %v, rid: %s", tempKey, err, rid) + blog.Errorf("delete temp id list key %s failed, err: %v, rid: %s", tempKey, err, kit.Rid) } }() - blog.V(4).Infof("try to refresh id list %s with temp key: %s, rid: %s", idListKey, tempKey, rid) + blog.V(4).Infof("try to refresh id list %s with temp key: %s, rid: %s", idListKey, tempKey, kit.Rid) listOpt := &types.ListDetailOpt{ OnlyListID: true, @@ -575,7 +570,7 @@ func (c *Cache) refreshIDList(ctx context.Context, opt *refreshIDListOpt, rid st } total := 0 for { - dbRes, err := c.listDataFromDB(ctx, listOpt, rid) + dbRes, err := c.listDataFromDB(kit, listOpt) if err != nil { return err } @@ -594,7 +589,8 @@ func (c *Cache) refreshIDList(ctx context.Context, opt *refreshIDListOpt, rid st for _, data := range dbData { id, score, err := c.generateID(data) if err != nil { - blog.Errorf("generate %s id from data: %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, rid) + blog.Errorf("generate %s id from data: %+v failed, err: %v, rid: %s", c.key.Resource(), data, err, + kit.Rid) continue } @@ -606,7 +602,7 @@ func (c *Cache) refreshIDList(ctx context.Context, opt *refreshIDListOpt, rid st } if _, err = pip.Exec(); err != nil { - blog.Errorf("update temp id list %s failed, err: %v, data: %+v, rid: %s", tempKey, err, dbData, rid) + blog.Errorf("update temp id list %s failed, err: %v, data: %+v, rid: %s", tempKey, err, dbData, kit.Rid) return err } @@ -616,7 +612,8 @@ func (c *Cache) refreshIDList(ctx context.Context, opt *refreshIDListOpt, rid st info, err := c.parseData(dbData[stepLen-1]) if err != nil { - blog.Errorf("parse %s data(%+v) failed, err: %v, rid: %s", c.key.Resource(), dbData[stepLen-1], err, rid) + blog.Errorf("parse %s data(%+v) failed, err: %v, rid: %s", c.key.Resource(), dbData[stepLen-1], err, + kit.Rid) return err } @@ -630,7 +627,7 @@ func (c *Cache) refreshIDList(ctx context.Context, opt *refreshIDListOpt, rid st } // if id list exists, we need to delete it - exists, err := isIDListExists(ctx, idListKey, rid) + exists, err := isIDListExists(kit, idListKey) if err != nil { return err } @@ -650,25 +647,26 @@ func (c *Cache) refreshIDList(ctx context.Context, opt *refreshIDListOpt, rid st pipe.Set(c.key.IDListExpireKey(idListKey), time.Now().Unix(), c.withRandomExpireSeconds(opt.ttl)) if _, err = pipe.Exec(); err != nil { - blog.Errorf("refresh id list %s with temp key: %s failed, err :%v, rid: %s", idListKey, tempKey, err, rid) + blog.Errorf("refresh id list %s with temp key: %s failed, err :%v, rid: %s", idListKey, tempKey, err, kit.Rid) return err } if exists { // remove the old id list key in background - go c.deleteIDList(context.Background(), tempOldKey, rid) + kit.Ctx = context.Background() + go c.deleteIDList(kit, tempOldKey) } - blog.V(4).Infof("refresh id list key: %s success, count: %d. rid: %s", idListKey, total, rid) + blog.V(4).Infof("refresh id list key: %s success, count: %d. rid: %s", idListKey, total, kit.Rid) return nil } // deleteIDList delete the general resource id list cache -func (c *Cache) deleteIDList(ctx context.Context, key string, rid string) error { +func (c *Cache) deleteIDList(kit *rest.Kit, key string) error { for { cnt, err := redis.Client().ZRemRangeByRank(key, 0, types.PageSize).Result() if err != nil { - blog.Errorf("delete id list: %s failed, err: %v, rid: %s", key, err, rid) + blog.Errorf("delete id list: %s failed, err: %v, rid: %s", key, err, kit.Rid) return err } @@ -681,7 +679,7 @@ func (c *Cache) deleteIDList(ctx context.Context, key string, rid string) error } // listIDsFromRedis list general resource id list from redis -func (c *Cache) listIDsFromRedis(ctx context.Context, key string, opt *general.PagingOption, rid string) ([]string, +func (c *Cache) listIDsFromRedis(kit *rest.Kit, key string, opt *general.PagingOption) ([]string, error) { if opt.Limit == 0 { @@ -695,9 +693,9 @@ func (c *Cache) listIDsFromRedis(ctx context.Context, key string, opt *general.P Max: "+inf", Count: opt.Limit, } - ids, err := redis.Client().ZRangeByScore(ctx, key, redisOpt).Result() + ids, err := redis.Client().ZRangeByScore(kit.Ctx, key, redisOpt).Result() if err != nil { - blog.Errorf("list %s ids from cache failed, err: %v, redis opt: %+v, rid: %s", key, err, redisOpt, rid) + blog.Errorf("list %s ids from cache failed, err: %v, redis opt: %+v, rid: %s", key, err, redisOpt, kit.Rid) return nil, err } return ids, nil @@ -710,28 +708,28 @@ func (c *Cache) listIDsFromRedis(ctx context.Context, key string, opt *general.P Max: "+", Count: opt.Limit, } - ids, err := redis.Client().ZRangeByLex(ctx, key, redisOpt).Result() + ids, err := redis.Client().ZRangeByLex(kit.Ctx, key, redisOpt).Result() if err != nil { - blog.Errorf("list %s ids from cache failed, err: %v, redis opt: %+v, rid: %s", key, err, redisOpt, rid) + blog.Errorf("list %s ids from cache failed, err: %v, redis opt: %+v, rid: %s", key, err, redisOpt, kit.Rid) return nil, err } return ids, nil } // list from start index - ids, err := redis.Client().ZRange(ctx, key, opt.StartIndex, opt.StartIndex+opt.Limit-1).Result() + ids, err := redis.Client().ZRange(kit.Ctx, key, opt.StartIndex, opt.StartIndex+opt.Limit-1).Result() if err != nil { - blog.Errorf("list %s ids from cache failed, err: %v, opt: %+v, rid: %s", key, err, opt, rid) + blog.Errorf("list %s ids from cache failed, err: %v, opt: %+v, rid: %s", key, err, opt, kit.Rid) return nil, err } return ids, nil } // countIDsFromRedis count general resource id list from redis -func (c *Cache) countIDsFromRedis(ctx context.Context, key string, rid string) (int64, error) { - cnt, err := redis.Client().ZCard(ctx, key).Result() +func (c *Cache) countIDsFromRedis(kit *rest.Kit, key string) (int64, error) { + cnt, err := redis.Client().ZCard(kit.Ctx, key).Result() if err != nil { - blog.Errorf("count %s ids from cache failed, err: %v, rid: %s", key, err, rid) + blog.Errorf("count %s ids from cache failed, err: %v, rid: %s", key, err, kit.Rid) return 0, err } return cnt, nil @@ -748,8 +746,7 @@ func (c *Cache) RefreshIDList(kit *rest.Kit, opt *general.RefreshIDListOpt, refreshOpt := &refreshIDListOpt{ filterOpt: &types.IDListFilterOpt{ BasicFilter: &types.BasicFilter{ - SubRes: opt.SubRes, - TenantID: kit.TenantID, + SubRes: opt.SubRes, }, IsAll: true, }, @@ -767,12 +764,11 @@ func (c *Cache) RefreshIDList(kit *rest.Kit, opt *general.RefreshIDListOpt, } refreshOpt.ttl = time.Duration(cond.Interval) * time.Hour - c.tryRefreshIDList(kit.Ctx, refreshOpt, kit.Rid) + c.tryRefreshIDList(kit, refreshOpt) return nil } // refresh system id list cache - refreshOpt.filterOpt.IsSystem = true idListTTL, err := c.validateIDList(refreshOpt.filterOpt) if err != nil { blog.Errorf("id list filter option is invalid, err: %v, opt: %+v, rid: %s", err, opt, kit.Rid) @@ -781,11 +777,11 @@ func (c *Cache) RefreshIDList(kit *rest.Kit, opt *general.RefreshIDListOpt, refreshOpt.ttl = idListTTL if opt.SubRes != "" { - refreshOpt.filterOpt.IDListKey = c.Key().IDListKey(opt.SubRes) + refreshOpt.filterOpt.IDListKey = c.Key().IDListKey(kit.TenantID, opt.SubRes) } else { - refreshOpt.filterOpt.IDListKey = c.Key().IDListKey() + refreshOpt.filterOpt.IDListKey = c.Key().IDListKey(kit.TenantID) } - c.tryRefreshIDList(kit.Ctx, refreshOpt, kit.Rid) + c.tryRefreshIDList(kit, refreshOpt) return nil } diff --git a/src/source_controller/cacheservice/cache/general/cache/obj_inst.go b/src/source_controller/cacheservice/cache/general/cache/obj_inst.go index d1f2bf2a52..43d430858e 100644 --- a/src/source_controller/cacheservice/cache/general/cache/obj_inst.go +++ b/src/source_controller/cacheservice/cache/general/cache/obj_inst.go @@ -18,18 +18,14 @@ package cache import ( - "context" "fmt" "configcenter/pkg/cache/general" "configcenter/src/common" - "configcenter/src/common/blog" "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" - "configcenter/src/common/metadata" "configcenter/src/common/util" "configcenter/src/source_controller/cacheservice/cache/general/types" - "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/driver/mongodb/instancemapping" ) @@ -41,28 +37,17 @@ func init() { } // getObjInstTable get object instance table by objID and tenant account -// NOTE: obj with "0" tenant can have inst with other suppliers, so we need to search the obj for its actual tenant -func getObjInstTable(ctx context.Context, filter *types.BasicFilter, rid string) (string, error) { - cond := mapstr.MapStr{ - common.BKObjIDField: filter.SubRes, - } - - obj := new(metadata.Object) - err := mongodb.Client().Table(common.BKTableNameObjDes).Find(cond).Fields(common.TenantID).One(ctx, &obj) - if err != nil { - blog.Errorf("get object tenant account by cond(%+v) failed, err: %v, rid: %s", cond, err, rid) - return "", err - } - return common.GetInstTableName(filter.SubRes, obj.TenantID), nil +func getObjInstTable(kit *rest.Kit, filter *types.BasicFilter) (string, error) { + return common.GetInstTableName(filter.SubRes, kit.TenantID), nil } -func parseObjInstData(data dataWithTable[mapstr.MapStr]) (*basicInfo, error) { +func parseObjInstData(data dataWithTenant[mapstr.MapStr]) (*basicInfo, error) { instID, err := util.GetInt64ByInterface(data.Data[common.BKInstIDField]) if err != nil { return nil, fmt.Errorf("parse id %+v failed, err: %v", data.Data[common.BKInstIDField], err) } - kit := rest.NewKit() + kit := rest.NewKit().WithTenant(data.TenantID) instObjMappings, err := instancemapping.GetInstanceObjectMapping(kit, []int64{instID}) if err != nil { return nil, fmt.Errorf("get object ids from instance ids(%d) failed, err: %v", instID, err) @@ -75,6 +60,5 @@ func parseObjInstData(data dataWithTable[mapstr.MapStr]) (*basicInfo, error) { return &basicInfo{ id: instID, subRes: []string{instObjMappings[0].ObjectID}, - tenant: instObjMappings[0].TenantID, }, nil } diff --git a/src/source_controller/cacheservice/cache/general/cache/util.go b/src/source_controller/cacheservice/cache/general/cache/util.go index 600d19b169..a1ee178ddb 100644 --- a/src/source_controller/cacheservice/cache/general/cache/util.go +++ b/src/source_controller/cacheservice/cache/general/cache/util.go @@ -18,11 +18,11 @@ package cache import ( - "context" "fmt" "time" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/cache/general/types" "configcenter/src/storage/driver/redis" @@ -48,20 +48,19 @@ func parseWatchChainNode(node *watch.ChainNode) (*basicInfo, error) { id: node.InstanceID, oid: node.Oid, subRes: node.SubResource, - tenant: node.TenantID, }, nil } // isIDListExists check if id list exists -func isIDListExists(ctx context.Context, key string, rid string) (bool, error) { - existRes, err := redis.Client().Exists(ctx, key).Result() +func isIDListExists(kit *rest.Kit, key string) (bool, error) { + existRes, err := redis.Client().Exists(kit.Ctx, key).Result() if err != nil { - blog.Errorf("check if id list %s exists failed, err: %v, opt: %+v, rid: %s", key, err, rid) + blog.Errorf("check if id list %s exists failed, err: %v, opt: %+v, rid: %s", key, err, kit.Rid) return false, err } if existRes != 1 { - blog.V(4).Infof("id list %s key not exists. rid: %s", key, rid) + blog.V(4).Infof("id list %s key not exists. rid: %s", key, kit.Rid) return false, nil } diff --git a/src/source_controller/cacheservice/cache/general/client.go b/src/source_controller/cacheservice/cache/general/client.go index 11242b5958..71d8020366 100644 --- a/src/source_controller/cacheservice/cache/general/client.go +++ b/src/source_controller/cacheservice/cache/general/client.go @@ -34,7 +34,7 @@ func (c *Cache) FullSyncCond() *fullsynccondcli.FullSyncCond { // ListDetailByIDs list general resource detail cache by ids // NOTE: since event flow and cache are reused, this method may return deleted data // because event ttl is long and event detail cache will not be deleted -func (c *Cache) ListDetailByIDs(kit *rest.Kit, opt *general.ListDetailByIDsOpt, isSystem bool) ([]string, error) { +func (c *Cache) ListDetailByIDs(kit *rest.Kit, opt *general.ListDetailByIDsOpt) ([]string, error) { cache, exists := c.cacheSet[opt.Resource] if !exists { return nil, kit.CCError.CCErrorf(common.CCErrCommParamsIsInvalid, general.ResourceField) @@ -46,30 +46,26 @@ func (c *Cache) ListDetailByIDs(kit *rest.Kit, opt *general.ListDetailByIDsOpt, } listOpt := &types.ListDetailByIDsOpt{ - SubRes: opt.SubResource, - IsSystem: isSystem, - IDKeys: idKeys, - Fields: opt.Fields, + SubRes: opt.SubResource, + IDKeys: idKeys, + Fields: opt.Fields, } return cache.ListDetailByIDs(kit, listOpt) } // ListDetailByUniqueKey list general resource detail cache by unique keys -func (c *Cache) ListDetailByUniqueKey(kit *rest.Kit, opt *general.ListDetailByUniqueKeyOpt, - isSystem bool) ([]string, error) { - +func (c *Cache) ListDetailByUniqueKey(kit *rest.Kit, opt *general.ListDetailByUniqueKeyOpt) ([]string, error) { cache, exists := c.cacheSet[opt.Resource] if !exists { return nil, kit.CCError.CCErrorf(common.CCErrCommParamsIsInvalid, general.ResourceField) } listOpt := &types.ListDetailByUniqueKeyOpt{ - SubRes: opt.SubResource, - IsSystem: isSystem, - Type: opt.Type, - Keys: opt.Keys, - Fields: opt.Fields, + SubRes: opt.SubResource, + Type: opt.Type, + Keys: opt.Keys, + Fields: opt.Fields, } return cache.ListDetailByUniqueKey(kit, listOpt) @@ -94,8 +90,7 @@ func (c *Cache) ListCacheByFullSyncCond(kit *rest.Kit, opt *fullsynccond.ListCac IDListFilter: &types.IDListFilterOpt{ IDListKey: idListKey, BasicFilter: &types.BasicFilter{ - SubRes: cond.SubResource, - TenantID: cond.TenantID, + SubRes: cond.SubResource, }, IsAll: cond.IsAll, Cond: cond.Condition, @@ -115,9 +110,9 @@ func (c *Cache) ListData(kit *rest.Kit, opt *general.ListDetailOpt) (int64, []st return 0, nil, kit.CCError.CCErrorf(common.CCErrCommParamsIsInvalid, general.ResourceField) } - idListKey := cache.Key().IDListKey() + idListKey := cache.Key().IDListKey(kit.TenantID) if opt.SubResource != "" { - idListKey = cache.Key().IDListKey(opt.SubResource) + idListKey = cache.Key().IDListKey(kit.TenantID, opt.SubResource) } listOpt := &types.ListDetailOpt{ @@ -125,9 +120,7 @@ func (c *Cache) ListData(kit *rest.Kit, opt *general.ListDetailOpt) (int64, []st IDListFilter: &types.IDListFilterOpt{ IDListKey: idListKey, BasicFilter: &types.BasicFilter{ - SubRes: opt.SubResource, - TenantID: kit.TenantID, - IsSystem: true, + SubRes: opt.SubResource, }, IsAll: true, }, diff --git a/src/source_controller/cacheservice/cache/general/full-sync-cond/client.go b/src/source_controller/cacheservice/cache/general/full-sync-cond/client.go index 5c360c2b4e..68afe31662 100644 --- a/src/source_controller/cacheservice/cache/general/full-sync-cond/client.go +++ b/src/source_controller/cacheservice/cache/general/full-sync-cond/client.go @@ -38,7 +38,7 @@ func (f *FullSyncCond) CreateFullSyncCond(kit *rest.Kit, opt *types.CreateFullSy cond[types.SubResField] = opt.SubResource } - cnt, err := mongodb.Client().Table(types.BKTableNameFullSyncCond).Find(cond).Count(kit.Ctx) + cnt, err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameFullSyncCond).Find(cond).Count(kit.Ctx) if err != nil { blog.Errorf("count is_all=true full sync cond failed, err: %v, cond: %+v, rid: %s", err, cond, kit.Rid) return 0, kit.CCError.CCErrorf(common.CCErrCommDBSelectFailed) @@ -53,7 +53,7 @@ func (f *FullSyncCond) CreateFullSyncCond(kit *rest.Kit, opt *types.CreateFullSy cond := mapstr.MapStr{ types.IsAllField: false, } - cnt, err := mongodb.Client().Table(types.BKTableNameFullSyncCond).Find(cond).Count(kit.Ctx) + cnt, err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameFullSyncCond).Find(cond).Count(kit.Ctx) if err != nil { blog.Errorf("count is_all=true full sync cond failed, err: %v, cond: %+v, rid: %s", err, cond, kit.Rid) return 0, kit.CCError.CCErrorf(common.CCErrCommDBSelectFailed) @@ -66,7 +66,7 @@ func (f *FullSyncCond) CreateFullSyncCond(kit *rest.Kit, opt *types.CreateFullSy } } - id, err := mongodb.Client().NextSequence(kit.Ctx, types.BKTableNameFullSyncCond) + id, err := mongodb.Shard(kit.SysShardOpts()).NextSequence(kit.Ctx, types.BKTableNameFullSyncCond) if err != nil { blog.Errorf("generate full sync cond id failed, err: %v, rid: %s", err, kit.Rid) return 0, kit.CCError.CCErrorf(common.CCErrCommGenerateRecordIDFailed) @@ -82,7 +82,7 @@ func (f *FullSyncCond) CreateFullSyncCond(kit *rest.Kit, opt *types.CreateFullSy TenantID: kit.TenantID, } - err = mongodb.Client().Table(types.BKTableNameFullSyncCond).Insert(kit.Ctx, data) + err = mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameFullSyncCond).Insert(kit.Ctx, data) if err != nil { blog.Errorf("insert full sync cond failed, err: %v, data: %+v, rid: %s", err, data, kit.Rid) return 0, kit.CCError.CCErrorf(common.CCErrCommDBInsertFailed) @@ -101,7 +101,7 @@ func (f *FullSyncCond) UpdateFullSyncCond(kit *rest.Kit, opt *types.UpdateFullSy types.IntervalField: opt.Data.Interval, } - err := mongodb.Client().Table(types.BKTableNameFullSyncCond).Update(kit.Ctx, cond, data) + err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameFullSyncCond).Update(kit.Ctx, cond, data) if err != nil { blog.Errorf("update full sync cond failed, err: %v, cond: %+v, data: %+v, rid: %s", err, cond, data, kit.Rid) return kit.CCError.CCErrorf(common.CCErrCommDBUpdateFailed) @@ -116,7 +116,7 @@ func (f *FullSyncCond) DeleteFullSyncCond(kit *rest.Kit, opt *types.DeleteFullSy types.IDField: opt.ID, } - err := mongodb.Client().Table(types.BKTableNameFullSyncCond).Delete(kit.Ctx, delCond) + err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameFullSyncCond).Delete(kit.Ctx, delCond) if err != nil { blog.Errorf("delete full sync cond %d failed, err: %v, rid: %s", opt.ID, err, kit.Rid) return kit.CCError.CCErrorf(common.CCErrCommDBDeleteFailed) @@ -146,7 +146,7 @@ func (f *FullSyncCond) ListFullSyncCond(kit *rest.Kit, opt *types.ListFullSyncCo } result := make([]types.FullSyncCond, 0) - err := mongodb.Client().Table(types.BKTableNameFullSyncCond).Find(listCond).All(kit.Ctx, &result) + err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameFullSyncCond).Find(listCond).All(kit.Ctx, &result) if err != nil { blog.Errorf("list full sync cond failed, err: %v, cond: %+v, rid: %s", err, listCond, kit.Rid) return nil, kit.CCError.CCErrorf(common.CCErrCommDBSelectFailed) @@ -167,7 +167,7 @@ func (f *FullSyncCond) GetFullSyncCond(kit *rest.Kit, id int64) (*types.FullSync } fullSyncCond := new(types.FullSyncCond) - err := mongodb.Client().Table(types.BKTableNameFullSyncCond).Find(cond).One(kit.Ctx, &fullSyncCond) + err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameFullSyncCond).Find(cond).One(kit.Ctx, &fullSyncCond) if err != nil { blog.Errorf("get full sync cond failed, err: %v, cond: %+v, rid: %s", err, cond, kit.Rid) return nil, kit.CCError.CCErrorf(common.CCErrCommDBSelectFailed) diff --git a/src/source_controller/cacheservice/cache/general/full-sync-cond/full_sync_cond.go b/src/source_controller/cacheservice/cache/general/full-sync-cond/full_sync_cond.go index 110ed7cde7..6b317c01d9 100644 --- a/src/source_controller/cacheservice/cache/general/full-sync-cond/full_sync_cond.go +++ b/src/source_controller/cacheservice/cache/general/full-sync-cond/full_sync_cond.go @@ -23,21 +23,19 @@ import ( "configcenter/pkg/cache/general" "configcenter/src/source_controller/cacheservice/cache/general/types" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) // FullSyncCond defines the full sync cond related logics type FullSyncCond struct { - loopW stream.LoopInterface + task *task.Task chMap map[general.ResType]chan<- types.FullSyncCondEvent } // New FullSyncCond -func New(loopW stream.LoopInterface, chMap map[general.ResType]chan<- types.FullSyncCondEvent) (*FullSyncCond, - error) { - +func New(watchTask *task.Task, chMap map[general.ResType]chan<- types.FullSyncCondEvent) (*FullSyncCond, error) { f := &FullSyncCond{ - loopW: loopW, + task: watchTask, chMap: chMap, } diff --git a/src/source_controller/cacheservice/cache/general/full-sync-cond/watch.go b/src/source_controller/cacheservice/cache/general/full-sync-cond/watch.go index e66f0a0869..a2a8dd4b24 100644 --- a/src/source_controller/cacheservice/cache/general/full-sync-cond/watch.go +++ b/src/source_controller/cacheservice/cache/general/full-sync-cond/watch.go @@ -31,6 +31,7 @@ import ( "configcenter/src/common/util" cachetypes "configcenter/src/source_controller/cacheservice/cache/general/types" tokenhandler "configcenter/src/source_controller/cacheservice/cache/token-handler" + "configcenter/src/storage/dal/mongo/local" "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) @@ -39,21 +40,19 @@ import ( func (f *FullSyncCond) Watch() error { tokenHandler := tokenhandler.NewMemoryTokenHandler() - startAtTime := &types.TimeStamp{Sec: uint32(time.Now().Unix())} - if err := f.initFullSyncCond(); err != nil { return err } - loopOptions := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ + opts := &types.LoopBatchTaskOptions{ + WatchTaskOptions: &types.WatchTaskOptions{ Name: "full-sync-cond", - WatchOpt: &types.WatchOptions{ - Options: types.Options{ - Filter: make(mapstr.MapStr), + CollOpts: &types.WatchCollOptions{ + CollectionOptions: types.CollectionOptions{ + CollectionFilter: &types.CollectionFilter{ + Regex: fullsynccond.BKTableNameFullSyncCond, + }, EventStruct: new(fullsynccond.FullSyncCond), - Collection: fullsynccond.BKTableNameFullSyncCond, - StartAtTime: startAtTime, }, }, TokenHandler: tokenHandler, @@ -62,14 +61,15 @@ func (f *FullSyncCond) Watch() error { RetryDuration: 1 * time.Second, }, }, - EventHandler: &types.BatchHandler{ + EventHandler: &types.TaskBatchHandler{ DoBatch: f.doBatch, }, BatchSize: 200, } - if err := f.loopW.WithBatch(loopOptions); err != nil { - blog.Errorf("watch full sync cond failed, err: %v", err) + err := f.task.AddLoopBatchTask(opts) + if err != nil { + blog.Errorf("add watch full sync cond task failed, err: %v", err) return err } @@ -77,40 +77,9 @@ func (f *FullSyncCond) Watch() error { } // doBatch batch handle full sync cond event -func (f *FullSyncCond) doBatch(es []*types.Event) (retry bool) { - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) - - // get deleted full sync cond oid to info map - delOids := make([]string, 0) - for _, e := range es { - if e.OperationType == types.Delete { - delOids = append(delOids, e.Oid) - } - } - - delOidCondMap := make(map[string]*fullsynccond.FullSyncCond) - if len(delOids) > 0 { - filter := mapstr.MapStr{ - "oid": mapstr.MapStr{common.BKDBIN: delOids}, - "coll": fullsynccond.BKTableNameFullSyncCond, - } - archives := make([]delArchive, 0) - err := mongodb.Client().Table(common.BKTableNameDelArchive).Find(filter).All(ctx, &archives) - if err != nil { - blog.Errorf("get deleted full sync cond failed, err: %v, oids: %+v", err, delOids) - return true - } - - for _, archive := range archives { - if archive.Detail == nil { - continue - } - delOidCondMap[archive.Oid] = archive.Detail - } - } - +func (f *FullSyncCond) doBatch(dbInfo *types.DBInfo, es []*types.Event) bool { // aggregate full sync cond event - condMap := f.aggregateEvent(es, delOidCondMap) + condMap := f.aggregateEvent(es) // generate full sync cond event resEventMap := make(map[general.ResType]map[cachetypes.EventType][]*fullsynccond.FullSyncCond) @@ -148,9 +117,7 @@ func (f *FullSyncCond) doBatch(es []*types.Event) (retry bool) { } // aggregateEvent aggregate full sync cond event -func (f *FullSyncCond) aggregateEvent(es []*types.Event, - delOidCondMap map[string]*fullsynccond.FullSyncCond) map[types.OperType]map[string]*fullsynccond.FullSyncCond { - +func (f *FullSyncCond) aggregateEvent(es []*types.Event) map[types.OperType]map[string]*fullsynccond.FullSyncCond { condMap := make(map[types.OperType]map[string]*fullsynccond.FullSyncCond) supportedOps := []types.OperType{types.Insert, types.Update, types.Delete} for _, op := range supportedOps { @@ -185,16 +152,16 @@ func (f *FullSyncCond) aggregateEvent(es []*types.Event, condMap[e.OperationType][condKey] = cond case types.Delete: - cond, exists := delOidCondMap[e.Oid] - if !exists { - blog.Errorf("delete event %s has no matching del archive", e.Oid) + cond, ok := e.Document.(*fullsynccond.FullSyncCond) + if !ok { + blog.Errorf("event document %+v type is invalid", e.Document) continue } condKey := genFullSyncCondUniqueKey(cond) // if full sync cond with same unique key is inserted before, treat these event as not exists - _, exists = condMap[types.Insert][condKey] + _, exists := condMap[types.Insert][condKey] if exists { delete(condMap[types.Insert], condKey) continue @@ -223,11 +190,6 @@ func genFullSyncCondUniqueKey(cond *fullsynccond.FullSyncCond) string { return fmt.Sprintf("%s:%s:%s", cond.Resource, cond.TenantID, cond.SubResource) } -type delArchive struct { - Oid string `bson:"oid"` - Detail *fullsynccond.FullSyncCond `bson:"detail"` -} - // initFullSyncCond get all full sync cond from db and send initialize event to channel func (f *FullSyncCond) initFullSyncCond() error { ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) @@ -235,26 +197,33 @@ func (f *FullSyncCond) initFullSyncCond() error { fullSyncCondMap := make(map[general.ResType][]*fullsynccond.FullSyncCond) - cond := make(mapstr.MapStr) + err := mongodb.Dal().ExecForAllDB(func(db local.DB) error { + cond := make(mapstr.MapStr) - for { - paged := make([]*fullsynccond.FullSyncCond, 0) - err := mongodb.Client().Table(fullsynccond.BKTableNameFullSyncCond).Find(cond).Sort(fullsynccond.IDField). - Limit(cachetypes.PageSize).All(ctx, &paged) - if err != nil { - blog.Errorf("paged get full sync cond data failed, cond: %+v, err: %v, rid: %s", cond, err, rid) - return err - } + for { + paged := make([]*fullsynccond.FullSyncCond, 0) + err := db.Table(fullsynccond.BKTableNameFullSyncCond).Find(cond).Sort(fullsynccond.IDField). + Limit(cachetypes.PageSize).All(ctx, &paged) + if err != nil { + blog.Errorf("paged get full sync cond data failed, cond: %+v, err: %v, rid: %s", cond, err, rid) + return err + } - for _, data := range paged { - fullSyncCondMap[data.Resource] = append(fullSyncCondMap[data.Resource], data) - } + for _, data := range paged { + fullSyncCondMap[data.Resource] = append(fullSyncCondMap[data.Resource], data) + } - if len(paged) < cachetypes.PageSize { - break + if len(paged) < cachetypes.PageSize { + break + } + + cond[fullsynccond.IDField] = mapstr.MapStr{common.BKDBGT: paged[len(paged)-1].ID} } - cond[fullsynccond.IDField] = mapstr.MapStr{common.BKDBGT: paged[len(paged)-1].ID} + return nil + }) + if err != nil { + return err } // send init full sync cond event to channel diff --git a/src/source_controller/cacheservice/cache/general/types/types.go b/src/source_controller/cacheservice/cache/general/types/types.go index fd4e0b50a0..a9191e31e9 100644 --- a/src/source_controller/cacheservice/cache/general/types/types.go +++ b/src/source_controller/cacheservice/cache/general/types/types.go @@ -62,10 +62,9 @@ type FullSyncCondInfo struct { // ListDetailByIDsOpt is list general resource detail cache by ids option type ListDetailByIDsOpt struct { - SubRes string - IsSystem bool - IDKeys []string - Fields []string + SubRes string + IDKeys []string + Fields []string } // Validate ListDetailByIDsOpt @@ -87,11 +86,10 @@ func (o *ListDetailByIDsOpt) Validate(hasSubRes bool) ccErr.RawErrorInfo { // ListDetailByUniqueKeyOpt is list general resource detail cache by unique keys option type ListDetailByUniqueKeyOpt struct { - SubRes string - IsSystem bool - Type general.UniqueKeyType - Keys []string - Fields []string + SubRes string + Type general.UniqueKeyType + Keys []string + Fields []string } // Validate ListDetailByUniqueKeyOpt @@ -226,11 +224,7 @@ func (o *IDListFilterOpt) Validate(hasSubRes bool) ccErr.RawErrorInfo { // BasicFilter is the basic filter for getting general resource data from db type BasicFilter struct { - SubRes string - TenantID string - - // IsSystem defines whether id list is for system use, system resource do not need to be filtered by TenantID - IsSystem bool + SubRes string } // Validate BasicFilter diff --git a/src/source_controller/cacheservice/cache/general/watch/watch.go b/src/source_controller/cacheservice/cache/general/watch/watch.go index 9c840ffb93..51ac4970c8 100644 --- a/src/source_controller/cacheservice/cache/general/watch/watch.go +++ b/src/source_controller/cacheservice/cache/general/watch/watch.go @@ -19,45 +19,36 @@ package watch import ( - "context" "fmt" - "net/http" "strings" - "time" "configcenter/pkg/cache/general/mapping" "configcenter/pkg/filter" + "configcenter/pkg/tenant" "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/common/errors" "configcenter/src/common/http/rest" "configcenter/src/common/util" "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/cache/general/cache" cachetypes "configcenter/src/source_controller/cacheservice/cache/general/types" tokenhandler "configcenter/src/source_controller/cacheservice/cache/token-handler" - "configcenter/src/source_controller/cacheservice/event" + "configcenter/src/source_controller/cacheservice/event/loop" watchcli "configcenter/src/source_controller/cacheservice/event/watch" - "configcenter/src/storage/driver/mongodb" - "configcenter/src/storage/stream/types" ) // Watcher defines mongodb event watcher for general resource type Watcher struct { - cache *cache.Cache - isMaster discovery.ServiceManageInterface - watchCli *watchcli.Client - tokenHandler *tokenhandler.SingleHandler - eventKey event.Key + cache *cache.Cache + loopWatcher *loop.LoopWatcher } // Init general resource mongodb event watcher func Init(cache *cache.Cache, isMaster discovery.ServiceManageInterface, watchCli *watchcli.Client) error { watcher := &Watcher{ - cache: cache, - isMaster: isMaster, - watchCli: watchCli, + cache: cache, + loopWatcher: loop.NewLoopWatcher(isMaster, watchCli), } cache.CacheChangeCh() <- struct{}{} @@ -78,134 +69,107 @@ func (w *Watcher) watch() error { return err } - w.eventKey, err = event.GetResourceKeyWithCursorType(cursorType) + name := fmt.Sprintf("%s%s:%s", common.BKCacheKeyV3Prefix, "common_res", resType) + + loopEventChan := make(chan loop.TenantEvent) + + go w.watchCacheChange(cursorType, name, loopEventChan) + + opts := &loop.LoopWatchTaskOptions{ + Name: name, + CursorType: cursorType, + TokenHandler: tokenhandler.NewSingleTokenHandler(name), + EventHandler: w.handleEvents, + TenantChan: loopEventChan, + } + + err = w.loopWatcher.AddLoopWatchTask(opts) if err != nil { - blog.Errorf("get event key with cursor type %s failed, err: %v", cursorType, err) + blog.Errorf("watch %s brief biz topo cache failed, err: %v", cursorType, err) return err } - name := fmt.Sprintf("%s%s:%s", common.BKCacheKeyV3Prefix, "common_res", resType) - w.tokenHandler = tokenhandler.NewSingleTokenHandler(name, mongodb.Client()) - - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) - go w.loopWatch(ctx, cursorType) return nil } -func (w *Watcher) loopWatch(ctx context.Context, cursorType watch.CursorType) { - prevStatus := w.isMaster.IsMaster() - - opts := &watch.WatchEventOptions{ - Resource: cursorType, - } - +func (w *Watcher) watchCacheChange(cursorType watch.CursorType, name string, loopEventChan chan<- loop.TenantEvent) { + tenantChan := make(<-chan tenant.TenantEvent) for { - // get need watched resource when initialization and cache changes, wait until the cache needs watch select { case <-w.cache.CacheChangeCh(): - for { - watchAll, subRes := w.cache.NeedWatchRes() - if watchAll || len(subRes) > 0 { - opts.Filter.SubResources = subRes - break - } - blog.Infof("watch %s event, but do not need cache, wait until it needs cache again", cursorType) - prevStatus = false - select { - case <-w.cache.CacheChangeCh(): - } + // cancel old loop watch task + loopEventChan <- loop.TenantEvent{ + EventType: watch.Delete, + IsAllTenant: true, + } + tenant.RemoveTenantEventChan(name) + tenantChan = make(<-chan tenant.TenantEvent) + + // get need watched resource when initialization and cache changes + watchAll, tenantSubResMap := w.cache.NeedWatchRes() + if watchAll { + // watch all tenants' event + tenantChan = tenant.NewTenantEventChan(name) + continue } - default: - } - - isMaster := w.isMaster.IsMaster() - if !isMaster { - prevStatus = false - blog.V(4).Infof("watch %s event, but not master, skip.", cursorType) - time.Sleep(time.Minute) - continue - } - // need watch status changed, re-watch from the last cursor with renewed watch resource type - if !prevStatus { - prevStatus = isMaster - var err error - opts.Cursor, err = w.tokenHandler.GetStartWatchToken(ctx) - if err != nil { - blog.Errorf("get %s start watch token failed, err: %v", cursorType, err) - time.Sleep(500 * time.Millisecond) + // watch specific tenants' event + if len(tenantSubResMap) > 0 { + for tenantID, subRes := range tenantSubResMap { + loopEventChan <- loop.TenantEvent{ + EventType: watch.Update, + TenantID: tenantID, + WatchOpts: &watch.WatchEventOptions{ + Resource: cursorType, + Filter: watch.WatchEventFilter{ + SubResources: util.StrArrayUnique(subRes), + }, + }, + } + } continue } - select { - case w.cache.CacheChangeCh() <- struct{}{}: - default: + blog.Infof("watch %s event, but do not need cache, wait until it needs cache again", cursorType) + case e, ok := <-tenantChan: + if !ok { + // tenant chan is closed, wait until it needs cache again + tenantChan = make(<-chan tenant.TenantEvent) + continue } - continue - } - - retryWrapper(5, func() error { - return w.doWatch(ctx, opts) - }) - } -} -func (w *Watcher) doWatch(ctx context.Context, opts *watch.WatchEventOptions) error { - kit := &rest.Kit{ - Rid: util.GenerateRID(), - Header: make(http.Header), - Ctx: ctx, - CCError: errors.NewFromCtx(errors.EmptyErrorsSetting).CreateDefaultCCErrorIf("zh-cn"), - User: common.CCSystemOperatorUserName, - TenantID: common.BKSuperTenantID, - } - - var events []*watch.WatchEventDetail - var err error - if opts.Cursor == "" { - lastEvent, err := w.watchCli.WatchFromNow(kit, w.eventKey, opts) - if err != nil { - blog.Errorf("watch %s event from now failed, re-watch again, err: %v, rid: %s", opts.Resource, err, kit.Rid) - return err - } - events = []*watch.WatchEventDetail{lastEvent} - } else { - events, err = w.watchCli.WatchWithCursor(kit, w.eventKey, opts) - if err != nil { - if ccErr, ok := err.(errors.CCErrorCoder); ok && ccErr.GetCode() == common.CCErrEventChainNodeNotExist { - // the cursor does not exist, re-watch from now - opts.Cursor = "" - if err = w.tokenHandler.ResetWatchToken(types.TimeStamp{Sec: uint32(time.Now().Unix())}); err != nil { - blog.Errorf("reset %s watch token failed, err: %v, rid: %s", opts.Resource, err, kit.Rid) - return err + switch e.EventType { + case tenant.Create: + loopEventChan <- loop.TenantEvent{ + EventType: watch.Create, + TenantID: e.TenantID, + WatchOpts: &watch.WatchEventOptions{Resource: cursorType}, + } + case tenant.Delete: + loopEventChan <- loop.TenantEvent{ + EventType: watch.Delete, + TenantID: e.TenantID, } - - blog.Errorf("watch event failed, re-watch from now, err: %v, opt: %+v, rid: %s", err, opts, kit.Rid) - return ccErr } - blog.Errorf("watch event failed, err: %v, opt: %+v, rid: %s", err, opts, kit.Rid) - return err } } +} +func (w *Watcher) handleEvents(kit *rest.Kit, events []*watch.WatchEventDetail) error { if len(events) == 0 { return nil } upsertDataArr, delDataArr := w.aggregateEvent(events, kit.Rid) - if err = w.cache.AddData(ctx, upsertDataArr, kit.Rid); err != nil { - blog.Errorf("add %s cache data failed, err: %v, data: %+v, rid: %s", opts.Resource, err, upsertDataArr, kit.Rid) - return err - } - - if err = w.cache.RemoveData(ctx, delDataArr, kit.Rid); err != nil { - blog.Errorf("delete %s cache data failed, err: %v, data: %+v, rid: %s", opts.Resource, err, delDataArr, kit.Rid) + if err := w.cache.AddData(kit, upsertDataArr); err != nil { + blog.Errorf("add %s cache data failed, err: %v, data: %+v, rid: %s", w.cache.Key().Resource(), err, + upsertDataArr, kit.Rid) return err } - opts.Cursor = events[len(events)-1].Cursor - if err = w.tokenHandler.SetLastWatchToken(ctx, opts.Cursor); err != nil { - blog.Errorf("set %s watch token to %s failed, err: %v, rid: %s", opts.Resource, opts.Cursor, err, kit.Rid) + if err := w.cache.RemoveData(kit, delDataArr); err != nil { + blog.Errorf("delete %s cache data failed, err: %v, data: %+v, rid: %s", w.cache.Key().Resource(), err, + delDataArr, kit.Rid) return err } return nil @@ -260,13 +224,3 @@ func (w *Watcher) aggregateEvent(events []*watch.WatchEventDetail, rid string) ( } return upsertDataArr, delDataArr } - -func retryWrapper(maxRetry int, handler func() error) { - for retry := 0; retry < maxRetry; retry++ { - err := handler() - if err == nil { - return - } - time.Sleep(500 * time.Millisecond * time.Duration(retry)) - } -} diff --git a/src/source_controller/cacheservice/cache/mainline/biz.go b/src/source_controller/cacheservice/cache/mainline/biz.go deleted file mode 100644 index 89c1ecfd46..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/biz.go +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import ( - "context" - - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/mapstr" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/redis" - "configcenter/src/storage/stream" - "configcenter/src/storage/stream/types" - "github.com/tidwall/gjson" -) - -// business is a instance to watch business's change event and -// then try to refresh it to the cache. -// it based one the event loop watch mechanism which can ensure -// all the event can be watched safely, which also means the cache -// can be refreshed without lost and immediately. -type business struct { - key keyGenerator - event stream.LoopInterface - rds redis.Client - db dal.DB -} - -// Run start to watch and refresh the business's cache. -func (b *business) Run() error { - - // initialize business token handler key. - handler := newTokenHandler(b.key) - startTime, err := handler.getStartTimestamp(context.Background()) - if err != nil { - blog.Errorf("get business cache event start at time failed, err: %v", err) - return err - } - - loopOpts := &types.LoopOneOptions{ - LoopOptions: types.LoopOptions{ - Name: "biz_cache", - WatchOpt: &types.WatchOptions{ - Options: types.Options{ - EventStruct: new(map[string]interface{}), - Collection: common.BKTableNameBaseApp, - // start token will be automatically set when it's running, - // so we do not set here. - StartAfterToken: nil, - StartAtTime: startTime, - WatchFatalErrorCallback: handler.resetWatchTokenWithTimestamp, - }, - }, - TokenHandler: handler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: 4, - RetryDuration: retryDuration, - }, - }, - EventHandler: &types.OneHandler{ - DoAdd: b.onUpsert, - DoUpdate: b.onUpsert, - DoDelete: b.onDelete, - }, - } - - return b.event.WithOne(loopOpts) -} - -// onUpsert set or update business cache when a add/update/upsert -// event is triggered. -func (b *business) onUpsert(e *types.Event) bool { - if blog.V(4) { - blog.Infof("received biz cache event, op: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, e.ID()) - } - - bizID := gjson.GetBytes(e.DocBytes, common.BKAppIDField).Int() - if bizID <= 0 { - blog.Errorf("received invalid biz event, skip, op: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, e.ID()) - return false - } - - // update the cache. - err := b.rds.Set(context.Background(), b.key.detailKey(bizID), e.DocBytes, b.key.detailExpireDuration).Err() - if err != nil { - blog.Errorf("set biz cache failed, op: %s, doc: %s, err: %v, rid: %s", e.OperationType, e.DocBytes, err, e.ID()) - return true - } - - return false -} - -// onDelete delete business cache when a business s delete. -func (b *business) onDelete(e *types.Event) bool { - - filter := mapstr.MapStr{ - "coll": common.BKTableNameBaseApp, - "oid": e.Oid, - } - - biz := new(bizArchive) - err := b.db.Table(common.BKTableNameDelArchive).Find(filter).Fields("detail").One(context.Background(), biz) - if err != nil { - blog.Errorf("get biz del archive detail failed, err: %v, rid: %s", err, e.ID()) - if b.db.IsNotFoundError(err) { - return false - } - return true - } - - blog.Infof("received delete biz %d/%s event, rid: %s", biz.Detail.BusinessID, biz.Detail.BusinessName, e.ID()) - - // delete the cache. - if err := b.rds.Del(context.Background(), b.key.detailKey(biz.Detail.BusinessID)).Err(); err != nil { - blog.Errorf("delete biz cache failed, err: %v, rid: %s", err, e.ID()) - return true - } - - return false -} diff --git a/src/source_controller/cacheservice/cache/mainline/client.go b/src/source_controller/cacheservice/cache/mainline/client.go index ca8d12b4cd..df0808fe48 100644 --- a/src/source_controller/cacheservice/cache/mainline/client.go +++ b/src/source_controller/cacheservice/cache/mainline/client.go @@ -10,6 +10,7 @@ * limitations under the License. */ +// Package mainline is the mainline instance cache package mainline import ( @@ -18,90 +19,42 @@ import ( "strings" "sync" + "configcenter/pkg/cache/general" + "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/common/json" + "configcenter/src/common/http/rest" "configcenter/src/common/metadata" - "configcenter/src/storage/dal" - dalrds "configcenter/src/storage/dal/redis" - "configcenter/src/storage/driver/mongodb" + generalcache "configcenter/src/source_controller/cacheservice/cache/general" "configcenter/src/storage/driver/redis" - "configcenter/src/storage/stream" ) var client *Client var clientOnce sync.Once -var cache *cacheCollection +var cache *mainlineCache // NewMainlineCache is to initialize a mainline cache handle instance. -// It will start to cache the business's mainline topology instance's -// cache when a instance's event is occurred. -// It's a event triggered and ttl refresh combined cache mechanism. -// which help us to refresh the cache in time with event, and refresh -// cache with ttl without event triggered. // Note: it can only be called for once. -func NewMainlineCache(event stream.LoopInterface) error { - +func NewMainlineCache(isMaster discovery.ServiceManageInterface) error { if cache != nil { return nil } - // cache has not been initialized. - biz := &business{ - key: bizKey, - event: event, - rds: redis.Client(), - db: mongodb.Client(), - } - - if err := biz.Run(); err != nil { - return fmt.Errorf("run biz cache failed, err: %v", err) - } - - module := &module{ - key: moduleKey, - event: event, - rds: redis.Client(), - db: mongodb.Client(), - } - if err := module.Run(); err != nil { - return fmt.Errorf("run module cache failed, err: %v", err) - } - - set := &set{ - key: setKey, - event: event, - rds: redis.Client(), - db: mongodb.Client(), - } - if err := set.Run(); err != nil { - return fmt.Errorf("run set cache failed, err: %v", err) - } - - custom := &customLevel{ - event: event, - rds: redis.Client(), - db: mongodb.Client(), + cache = &mainlineCache{ + isMaster: isMaster, } - if err := custom.Run(); err != nil { - return fmt.Errorf("run biz custom level cache failed, err: %v", err) + if err := cache.Run(); err != nil { + return fmt.Errorf("run mainline cache failed, err: %v", err) } - cache = &cacheCollection{ - business: biz, - set: set, - module: module, - custom: custom, - } return nil } // NewMainlineClient new a mainline cache client, which is used to get the business's // mainline topology's instance cache. // this client can only be initialized for once. -func NewMainlineClient() *Client { - +func NewMainlineClient(cache *generalcache.Cache) *Client { if client != nil { return client } @@ -109,8 +62,7 @@ func NewMainlineClient() *Client { // initialize for once. clientOnce.Do(func() { client = &Client{ - rds: redis.Client(), - db: mongodb.Client(), + cache: cache, } }) @@ -121,397 +73,114 @@ func NewMainlineClient() *Client { // which is used to get cache from redis and refresh cache // with ttl policy. type Client struct { - rds dalrds.Client - db dal.DB + cache *generalcache.Cache } // GetBusiness get a business's all info with business id -func (c *Client) GetBusiness(ctx context.Context, bizID int64) (string, error) { - rid := ctx.Value(common.ContextRequestIDField) +func (c *Client) GetBusiness(kit *rest.Kit, bizID int64) (string, error) { + return c.getCacheDetailByID(kit, general.Biz, "", bizID) +} - key := bizKey.detailKey(bizID) - biz, err := c.rds.Get(ctx, key).Result() - if err == nil { - return biz, nil +// getCacheDetailByID get resource cache detail by id +func (c *Client) getCacheDetailByID(kit *rest.Kit, resource general.ResType, subRes string, id int64) (string, error) { + listOpt := &general.ListDetailByIDsOpt{ + Resource: resource, + SubResource: subRes, + IDs: []int64{id}, } - - blog.Errorf("get business %d info from cache failed, will get from db, err: %v, rid: %v", bizID, err, rid) - - // error occurs, get from db directly. - biz, err = c.getBusinessFromMongo(bizID) + details, err := c.cache.ListDetailByIDs(kit, listOpt) if err != nil { - blog.Errorf("get biz detail from db failed, err: %v, rid: %v", err, rid) + blog.Errorf("get %s:%s %d from cache failed, err: %v, rid: %v", resource, subRes, id, err, kit.Rid) return "", err } - // refresh biz cache with the latest info. - err = c.rds.Set(ctx, bizKey.detailKey(bizID), biz, bizKey.detailExpireDuration).Err() - if err != nil { - blog.Errorf("update biz cache failed, err: %v, rid: %s", err, rid) - // do not return, cache will be refresh with next round. + if len(details) != 1 || details[0] == "" { + blog.Errorf("%s:%s %d cache detail %+v is invalid, rid: %s", resource, subRes, id, details, kit.Rid) + return "", kit.CCError.CCErrorf(common.CCErrCommParamsInvalid, "cache detail") } - - // get from db - return biz, nil + return details[0], nil } // ListBusiness list business's cache with options. -func (c *Client) ListBusiness(ctx context.Context, opt *metadata.ListWithIDOption) ([]string, error) { - rid := ctx.Value(common.ContextRequestIDField) +func (c *Client) ListBusiness(kit *rest.Kit, opt *metadata.ListWithIDOption) ([]string, error) { + return c.listCacheDetail(kit, general.Biz, "", opt) +} + +// listCacheDetail list resource cache detail +func (c *Client) listCacheDetail(kit *rest.Kit, res general.ResType, subRes string, opt *metadata.ListWithIDOption) ( + []string, error) { if len(opt.IDs) == 0 { return make([]string, 0), nil } - keys := make([]string, len(opt.IDs)) - for idx, bizID := range opt.IDs { - keys[idx] = bizKey.detailKey(bizID) + listOpt := &general.ListDetailByIDsOpt{ + Resource: res, + SubResource: subRes, + IDs: opt.IDs, + Fields: opt.Fields, } - - bizList, err := c.rds.MGet(context.Background(), keys...).Result() + details, err := c.cache.ListDetailByIDs(kit, listOpt) if err != nil { - blog.Errorf("get business %d info from cache failed, get from db directly, err: %v, rid: %v", opt.IDs, err, rid) - return c.listBusinessWithRefreshCache(ctx, opt.IDs, opt.Fields) + blog.Errorf("list %s:%s from cache failed, err: %v, opt: %+v, rid: %v", res, subRes, err, opt, kit.Rid) + return nil, err } - all := make([]string, 0) - toAdd := make([]int64, 0) - for idx, biz := range bizList { - if biz == nil { - // can not find in cache - toAdd = append(toAdd, opt.IDs[idx]) - continue - } - - detail, ok := biz.(string) - if !ok { - blog.Errorf("got invalid biz cache %v, rid: %v", biz, rid) - return nil, fmt.Errorf("got invalid biz cache %v", biz) - } - - if len(opt.Fields) != 0 { - all = append(all, *json.CutJsonDataWithFields(&detail, opt.Fields)) - } else { - all = append(all, detail) - } - - } - - if len(toAdd) != 0 { - // several business caches is not hit, try get from db and refresh them to cache. - details, err := c.listBusinessWithRefreshCache(ctx, toAdd, opt.Fields) - if err != nil { - blog.Errorf("get business list from db failed, err: %v, rid: %v", err, rid) - return nil, err - } - - all = append(all, details...) - } - - return all, nil + return details, nil } // ListModules list modules cache with options from redis. -func (c *Client) ListModules(ctx context.Context, opt *metadata.ListWithIDOption) ([]string, error) { - rid := ctx.Value(common.ContextRequestIDField) - - if len(opt.IDs) == 0 { - return make([]string, 0), nil - } - - keys := make([]string, len(opt.IDs)) - list, err := c.rds.MGet(context.Background(), keys...).Result() - if err != nil { - blog.Errorf("list module %d info from cache failed, get from db directly, err: %v, rid: %v", opt.IDs, err, rid) - return c.listModuleWithRefreshCache(ctx, opt.IDs, opt.Fields) - } - - all := make([]string, 0) - toAdd := make([]int64, 0) - for idx, module := range list { - if module == nil { - // can not find in cache - toAdd = append(toAdd, opt.IDs[idx]) - continue - } - - detail, ok := module.(string) - if !ok { - blog.Errorf("got invalid module cache %v, rid: %v", module, rid) - return nil, fmt.Errorf("got invalid module cache %v", module) - } - - if len(opt.Fields) != 0 { - all = append(all, *json.CutJsonDataWithFields(&detail, opt.Fields)) - } else { - all = append(all, detail) - } - } - - if len(toAdd) != 0 { - // several module caches is not hit, try get from db and refresh them to cache. - details, err := c.listModuleWithRefreshCache(ctx, toAdd, opt.Fields) - if err != nil { - blog.Errorf("get module list from db failed, err: %v, rid: %v", err, rid) - return nil, err - } - - all = append(all, details...) - } - - return all, nil +func (c *Client) ListModules(kit *rest.Kit, opt *metadata.ListWithIDOption) ([]string, error) { + return c.listCacheDetail(kit, general.Module, "", opt) } // ListSets list sets from cache with options. -func (c *Client) ListSets(ctx context.Context, opt *metadata.ListWithIDOption) ([]string, error) { - rid := ctx.Value(common.ContextRequestIDField) - - if len(opt.IDs) == 0 { - return make([]string, 0), nil - } - - keys := make([]string, len(opt.IDs)) - for idx, id := range opt.IDs { - keys[idx] = setKey.detailKey(id) - } - - list, err := c.rds.MGet(ctx, keys...).Result() - if err != nil { - blog.Errorf("list set %d info from cache failed, get from db directly, err: %v, rid: %v", opt.IDs, err, rid) - return c.listSetWithRefreshCache(ctx, opt.IDs, opt.Fields) - } - - all := make([]string, 0) - toAdd := make([]int64, 0) - for idx, set := range list { - if set == nil { - // can not find in cache - toAdd = append(toAdd, opt.IDs[idx]) - continue - } - - detail, ok := set.(string) - if !ok { - blog.Errorf("got invalid set cache %v, rid: %v", set, rid) - return nil, fmt.Errorf("got invalid set cache %v", set) - } - - if len(opt.Fields) != 0 { - all = append(all, *json.CutJsonDataWithFields(&detail, opt.Fields)) - } else { - all = append(all, detail) - } - } - - if len(toAdd) != 0 { - // several set caches is not hit, try get from db and refresh them to cache. - details, err := c.listSetWithRefreshCache(ctx, toAdd, opt.Fields) - if err != nil { - blog.Errorf("get set list from db failed, err: %v, rid: %v", err, rid) - return nil, err - } - - all = append(all, details...) - } - - return all, nil +func (c *Client) ListSets(kit *rest.Kit, opt *metadata.ListWithIDOption) ([]string, error) { + return c.listCacheDetail(kit, general.Set, "", opt) } // ListModuleDetails list module's all details from cache with module ids. -func (c *Client) ListModuleDetails(ctx context.Context, moduleIDs []int64) ([]string, error) { - if len(moduleIDs) == 0 { - return make([]string, 0), nil - } - - rid := ctx.Value(common.ContextRequestIDField) - - keys := make([]string, len(moduleIDs)) - for idx, id := range moduleIDs { - keys[idx] = moduleKey.detailKey(id) - } - - modules, err := c.rds.MGet(context.Background(), keys...).Result() - if err == nil { - list := make([]string, 0) - for idx, m := range modules { - if m == nil { - detail, isNotFound, err := c.getModuleDetailCheckNotFoundWithRefreshCache(ctx, moduleIDs[idx]) - // 跳过不存在的模块,因为作为批量查询的API,调用方希望查询到存在的资源,并自动过滤掉不存在的资源 - if isNotFound { - blog.Errorf("module %d not exist, err: %v, rid: %v", moduleIDs[idx], err, rid) - continue - } - - if err != nil { - blog.Errorf("get module %d detail from db failed, err: %v, rid: %v", moduleIDs[idx], err, rid) - return nil, err - } - - list = append(list, detail) - continue - } - list = append(list, m.(string)) - } - return list, nil - } - blog.Errorf("get modules details from redis failed, err: %v, rid: %v", err, rid) - - // can not get from redis, get from db directly and refresh cache. - return c.listModuleWithRefreshCache(ctx, moduleIDs, nil) +func (c *Client) ListModuleDetails(kit *rest.Kit, moduleIDs []int64) ([]string, error) { + return c.ListModules(kit, &metadata.ListWithIDOption{IDs: moduleIDs}) } // GetModuleDetail get a module's details with id from cache. -func (c *Client) GetModuleDetail(ctx context.Context, moduleID int64) (string, error) { - rid := ctx.Value(common.ContextRequestIDField) - - mod, err := c.rds.Get(ctx, moduleKey.detailKey(moduleID)).Result() - if err == nil { - return mod, nil - } - - blog.Errorf("get module: %d failed from redis, err: %v, rid: %v", moduleID, err, rid) - // get from db directly and refresh the cache. - detail, _, err := c.getModuleDetailCheckNotFoundWithRefreshCache(ctx, moduleID) - return detail, err +func (c *Client) GetModuleDetail(kit *rest.Kit, moduleID int64) (string, error) { + return c.getCacheDetailByID(kit, general.Module, "", moduleID) } // GetSet get a set's details from cache with id. -func (c *Client) GetSet(ctx context.Context, setID int64) (string, error) { - rid := ctx.Value(common.ContextRequestIDField) - - set, err := c.rds.Get(context.Background(), setKey.detailKey(setID)).Result() - if err == nil { - return set, nil - } - - blog.Errorf("get set: %d failed from redis failed, err: %v, rid: %v", setID, err, rid) - - // can not get set from cache, get from db directly and refresh cache. - detail, _, err := c.getSetDetailCheckNotFoundWithRefreshCache(ctx, setID) - return detail, err +func (c *Client) GetSet(kit *rest.Kit, setID int64) (string, error) { + return c.getCacheDetailByID(kit, general.Set, "", setID) } // ListSetDetails list set's details from cache with ids. -func (c *Client) ListSetDetails(ctx context.Context, setIDs []int64) ([]string, error) { - rid := ctx.Value(common.ContextRequestIDField) - - if len(setIDs) == 0 { - return make([]string, 0), nil - } - - keys := make([]string, len(setIDs)) - for idx, set := range setIDs { - keys[idx] = setKey.detailKey(set) - } - - sets, err := c.rds.MGet(context.Background(), keys...).Result() - if err == nil && len(sets) != 0 { - all := make([]string, 0) - for idx, s := range sets { - if s == nil { - detail, isNotFound, err := c.getSetDetailCheckNotFoundWithRefreshCache(ctx, setIDs[idx]) - // 跳过不存在的集群,因为作为批量查询的API,调用方希望查询到存在的资源,并自动过滤掉不存在的资源 - if isNotFound { - blog.Errorf("set %d not exist, err: %v, rid: %v", setIDs[idx], err, rid) - continue - } - - if err != nil { - blog.Errorf("get set %d from mongodb failed, err: %v, rid: %v", setIDs[idx], err, rid) - return nil, err - } - all = append(all, detail) - continue - } - all = append(all, s.(string)) - } - - return all, nil - } - - blog.Errorf("get sets: %v failed from redis failed, err: %v, rid: %v", setIDs, err, rid) - - // get from db directly and refresh the cache. - return c.listSetWithRefreshCache(ctx, setIDs, nil) +func (c *Client) ListSetDetails(kit *rest.Kit, setIDs []int64) ([]string, error) { + return c.ListSets(kit, &metadata.ListWithIDOption{IDs: setIDs}) } // GetCustomLevelDetail get business's custom level object's instance detail information with instance id. -func (c *Client) GetCustomLevelDetail(ctx context.Context, objID, supplierAccount string, instID int64) ( - string, error) { - - rid := ctx.Value(common.ContextRequestIDField) - key := newCustomKey(objID) - custom, err := c.rds.Get(context.Background(), key.detailKey(instID)).Result() - if err == nil { - return custom, nil - } - - blog.Errorf("get biz custom level, obj:%s, inst: %d failed from redis, err: %v, rid: %v", objID, instID, err, rid) - - detail, _, err := c.getCustomDetailCheckNotFoundWithRefreshCache(ctx, key, objID, supplierAccount, instID) - return detail, err +func (c *Client) GetCustomLevelDetail(kit *rest.Kit, objID string, instID int64) (string, error) { + return c.getCacheDetailByID(kit, general.MainlineInstance, objID, instID) } // ListCustomLevelDetail business's custom level object's instance detail information with id list. -func (c *Client) ListCustomLevelDetail(ctx context.Context, objID, supplierAccount string, instIDs []int64) ( - []string, error) { - - if len(instIDs) == 0 { - return make([]string, 0), nil - } - - rid := ctx.Value(common.ContextRequestIDField) - - customKey := newCustomKey(objID) - keys := make([]string, len(instIDs)) - for idx, instID := range instIDs { - keys[idx] = customKey.detailKey(instID) - } - - customs, err := c.rds.MGet(context.Background(), keys...).Result() - if err == nil && len(customs) != 0 { - all := make([]string, 0) - for idx, cu := range customs { - if cu == nil { - detail, isNotFound, err := c.getCustomDetailCheckNotFoundWithRefreshCache(ctx, customKey, objID, - supplierAccount, instIDs[idx]) - // 跳过不存在的自定义节点,因为作为批量查询的API,调用方希望查询到存在的资源,并自动过滤掉不存在的资源 - if isNotFound { - blog.Errorf("custom layer %s/%d not exist, err: %v, rid: %v", objID, instIDs[idx], err, rid) - continue - } - - if err != nil { - blog.Errorf("get %s/%d detail from mongodb failed, err: %v, rid: %v", objID, instIDs[idx], err, rid) - return nil, err - } - all = append(all, detail) - continue - } - - all = append(all, cu.(string)) - } - return all, nil - } - - blog.Errorf("get biz custom level, obj:%s, inst: %v failed from redis, err: %v, rid: %v", objID, instIDs, err, rid) - // get from db directly and try refresh the cache. - return c.listCustomLevelDetailWithRefreshCache(ctx, customKey, objID, supplierAccount, instIDs) +func (c *Client) ListCustomLevelDetail(kit *rest.Kit, objID string, instIDs []int64) ([]string, error) { + return c.listCacheDetail(kit, general.MainlineInstance, objID, &metadata.ListWithIDOption{IDs: instIDs}) } // GetTopology get business's mainline topology with rank from biz model to host model. -func (c *Client) GetTopology() ([]string, error) { - - rank, err := c.rds.Get(context.Background(), topologyKey).Result() +func (c *Client) GetTopology(kit *rest.Kit) ([]string, error) { + rank, err := redis.Client().Get(context.Background(), genTopologyKey(kit)).Result() if err != nil { - blog.Errorf("get mainline topology from cache failed, get from db directly. err: %v", err) - return c.refreshAndGetTopologyRank() + blog.Errorf("get mainline topology from cache failed, get from db directly. err: %v, rid: %s", err, kit.Rid) + return refreshAndGetTopologyRank(kit) } topo := strings.Split(rank, ",") if len(topo) < 4 { // invalid topology - return c.refreshAndGetTopologyRank() + return refreshAndGetTopologyRank(kit) } return topo, nil diff --git a/src/source_controller/cacheservice/cache/mainline/custom.go b/src/source_controller/cacheservice/cache/mainline/custom.go deleted file mode 100644 index f4445df4c8..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/custom.go +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import ( - "context" - "fmt" - "strings" - "time" - - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/mapstr" - "configcenter/src/common/util" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/redis" - "configcenter/src/storage/driver/mongodb" - drvredis "configcenter/src/storage/driver/redis" - "configcenter/src/storage/stream" - "configcenter/src/storage/stream/types" - - "github.com/tidwall/gjson" -) - -// customLevel is a instance to watch custom object instance's change -// -// event and then try to refresh it to the cache. -// -// it based one the event loop watch mechanism which can ensure -// all the event can be watched safely, which also means the cache -// can be refreshed without lost and immediately. -type customLevel struct { - rds redis.Client - db dal.DB - observer *watchObserver - event stream.LoopInterface -} - -// Run start to watch and refresh the custom object instance's cache. -func (m *customLevel) Run() error { - // initialize observer at first. - m.observer = &watchObserver{ - observer: make(map[string]chan struct{}), - } - - rid := util.GenerateRID() - if err := m.runCustomLevelInstance(rid); err != nil { - return fmt.Errorf("run mainline instance watch failed, err: %v, rid: %s", err, rid) - } - - go m.runObserver() - - return nil -} - -// runObserver start to watch if the custom level is changed not. -// if yes, it will reset the watch and add or drop the watch accordingly. -// This will help us to re-watch the new custom level object's instance -// event, and refresh it's cache. -func (m *customLevel) runObserver() { - blog.Infof("start run custom level object watch observer.") - // wait for a moment and then start loop. - time.Sleep(5 * time.Minute) - for { - rid := util.GenerateRID() - blog.Infof("start run biz custom level cache observer, rid: %s", rid) - if err := m.runCustomLevelInstance(rid); err != nil { - blog.Errorf("run mainline instance watch failed, err: %v, rid: %s", err) - time.Sleep(time.Minute) - continue - } - - blog.Infof("finished run biz custom level cache observer, rid: %s", rid) - time.Sleep(5 * time.Minute) - } -} - -// runCustomLevelInstance to watch each custom level object instance's change -// for cache update. -func (m *customLevel) runCustomLevelInstance(rid string) error { - - relations, err := getMainlineTopology() - if err != nil { - blog.Errorf("get mainline topology from mongodb failed, err: %v, rid: %s", err, rid) - return err - } - - // rank start from biz to host - rank := rankMainlineTopology(relations) - - // refresh topology cache. - refreshTopologyRank(rank) - - // reconcile the custom watch - if err := m.reconcileCustomWatch(rid, rank); err != nil { - return err - } - - return nil -} - -// reconcileCustomWatch to check if the custom watch is already exist or not. -// if not exist, then do loop watch, otherwise, if the custom level object is -// deleted then stop the watch. -func (m *customLevel) reconcileCustomWatch(rid string, rank []string) error { - rankMap := make(map[string]bool) - for _, objID := range rank { - if objID == "biz" || objID == "module" || objID == "set" || objID == "host" { - // skip system embed object - continue - } - rankMap[objID] = true - } - - // check if custom watch need to be stopped at first. - for _, obj := range m.observer.getAllObjects() { - if _, exist := rankMap[obj]; !exist { - // a redundant watch exist, stop watch it. - blog.Warnf("reconcile custom watch, find a redundant one with object %s, stop it now. rid: %s", obj, rid) - - // delete resume token and start time at first, because it should not be reused. - key := newCustomKey(obj) - pipe := m.rds.Pipeline() - pipe.Del(key.resumeAtTimeKey()) - pipe.Del(key.resumeTokenKey()) - _, err := pipe.Exec() - if err != nil { - blog.Errorf("delete resume token and start time key failed, err: %v, rid: %s", err, key) - // try next round. - return err - } - - // stop watch now. - stopNotifier := m.observer.delete(obj) - if stopNotifier != nil { - // cancel the watch immediately. - close(stopNotifier) - } - - } - } - - if len(rankMap) == 0 { - // no business custom level exist, do nothing. - return nil - } - - // check if new watch is need secondly. - for objID := range rankMap { - if m.observer.exist(objID) { - // already exist, check next - continue - } - - // object watch not exist, need to add a new watch immediately. - stopNotifier := make(chan struct{}) - if err := m.runCustomWatch(rid, objID, stopNotifier); err != nil { - // close the notifier channel - close(stopNotifier) - - blog.Errorf("reconcile custom watch, run new watch with object %s, but failed, err: %v, rid: %s", - objID, err, rid) - return err - } - - blog.Infof("run new custom level object: %s instance watch to cache success. rid: %s", objID, rid) - - // loop watch success, it's time to add this object watch to observer for now. - m.observer.add(objID, stopNotifier) - } - - return nil -} - -// runCustomWatch launch a new custom object's instance watch, which will refresh -// the cache when a event is occurred. -func (m *customLevel) runCustomWatch(rid, objID string, stopNotifier chan struct{}) error { - key := newCustomKey(objID) - - handler := newTokenHandler(*key) - startTime, err := handler.getStartTimestamp(context.Background()) - if err != nil { - blog.Errorf("get biz custom object %s cache event start at time failed, err: %v, rid :%s", objID, err, rid) - return err - } - - loopOpts := &types.LoopOneOptions{ - LoopOptions: types.LoopOptions{ - Name: fmt.Sprintf("biz_custom_obj_%s_cache", objID), - WatchOpt: &types.WatchOptions{ - Options: types.Options{ - EventStruct: new(map[string]interface{}), - Collection: common.GetInstTableName(objID, common.BKDefaultTenantID), - // start token will be automatically set when it's running, - // so we do not set here. - StartAfterToken: nil, - StartAtTime: startTime, - WatchFatalErrorCallback: handler.resetWatchTokenWithTimestamp, - }, - }, - TokenHandler: handler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: 4, - RetryDuration: retryDuration, - }, - StopNotifier: stopNotifier, - }, - EventHandler: &types.OneHandler{ - DoAdd: func(event *types.Event) (retry bool) { - return m.onUpsert(key, event) - }, - DoUpdate: func(event *types.Event) (retry bool) { - return m.onUpsert(key, event) - }, - DoDelete: func(event *types.Event) (retry bool) { - return m.onDelete(key, event) - }, - }, - } - - blog.Infof("start run new custom level object: %s instance to cache with watch. rid: %s", objID, rid) - return m.event.WithOne(loopOpts) -} - -// onUpsert is to upsert the custom object instance cache when a -// add/update/upsert event is triggered. -func (m *customLevel) onUpsert(key *keyGenerator, e *types.Event) bool { - if blog.V(4) { - blog.Infof("received biz custom cache event, op: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, e.ID()) - } - - instID := gjson.GetBytes(e.DocBytes, common.BKInstIDField).Int() - if instID <= 0 { - blog.Errorf("received invalid biz custom object instance event, skip, op: %s, doc: %s, rid: %s", - e.OperationType, e.DocBytes, e.ID()) - return false - } - - // update the cache. - err := m.rds.Set(context.Background(), key.detailKey(instID), e.DocBytes, key.detailExpireDuration).Err() - if err != nil { - blog.Errorf("update module cache failed, op: %s, doc: %s, err: %v, rid: %s", - e.OperationType, e.DocBytes, err, e.ID()) - return true - } - - return false -} - -// onDelete delete business cache when a custom object's instance is delete. -func (m *customLevel) onDelete(key *keyGenerator, e *types.Event) bool { - filter := mapstr.MapStr{ - "coll": e.Collection, - "oid": e.Oid, - } - - module := new(customArchive) - err := m.db.Table(common.BKTableNameDelArchive).Find(filter).Fields("detail").One(context.Background(), module) - if err != nil { - blog.Errorf("get biz custom level archive detail failed, err: %v, rid: %s", err, e.ID()) - if m.db.IsNotFoundError(err) { - return false - } - return true - } - - blog.Infof("received delete custom instance %d/%s event, rid: %s", module.Detail.InstanceID, - module.Detail.InstanceName, e.ID()) - - // delete the cache. - if err := m.rds.Del(context.Background(), key.detailKey(module.Detail.InstanceID)).Err(); err != nil { - blog.Errorf("delete custom instance cache failed, err: %v, rid: %s", err, e.ID()) - return true - } - - return false -} - -// getMainlineTopology get mainline topology's association details. -func getMainlineTopology() ([]mainlineAssociation, error) { - relations := make([]mainlineAssociation, 0) - filter := mapstr.MapStr{ - common.AssociationKindIDField: common.AssociationKindMainline, - } - err := mongodb.Client().Table(common.BKTableNameObjAsst).Find(filter).All(context.Background(), &relations) - if err != nil { - blog.Errorf("get mainline topology association failed, err: %v", err) - return nil, err - } - return relations, nil -} - -// rankMainlineTopology TODO -// rankTopology is to rank the biz topology to a array, start from biz to host -func rankMainlineTopology(relations []mainlineAssociation) []string { - rank := make([]string, 0) - next := "biz" - rank = append(rank, next) - for _, relation := range relations { - if relation.AssociateTo == next { - rank = append(rank, relation.ObjectID) - next = relation.ObjectID - continue - } else { - for _, rel := range relations { - if rel.AssociateTo == next { - rank = append(rank, rel.ObjectID) - next = rel.ObjectID - break - } - } - } - } - return rank -} - -// refreshTopologyRank is to refresh the business's rank informations. -func refreshTopologyRank(rank []string) { - // then set the rank to cache - value := strings.Join(rank, ",") - err := drvredis.Client().Set(context.Background(), topologyKey, value, detailTTLDuration).Err() - if err != nil { - blog.Errorf("refresh mainline topology rank, but update to cache failed, err: %v", err) - // do not return, it will be refreshed next round. - } -} diff --git a/src/source_controller/cacheservice/cache/mainline/handler.go b/src/source_controller/cacheservice/cache/mainline/handler.go deleted file mode 100644 index 983b0531e2..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/handler.go +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import ( - "context" - "encoding/json" - "time" - - "configcenter/src/storage/dal/redis" - drv "configcenter/src/storage/driver/redis" - "configcenter/src/storage/stream/types" -) - -// newTokenHandler initialize a token handler. -func newTokenHandler(key keyGenerator) *tokenHandler { - return &tokenHandler{ - key: key, - rds: drv.Client(), - } -} - -// tokenHandler is used to handle all the watch token related operations. -// which help the cache instance to manage its token, so that it can be -// re-watched event from where they stopped when the task is restarted or -// some unexpected exceptions happens. -type tokenHandler struct { - key keyGenerator - rds redis.Client -} - -// SetLastWatchToken set watch token and resume time at the same time. -func (t *tokenHandler) SetLastWatchToken(_ context.Context, token string) error { - stamp := &types.TimeStamp{ - Sec: uint32(time.Now().Unix()), - Nano: 0, - } - atTime, err := json.Marshal(stamp) - if err != nil { - return err - } - - pipe := t.rds.Pipeline() - pipe.Set(t.key.resumeTokenKey(), token, 0) - pipe.Set(t.key.resumeAtTimeKey(), string(atTime), 0) - _, err = pipe.Exec() - if err != nil { - return err - } - - return nil -} - -// GetStartWatchToken get the last watched token, it can be empty. -func (t *tokenHandler) GetStartWatchToken(ctx context.Context) (token string, err error) { - token, err = t.rds.Get(ctx, t.key.resumeTokenKey()).Result() - if err != nil { - if redis.IsNilErr(err) { - return "", nil - } - return "", err - } - return token, err -} - -// getStartTimestamp get the last event's timestamp. -func (t *tokenHandler) getStartTimestamp(ctx context.Context) (*types.TimeStamp, error) { - js, err := t.rds.Get(ctx, t.key.resumeAtTimeKey()).Result() - if err != nil { - if redis.IsNilErr(err) { - // start from now. - return &types.TimeStamp{Sec: uint32(time.Now().Unix())}, nil - } - return nil, err - } - - stamp := new(types.TimeStamp) - if len(js) == 0 { - // it will be empty when it is never set. - return stamp, nil - } - - if err := json.Unmarshal([]byte(js), stamp); err != nil { - return nil, err - } - - return stamp, nil -} - -// resetWatchTokenWithTimestamp TODO -// resetWatchToken reset the watch token, and update startAtTime time, so that we can -// re-watch from the timestamp we set now. -func (t *tokenHandler) resetWatchTokenWithTimestamp(startAtTime types.TimeStamp) error { - atTime, err := json.Marshal(startAtTime) - if err != nil { - return err - } - - pipe := t.rds.Pipeline() - pipe.Set(t.key.resumeTokenKey(), "", 0) - pipe.Set(t.key.resumeAtTimeKey(), string(atTime), 0) - _, err = pipe.Exec() - if err != nil { - return err - } - - return nil -} diff --git a/src/source_controller/cacheservice/cache/mainline/key.go b/src/source_controller/cacheservice/cache/mainline/key.go deleted file mode 100644 index 671eaaace0..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/key.go +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import ( - "fmt" - "time" - - "configcenter/src/common" -) - -// TODO replace these cache with the general resource cache -// key file is used to manage all the business's mainline topology -// cache keys with keyGenerator. - -const ( - bizNamespace = common.BKCacheKeyV3Prefix + "biz" - detailTTLDuration = 180 * time.Minute -) - -// business's cache key generator instance. -var bizKey = keyGenerator{ - namespace: bizNamespace, - name: bizKeyName, - detailExpireDuration: detailTTLDuration, -} - -// module's cache key generator instance. -var moduleKey = keyGenerator{ - namespace: bizNamespace, - name: moduleKeyName, - detailExpireDuration: detailTTLDuration, -} - -// set's cache key generator instance. -var setKey = keyGenerator{ - namespace: bizNamespace, - name: setKeyName, - detailExpireDuration: detailTTLDuration, -} - -type keyName string - -const ( - bizKeyName keyName = common.BKInnerObjIDApp - setKeyName keyName = common.BKInnerObjIDSet - moduleKeyName keyName = common.BKInnerObjIDModule -) - -// newCustomKey initialize a custom object's key generator with object. -func newCustomKey(objID string) *keyGenerator { - return &keyGenerator{ - namespace: bizNamespace, - name: keyName(objID), - detailExpireDuration: detailTTLDuration, - } -} - -// keyGenerator is a mainline instance's cache key generator. -type keyGenerator struct { - namespace string - name keyName - detailExpireDuration time.Duration -} - -// resumeTokenKey is used to store the event resume token data -func (k keyGenerator) resumeTokenKey() string { - return fmt.Sprintf("%s:%s:resume_token", k.namespace, k.name) -} - -// resumeAtTimeKey is used to store the time where to resume the event stream. -func (k keyGenerator) resumeAtTimeKey() string { - return fmt.Sprintf("%s:%s:resume_at_time", k.namespace, k.name) -} - -// detailKey is to generate the key to store the instance's detail information. -func (k keyGenerator) detailKey(instID int64) string { - return fmt.Sprintf("%s:%s_detail:%d", k.namespace, k.name, instID) -} diff --git a/src/source_controller/cacheservice/cache/mainline/key_test.go b/src/source_controller/cacheservice/cache/mainline/key_test.go deleted file mode 100644 index 7b41d4cbd1..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/key_test.go +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import "testing" - -func TestBusinessKey(t *testing.T) { - // test resume token key - if bizKey.resumeTokenKey() != "cc:v3:biz:biz:resume_token" { - t.Fatalf("invalid biz resume token key") - } - - // test resume at time key - if bizKey.resumeAtTimeKey() != "cc:v3:biz:biz:resume_at_time" { - t.Fatalf("invalid biz resume at time key") - } - - // test detail key - if bizKey.detailKey(1) != "cc:v3:biz:biz_detail:1" { - t.Fatalf("invalid biz resume at time key") - } -} - -func TestCustomKey(t *testing.T) { - country := newCustomKey("country") - // test resume token key - if country.resumeTokenKey() != "cc:v3:biz:country:resume_token" { - t.Fatalf("invalid custom country object instance resume token key") - } - - // test resume at time key - if country.resumeAtTimeKey() != "cc:v3:biz:country:resume_at_time" { - t.Fatalf("invalid custom country object instance resume at time key") - } - - // test detail key - if country.detailKey(1) != "cc:v3:biz:country_detail:1" { - t.Fatalf("invalid custom country object instance resume at time key") - } - -} diff --git a/src/source_controller/cacheservice/cache/mainline/logic.go b/src/source_controller/cacheservice/cache/mainline/logic.go deleted file mode 100644 index c7c611a558..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/logic.go +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import ( - "context" - "errors" - - "configcenter/src/common" - "configcenter/src/common/blog" - errs "configcenter/src/common/errors" - "configcenter/src/common/json" - "configcenter/src/common/mapstr" - "configcenter/src/common/util" -) - -// getBusinessFromMongo get business detail from cache directly. -func (c *Client) getBusinessFromMongo(bizID int64) (string, error) { - biz := make(map[string]interface{}) - filter := mapstr.MapStr{ - common.BKAppIDField: bizID, - } - err := c.db.Table(common.BKTableNameBaseApp).Find(filter).One(context.Background(), &biz) - if err != nil { - blog.Errorf("get business %d info from db, but failed, err: %v", bizID, err) - return "", errs.New(common.CCErrCommDBSelectFailed, err.Error()) - } - - js, err := json.Marshal(biz) - if err != nil { - return "", err - } - - return string(js), nil -} - -// listBusinessWithRefreshCache list business detail from db and refresh cache at the same time. -func (c *Client) listBusinessWithRefreshCache(ctx context.Context, ids []int64, fields []string) ([]string, error) { - if len(ids) == 0 { - return nil, errors.New("id list is empty") - } - - rid := ctx.Value(common.ContextRequestIDField) - - list := make([]map[string]interface{}, 0) - filter := mapstr.MapStr{ - common.BKAppIDField: mapstr.MapStr{ - common.BKDBIN: ids, - }, - } - - err := c.db.Table(common.BKTableNameBaseApp).Find(filter).All(context.Background(), &list) - if err != nil { - blog.Errorf("list business info from db failed, err: %v, rid: %v", err, rid) - return nil, errs.New(common.CCErrCommDBSelectFailed, err.Error()) - } - - pipe := c.rds.Pipeline() - all := make([]string, len(list)) - for idx, biz := range list { - - id, err := util.GetInt64ByInterface(biz[common.BKAppIDField]) - if err != nil { - return nil, err - } - - js, err := json.Marshal(biz) - if err != nil { - return nil, err - } - - pipe.Set(bizKey.detailKey(id), js, detailTTLDuration) - - all[idx] = string(js) - if len(fields) != 0 { - all[idx] = *json.CutJsonDataWithFields(&all[idx], fields) - } - } - - _, err = pipe.Exec() - if err != nil { - blog.Errorf("update biz cache failed, err: %v, rid: %v", err, rid) - // do not return, cache will be refresh for the next round - } - - return all, nil -} - -// listModuleWithRefreshCache list modules detail from db and refresh the cache at the same time. -func (c *Client) listModuleWithRefreshCache(ctx context.Context, ids []int64, fields []string) ([]string, error) { - rid := ctx.Value(common.ContextRequestIDField) - - list := make([]map[string]interface{}, 0) - filter := mapstr.MapStr{ - common.BKModuleIDField: mapstr.MapStr{ - common.BKDBIN: ids, - }, - } - - err := c.db.Table(common.BKTableNameBaseModule).Find(filter).All(context.Background(), &list) - if err != nil { - blog.Errorf("list module info from db failed, err: %v, rid: %v", err, rid) - return nil, errs.New(common.CCErrCommDBSelectFailed, err.Error()) - } - - pipe := c.rds.Pipeline() - all := make([]string, len(list)) - for idx, mod := range list { - id, err := util.GetInt64ByInterface(mod[common.BKModuleIDField]) - if err != nil { - return nil, err - } - - js, err := json.Marshal(mod) - if err != nil { - return nil, err - } - - pipe.Set(moduleKey.detailKey(id), js, detailTTLDuration) - - all[idx] = string(js) - if len(fields) != 0 { - all[idx] = *json.CutJsonDataWithFields(&all[idx], fields) - } - } - - _, err = pipe.Exec() - if err != nil { - blog.Errorf("update module cache failed, err: %v, rid: %v", err, rid) - // do not return, cache will be refresh for the next round - } - - return all, nil -} - -// listSetWithRefreshCache list set from db directly and refresh cache at the same time. -func (c *Client) listSetWithRefreshCache(ctx context.Context, ids []int64, fields []string) ([]string, error) { - rid := ctx.Value(common.ContextRequestIDField) - - list := make([]map[string]interface{}, 0) - filter := mapstr.MapStr{ - common.BKSetIDField: mapstr.MapStr{ - common.BKDBIN: ids, - }, - } - - err := c.db.Table(common.BKTableNameBaseSet).Find(filter).All(context.Background(), &list) - if err != nil { - blog.Errorf("list set info from db failed, err: %v, rid: %v", err, rid) - return nil, errs.New(common.CCErrCommDBSelectFailed, err.Error()) - } - - pipe := c.rds.Pipeline() - all := make([]string, len(list)) - for idx, set := range list { - id, err := util.GetInt64ByInterface(set[common.BKSetIDField]) - if err != nil { - return nil, err - } - - js, err := json.Marshal(set) - if err != nil { - return nil, err - } - - pipe.Set(setKey.detailKey(id), js, detailTTLDuration) - - all[idx] = string(js) - if len(fields) != 0 { - all[idx] = *json.CutJsonDataWithFields(&all[idx], fields) - } - } - - _, err = pipe.Exec() - if err != nil { - blog.Errorf("update set cache failed, err: %v, rid: %v", err, rid) - // do not return, cache will be refresh for the next round - } - - return all, nil -} - -// getModuleDetailCheckNotFoundWithRefreshCache get module from db directly and refresh cache at the same time. -func (c *Client) getModuleDetailCheckNotFoundWithRefreshCache(ctx context.Context, id int64) (string, bool, error) { - rid := ctx.Value(common.ContextRequestIDField) - - mod := make(map[string]interface{}) - filter := mapstr.MapStr{ - common.BKModuleIDField: id, - } - - if err := c.db.Table(common.BKTableNameBaseModule).Find(filter).One(context.Background(), &mod); err != nil { - blog.Errorf("get module %d detail from mongo failed, err: %v, rid: %v", id, err, rid) - - // if module is not found, returns not found flag - if c.db.IsNotFoundError(err) { - return "", true, err - } - return "", false, err - } - - js, err := json.Marshal(mod) - if err != nil { - return "", false, err - } - - // refresh cache - err = c.rds.Set(ctx, moduleKey.detailKey(id), js, detailTTLDuration).Err() - if err != nil { - blog.Errorf("update module: %d cache failed, err: %v, rid: %v", id, err, rid) - // do not return, cache will be refresh for the next round - } - - return string(js), false, nil -} - -// getSetDetailCheckNotFoundWithRefreshCache get set from db directly and refresh cache at the same time. -func (c *Client) getSetDetailCheckNotFoundWithRefreshCache(ctx context.Context, id int64) (string, bool, error) { - rid := ctx.Value(common.ContextRequestIDField) - - set := make(map[string]interface{}) - filter := mapstr.MapStr{ - common.BKSetIDField: id, - } - - if err := c.db.Table(common.BKTableNameBaseSet).Find(filter).One(context.Background(), &set); err != nil { - blog.Errorf("get set %d detail from mongo failed, err: %v, rid: %v", id, err, rid) - - // if set is not found, returns not found flag - if c.db.IsNotFoundError(err) { - return "", true, err - } - return "", false, err - } - - js, err := json.Marshal(set) - if err != nil { - return "", false, err - } - - // refresh cache - err = c.rds.Set(ctx, setKey.detailKey(id), js, detailTTLDuration).Err() - if err != nil { - blog.Errorf("update set: %d cache failed, err: %v, rid: %v", id, err, rid) - // do not return, cache will be refresh for the next round - } - - return string(js), false, nil -} - -// getCustomDetailCheckNotFoundWithRefreshCache get custom instance -// from db directly and refresh cache at the same time. -func (c *Client) getCustomDetailCheckNotFoundWithRefreshCache(ctx context.Context, key *keyGenerator, objID, - supplierAccount string, instID int64) (string, bool, error) { - - rid := ctx.Value(common.ContextRequestIDField) - - filter := mapstr.MapStr{ - common.BKObjIDField: objID, - common.BKInstIDField: instID, - } - instance := make(map[string]interface{}) - instTableName := common.GetObjectInstTableName(objID, supplierAccount) - - err := c.db.Table(instTableName).Find(filter).One(context.Background(), &instance) - // if module is not found, returns not found flag - if c.db.IsNotFoundError(err) { - return "", true, err - } - - if err != nil { - blog.Errorf("get custom level object: %s, inst: %d from db failed, err: %v, rid: %v", objID, instID, err, rid) - return "", false, err - } - - js, err := json.Marshal(instance) - if err != nil { - return "", false, err - } - - // refresh cache - err = c.rds.Set(ctx, key.detailKey(instID), js, detailTTLDuration).Err() - if err != nil { - blog.Errorf("update object: %s, inst: %d cache failed, err: %v, rid: %v", objID, instID, err, rid) - // do not return - } - - return string(js), false, nil -} - -// listCustomLevelDetailWithRefreshCache list custom instance from db directly and refresh cache at the same time. -func (c *Client) listCustomLevelDetailWithRefreshCache(ctx context.Context, key *keyGenerator, objID, - supplierAccount string, instIDs []int64) ([]string, error) { - - rid := ctx.Value(common.ContextRequestIDField) - - filter := mapstr.MapStr{ - common.BKObjIDField: objID, - common.BKInstIDField: mapstr.MapStr{ - common.BKDBIN: instIDs, - }, - } - - tableName := common.GetObjectInstTableName(objID, supplierAccount) - instance := make([]map[string]interface{}, 0) - err := c.db.Table(tableName).Find(filter).All(ctx, &instance) - if err != nil { - blog.Errorf("get custom level object: %s, inst: %v from db failed, err: %v, rid: %v", objID, instIDs, err, rid) - return nil, err - } - - pipe := c.rds.Pipeline() - all := make([]string, len(instance)) - for idx := range instance { - js, err := json.Marshal(instance[idx]) - if err != nil { - return nil, err - } - all[idx] = string(js) - - id, err := util.GetInt64ByInterface(instance[idx][common.BKInstIDField]) - if err != nil { - return nil, err - } - - pipe.Set(key.detailKey(id), js, detailTTLDuration) - } - - _, err = pipe.Exec() - if err != nil { - blog.Errorf("update custom object instance cache failed, err: %v, rid: %v", err, rid) - // do not return, cache will be refresh for the next round - } - - return all, nil -} - -// refreshAndGetTopologyRank refresh the business's topology rank to cache, from biz model to host model. -func (c *Client) refreshAndGetTopologyRank() ([]string, error) { - // read information from mongodb - relations, err := getMainlineTopology() - if err != nil { - blog.Errorf("refresh mainline topology rank, but get it from mongodb failed, err: %v", err) - return nil, err - } - // rank start from biz to host - rank := rankMainlineTopology(relations) - refreshTopologyRank(rank) - - return rank, nil -} diff --git a/src/source_controller/cacheservice/cache/mainline/mainline.go b/src/source_controller/cacheservice/cache/mainline/mainline.go index 25ae1d758f..927683eb3c 100644 --- a/src/source_controller/cacheservice/cache/mainline/mainline.go +++ b/src/source_controller/cacheservice/cache/mainline/mainline.go @@ -1,2 +1,144 @@ -// Package mainline TODO +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +// Package mainline is the mainline association cache package mainline + +import ( + "strings" + "time" + + "configcenter/pkg/tenant" + "configcenter/src/apimachinery/discovery" + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/common/http/rest" + "configcenter/src/common/mapstr" + "configcenter/src/storage/driver/mongodb" + "configcenter/src/storage/driver/redis" +) + +// mainlineCache is an instance to refresh mainline topology cache. +type mainlineCache struct { + isMaster discovery.ServiceManageInterface +} + +// Run start to watch and refresh the mainline topology cache. +func (m *mainlineCache) Run() error { + kit := rest.NewKit() + if err := m.refreshMainlineTopoCache(kit); err != nil { + return err + } + + go func() { + // wait for a moment and then start loop. + time.Sleep(5 * time.Minute) + for { + if !m.isMaster.IsMaster() { + blog.V(4).Infof("loop refresh mainline topology cache, but not master, skip.") + time.Sleep(time.Minute) + continue + } + + kit := rest.NewKit() + blog.V(4).Infof("start refresh mainline topology cache, rid: %s", kit.Rid) + if err := m.refreshMainlineTopoCache(kit); err != nil { + time.Sleep(time.Minute) + continue + } + + blog.V(4).Infof("finished refresh mainline topology cache, rid: %s", kit.Rid) + time.Sleep(5 * time.Minute) + } + }() + + return nil +} + +// refreshMainlineTopoCache refresh mainline topology cache for all tenants. +func (m *mainlineCache) refreshMainlineTopoCache(kit *rest.Kit) error { + err := tenant.ExecForAllTenants(func(tenantID string) error { + kit = kit.WithTenant(tenantID) + _, err := refreshAndGetTopologyRank(kit) + if err != nil { + blog.Errorf("refresh tenant: %s mainline topology cache failed, err: %v, rid: %s", tenantID, err, kit.Rid) + return err + } + return nil + }) + if err != nil { + return err + } + + return nil +} + +// getMainlineTopology get mainline topology's association details. +func getMainlineTopology(kit *rest.Kit) ([]mainlineAssociation, error) { + relations := make([]mainlineAssociation, 0) + filter := mapstr.MapStr{ + common.AssociationKindIDField: common.AssociationKindMainline, + } + err := mongodb.Shard(kit.ShardOpts()).Table(common.BKTableNameObjAsst).Find(filter).All(kit.Ctx, &relations) + if err != nil { + blog.Errorf("get mainline topology association failed, err: %v, rid: %s", err, kit.Rid) + return nil, err + } + return relations, nil +} + +// rankMainlineTopology ranks the biz topology to an array, starting from biz to host +func rankMainlineTopology(relations []mainlineAssociation) []string { + asstMap := make(map[string]string) + for _, relation := range relations { + asstMap[relation.AssociateTo] = relation.ObjectID + } + + rank := make([]string, 0) + + for next := "biz"; next != ""; next = asstMap[next] { + rank = append(rank, next) + } + + return rank +} + +// refreshTopologyRank is to refresh the business's rank information. +func refreshTopologyRank(kit *rest.Kit, rank []string) { + // then set the rank to cache + value := strings.Join(rank, ",") + err := redis.Client().Set(kit.Ctx, genTopologyKey(kit), value, detailTTLDuration).Err() + if err != nil { + blog.Errorf("refresh mainline topology rank, but update to cache failed, err: %v", err) + // do not return, it will be refreshed next round. + } +} + +// refreshAndGetTopologyRank refresh the business's topology rank to cache, from biz model to host model. +func refreshAndGetTopologyRank(kit *rest.Kit) ([]string, error) { + // read information from mongodb + relations, err := getMainlineTopology(kit) + if err != nil { + blog.Errorf("refresh mainline topology rank, but get it from mongodb failed, err: %v, rid: %s", err, kit.Rid) + return nil, err + } + // rank start from biz to host + rank := rankMainlineTopology(relations) + refreshTopologyRank(kit, rank) + + return rank, nil +} diff --git a/src/source_controller/cacheservice/cache/mainline/module.go b/src/source_controller/cacheservice/cache/mainline/module.go deleted file mode 100644 index 1250e1dba2..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/module.go +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import ( - "context" - - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/mapstr" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/redis" - "configcenter/src/storage/stream" - "configcenter/src/storage/stream/types" - - "github.com/tidwall/gjson" -) - -// module is a instance to watch module's change event and -// then try to refresh it to the cache. -// it based one the event loop watch mechanism which can ensure -// all the event can be watched safely, which also means the cache -// can be refreshed without lost and immediately. -type module struct { - key keyGenerator - event stream.LoopInterface - rds redis.Client - db dal.DB -} - -// Run start to watch and refresh the module's cache. -func (m *module) Run() error { - - // initialize module token handler key. - handler := newTokenHandler(m.key) - startTime, err := handler.getStartTimestamp(context.Background()) - if err != nil { - blog.Errorf("get module cache event start at time failed, err: %v", err) - return err - } - - loopOpts := &types.LoopOneOptions{ - LoopOptions: types.LoopOptions{ - Name: "module_cache", - WatchOpt: &types.WatchOptions{ - Options: types.Options{ - EventStruct: new(map[string]interface{}), - Collection: common.BKTableNameBaseModule, - // start token will be automatically set when it's running, - // so we do not set here. - StartAfterToken: nil, - StartAtTime: startTime, - WatchFatalErrorCallback: handler.resetWatchTokenWithTimestamp, - }, - }, - TokenHandler: handler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: 4, - RetryDuration: retryDuration, - }, - }, - EventHandler: &types.OneHandler{ - DoAdd: m.onUpsert, - DoUpdate: m.onUpsert, - DoDelete: m.onDelete, - }, - } - - return m.event.WithOne(loopOpts) -} - -// onUpsert set or update module cache. -func (m *module) onUpsert(e *types.Event) bool { - if blog.V(4) { - blog.Infof("received module cache event, op: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, e.ID()) - } - - moduleID := gjson.GetBytes(e.DocBytes, common.BKModuleIDField).Int() - if moduleID <= 0 { - blog.Errorf("received invalid module event, skip, op: %s, doc: %s, rid: %s", - e.OperationType, e.DocBytes, e.ID()) - return false - } - - // update the cache. - err := m.rds.Set(context.Background(), m.key.detailKey(moduleID), e.DocBytes, m.key.detailExpireDuration).Err() - if err != nil { - blog.Errorf("update module cache failed, op: %s, doc: %s, err: %v, rid: %s", - e.OperationType, e.DocBytes, err, e.ID()) - return true - } - - return false -} - -// onDelete delete module cache. -func (m *module) onDelete(e *types.Event) bool { - - filter := mapstr.MapStr{ - "coll": common.BKTableNameBaseModule, - "oid": e.Oid, - } - - module := new(moduleArchive) - err := m.db.Table(common.BKTableNameDelArchive).Find(filter).Fields("detail").One(context.Background(), module) - if err != nil { - blog.Errorf("get module del archive detail failed, err: %v, rid: %s", err, e.ID()) - if m.db.IsNotFoundError(err) { - return false - } - return true - } - - blog.Infof("received delete module %d/%s event, rid: %s", module.Detail.ModuleID, module.Detail.ModuleName, e.ID()) - - // delete the cache. - if err := m.rds.Del(context.Background(), m.key.detailKey(module.Detail.ModuleID)).Err(); err != nil { - blog.Errorf("delete module cache failed, err: %v, rid: %s", err, e.ID()) - return true - } - - return false -} diff --git a/src/source_controller/cacheservice/cache/mainline/set.go b/src/source_controller/cacheservice/cache/mainline/set.go deleted file mode 100644 index 32b07dcab3..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/set.go +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import ( - "context" - - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/mapstr" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/redis" - "configcenter/src/storage/stream" - "configcenter/src/storage/stream/types" - - "github.com/tidwall/gjson" -) - -// set is a instance to watch set's change event and -// then try to refresh it to the cache. -// it based one the event loop watch mechanism which can ensure -// all the event can be watched safely, which also means the cache -// can be refreshed without lost and immediately. -type set struct { - key keyGenerator - event stream.LoopInterface - rds redis.Client - db dal.DB -} - -// Run start to watch and refresh the set's cache. -func (s *set) Run() error { - - // initialize set token handler key. - handler := newTokenHandler(s.key) - startTime, err := handler.getStartTimestamp(context.Background()) - if err != nil { - blog.Errorf("get set cache event start at time failed, err: %v", err) - return err - } - - loopOpts := &types.LoopOneOptions{ - LoopOptions: types.LoopOptions{ - Name: "set_cache", - WatchOpt: &types.WatchOptions{ - Options: types.Options{ - EventStruct: new(map[string]interface{}), - Collection: common.BKTableNameBaseSet, - // start token will be automatically set when it's running, - // so we do not set here. - StartAfterToken: nil, - StartAtTime: startTime, - WatchFatalErrorCallback: handler.resetWatchTokenWithTimestamp, - }, - }, - TokenHandler: handler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: 4, - RetryDuration: retryDuration, - }, - }, - EventHandler: &types.OneHandler{ - DoAdd: s.onUpsert, - DoUpdate: s.onUpsert, - DoDelete: s.onDelete, - }, - } - - return s.event.WithOne(loopOpts) -} - -// onUpsert set or update set cache. -func (s *set) onUpsert(e *types.Event) bool { - if blog.V(4) { - blog.Infof("received set cache event, op: %s, doc: %s, rid: %s", e.OperationType, e.DocBytes, e.ID()) - } - - setID := gjson.GetBytes(e.DocBytes, common.BKSetIDField).Int() - if setID <= 0 { - blog.Errorf("received invalid set event, skip, op: %s, doc: %s, rid: %s", - e.OperationType, e.DocBytes, e.ID()) - return false - } - - // update the cache. - err := s.rds.Set(context.Background(), s.key.detailKey(setID), string(e.DocBytes), s.key.detailExpireDuration).Err() - if err != nil { - blog.Errorf("update set cache failed, op: %s, doc: %s, err: %v, rid: %s", - e.OperationType, string(e.DocBytes), err, e.ID()) - return true - } - - return false -} - -// onDelete delete set cache. -func (s *set) onDelete(e *types.Event) bool { - - filter := mapstr.MapStr{ - "coll": common.BKTableNameBaseSet, - "oid": e.Oid, - } - - set := new(setArchive) - err := s.db.Table(common.BKTableNameDelArchive).Find(filter).Fields("detail").One(context.Background(), set) - if err != nil { - blog.Errorf("get set del archive detail failed, err: %v, rid: %s", err, e.ID()) - if s.db.IsNotFoundError(err) { - return false - } - return true - } - - blog.Infof("received delete set %d/%s event, rid: %s", set.Detail.SetID, set.Detail.SetName, e.ID()) - - // delete the cache. - if err := s.rds.Del(context.Background(), s.key.detailKey(set.Detail.SetID)).Err(); err != nil { - blog.Errorf("delete set cache failed, err: %v, rid: %s", err, e.ID()) - return true - } - - return false -} diff --git a/src/source_controller/cacheservice/cache/mainline/types.go b/src/source_controller/cacheservice/cache/mainline/types.go index 32e1892683..2f0fec8e00 100644 --- a/src/source_controller/cacheservice/cache/mainline/types.go +++ b/src/source_controller/cacheservice/cache/mainline/types.go @@ -13,103 +13,22 @@ limitations under the License. package mainline import ( - "sync" "time" -) - -const topologyKey = bizNamespace + ":custom:topology" -const retryDuration = 500 * time.Millisecond - -type cacheCollection struct { - business *business - set *set - module *module - custom *customLevel -} - -type bizArchive struct { - Detail bizBaseInfo `json:"detail" bson:"detail"` -} - -type bizBaseInfo struct { - BusinessID int64 `json:"bk_biz_id" bson:"bk_biz_id"` - BusinessName string `json:"bk_biz_name" bson:"bk_biz_name"` -} -type moduleArchive struct { - Detail moduleBaseInfo `json:"detail" bson:"detail"` -} - -type moduleBaseInfo struct { - ModuleID int64 `json:"bk_module_id" bson:"bk_module_id"` - ModuleName string `json:"bk_module_name" bson:"bk_module_name"` -} + "configcenter/src/common" + "configcenter/src/common/http/rest" +) -type setArchive struct { - Detail setBaseInfo `json:"detail" bson:"detail"` -} +const ( + topologyKey = common.BKCacheKeyV3Prefix + "biz:custom:topology" + detailTTLDuration = 180 * time.Minute +) -type setBaseInfo struct { - SetID int64 `json:"bk_set_id" bson:"bk_set_id"` - SetName string `json:"bk_set_name" bson:"bk_set_name"` - ParentID int64 `json:"bk_parent_id" bson:"bk_parent_id"` +func genTopologyKey(kit *rest.Kit) string { + return topologyKey + ":" + kit.TenantID } type mainlineAssociation struct { AssociateTo string `json:"bk_asst_obj_id" bson:"bk_asst_obj_id"` ObjectID string `json:"bk_obj_id" bson:"bk_obj_id"` } - -type customArchive struct { - Detail customInstanceBase `json:"detail" bson:"detail"` -} - -type customInstanceBase struct { - ObjectID string `json:"bk_obj_id" bson:"bk_obj_id"` - InstanceID int64 `json:"bk_inst_id" bson:"bk_inst_id"` - InstanceName string `json:"bk_inst_name" bson:"bk_inst_name"` - ParentID int64 `json:"bk_parent_id" bson:"bk_parent_id"` -} - -// watchObserver is to observe and manage the change of the business's -// mainline custom topology. -type watchObserver struct { - // key is biz custom object id - // value is this watch's stop channel notifier. - observer map[string]chan struct{} - lock sync.Mutex -} - -func (w *watchObserver) add(objID string, stopNotifier chan struct{}) { - w.lock.Lock() - w.observer[objID] = stopNotifier - w.lock.Unlock() - return -} - -func (w *watchObserver) exist(objID string) bool { - w.lock.Lock() - _, exist := w.observer[objID] - w.lock.Unlock() - return exist -} - -func (w *watchObserver) delete(objID string) chan struct{} { - - w.lock.Lock() - stopNotifier := w.observer[objID] - delete(w.observer, objID) - w.lock.Unlock() - return stopNotifier -} - -func (w *watchObserver) getAllObjects() []string { - all := make([]string, 0) - w.lock.Lock() - for obj := range w.observer { - all = append(all, obj) - } - w.lock.Unlock() - - return all -} diff --git a/src/source_controller/cacheservice/cache/mainline/types_test.go b/src/source_controller/cacheservice/cache/mainline/types_test.go deleted file mode 100644 index 326320aa07..0000000000 --- a/src/source_controller/cacheservice/cache/mainline/types_test.go +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package mainline - -import ( - "testing" -) - -func TestObserver(t *testing.T) { - observer := watchObserver{ - observer: make(map[string]chan struct{}), - } - - // add a custom object - notifier := make(chan struct{}) - custom := "country" - observer.add("country", notifier) - - // test a custom is exist or not - if !observer.exist(custom) { - t.Fatal("test country custom level exist failed, not exist") - } - - // test get custom object - if observer.getAllObjects()[0] != custom { - t.Fatal("test country custom level get failed, not equal") - } - - // test delete custom object - observer.delete(custom) - if observer.exist(custom) { - t.Fatal("test country custom level delete failed, still exist") - } - -} diff --git a/src/source_controller/cacheservice/cache/token-handler/memory.go b/src/source_controller/cacheservice/cache/token-handler/memory.go index f07674d069..935c11ef81 100644 --- a/src/source_controller/cacheservice/cache/token-handler/memory.go +++ b/src/source_controller/cacheservice/cache/token-handler/memory.go @@ -20,28 +20,35 @@ package tokenhandler import ( "context" + "configcenter/src/storage/dal/mongo/local" "configcenter/src/storage/stream/types" ) -var _ types.TokenHandler = new(MemoryHandler) +var _ types.TaskTokenHandler = new(MemoryHandler) // MemoryHandler is a token handler that stores the token in process memory type MemoryHandler struct { - token string + dbTokenMap map[string]*types.TokenInfo } // NewMemoryTokenHandler generate a new memory event token handler func NewMemoryTokenHandler() *MemoryHandler { - return new(MemoryHandler) + return &MemoryHandler{ + dbTokenMap: make(map[string]*types.TokenInfo), + } } // SetLastWatchToken set last event watch token -func (m *MemoryHandler) SetLastWatchToken(ctx context.Context, token string) error { - m.token = token +func (m *MemoryHandler) SetLastWatchToken(_ context.Context, uuid string, _ local.DB, token *types.TokenInfo) error { + m.dbTokenMap[uuid] = token return nil } // GetStartWatchToken get event start watch token -func (m *MemoryHandler) GetStartWatchToken(ctx context.Context) (string, error) { - return m.token, nil +func (m *MemoryHandler) GetStartWatchToken(_ context.Context, uuid string, _ local.DB) (*types.TokenInfo, error) { + token, exists := m.dbTokenMap[uuid] + if !exists { + return new(types.TokenInfo), nil + } + return token, nil } diff --git a/src/source_controller/cacheservice/cache/token-handler/mix.go b/src/source_controller/cacheservice/cache/token-handler/mix.go index 62cbdf3f7a..20ec4f173f 100644 --- a/src/source_controller/cacheservice/cache/token-handler/mix.go +++ b/src/source_controller/cacheservice/cache/token-handler/mix.go @@ -20,126 +20,78 @@ package tokenhandler import ( "context" + "time" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/mapstr" - "configcenter/src/common/metadata" - "configcenter/src/storage/dal" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) -var _ types.TokenHandler = new(MixHandler) +var _ types.TaskTokenHandler = new(MixHandler) // MixHandler is a token handler for mix event composed of multiple types of events // token data: {"_id": $mixKey, $collection: {"token": $token, "start_at_time": $start_at_time}} type MixHandler struct { mixKey string collection string - db dal.DB } // NewMixTokenHandler generate a new mix event token handler -func NewMixTokenHandler(mixKey, collection string, db dal.DB) *MixHandler { +func NewMixTokenHandler(mixKey, collection string) *MixHandler { return &MixHandler{ mixKey: mixKey, collection: collection, - db: db, } } // SetLastWatchToken set last mix event watch token -func (m *MixHandler) SetLastWatchToken(ctx context.Context, token string) error { +func (m *MixHandler) SetLastWatchToken(ctx context.Context, uuid string, watchDB local.DB, + token *types.TokenInfo) error { + filter := map[string]interface{}{ - "_id": m.mixKey, + "_id": m.genWatchTokenID(uuid), } tokenInfo := mapstr.MapStr{ - m.collection + ".token": token, + m.collection: token, } - if err := m.db.Table(common.BKTableNameSystem).Upsert(ctx, filter, tokenInfo); err != nil { - blog.Errorf("set mix event %s last watch token failed, data: %+v, err: %v", m.mixKey, tokenInfo, err) + if err := watchDB.Table(common.BKTableNameCacheWatchToken).Upsert(ctx, filter, tokenInfo); err != nil { + blog.Errorf("set mix event %s:%s last watch token failed, data: %+v, err: %v", m.mixKey, uuid, tokenInfo, err) return err } return nil } // GetStartWatchToken get mix event start watch token -func (m *MixHandler) GetStartWatchToken(ctx context.Context) (string, error) { +func (m *MixHandler) GetStartWatchToken(ctx context.Context, uuid string, watchDB local.DB) (*types.TokenInfo, error) { filter := map[string]interface{}{ - "_id": m.mixKey, + "_id": m.genWatchTokenID(uuid), } - data := make(map[string]map[string]string) - err := m.db.Table(common.BKTableNameSystem).Find(filter).Fields(m.collection+".token").One(ctx, &data) + data := make(map[string]*types.TokenInfo) + err := watchDB.Table(common.BKTableNameCacheWatchToken).Find(filter).Fields(m.collection).One(ctx, &data) if err != nil { - if !m.db.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { blog.Errorf("get mix event start watch token by filter: %+v failed, err: %v", filter, err) - return "", err - } - - return "", nil - } - - tokenInfo, exist := data[m.collection] - if !exist { - blog.Infof("mix event %s start watch token is not found", m.mixKey) - return "", nil - } - - return tokenInfo["token"], nil -} - -// ResetWatchToken reset watch token and start watch time -func (m *MixHandler) ResetWatchToken(startAtTime types.TimeStamp) error { - data := mapstr.MapStr{ - m.collection: mapstr.MapStr{ - common.BKTokenField: "", - common.BKStartAtTimeField: startAtTime, - }, - } - - filter := map[string]interface{}{ - "_id": m.mixKey, - } - - if err := m.db.Table(common.BKTableNameSystem).Upsert(context.Background(), filter, data); err != nil { - blog.Errorf("reset mix watch token %s failed, data: %+v, err: %v", m.mixKey, data, err) - return err - } - return nil -} - -// GetStartWatchTime get mix event start watch time -func (m *MixHandler) GetStartWatchTime(ctx context.Context) (*types.TimeStamp, error) { - filter := map[string]interface{}{ - "_id": m.mixKey, - } - - data := make(map[string]map[string]metadata.Time) - if err := m.db.Table(common.BKTableNameSystem).Find(filter).Fields(m.collection+".start_at_time"). - One(ctx, &data); err != nil { - - if !m.db.IsNotFoundError(err) { - blog.Errorf("get mix event start watch time by filter: %+v failed, err: %v", filter, err) return nil, err } - blog.Infof("mix event %s start watch time is not found", m.mixKey) - return new(types.TimeStamp), nil + return &types.TokenInfo{Token: "", StartAtTime: &types.TimeStamp{Sec: uint32(time.Now().Unix())}}, nil } tokenInfo, exist := data[m.collection] if !exist { - blog.Infof("mix event %s start watch time is not found", m.mixKey) - return new(types.TimeStamp), nil + blog.Infof("mix event %s:%s start watch token is not found", m.mixKey, uuid) + return &types.TokenInfo{Token: "", StartAtTime: &types.TimeStamp{Sec: uint32(time.Now().Unix())}}, nil } - time := tokenInfo["start_at_time"].Time + return tokenInfo, nil +} - return &types.TimeStamp{ - Sec: uint32(time.Unix()), - Nano: uint32(time.Nanosecond()), - }, nil +func (m *MixHandler) genWatchTokenID(uuid string) string { + return m.mixKey + ":" + uuid } diff --git a/src/source_controller/cacheservice/cache/token-handler/single.go b/src/source_controller/cacheservice/cache/token-handler/single.go index 97368a0f3d..7af6a3bdde 100644 --- a/src/source_controller/cacheservice/cache/token-handler/single.go +++ b/src/source_controller/cacheservice/cache/token-handler/single.go @@ -19,125 +19,97 @@ package tokenhandler import ( "context" + "fmt" + "time" "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/common/mapstr" - "configcenter/src/common/metadata" "configcenter/src/storage/dal" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) -var _ types.TokenHandler = new(SingleHandler) - // SingleHandler is a token handler for single event that watches db only once // token data: {"_id": $key, "token": $token, "start_at_time": $start_at_time} type SingleHandler struct { key string - db dal.DB } // NewSingleTokenHandler generate a new event token handler -func NewSingleTokenHandler(key string, db dal.DB) *SingleHandler { +func NewSingleTokenHandler(key string) *SingleHandler { return &SingleHandler{ key: key, - db: db, } } // SetLastWatchToken set last event watch token -func (m *SingleHandler) SetLastWatchToken(ctx context.Context, token string) error { - filter := map[string]interface{}{ - "_id": m.key, - } +func (m *SingleHandler) SetLastWatchToken(ctx context.Context, uuid string, watchDB local.DB, + token *types.TokenInfo) error { - tokenInfo := mapstr.MapStr{ - "token": token, + filter := map[string]interface{}{ + "_id": m.genWatchTokenID(uuid), } - if err := m.db.Table(common.BKTableNameSystem).Upsert(ctx, filter, tokenInfo); err != nil { - blog.Errorf("set event %s last watch token failed, data: %+v, err: %v", m.key, tokenInfo, err) + if err := watchDB.Table(common.BKTableNameCacheWatchToken).Upsert(ctx, filter, token); err != nil { + blog.Errorf("set event %s-%s last watch token failed, data: %+v, err: %v", m.key, uuid, *token, err) return err } return nil } // GetStartWatchToken get event start watch token -func (m *SingleHandler) GetStartWatchToken(ctx context.Context) (string, error) { +func (m *SingleHandler) GetStartWatchToken(ctx context.Context, uuid string, watchDB local.DB) (*types.TokenInfo, + error) { + filter := map[string]interface{}{ - "_id": m.key, + "_id": m.genWatchTokenID(uuid), } - tokenInfo := make(map[string]string) - err := m.db.Table(common.BKTableNameSystem).Find(filter).Fields("token").One(ctx, &tokenInfo) + tokenInfo := new(types.TokenInfo) + err := watchDB.Table(common.BKTableNameCacheWatchToken).Find(filter).One(ctx, &tokenInfo) if err != nil { - if !m.db.IsNotFoundError(err) { + if !mongodb.IsNotFoundError(err) { blog.Errorf("get event start watch token by filter: %+v failed, err: %v", filter, err) - return "", err + return nil, err } - return "", nil + return &types.TokenInfo{Token: "", StartAtTime: &types.TimeStamp{Sec: uint32(time.Now().Unix())}}, nil } - return tokenInfo["token"], nil + return tokenInfo, nil } -// ResetWatchToken reset watch token and start watch time -func (m *SingleHandler) ResetWatchToken(startAtTime types.TimeStamp) error { - data := mapstr.MapStr{ - common.BKTokenField: "", - common.BKStartAtTimeField: startAtTime, - } - - filter := map[string]interface{}{ - "_id": m.key, - } - - if err := m.db.Table(common.BKTableNameSystem).Upsert(context.Background(), filter, data); err != nil { - blog.Errorf("reset single watch token %s failed, data: %+v, err: %v", m.key, data, err) - return err - } - return nil +func (m *SingleHandler) genWatchTokenID(uuid string) string { + return m.key + ":" + uuid } -// GetStartWatchTime get event start watch time -func (m *SingleHandler) GetStartWatchTime(ctx context.Context) (*types.TimeStamp, error) { +// IsTokenExists check if any event token exists for all watch dbs +func (m *SingleHandler) IsTokenExists(ctx context.Context, watchDal dal.Dal) (bool, error) { filter := map[string]interface{}{ - "_id": m.key, + "_id": map[string]interface{}{ + common.BKDBLIKE: fmt.Sprintf("^%s:", m.key), + }, } - tokenInfo := make(map[string]metadata.Time) - if err := m.db.Table(common.BKTableNameSystem).Find(filter).Fields("start_at_time"). - One(ctx, &tokenInfo); err != nil { - - if !m.db.IsNotFoundError(err) { - blog.Errorf("get event start watch time by filter: %+v failed, err: %v", filter, err) - return nil, err + exists := false + err := watchDal.ExecForAllDB(func(db local.DB) error { + if exists { + return nil } - blog.Infof("event %s start watch time is not found", m.key) - return new(types.TimeStamp), nil - } - - time := tokenInfo["start_at_time"].Time - - return &types.TimeStamp{ - Sec: uint32(time.Unix()), - Nano: uint32(time.Nanosecond()), - }, nil -} - -// IsTokenExists check if event token exists -func (m *SingleHandler) IsTokenExists(ctx context.Context) (bool, error) { - filter := map[string]interface{}{ - "_id": m.key, - } + cnt, err := db.Table(common.BKTableNameCacheWatchToken).Find(filter).Count(ctx) + if err != nil { + blog.Errorf("check if event token exists failed, filter: %+v, err: %v", filter, err) + return err + } - cnt, err := m.db.Table(common.BKTableNameSystem).Find(filter).Fields("token").Count(ctx) + exists = cnt > 0 + return nil + }) if err != nil { - blog.Errorf("check if event token exists failed, filter: %+v, err: %v", filter, err) return false, err } - return cnt > 0, nil + return exists, nil } diff --git a/src/source_controller/cacheservice/cache/tools/kube.go b/src/source_controller/cacheservice/cache/tools/kube.go index c848adf3b5..f494401313 100644 --- a/src/source_controller/cacheservice/cache/tools/kube.go +++ b/src/source_controller/cacheservice/cache/tools/kube.go @@ -18,10 +18,9 @@ package tools import ( - "context" - "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/util" "configcenter/src/kube/types" @@ -29,16 +28,16 @@ import ( ) // GenKubeSharedNsCond generate shared namespace condition by biz id -func GenKubeSharedNsCond(ctx context.Context, bizID int64, nsIDField string, rid string) (mapstr.MapStr, error) { - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) +func GenKubeSharedNsCond(kit *rest.Kit, bizID int64, nsIDField string) (mapstr.MapStr, error) { + kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) sharedCond := mapstr.MapStr{types.BKAsstBizIDField: bizID} relations := make([]types.NsSharedClusterRel, 0) - err := mongodb.Client().Table(types.BKTableNameNsSharedClusterRel).Find(sharedCond). - Fields(types.BKNamespaceIDField).All(ctx, &relations) + err := mongodb.Shard(kit.ShardOpts()).Table(types.BKTableNameNsSharedClusterRel).Find(sharedCond). + Fields(types.BKNamespaceIDField).All(kit.Ctx, &relations) if err != nil { - blog.Errorf("list kube shared namespace rel failed, err: %v, cond: %+v, rid: %v", err, sharedCond, rid) + blog.Errorf("list kube shared namespace rel failed, err: %v, cond: %+v, rid: %v", err, sharedCond, kit.Rid) return nil, err } diff --git a/src/source_controller/cacheservice/cache/topology/client.go b/src/source_controller/cacheservice/cache/topology/client.go deleted file mode 100644 index 13a30b1892..0000000000 --- a/src/source_controller/cacheservice/cache/topology/client.go +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package topology - -import ( - "encoding/json" - - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/http/rest" - "configcenter/src/common/util" -) - -// GetBizTopology TODO -func (t *Topology) GetBizTopology(kit *rest.Kit, biz int64) (*string, error) { - // read from secondary in mongodb cluster. - kit.Ctx = util.SetDBReadPreference(kit.Ctx, common.SecondaryPreferredMode) - - topology, err := t.briefBizKey.getTopology(kit.Ctx, biz) - if err == nil { - if len(*topology) != 0 { - // get data from cache success - return topology, nil - } - // get from db directly. - } - - blog.Errorf("get biz: %d topology from cache failed, get from db now, err: %v, rid: %s", biz, err, kit.Rid) - - // do not get biz topology from cache, get it from db directly. - topo, err := t.genBusinessTopology(kit.Ctx, biz) - if err != nil { - blog.Errorf("generate biz: %d topology from db failed, err: %v, rid: %s", biz, err, kit.Rid) - return nil, err - } - - // update it to cache directly. - if err := t.briefBizKey.updateTopology(kit.Ctx, topo); err != nil { - blog.Errorf("refresh biz: %d topology cache failed, err: %v, rid: %s", biz, err, kit.Rid) - // do not return error - } - - dat, err := json.Marshal(topo) - if err != nil { - blog.Errorf("marshal biz topology failed, err: %v, rid: %s", err, kit.Rid) - return nil, err - } - - topoStr := string(dat) - return &topoStr, nil -} diff --git a/src/source_controller/cacheservice/cache/topology/key.go b/src/source_controller/cacheservice/cache/topology/key.go deleted file mode 100644 index abf113994b..0000000000 --- a/src/source_controller/cacheservice/cache/topology/key.go +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package topology - -import ( - "context" - "fmt" - "time" - - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/json" - "configcenter/src/common/mapstr" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/redis" - "configcenter/src/storage/driver/mongodb" - drvRedis "configcenter/src/storage/driver/redis" - "configcenter/src/storage/stream/types" -) - -func newTokenHandler(object string) *tokenHandler { - return &tokenHandler{ - doc: "brief_topology_cache_watch_token", - key: object, - db: mongodb.Client(), - } -} - -type tokenHandler struct { - doc string - key string - db dal.DB -} - -// SetLastWatchToken TODO -func (w *tokenHandler) SetLastWatchToken(ctx context.Context, token string) error { - var err error - // do with retry - filter := map[string]interface{}{"_id": w.doc} - tokenData := mapstr.MapStr{w.key: token} - - for try := 0; try < 5; try++ { - err = w.db.Table(common.BKTableNameSystem).Upsert(ctx, filter, tokenData) - if err != nil { - time.Sleep(time.Duration(try/2+1) * time.Second) - continue - } - return nil - } - - return err -} - -// GetStartWatchToken TODO -// get the former watched token. -// if Key is not exist, then token is "". -func (w *tokenHandler) GetStartWatchToken(ctx context.Context) (token string, err error) { - // do with retry - filter := map[string]interface{}{"_id": w.doc} - for try := 0; try < 5; try++ { - tokenData := make(map[string]string) - err = w.db.Table(common.BKTableNameSystem).Find(filter).Fields(w.key).One(ctx, &tokenData) - if err != nil { - blog.Errorf("get %s start token failed, err: %v", w.key, err) - if !w.db.IsNotFoundError(err) { - time.Sleep(time.Duration(try/2+1) * time.Second) - continue - } - return "", nil - } - return tokenData[w.key], nil - } - - return "", err -} - -// resetWatchToken set watch token to empty and set the start watch time to the given one for next watch -func (w *tokenHandler) resetWatchToken(startAtTime types.TimeStamp) error { - filter := map[string]interface{}{"_id": w.doc} - tokenData := mapstr.MapStr{ - w.key: "", - w.key + "_start_time": startAtTime, - } - - return w.db.Table(common.BKTableNameSystem).Upsert(context.Background(), filter, tokenData) -} - -func (w *tokenHandler) getStartWatchTime(ctx context.Context) (*types.TimeStamp, error) { - filter := map[string]interface{}{"_id": w.doc} - - data := make(map[string]types.TimeStamp) - err := w.db.Table(common.BKTableNameSystem).Find(filter).Fields(w.key+"_start_time").One(ctx, &data) - if err != nil { - if !w.db.IsNotFoundError(err) { - blog.Errorf("get %s start time failed, err: %v", w.key, err) - return nil, err - } - return new(types.TimeStamp), nil - } - startTime := data[w.key+"_start_time"] - return &startTime, nil -} - -func newTopologyKey() *cacheKey { - return &cacheKey{ - namespace: common.BKCacheKeyV3Prefix + "topology:brief", - ttl: 24 * time.Hour, - rds: drvRedis.Client(), - } -} - -type cacheKey struct { - name string - namespace string - ttl time.Duration - rds redis.Client -} - -func (c *cacheKey) bizTopologyKey(biz int64) string { - return fmt.Sprintf("%s:%d", c.namespace, biz) -} - -// updateTopology update biz Topology cache -func (c *cacheKey) updateTopology(ctx context.Context, topo *BizBriefTopology) error { - - js, err := json.Marshal(topo) - if err != nil { - return fmt.Errorf("marshal topology failed, err: %v", err) - } - - return c.rds.Set(ctx, c.bizTopologyKey(topo.Biz.ID), string(js), c.ttl).Err() -} - -// getTopology get biz Topology from cache -func (c *cacheKey) getTopology(ctx context.Context, biz int64) (*string, error) { - dat, err := c.rds.Get(ctx, c.bizTopologyKey(biz)).Result() - if err != nil { - if redis.IsNilErr(err) { - empty := "" - return &empty, nil - } - - return nil, fmt.Errorf("get cache from redis failed, err: %v", err) - } - - return &dat, nil -} diff --git a/src/source_controller/cacheservice/cache/topology/logic.go b/src/source_controller/cacheservice/cache/topology/logic.go deleted file mode 100644 index c9833bb662..0000000000 --- a/src/source_controller/cacheservice/cache/topology/logic.go +++ /dev/null @@ -1,421 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package topology - -import ( - "context" - "errors" - "sort" - - "configcenter/src/common" - "configcenter/src/common/backbone/configcenter" - "configcenter/src/common/blog" - "configcenter/src/common/mapstr" - "configcenter/src/common/util" -) - -// genBusinessTopology generate a fully business topology data. -func (t *Topology) genBusinessTopology(ctx context.Context, biz int64) (*BizBriefTopology, error) { - rid := ctx.Value(common.ContextRequestIDField) - - filter := mapstr.MapStr{ - common.BKAppIDField: biz, - } - detail := new(BizBase) - err := t.db.Table(common.BKTableNameBaseApp).Find(filter).Fields(bizBaseFields...).One(ctx, detail) - if err != nil { - blog.Errorf("get biz: %d detail failed, err: %v, rid: %v", biz, err, rid) - return nil, err - } - - idle, common, err := t.getBusinessTopology(ctx, biz, detail.OwnerID) - if err != nil { - blog.Errorf("get biz topology nodes from db failed, err: %v, rid: %v", err, rid) - return nil, err - } - - return &BizBriefTopology{ - Biz: detail, - Idle: idle, - Nodes: common, - }, nil -} - -// getBusinessTopology construct a business's fully Topology data, separated with inner set and -// other common nodes -func (t *Topology) getBusinessTopology(ctx context.Context, biz int64, supplierAccount string) ([]*Node, []*Node, - error) { - - rid := ctx.Value(common.ContextRequestIDField) - - // read from secondary in mongodb cluster. - ctx = util.SetDBReadPreference(ctx, common.SecondaryPreferredMode) - - reverseRank, err := t.getMainlineReverseRank(ctx) - if err != nil { - blog.Errorf("get biz: %d topology, but get mainline rank failed, err: %v, rid: %v", biz, err, rid) - return nil, nil, err - } - - var previousNodes map[int64][]*Node - var idleSetNodes map[int64][]*Node - // cycle from set to business - for _, level := range reverseRank[2:] { - if level == "set" { - idleNodes, normalNodes, err := t.genSetNodes(ctx, biz) - if err != nil { - return nil, nil, err - } - - idleSetNodes = idleNodes - - // update previous nodes - previousNodes = normalNodes - continue - } - - if level == "biz" { - break - } - - customNodes, err := t.genCustomNodes(ctx, biz, level, supplierAccount, previousNodes) - if err != nil { - return nil, nil, err - } - - // update previous nodes with custom nodes - previousNodes = customNodes - } - - if previousNodes == nil || idleSetNodes == nil { - return nil, nil, errors.New("invalid business topology") - } - - inner, exists := idleSetNodes[biz] - if !exists { - blog.ErrorJSON("can not find biz inner set topology, origin: %s, rid: %s", idleSetNodes, rid) - return nil, nil, errors.New("invalid biz inner set topology") - } - - return inner, previousNodes[biz], nil -} - -// genCustomNodes generate custom object's instance node with its parent map -func (t *Topology) genCustomNodes(ctx context.Context, biz int64, object, supplierAccount string, - previousNodes map[int64][]*Node) (map[int64][]*Node, error) { - - if previousNodes == nil { - previousNodes = make(map[int64][]*Node) - } - - customList, err := t.listCustomInstance(ctx, biz, object, supplierAccount) - if err != nil { - return nil, err - } - - reminder := make(map[int64]struct{}) - customParentMap := make(map[int64][]*Node) - for _, custom := range customList { - if _, exists := customParentMap[custom.ParentID]; !exists { - customParentMap[custom.ParentID] = make([]*Node, 0) - } - - if _, exists := reminder[custom.ID]; exists { - continue - } - reminder[custom.ID] = struct{}{} - - customParentMap[custom.ParentID] = append(customParentMap[custom.ParentID], &Node{ - Object: object, - ID: custom.ID, - Name: custom.Name, - // fill it later - SubNodes: previousNodes[custom.ID], - }) - } - - for _, nodes := range customParentMap { - sortNodes(nodes) - } - - return customParentMap, nil - -} - -// listCustomInstance list a biz's custom object's all instances -func (t *Topology) listCustomInstance(ctx context.Context, biz int64, object, supplierAccount string) ( - []*customBase, error) { - - filter := mapstr.MapStr{common.BKAppIDField: biz, "bk_obj_id": object} - all := make([]*customBase, 0) - start := uint64(0) - for { - oneStep := make([]*customBase, 0) - err := t.db.Table(common.GetObjectInstTableName(object, supplierAccount)).Find(filter). - Fields(customBaseFields...).Sort(common.BKInstIDField).Start(start). - Limit(step).All(ctx, &oneStep) - if err != nil { - blog.Errorf("get biz: %d custom object: %s instance list failed, err: %v", biz, object, err) - return nil, err - } - - all = append(all, oneStep...) - - if len(oneStep) < step { - // we got all the data - break - } - - // update start position - start += step - } - - return all, nil -} - -// genSetNodes generate set's node with it's parent map -func (t *Topology) genSetNodes(ctx context.Context, biz int64) (idle map[int64][]*Node, normal map[int64][]*Node, - err error) { - - moduleNodes, err := t.genModulesNodes(ctx, biz) - if err != nil { - return nil, nil, err - } - - setList, err := t.listSets(ctx, biz) - if err != nil { - return nil, nil, err - } - - reminder := make(map[int64]struct{}) - idleSetNodes := make(map[int64][]*Node) - normalSetParentMap := make(map[int64][]*Node) - var current map[int64][]*Node - for _, set := range setList { - - if set.Default > 0 { - // not the common set - current = idleSetNodes - } else { - // common sets - current = normalSetParentMap - } - - _, exists := current[set.ParentID] - if !exists { - current[set.ParentID] = make([]*Node, 0) - } - - if _, exists = reminder[set.ID]; exists { - continue - } - reminder[set.ID] = struct{}{} - - current[set.ParentID] = append(current[set.ParentID], &Node{ - Object: "set", - ID: set.ID, - Name: set.Name, - Default: &set.Default, - SubNodes: moduleNodes[set.ID], - }) - } - - for _, nodes := range idleSetNodes { - sortNodes(nodes) - } - - for _, nodes := range normalSetParentMap { - sortNodes(nodes) - } - - return idleSetNodes, normalSetParentMap, nil - -} - -func (t *Topology) listSets(ctx context.Context, biz int64) ([]*setBase, error) { - filter := mapstr.MapStr{common.BKAppIDField: biz} - all := make([]*setBase, 0) - start := uint64(0) - for { - oneStep := make([]*setBase, 0) - err := t.db.Table(common.BKTableNameBaseSet).Find(filter).Fields(setBaseFields...).Start(start). - Limit(step).Sort(common.BKSetIDField).All(ctx, &oneStep) - if err != nil { - blog.Errorf("get biz: %d set list failed, err: %v", biz, err) - return nil, err - } - - all = append(all, oneStep...) - - if len(oneStep) < step { - // we got all the data - break - } - - // update start position - start += step - } - - return all, nil -} - -// genModulesNodes generate module's node with it's parent set map -func (t *Topology) genModulesNodes(ctx context.Context, biz int64) (map[int64][]*Node, error) { - moduleList, err := t.listModules(ctx, biz) - if err != nil { - return nil, err - } - - reminder := make(map[int64]struct{}) - moduleParentMap := make(map[int64][]*Node) - for idx := range moduleList { - module := moduleList[idx] - _, exists := moduleParentMap[module.SetID] - if !exists { - moduleParentMap[module.SetID] = make([]*Node, 0) - } - - if _, exists = reminder[module.ID]; exists { - continue - } - reminder[module.ID] = struct{}{} - - moduleParentMap[module.SetID] = append(moduleParentMap[module.SetID], &Node{ - Object: "module", - ID: module.ID, - Name: module.Name, - Default: &module.Default, - SubNodes: nil, - }) - } - - for _, nodes := range moduleParentMap { - sortNodes(nodes) - } - - return moduleParentMap, nil - -} - -// listModules list a business's all modules -func (t *Topology) listModules(ctx context.Context, biz int64) ([]*moduleBase, error) { - filter := mapstr.MapStr{common.BKAppIDField: biz} - all := make([]*moduleBase, 0) - start := uint64(0) - for { - oneStep := make([]*moduleBase, 0) - err := t.db.Table(common.BKTableNameBaseModule).Find(filter).Fields(moduleBaseFields...).Start(start). - Limit(step).Sort(common.BKModuleIDField).All(ctx, &oneStep) - if err != nil { - blog.Errorf("get biz: %d module list failed, err: %v", biz, err) - return nil, err - } - - all = append(all, oneStep...) - - if len(oneStep) < step { - // we got all the data - break - } - - // update start position - start += step - } - - return all, nil -} - -// getMainlineReverseRank rank mainline object from module to biz -func (t *Topology) getMainlineReverseRank(ctx context.Context) ([]string, error) { - relations := make([]mainlineAssociation, 0) - filter := mapstr.MapStr{ - common.AssociationKindIDField: common.AssociationKindMainline, - } - err := t.db.Table(common.BKTableNameObjAsst).Find(filter).Fields(mainlineAsstFields...).All(ctx, &relations) - if err != nil { - blog.Errorf("get mainline topology association failed, err: %v", err) - return nil, err - } - - // rank mainline object - rank := make([]string, 0) - next := "biz" - rank = append(rank, next) - for _, relation := range relations { - if relation.AssociateTo == next { - rank = append(rank, relation.ObjectID) - next = relation.ObjectID - continue - } else { - for _, rel := range relations { - if rel.AssociateTo == next { - rank = append(rank, rel.ObjectID) - next = rel.ObjectID - break - } - } - } - } - - return util.ReverseArrayString(rank), nil -} - -// listAllBusiness list all business brief info -func (t *Topology) listAllBusiness(ctx context.Context) ([]*BizBase, error) { - - filter := mapstr.MapStr{} - all := make([]*BizBase, 0) - start := uint64(0) - for { - oneStep := make([]*BizBase, 0) - err := t.db.Table(common.BKTableNameBaseApp).Find(filter).Fields(bizBaseFields...).Start(start). - Limit(step).Sort(common.BKAppIDField).All(ctx, &oneStep) - if err != nil { - return nil, err - } - - all = append(all, oneStep...) - - if len(oneStep) < step { - // we got all the data - break - } - - // update start position - start += step - } - - return all, nil -} - -func getBreifTopoCacheRefreshMinutes() int { - duration, err := configcenter.Int("cacheService.briefTopologySyncIntervalMinutes") - if err != nil { - blog.Errorf("get brief biz topology cache refresh interval minutes failed, err: %v, use default value 15.", err) - return defaultRefreshIntervalMinutes - } - - if duration < 2 { - blog.Warnf("got invalid brief biz topology cache refresh interval minutes %d, < 2min, use default value 15.") - return defaultRefreshIntervalMinutes - } - - return duration -} - -// sortNodes sort nodes by name -func sortNodes(nodes []*Node) { - sort.Slice(nodes, func(i, j int) bool { - return nodes[i].Name < nodes[j].Name - }) -} diff --git a/src/source_controller/cacheservice/cache/topology/readme.md b/src/source_controller/cacheservice/cache/topology/readme.md deleted file mode 100644 index 63be389065..0000000000 --- a/src/source_controller/cacheservice/cache/topology/readme.md +++ /dev/null @@ -1,17 +0,0 @@ -## Business Topology Cache - This package used to cache all the business's topology from the root nodes business all the way -to the lowest node module. - It's used by various scenes, which is need to get business topology frequently and care about -the performance. like job's scheduled tasks. - - This business's topology cache has features as follows: - - the cache is a brief topology of this business, which contains the basic information with - the object, instance id and name. - - this cache is refreshed when a business's topology changed, such as a custom level instance is - added, removed. or a set, module is added or removed. this is an event-drive mechanism, so that - cache can be refreshed in time. - - this cache has a ttl for several hours, which help us to clean the cache automatically when a - business is deleted or archived. - - all the cache refreshed every 15 minutes no matter event occurred or not. it's a safety - mechanism to ensure the cache is correct. - - if we cannot find business topology from the cache, we read it from the db directly. \ No newline at end of file diff --git a/src/source_controller/cacheservice/cache/topology/topology.go b/src/source_controller/cacheservice/cache/topology/topology.go deleted file mode 100644 index a5b1fc02a0..0000000000 --- a/src/source_controller/cacheservice/cache/topology/topology.go +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Package topology TODO -package topology - -import ( - "context" - "sync" - "time" - - "configcenter/src/apimachinery/discovery" - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/mapstr" - "configcenter/src/common/util" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/redis" - "configcenter/src/storage/driver/mongodb" - drvRedis "configcenter/src/storage/driver/redis" - "configcenter/src/storage/stream" -) - -// NewTopology TODO -func NewTopology(isMaster discovery.ServiceManageInterface, loopW stream.LoopInterface) (*Topology, error) { - - t := &Topology{ - db: mongodb.Client(), - rds: drvRedis.Client(), - loopW: loopW, - checkMaster: isMaster, - briefBizKey: newTopologyKey(), - } - - if err := t.watchCustom(); err != nil { - blog.Errorf("topology watch custom failed, err: %v", err) - return nil, err - } - - if err := t.watchSet(); err != nil { - blog.Errorf("topology watch set failed, err: %v", err) - return nil, err - } - - if err := t.watchModule(); err != nil { - blog.Errorf("topology watch module failed, err: %v", err) - return nil, err - } - - go t.loopBizBriefCache() - - return t, nil -} - -// Topology TODO -type Topology struct { - db dal.DB - rds redis.Client - loopW stream.LoopInterface - checkMaster discovery.ServiceManageInterface - briefBizKey *cacheKey -} - -// refreshBatch refresh business Topology with batch -func (t *Topology) refreshBatch(bizList []int64, rid string) error { - blog.Infof("try to refresh biz: %v topology, rid: %s", bizList, rid) - - if len(bizList) == 0 { - return nil - } - - filter := mapstr.MapStr{ - common.BKAppIDField: mapstr.MapStr{ - common.BKDBIN: bizList, - }, - } - list := make([]*BizBase, 0) - err := t.db.Table(common.BKTableNameBaseApp).Find(filter).Fields(bizBaseFields...).All(context.Background(), &list) - if err != nil { - blog.Errorf("list biz detail failed, err: %v, rid: %s", err, rid) - return err - } - - // set max goroutine number - pipeline := make(chan struct{}, 5) - wg := sync.WaitGroup{} - var hitErr error - for idx := range list { - pipeline <- struct{}{} - wg.Add(1) - - go func(biz *BizBase) { - err := t.refreshBizTopology(biz, rid) - if err != nil { - hitErr = err - } - <-pipeline - wg.Done() - }(list[idx]) - } - - wg.Wait() - if hitErr != nil { - blog.Errorf("refresh biz list failed, err: %v, rid: %s", err, rid) - return hitErr - } - - blog.Infof("try to refresh biz topology success, rid: %s", rid) - return nil -} - -// refreshBizTopology construct a business Topology and update it to cache. -func (t *Topology) refreshBizTopology(biz *BizBase, rid string) error { - ctx := context.WithValue(context.TODO(), common.ContextRequestIDField, rid) - idle, common, err := t.getBusinessTopology(ctx, biz.ID, biz.OwnerID) - if err != nil { - blog.Error("refresh biz %d/%s topology, but get topology failed, err: %v, rid: %s", biz.ID, biz.Name, err, rid) - return err - } - - topo := &BizBriefTopology{ - Biz: biz, - Idle: idle, - Nodes: common, - } - - err = t.briefBizKey.updateTopology(ctx, topo) - if err != nil { - blog.Error("update biz %d/%s topology to cache failed, err: %v, rid: %s", biz.ID, biz.Name, err, rid) - return err - } - - return nil -} - -// loopBizBriefCache launch the task to loop business's brief topology every interval minutes. -func (t *Topology) loopBizBriefCache() { - blog.Infof("loop refresh biz brief topology task every %d minutes.", getBreifTopoCacheRefreshMinutes()) - for { - - if !t.checkMaster.IsMaster() { - blog.V(4).Infof("loop biz brief cache, but not master, skip.") - time.Sleep(time.Minute) - continue - } - - interval := getBreifTopoCacheRefreshMinutes() - time.Sleep(time.Duration(interval) * time.Minute) - // time.Sleep(30 * time.Second) - rid := util.GenerateRID() - - blog.Infof("start loop refresh biz brief topology task, interval: %d, rid: %s", interval, rid) - t.doLoopBizBriefTopologyToCache(rid) - blog.Infof("finished loop refresh biz brief topology task, rid: %s", rid) - } -} - -func (t *Topology) doLoopBizBriefTopologyToCache(rid string) { - // read from secondary in mongodb cluster. - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) - - all, err := t.listAllBusiness(ctx) - if err != nil { - blog.Errorf("loop biz brief topology, but list all business failed, err: %v, rid: %s", err, rid) - return - } - - for _, biz := range all { - time.Sleep(50 * time.Millisecond) - - err := t.refreshBizTopology(biz, rid) - if err != nil { - blog.Errorf("loop refresh biz %d/%s topology failed, err: %v, rid: %s", biz.ID, biz.Name, err, rid) - } else { - blog.Infof("loop refresh biz %d/%s brief topology success, rid: %s", biz.ID, biz.Name, rid) - } - - } - -} diff --git a/src/source_controller/cacheservice/cache/topology/types.go b/src/source_controller/cacheservice/cache/topology/types.go deleted file mode 100644 index f086c69ae4..0000000000 --- a/src/source_controller/cacheservice/cache/topology/types.go +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package topology - -// BizBriefTopology TODO -type BizBriefTopology struct { - // basic business info - Biz *BizBase `json:"biz"` - // the idle set nodes info - Idle []*Node `json:"idle"` - // the other common nodes - Nodes []*Node `json:"nds"` -} - -// Node TODO -type Node struct { - // the object of this node, like set or module - Object string `json:"obj"` - // the node's instance id, like set id or module id - ID int64 `json:"id"` - // the node's name, like set name or module name - Name string `json:"nm"` - // only set, module has this field. - // describe what kind of set or module this node is. - // 0: normal module or set. - // >1: special set or module - Default *int `json:"dft,omitempty"` - // the sub-nodes of current node - SubNodes []*Node `json:"nds"` -} - -var bizBaseFields = []string{"bk_biz_id", "bk_biz_name", "default", "bk_supplier_account"} - -// BizBase TODO -type BizBase struct { - // business id - ID int64 `json:"id" bson:"bk_biz_id"` - // business name - Name string `json:"nm" bson:"bk_biz_name"` - // describe it's a resource pool business or normal business. - // 0: normal business - // >0: special business, like resource pool business. - Default int `json:"dft" bson:"default"` - - OwnerID string `json:"bk_supplier_account" bson:"bk_supplier_account"` -} - -var customBaseFields = []string{"bk_biz_id", "bk_parent_id", "bk_inst_id", "bk_inst_name", "bk_obj_id"} - -type customBase struct { - Business int64 `bson:"bk_biz_id"` - ParentID int64 `bson:"bk_parent_id"` - ID int64 `bson:"bk_inst_id"` - Name string `bson:"bk_inst_name"` - Object string `bson:"bk_obj_id"` -} - -type customArchive struct { - Oid string `bson:"oid"` - Detail *customBase `bson:"detail"` -} - -var setBaseFields = []string{"bk_biz_id", "bk_parent_id", "bk_set_id", "bk_set_name", "default"} - -type setBase struct { - Business int64 `bson:"bk_biz_id"` - ParentID int64 `bson:"bk_parent_id"` - ID int64 `bson:"bk_set_id"` - Name string `bson:"bk_set_name"` - Default int `bson:"default"` -} - -type setArchive struct { - Oid string `bson:"oid"` - Detail *setBase `bson:"detail"` -} - -var moduleBaseFields = []string{"bk_biz_id", "bk_set_id", "bk_module_id", "bk_module_name", "default"} - -type moduleBase struct { - Business int64 `bson:"bk_biz_id"` - SetID int64 `bson:"bk_set_id"` - ID int64 `bson:"bk_module_id"` - Name string `bson:"bk_module_name"` - Default int `bson:"default"` -} - -type moduleArchive struct { - Oid string `bson:"oid"` - Detail *moduleBase `bson:"detail"` -} - -var mainlineAsstFields = []string{"bk_asst_obj_id", "bk_obj_id"} - -type mainlineAssociation struct { - AssociateTo string `bson:"bk_asst_obj_id"` - ObjectID string `bson:"bk_obj_id"` -} - -// page step -const ( - step = 100 - defaultRefreshIntervalMinutes = 15 -) diff --git a/src/source_controller/cacheservice/cache/topology/watch.go b/src/source_controller/cacheservice/cache/topology/watch.go deleted file mode 100644 index 199a26ef31..0000000000 --- a/src/source_controller/cacheservice/cache/topology/watch.go +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -package topology - -import ( - "context" - "time" - - "configcenter/src/common" - "configcenter/src/common/blog" - "configcenter/src/common/mapstr" - "configcenter/src/common/util" - "configcenter/src/storage/stream/types" -) - -func (t *Topology) watchSet() error { - watchOpts := &types.WatchOptions{ - Options: types.Options{ - EventStruct: new(setBase), - Collection: common.BKTableNameBaseSet, - Filter: mapstr.MapStr{}, - }, - } - - tokenHandler := newTokenHandler("set") - startAtTime, err := tokenHandler.getStartWatchTime(context.Background()) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", watchOpts.Collection, err) - return err - } - watchOpts.StartAtTime = startAtTime - watchOpts.WatchFatalErrorCallback = tokenHandler.resetWatchToken - - loopOptions := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: "topology cache with set", - WatchOpt: watchOpts, - TokenHandler: tokenHandler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: 10, - RetryDuration: 1 * time.Second, - }, - }, - EventHandler: &types.BatchHandler{ - DoBatch: t.onSetChange, - }, - BatchSize: 50, - } - - return t.loopW.WithBatch(loopOptions) -} - -func (t *Topology) onSetChange(es []*types.Event) (retry bool) { - if len(es) == 0 { - return false - } - - rid := es[0].ID() - bizList := make([]int64, 0) - for idx := range es { - one := es[idx] - - var set *setBase - switch one.OperationType { - case types.Insert: - set = one.Document.(*setBase) - - case types.Update: - // when a custom level is deleted from mainline topology, then - // we will change it's children's parent id, we will received - // it's parent's update event, and bk_parent_id is changed. - if _, exists := one.ChangeDesc.UpdatedFields[common.BKParentIDField]; !exists { - // only handle bk_parent_id changed events - continue - } - - set = one.Document.(*setBase) - - case types.Delete: - filter := mapstr.MapStr{ - "oid": one.Oid, - "coll": common.BKTableNameBaseSet, - } - archive := new(setArchive) - err := t.db.Table(common.BKTableNameDelArchive).Find(filter).One(context.TODO(), archive) - if err != nil { - blog.Errorf("topology cache, get deleted set %s failed, err: %v, rid: %s", one.Oid, err, rid) - if t.db.IsNotFoundError(err) { - blog.Errorf("can not find deleted set %s detail, skip, rid: %s", one.Oid, rid) - continue - } else { - return true - } - } - - set = archive.Detail - - default: - // only handle insert and delete event. - continue - } - - blog.Infof("topology cache, received biz: %d, set: %d/%s, op-time: %s, changed event, rid: %s", - set.Business, set.ID, set.Name, one.ClusterTime.String(), rid) - - bizList = append(bizList, set.Business) - } - - bizList = util.IntArrayUnique(bizList) - - err := t.refreshBatch(bizList, rid) - if err != nil { - return true - } - - return false -} - -func (t *Topology) watchModule() error { - watchOpts := &types.WatchOptions{ - Options: types.Options{ - EventStruct: new(moduleBase), - Collection: common.BKTableNameBaseModule, - Filter: mapstr.MapStr{}, - }, - } - - tokenHandler := newTokenHandler("module") - startAtTime, err := tokenHandler.getStartWatchTime(context.Background()) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", watchOpts.Collection, err) - return err - } - watchOpts.StartAtTime = startAtTime - watchOpts.WatchFatalErrorCallback = tokenHandler.resetWatchToken - - loopOptions := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: "topology cache with module", - WatchOpt: watchOpts, - TokenHandler: tokenHandler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: 10, - RetryDuration: 1 * time.Second, - }, - }, - EventHandler: &types.BatchHandler{ - DoBatch: t.onModuleChange, - }, - BatchSize: 100, - } - - return t.loopW.WithBatch(loopOptions) -} - -func (t *Topology) onModuleChange(es []*types.Event) (retry bool) { - if len(es) == 0 { - return false - } - - rid := es[0].ID() - bizList := make([]int64, 0) - for idx := range es { - one := es[idx] - - var module *moduleBase - switch one.OperationType { - case types.Insert: - module = one.Document.(*moduleBase) - - case types.Delete: - filter := mapstr.MapStr{ - "oid": one.Oid, - "coll": common.BKTableNameBaseModule, - } - archive := new(moduleArchive) - err := t.db.Table(common.BKTableNameDelArchive).Find(filter).One(context.TODO(), archive) - if err != nil { - blog.Errorf("topology cache, get deleted module %s failed, err: %v, rid: %s", one.Oid, err, rid) - if t.db.IsNotFoundError(err) { - blog.Errorf("can not find deleted module %s detail, skip, rid: %s", one.Oid, rid) - continue - } else { - return true - } - } - - module = archive.Detail - - default: - // only handle insert and delete event. - continue - } - - blog.Infof("topology cache, received biz: %d, module: %d/%s, op-time: %s, changed event, rid: %s", - module.Business, module.ID, module.Name, one.ClusterTime.String(), rid) - - bizList = append(bizList, module.Business) - } - - bizList = util.IntArrayUnique(bizList) - - err := t.refreshBatch(bizList, rid) - if err != nil { - return true - } - - return false -} - -// watchCustom watch business custom change event -func (t *Topology) watchCustom() error { - watchOpts := &types.WatchOptions{ - Options: types.Options{ - EventStruct: new(customBase), - Collection: common.BKTableNameBaseInst, - Filter: mapstr.MapStr{ - common.BKAppIDField: mapstr.MapStr{ - common.BKDBGT: 0, - }, - common.BKInstIDField: mapstr.MapStr{ - common.BKDBGT: 0, - }, - common.BKParentIDField: mapstr.MapStr{ - common.BKDBGT: 0, - }, - common.BKObjIDField: mapstr.MapStr{ - common.BKDBExists: true, - }, - }, - }, - } - - tokenHandler := newTokenHandler("custom_level") - startAtTime, err := tokenHandler.getStartWatchTime(context.Background()) - if err != nil { - blog.Errorf("get start watch time for %s failed, err: %v", watchOpts.Collection, err) - return err - } - watchOpts.StartAtTime = startAtTime - watchOpts.WatchFatalErrorCallback = tokenHandler.resetWatchToken - - loopOptions := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: "topology cache with custom level", - WatchOpt: watchOpts, - TokenHandler: tokenHandler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: 10, - RetryDuration: 1 * time.Second, - }, - }, - EventHandler: &types.BatchHandler{ - DoBatch: t.onCustomChange, - }, - BatchSize: 20, - } - - return t.loopW.WithBatch(loopOptions) -} - -func (t *Topology) onCustomChange(es []*types.Event) (retry bool) { - if len(es) == 0 { - return false - } - - rid := es[0].ID() - bizList := make([]int64, 0) - for idx := range es { - one := es[idx] - - var custom *customBase - switch one.OperationType { - case types.Insert: - custom = one.Document.(*customBase) - - case types.Update: - // when a custom level is deleted from mainline topology, then - // we will change it's children's parent id, we will received - // it's parent's update event, and bk_parent_id is changed. - if _, exists := one.ChangeDesc.UpdatedFields[common.BKParentIDField]; !exists { - // only handle bk_parent_id changed events - continue - } - - custom = one.Document.(*customBase) - - case types.Delete: - filter := mapstr.MapStr{ - "oid": one.Oid, - "coll": common.BKTableNameBaseInst, - } - archive := new(customArchive) - err := t.db.Table(common.BKTableNameDelArchive).Find(filter).One(context.TODO(), archive) - if err != nil { - blog.Errorf("topology cache, get deleted custom level %s failed, err: %v, rid: %s", one.Oid, err, rid) - if t.db.IsNotFoundError(err) { - blog.Errorf("can not find deleted custom level %s detail, skip, rid: %s", one.Oid, rid) - continue - } else { - return true - } - } - - custom = archive.Detail - - default: - // only handle insert, update bk_parent_id , delete event, drop the other event. - continue - } - - blog.Infof("topology cache, received biz: %d, custom level %s: %d/%s, op-time: %s, changed event, rid: %s", - custom.Business, custom.Object, custom.ID, custom.Name, one.ClusterTime.String(), rid) - - bizList = append(bizList, custom.Business) - } - - bizList = util.IntArrayUnique(bizList) - - err := t.refreshBatch(bizList, rid) - if err != nil { - return true - } - - return false -} diff --git a/src/source_controller/cacheservice/cache/topotree/path.go b/src/source_controller/cacheservice/cache/topotree/path.go index 25ed3f52e1..cb324f05a5 100644 --- a/src/source_controller/cacheservice/cache/topotree/path.go +++ b/src/source_controller/cacheservice/cache/topotree/path.go @@ -13,13 +13,13 @@ package topotree import ( - "context" "errors" "fmt" "configcenter/src/common" "configcenter/src/common/blog" ccError "configcenter/src/common/errors" + "configcenter/src/common/http/rest" "configcenter/src/common/json" "configcenter/src/source_controller/cacheservice/cache/mainline" ) @@ -35,14 +35,12 @@ type TopologyTree struct { } // SearchNodePath TODO -func (t *TopologyTree) SearchNodePath(ctx context.Context, opt *SearchNodePathOption, - supplierAccount string) ([]NodePaths, error) { - +func (t *TopologyTree) SearchNodePath(kit *rest.Kit, opt *SearchNodePathOption) ([]NodePaths, error) { if opt.Business <= 0 { return nil, ccError.New(common.CCErrCommParamsIsInvalid, fmt.Sprintf("invalid bk_biz_id: %d", opt.Business)) } - topo, err := t.bizCache.GetTopology() + topo, err := t.bizCache.GetTopology(kit) if err != nil { return nil, err } @@ -81,50 +79,13 @@ func (t *TopologyTree) SearchNodePath(ctx context.Context, opt *SearchNodePathOp objNameMap := make(map[string]map[int64]string) all := make(map[string]map[int64][]Node) for object, instances := range objects { - - switch object { - case "host": - // TODO: support this later - return nil, ccError.New(common.CCErrCommParamsInvalid, "host") - - case "module": - nameMap, paths, err = t.genModuleParentPaths(ctx, opt.Business, supplierAccount, reverseTopo, instances) - if err != nil { - return nil, err - } - - all[object] = paths - objNameMap[object] = nameMap - - case "set": - nameMap, paths, err = t.genSetParentPaths(ctx, opt.Business, supplierAccount, reverseTopo, instances) - if err != nil { - return nil, err - } - - all[object] = paths - objNameMap[object] = nameMap - - default: - nameMap, paths, err = t.genCustomParentPaths(ctx, opt.Business, "set", - supplierAccount, reverseTopo, instances) - if err != nil { - return nil, err - } - - // trim the head path - for id, nodes := range paths { - if len(nodes) < 1 { - // normally, this can not be happen - continue - } - paths[id] = nodes[1:] - } - - all[object] = paths - objNameMap[object] = nameMap - + nameMap, paths, err = t.genParentPaths(kit, object, opt.Business, reverseTopo, instances) + if err != nil { + return nil, err } + + all[object] = paths + objNameMap[object] = nameMap } for obj, paths := range all { @@ -150,17 +111,48 @@ func (t *TopologyTree) SearchNodePath(ctx context.Context, opt *SearchNodePathOp return nodePath, nil } -func (t *TopologyTree) genModuleParentPaths(ctx context.Context, biz int64, supplierAccount string, revTopo []string, - moduleIDs []int64) (names map[int64]string, paths map[int64][]Node, err error) { +func (t *TopologyTree) genParentPaths(kit *rest.Kit, object string, bizID int64, reverseTopo []string, + instances []int64) (map[int64]string, map[int64][]Node, error) { - rid := ctx.Value(common.ContextRequestIDField) + switch object { + case "host": + // TODO: support this later + return nil, nil, ccError.New(common.CCErrCommParamsInvalid, "host") + + case "module": + return t.genModuleParentPaths(kit, bizID, reverseTopo, instances) + + case "set": + return t.genSetParentPaths(kit, bizID, reverseTopo, instances) + + default: + nameMap, paths, err := t.genCustomParentPaths(kit, bizID, "set", reverseTopo, instances) + if err != nil { + return nil, nil, err + } - moduleMap, setList, err := t.searchModules(ctx, biz, moduleIDs) + // trim the head path + for id, nodes := range paths { + if len(nodes) < 1 { + // normally, this can not be happened + continue + } + paths[id] = nodes[1:] + } + + return nameMap, paths, nil + } +} + +func (t *TopologyTree) genModuleParentPaths(kit *rest.Kit, biz int64, revTopo []string, + moduleIDs []int64) (names map[int64]string, paths map[int64][]Node, err error) { + + moduleMap, setList, err := t.searchModules(kit, biz, moduleIDs) if err != nil { return nil, nil, err } - setMap, _, err := t.searchSets(ctx, biz, setList) + setMap, _, err := t.searchSets(kit, biz, setList) if err != nil { return nil, nil, err } @@ -177,9 +169,9 @@ func (t *TopologyTree) genModuleParentPaths(ctx context.Context, biz int64, supp }) } - _, setPaths, err := t.genSetParentPaths(ctx, biz, supplierAccount, revTopo, setList) + _, setPaths, err := t.genSetParentPaths(kit, biz, revTopo, setList) if err != nil { - blog.Errorf("gen set paths failed, err: %v, rid: %v", err, rid) + blog.Errorf("gen set paths failed, err: %v, rid: %v", err, kit.Rid) return nil, nil, err } @@ -198,17 +190,15 @@ func (t *TopologyTree) genModuleParentPaths(ctx context.Context, biz int64, supp return nameMap, paths, nil } -func (t *TopologyTree) genSetParentPaths(ctx context.Context, biz int64, supplierAccount string, revTopo []string, - previousList []int64) (names map[int64]string, paths map[int64][]Node, err error) { - - rid := ctx.Value(common.ContextRequestIDField) +func (t *TopologyTree) genSetParentPaths(kit *rest.Kit, biz int64, revTopo []string, previousList []int64) ( + names map[int64]string, paths map[int64][]Node, err error) { nextNode, err := nextNode("set", revTopo) if err != nil { return nil, nil, err } - setMap, customList, err := t.searchSets(ctx, biz, previousList) + setMap, customList, err := t.searchSets(kit, biz, previousList) if err != nil { return nil, nil, err } @@ -219,7 +209,7 @@ func (t *TopologyTree) genSetParentPaths(ctx context.Context, biz int64, supplie paths = make(map[int64][]Node) // loop until we go to biz node. if nextNode == "biz" { - bizDetail, err := t.bizDetail(ctx, biz) + bizDetail, err := t.bizDetail(kit, biz) if err != nil { return nil, nil, err } @@ -239,9 +229,9 @@ func (t *TopologyTree) genSetParentPaths(ctx context.Context, biz int64, supplie return nameMap, paths, nil } - _, customPaths, err := t.genCustomParentPaths(ctx, biz, "set", supplierAccount, revTopo, customList) + _, customPaths, err := t.genCustomParentPaths(kit, biz, "set", revTopo, customList) if err != nil { - blog.Errorf("gen custom parent %s/%v paths failed, err: %v, rid: %v", nextNode, previousList, err, rid) + blog.Errorf("gen custom parent %s/%v paths failed, err: %v, rid: %v", nextNode, previousList, err, kit.Rid) return nil, nil, err } @@ -261,21 +251,17 @@ func (t *TopologyTree) genSetParentPaths(ctx context.Context, biz int64, supplie return nameMap, paths, nil } -func (t *TopologyTree) genCustomParentPaths(ctx context.Context, biz int64, prevNode, supplierAccount string, - revTopo []string, previousList []int64) (names map[int64]string, paths map[int64][]Node, err error) { - - rid := ctx.Value(common.ContextRequestIDField) +func (t *TopologyTree) genCustomParentPaths(kit *rest.Kit, biz int64, prevNode string, revTopo []string, + previousList []int64) (names map[int64]string, paths map[int64][]Node, err error) { - bizDetail, err := t.bizDetail(ctx, biz) + bizDetail, err := t.bizDetail(kit, biz) if err != nil { return nil, nil, err } nameMap := make(map[int64]string, 0) - paths = make(map[int64][]Node) for { - nextNode, err := nextNode(prevNode, revTopo) if err != nil { return nil, nil, err @@ -285,32 +271,24 @@ func (t *TopologyTree) genCustomParentPaths(ctx context.Context, biz int64, prev if nextNode == "biz" { // add biz path for _, id := range previousList { - paths[id] = append(paths[id], Node{ - Object: "biz", - InstanceID: bizDetail.ID, - InstanceName: bizDetail.Name, - ParentID: 0, - }) + paths[id] = append(paths[id], Node{Object: "biz", InstanceID: bizDetail.ID, + InstanceName: bizDetail.Name}) } return nameMap, paths, nil } - customMap, prevList, err := t.searchCustomInstances(ctx, nextNode, supplierAccount, previousList) + customMap, prevList, err := t.searchCustomInstances(kit, nextNode, previousList) if err != nil { - blog.Errorf("search supplier account %s custom instance %s/%v failed, err: %v, rid: %v", - supplierAccount, nextNode, previousList, err, rid) + blog.Errorf("search tenant %s custom instance %s/%v failed, err: %v, rid: %v", kit.TenantID, nextNode, + previousList, err, kit.Rid) return nil, nil, err } // first paths, as is the bottom topology if len(paths) == 0 { for id, cu := range customMap { - paths[id] = append(paths[id], Node{ - Object: nextNode, - InstanceID: cu.ID, - InstanceName: cu.Name, - ParentID: cu.ParentID, - }) + paths[id] = append(paths[id], Node{Object: nextNode, InstanceID: cu.ID, InstanceName: cu.Name, + ParentID: cu.ParentID}) // first custom's name id map, it's all we need. nameMap[id] = cu.Name } @@ -334,38 +312,31 @@ func (t *TopologyTree) genCustomParentPaths(ctx context.Context, biz int64, prev } if hit < 0 { - blog.Errorf("gen custom topo instance path, but got invalid nodes: %v, rid: %v", nodes, rid) + blog.Errorf("gen custom topo instance path, but got invalid nodes: %v, rid: %v", nodes, kit.Rid) return nil, nil, errors.New("got invalid custom topo nodes") } custom, exist := customMap[prev.ParentID] if !exist { - blog.Errorf("gen custom topo instance path, but can not find node: %v parent, rid: %v", prev, rid) + blog.Errorf("gen custom topo instance path, but can not find node: %v parent, rid: %v", prev, kit.Rid) return nil, nil, fmt.Errorf("can not find node %v parent", nodes) } - paths[id] = append(paths[id], Node{ - Object: nextNode, - InstanceID: custom.ID, - InstanceName: custom.Name, - ParentID: custom.ParentID, - }) + paths[id] = append(paths[id], Node{Object: nextNode, InstanceID: custom.ID, InstanceName: custom.Name, + ParentID: custom.ParentID}) } prevNode = nextNode previousList = prevList } - } -func (t *TopologyTree) searchModules(ctx context.Context, biz int64, moduleIDs []int64) ( +func (t *TopologyTree) searchModules(kit *rest.Kit, biz int64, moduleIDs []int64) ( moduleMap map[int64]*module, setList []int64, err error) { - rid := ctx.Value(common.ContextRequestIDField) - - ms, err := t.bizCache.ListModuleDetails(ctx, moduleIDs) + ms, err := t.bizCache.ListModuleDetails(kit, moduleIDs) if err != nil { - blog.Errorf("list module detail from cache failed, err: %v, rid: %v", err, rid) + blog.Errorf("list module detail from cache failed, err: %v, rid: %v", err, kit.Rid) return nil, nil, err } moduleMap = make(map[int64]*module) @@ -373,7 +344,7 @@ func (t *TopologyTree) searchModules(ctx context.Context, biz int64, moduleIDs [ for _, m := range ms { mod := new(module) if err := json.Unmarshal([]byte(m), mod); err != nil { - blog.Errorf("unmarshal module failed, err: %v, rid: %v", err, rid) + blog.Errorf("unmarshal module failed, err: %v, rid: %v", err, kit.Rid) return nil, nil, err } @@ -394,14 +365,12 @@ func (t *TopologyTree) searchModules(ctx context.Context, biz int64, moduleIDs [ return moduleMap, setList, nil } -func (t *TopologyTree) searchSets(ctx context.Context, biz int64, setIDs []int64) ( +func (t *TopologyTree) searchSets(kit *rest.Kit, biz int64, setIDs []int64) ( setMap map[int64]*set, parentList []int64, err error) { - rid := ctx.Value(common.ContextRequestIDField) - - setDetails, err := t.bizCache.ListSetDetails(ctx, setIDs) + setDetails, err := t.bizCache.ListSetDetails(kit, setIDs) if err != nil { - blog.Errorf("construct module path, but get set details failed, err: %v, rid: %v", err, rid) + blog.Errorf("construct module path, but get set details failed, err: %v, rid: %v", err, kit.Rid) return nil, nil, err } setMap = make(map[int64]*set) @@ -409,7 +378,7 @@ func (t *TopologyTree) searchSets(ctx context.Context, biz int64, setIDs []int64 for _, s := range setDetails { set := new(set) if err := json.Unmarshal([]byte(s), set); err != nil { - blog.Errorf("unmarshal set failed, err: %v, rid: %s", err, rid) + blog.Errorf("unmarshal set failed, err: %v, rid: %s", err, kit.Rid) return nil, nil, err } @@ -428,12 +397,10 @@ func (t *TopologyTree) searchSets(ctx context.Context, biz int64, setIDs []int64 return setMap, parentList, nil } -func (t *TopologyTree) searchCustomInstances(ctx context.Context, object, supplierAccount string, instIDs []int64) ( +func (t *TopologyTree) searchCustomInstances(kit *rest.Kit, object string, instIDs []int64) ( instMap map[int64]*custom, parentList []int64, err error) { - rid := ctx.Value(common.ContextRequestIDField) - - instances, err := t.bizCache.ListCustomLevelDetail(ctx, object, supplierAccount, instIDs) + instances, err := t.bizCache.ListCustomLevelDetail(kit, object, instIDs) if err != nil { blog.Errorf("list custom level %s instances: %v failed, err: %v", object, instIDs, err) return nil, nil, err @@ -443,7 +410,7 @@ func (t *TopologyTree) searchCustomInstances(ctx context.Context, object, suppli for _, inst := range instances { c := new(custom) if err := json.Unmarshal([]byte(inst), c); err != nil { - blog.Errorf("unmarshal custom level failed, detail: %s, err: %v, rid: %v", inst, err, rid) + blog.Errorf("unmarshal custom level failed, detail: %s, err: %v, rid: %v", inst, err, kit.Rid) return nil, nil, err } @@ -454,19 +421,17 @@ func (t *TopologyTree) searchCustomInstances(ctx context.Context, object, suppli return instMap, parentList, nil } -func (t *TopologyTree) bizDetail(ctx context.Context, bizID int64) ( +func (t *TopologyTree) bizDetail(kit *rest.Kit, bizID int64) ( *biz, error) { - rid := ctx.Value(common.ContextRequestIDField) - - business, err := t.bizCache.GetBusiness(ctx, bizID) + business, err := t.bizCache.GetBusiness(kit, bizID) if err != nil { - return nil, fmt.Errorf("get biz: %d detail failed, err: %v, rid: %v", bizID, err, rid) + return nil, fmt.Errorf("get biz: %d detail failed, err: %v, rid: %v", bizID, err, kit.Rid) } detail := new(biz) if err := json.Unmarshal([]byte(business), detail); err != nil { - blog.Errorf("unmarshal business %s failed, err: %v, rid: %v", business, err, rid) + blog.Errorf("unmarshal business %s failed, err: %v, rid: %v", business, err, kit.Rid) return nil, err } diff --git a/src/source_controller/cacheservice/event/loop/loop_watch.go b/src/source_controller/cacheservice/event/loop/loop_watch.go new file mode 100644 index 0000000000..0bbbd60ac3 --- /dev/null +++ b/src/source_controller/cacheservice/event/loop/loop_watch.go @@ -0,0 +1,84 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +// Package loop defines event loop watcher +package loop + +import ( + "context" + + "configcenter/src/apimachinery/discovery" + "configcenter/src/common/blog" + "configcenter/src/common/watch" + "configcenter/src/source_controller/cacheservice/event" + watchcli "configcenter/src/source_controller/cacheservice/event/watch" + "configcenter/src/storage/stream/types" +) + +// LoopWatcher is the loop watch event flow client +type LoopWatcher struct { + isMaster discovery.ServiceManageInterface + watchCli *watchcli.Client +} + +// NewLoopWatcher new loop watch event flow client +func NewLoopWatcher(isMaster discovery.ServiceManageInterface, watchCli *watchcli.Client) *LoopWatcher { + return &LoopWatcher{ + isMaster: isMaster, + watchCli: watchCli, + } +} + +// LoopWatchTaskOptions is the loop watch event flow task options +type LoopWatchTaskOptions struct { + Name string + CursorType watch.CursorType + TokenHandler types.TaskTokenHandler + EventHandler EventHandler + TenantChan <-chan TenantEvent +} + +// TenantEvent is the tenant change event for loop watch task +type TenantEvent struct { + EventType watch.EventType + TenantID string + IsAllTenant bool + WatchOpts *watch.WatchEventOptions +} + +// AddLoopWatchTask add a loop watch task +func (w *LoopWatcher) AddLoopWatchTask(opts *LoopWatchTaskOptions) error { + key, err := event.GetResourceKeyWithCursorType(opts.CursorType) + if err != nil { + blog.Errorf("get task %s resource key with cursor type %s failed, err: %v", opts.Name, opts.CursorType, err) + return err + } + + task := &loopWatchTask{ + name: opts.Name, + key: key, + isMaster: w.isMaster, + watchCli: w.watchCli, + tokenHandler: opts.TokenHandler, + eventHandler: opts.EventHandler, + tenantChan: opts.TenantChan, + tenantCancelFunc: make(map[string]context.CancelFunc), + } + go task.run() + + return nil +} diff --git a/src/source_controller/cacheservice/event/loop/task.go b/src/source_controller/cacheservice/event/loop/task.go new file mode 100644 index 0000000000..a631b2366f --- /dev/null +++ b/src/source_controller/cacheservice/event/loop/task.go @@ -0,0 +1,207 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +// Package loop defines event loop watcher +package loop + +import ( + "context" + "sync" + "time" + + "configcenter/src/apimachinery/discovery" + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/common/errors" + "configcenter/src/common/http/rest" + "configcenter/src/common/util" + "configcenter/src/common/watch" + "configcenter/src/source_controller/cacheservice/event" + watchcli "configcenter/src/source_controller/cacheservice/event/watch" + "configcenter/src/storage/driver/mongodb" + "configcenter/src/storage/stream/types" +) + +type loopWatchTask struct { + name string + key event.Key + isMaster discovery.ServiceManageInterface + watchCli *watchcli.Client + tokenHandler types.TaskTokenHandler + eventHandler EventHandler + tenantChan <-chan TenantEvent + + mu sync.Mutex + tenantCancelFunc map[string]context.CancelFunc +} + +// run loop watch task +func (t *loopWatchTask) run() { + for e := range t.tenantChan { + switch e.EventType { + case watch.Create: + t.startTenantTask(e.TenantID, e.WatchOpts) + case watch.Delete: + t.stopTenantTask(e.TenantID, e.IsAllTenant) + case watch.Update: + t.stopTenantTask(e.TenantID, e.IsAllTenant) + t.startTenantTask(e.TenantID, e.WatchOpts) + } + } +} + +// startTenantTask start loop watch task for new tenant +func (t *loopWatchTask) startTenantTask(tenantID string, opts *watch.WatchEventOptions) { + t.mu.Lock() + defer t.mu.Unlock() + + if _, exists := t.tenantCancelFunc[tenantID]; exists { + return + } + + ctx, cancel := context.WithCancel(util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode)) + kit := rest.NewKit().WithCtx(ctx).WithTenant(tenantID) + + t.tenantCancelFunc[tenantID] = cancel + + go t.loopWatch(kit, opts) + blog.Infof("start tenant %s loop watch task %s, rid: %s", tenantID, t.name, kit.Rid) +} + +// stopTenantTask stop loop watch task for removed or disabled tenant +func (t *loopWatchTask) stopTenantTask(tenantID string, isAllTenant bool) { + t.mu.Lock() + defer t.mu.Unlock() + + if isAllTenant { + for id, cancel := range t.tenantCancelFunc { + cancel() + blog.Infof("stop tenant %s loop watch task %s", id, t.name) + } + t.tenantCancelFunc = make(map[string]context.CancelFunc) + return + } + + if cancel, exists := t.tenantCancelFunc[tenantID]; exists { + cancel() + delete(t.tenantCancelFunc, tenantID) + blog.Infof("stop tenant %s loop watch task %s", tenantID, t.name) + } +} + +// LoopWatch loop watch event flow +func (t *loopWatchTask) loopWatch(kit *rest.Kit, opts *watch.WatchEventOptions) { + go func() { + prevStatus := t.isMaster.IsMaster() + for { + select { + case <-kit.Ctx.Done(): + return + default: + } + + isMaster := t.isMaster.IsMaster() + if !isMaster { + prevStatus = false + blog.V(4).Infof("watch %s event, but not master, skip.", t.name) + time.Sleep(time.Minute) + continue + } + + // need watch status changed, re-watch from the last cursor with renewed watch resource type + if !prevStatus { + prevStatus = isMaster + var err error + token, err := t.tokenHandler.GetStartWatchToken(kit.Ctx, kit.TenantID, + mongodb.Dal("watch").Shard(kit.ShardOpts())) + if err != nil { + blog.Errorf("get %s start watch token failed, err: %v, rid: %s", t.name, err, kit.Rid) + time.Sleep(500 * time.Millisecond) + continue + } + opts.Cursor = token.Token + } + + retryWrapper(5, func() error { + return t.doWatch(kit, opts) + }) + } + }() +} + +// EventHandler is the event handler +type EventHandler func(kit *rest.Kit, events []*watch.WatchEventDetail) error + +// DoWatch do watch event for one step +func (t *loopWatchTask) doWatch(kit *rest.Kit, opts *watch.WatchEventOptions) error { + var events []*watch.WatchEventDetail + var err error + if opts.Cursor == "" { + lastEvent, err := t.watchCli.WatchFromNow(kit, t.key, opts) + if err != nil { + blog.Errorf("watch %s event from now failed, re-watch again, err: %v, rid: %s", t.name, err, kit.Rid) + return err + } + events = []*watch.WatchEventDetail{lastEvent} + } else { + events, err = t.watchCli.WatchWithCursor(kit, t.key, opts) + if err != nil { + if ccErr, ok := err.(errors.CCErrorCoder); ok && ccErr.GetCode() == common.CCErrEventChainNodeNotExist { + // the cursor does not exist, re-watch from now + opts.Cursor = "" + if err = t.tokenHandler.SetLastWatchToken(kit.Ctx, kit.TenantID, + mongodb.Dal("watch").Shard(kit.ShardOpts()), &types.TokenInfo{Token: ""}); err != nil { + blog.Errorf("reset %s watch token failed, err: %v, rid: %s", t.name, err, kit.Rid) + return err + } + + blog.Errorf("watch event failed, re-watch from now, err: %v, opt: %+v, rid: %s", err, + *opts, kit.Rid) + return ccErr + } + blog.Errorf("watch event failed, err: %v, opt: %+v, rid: %s", err, *opts, kit.Rid) + return err + } + } + + if len(events) == 0 { + return nil + } + + if err = t.eventHandler(kit, events); err != nil { + blog.Errorf("handle %s events failed, err: %v, events: %+v, rid: %s", t.name, err, events, kit.Rid) + return err + } + + opts.Cursor = events[len(events)-1].Cursor + if err = t.tokenHandler.SetLastWatchToken(kit.Ctx, kit.TenantID, mongodb.Dal("watch").Shard(kit.ShardOpts()), + &types.TokenInfo{Token: opts.Cursor}); err != nil { + blog.Errorf("set %s watch token to %s failed, err: %v, rid: %s", t.name, opts.Cursor, err, kit.Rid) + return err + } + return nil +} + +func retryWrapper(maxRetry int, handler func() error) { + for retry := 0; retry < maxRetry; retry++ { + err := handler() + if err == nil { + return + } + time.Sleep(500 * time.Millisecond * time.Duration(retry)) + } +} diff --git a/src/source_controller/cacheservice/service/cache.go b/src/source_controller/cacheservice/service/cache.go index 62b2538f87..1b561a86ca 100644 --- a/src/source_controller/cacheservice/service/cache.go +++ b/src/source_controller/cacheservice/service/cache.go @@ -56,7 +56,7 @@ func (s *cacheService) SearchHostWithInnerIPInCache(ctx *rest.Contexts) { Keys: []string{general.IPCloudIDKey(opt.InnerIP, opt.CloudID)}, Fields: opt.Fields, } - details, err := s.cacheSet.General.ListDetailByUniqueKey(ctx.Kit, listOpt, true) + details, err := s.cacheSet.General.ListDetailByUniqueKey(ctx.Kit, listOpt) if err != nil { ctx.RespAutoError(err) return @@ -92,7 +92,7 @@ func (s *cacheService) SearchHostWithAgentIDInCache(ctx *rest.Contexts) { Keys: []string{general.AgentIDKey(opt.AgentID)}, Fields: opt.Fields, } - details, err := s.cacheSet.General.ListDetailByUniqueKey(ctx.Kit, listOpt, true) + details, err := s.cacheSet.General.ListDetailByUniqueKey(ctx.Kit, listOpt) if err != nil { ctx.RespAutoError(err) return @@ -126,7 +126,7 @@ func (s *cacheService) SearchHostWithHostIDInCache(ctx *rest.Contexts) { IDs: []int64{opt.HostID}, Fields: opt.Fields, } - details, err := s.cacheSet.General.ListDetailByIDs(ctx.Kit, listOpt, true) + details, err := s.cacheSet.General.ListDetailByIDs(ctx.Kit, listOpt) if err != nil { ctx.RespAutoError(err) return @@ -171,7 +171,7 @@ func (s *cacheService) listHostWithHostIDInCache(kit *rest.Kit, ids []int64, fie IDs: ids, Fields: fields, } - details, err := s.cacheSet.General.ListDetailByIDs(kit, listOpt, true) + details, err := s.cacheSet.General.ListDetailByIDs(kit, listOpt) if err != nil { return nil, err } @@ -191,7 +191,7 @@ func (s *cacheService) ListHostWithPageInCache(ctx *rest.Contexts) { if len(opt.HostIDs) > 0 { cntCond := mapstr.MapStr{common.BKHostIDField: mapstr.MapStr{common.BKDBIN: opt.HostIDs}} - cnt, err := mongodb.Client().Table(common.BKTableNameBaseHost).Find(cntCond).Count(ctx.Kit.Ctx) + cnt, err := mongodb.Shard(ctx.Kit.ShardOpts()).Table(common.BKTableNameBaseHost).Find(cntCond).Count(ctx.Kit.Ctx) if err != nil { blog.Errorf("count host failed, err: %v, cond: %+v, rid: %s", err, cntCond, ctx.Kit.Rid) ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, err.Error()) @@ -249,7 +249,7 @@ func (s *cacheService) ListBusinessInCache(ctx *rest.Contexts) { return } - details, err := s.cacheSet.Business.ListBusiness(ctx.Kit.Ctx, opt) + details, err := s.cacheSet.Business.ListBusiness(ctx.Kit, opt) if err != nil { ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, "list business with id in cache failed, err: %v", err) return @@ -266,7 +266,7 @@ func (s *cacheService) ListModulesInCache(ctx *rest.Contexts) { return } - details, err := s.cacheSet.Business.ListModules(ctx.Kit.Ctx, opt) + details, err := s.cacheSet.Business.ListModules(ctx.Kit, opt) if err != nil { ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, "list modules with id in cache failed, err: %v", err) return @@ -283,7 +283,7 @@ func (s *cacheService) ListSetsInCache(ctx *rest.Contexts) { return } - details, err := s.cacheSet.Business.ListSets(ctx.Kit.Ctx, opt) + details, err := s.cacheSet.Business.ListSets(ctx.Kit, opt) if err != nil { ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, "list sets with id in cache failed, err: %v", err) return @@ -298,7 +298,7 @@ func (s *cacheService) SearchBusinessInCache(ctx *rest.Contexts) { ctx.RespErrorCodeOnly(common.CCErrCommParamsIsInvalid, "invalid biz id") return } - biz, err := s.cacheSet.Business.GetBusiness(ctx.Kit.Ctx, bizID) + biz, err := s.cacheSet.Business.GetBusiness(ctx.Kit, bizID) if err != nil { ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, "search biz with id in cache, but get biz failed, err: %v", err) @@ -315,7 +315,7 @@ func (s *cacheService) SearchSetInCache(ctx *rest.Contexts) { return } - set, err := s.cacheSet.Business.GetSet(ctx.Kit.Ctx, setID) + set, err := s.cacheSet.Business.GetSet(ctx.Kit, setID) if err != nil { ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, "search set with id in cache failed, err: %v", err) return @@ -331,7 +331,7 @@ func (s *cacheService) SearchModuleInCache(ctx *rest.Contexts) { return } - module, err := s.cacheSet.Business.GetModuleDetail(ctx.Kit.Ctx, moduleID) + module, err := s.cacheSet.Business.GetModuleDetail(ctx.Kit, moduleID) if err != nil { ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, "search module with id in cache failed, err: %v", err) return @@ -349,7 +349,7 @@ func (s *cacheService) SearchCustomLayerInCache(ctx *rest.Contexts) { return } - inst, err := s.cacheSet.Business.GetCustomLevelDetail(ctx.Kit.Ctx, objID, ctx.Kit.TenantID, instID) + inst, err := s.cacheSet.Business.GetCustomLevelDetail(ctx.Kit, objID, instID) if err != nil { ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, "search custom layer with id in cache failed, err: %v", err) @@ -380,7 +380,7 @@ func (s *cacheService) SearchBizTopologyNodePath(ctx *rest.Contexts) { opt.Business = bizID - paths, err := s.cacheSet.Tree.SearchNodePath(ctx.Kit.Ctx, opt, ctx.Kit.TenantID) + paths, err := s.cacheSet.Tree.SearchNodePath(ctx.Kit, opt) if err != nil { ctx.RespAutoError(err) return @@ -399,7 +399,7 @@ func (s *cacheService) SearchBusinessBriefTopology(ctx *rest.Contexts) { return } - topo, err := s.cacheSet.Topology.GetBizTopology(ctx.Kit, bizID) + topo, err := s.cacheSet.Topo.GetBizTopo(ctx.Kit, string(types.BriefType), &types.GetBizTopoOption{BizID: bizID}) if err != nil { ctx.RespErrorCodeOnly(common.CCErrCommDBSelectFailed, "search biz topology, select db failed, err: %v", err) return @@ -861,7 +861,7 @@ func (s *cacheService) ListGeneralCacheByIDs(cts *rest.Contexts) { return } - details, err := s.cacheSet.General.ListDetailByIDs(cts.Kit, opt, false) + details, err := s.cacheSet.General.ListDetailByIDs(cts.Kit, opt) if err != nil { cts.RespAutoError(err) return @@ -894,7 +894,7 @@ func (s *cacheService) ListGeneralCacheByUniqueKey(cts *rest.Contexts) { return } - details, err := s.cacheSet.General.ListDetailByUniqueKey(cts.Kit, opt, true) + details, err := s.cacheSet.General.ListDetailByUniqueKey(cts.Kit, opt) if err != nil { cts.RespAutoError(err) return diff --git a/src/source_controller/cacheservice/service/service.go b/src/source_controller/cacheservice/service/service.go index 1a3a92f2ff..86fb56f36f 100644 --- a/src/source_controller/cacheservice/service/service.go +++ b/src/source_controller/cacheservice/service/service.go @@ -16,7 +16,6 @@ package service import ( "fmt" "net/http" - "time" "configcenter/src/ac/extensions" "configcenter/src/ac/iam" @@ -36,8 +35,7 @@ import ( "configcenter/src/source_controller/cacheservice/event/flow" "configcenter/src/source_controller/cacheservice/event/identifier" "configcenter/src/source_controller/coreservice/core" - "configcenter/src/storage/dal/mongo/local" - "configcenter/src/storage/reflector" + "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" "configcenter/src/thirdparty/logplatform/opentelemetry" @@ -98,20 +96,13 @@ func (s *cacheService) SetConfig(cfg options.Config, engine *backbone.Engine, er watchTaskOpt := &types.NewTaskOptions{ StopNotifier: make(<-chan struct{}), } - - event, eventErr := reflector.NewReflector(s.cfg.Mongo.GetMongoConf()) - if eventErr != nil { - blog.Errorf("new reflector failed, err: %v", eventErr) - return eventErr - } - - watchDB, dbErr := local.NewMgo(s.cfg.WatchMongo.GetMongoConf(), time.Minute) - if dbErr != nil { - blog.Errorf("new watch mongo client failed, err: %v", dbErr) - return dbErr + watchTask, taskErr := task.New(mongodb.Dal(), mongodb.Dal("watch"), engine.ServiceManageInterface, watchTaskOpt) + if taskErr != nil { + blog.Errorf("new watch task instance failed, err: %v", taskErr) + return taskErr } - c, cacheErr := cacheop.NewCache(event, loopW, engine.ServiceManageInterface, watchDB) + c, cacheErr := cacheop.NewCache(watchTask, engine.ServiceManageInterface) if cacheErr != nil { blog.Errorf("new cache instance failed, err: %v", cacheErr) return cacheErr From e85097879ea4be16af85fab2af31c577b280d6e7 Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 13 Mar 2025 17:38:07 +0800 Subject: [PATCH 07/10] feat: transfer service watch logics support sharding --story=120702866 --- src/common/tablenames.go | 2 + .../transfer-service/app/server.go | 9 +- .../transfer-service/service/service.go | 17 ++- .../transfer-service/sync/sync.go | 15 +-- .../sync/watch/token_handler.go | 97 ++++++++-------- .../transfer-service/sync/watch/watch.go | 18 +-- .../transfer-service/sync/watch/watch_api.go | 22 ++-- .../transfer-service/sync/watch/watch_db.go | 106 ++++-------------- 8 files changed, 121 insertions(+), 165 deletions(-) diff --git a/src/common/tablenames.go b/src/common/tablenames.go index f642f0cbcc..b73f773947 100644 --- a/src/common/tablenames.go +++ b/src/common/tablenames.go @@ -307,6 +307,8 @@ var platformTableWithTenantMap = map[string]struct{}{ BKTableNameAPITaskSyncHistory: {}, BKTableNameFullSyncCond: {}, BKTableNameCacheWatchToken: {}, + "SrcSyncDataToken": {}, + "SrcSyncDataCursor": {}, } // IsPlatformTableWithTenant returns if the target table is a platform table with tenant id field diff --git a/src/source_controller/transfer-service/app/server.go b/src/source_controller/transfer-service/app/server.go index 3feb6a92f6..0c38c0aca6 100644 --- a/src/source_controller/transfer-service/app/server.go +++ b/src/source_controller/transfer-service/app/server.go @@ -120,7 +120,12 @@ func (s *TransferService) initResource(exSyncConfFile string) error { return fmt.Errorf("get mongo config failed, err: %v", err) } - if err = mongodb.InitClient("", &s.Config.Mongo); err != nil { + cryptoConf, err := cc.Crypto("crypto") + if err != nil { + return fmt.Errorf("get crypto config failed, err: %v", err) + } + + if err = mongodb.SetShardingCli("", &s.Config.Mongo, cryptoConf); err != nil { return fmt.Errorf("init mongo client failed, err: %v", err) } @@ -129,7 +134,7 @@ func (s *TransferService) initResource(exSyncConfFile string) error { return fmt.Errorf("get watch mongo config failed, err: %v", err) } - if err = mongodb.InitClient("watch", &s.Config.WatchMongo); err != nil { + if err = mongodb.SetWatchCli("watch", &s.Config.WatchMongo, cryptoConf); err != nil { return fmt.Errorf("init watch mongo client failed, err: %v", err) } diff --git a/src/source_controller/transfer-service/service/service.go b/src/source_controller/transfer-service/service/service.go index f0ad220ab2..3a0d1430fd 100644 --- a/src/source_controller/transfer-service/service/service.go +++ b/src/source_controller/transfer-service/service/service.go @@ -29,7 +29,9 @@ import ( "configcenter/src/common/webservice/restfulservice" "configcenter/src/source_controller/transfer-service/app/options" "configcenter/src/source_controller/transfer-service/sync" - "configcenter/src/storage/stream" + "configcenter/src/storage/driver/mongodb" + "configcenter/src/storage/stream/task" + "configcenter/src/storage/stream/types" "configcenter/src/thirdparty/logplatform/opentelemetry" "github.com/emicklei/go-restful/v3" @@ -43,13 +45,16 @@ type Service struct { // New Service func New(conf *options.Config, engine *backbone.Engine) (*Service, error) { - loopW, err := stream.NewLoopStream(conf.Mongo.GetMongoConf(), engine.ServiceManageInterface) - if err != nil { - blog.Errorf("new loop stream failed, err: %v", err) - return nil, err + watchTaskOpt := &types.NewTaskOptions{ + StopNotifier: make(<-chan struct{}), + } + watchTask, taskErr := task.New(mongodb.Dal(), mongodb.Dal("watch"), engine.ServiceManageInterface, watchTaskOpt) + if taskErr != nil { + blog.Errorf("new watch task instance failed, err: %v", taskErr) + return nil, taskErr } - syncer, err := sync.NewSyncer(conf, engine.ServiceManageInterface, loopW, engine.CoreAPI.CacheService(), + syncer, err := sync.NewSyncer(conf, engine.ServiceManageInterface, watchTask, engine.CoreAPI.CacheService(), engine.Metric().Registry()) if err != nil { blog.Errorf("new syncer failed, err: %v", err) diff --git a/src/source_controller/transfer-service/sync/sync.go b/src/source_controller/transfer-service/sync/sync.go index 9d39fd33c8..473ddf57c5 100644 --- a/src/source_controller/transfer-service/sync/sync.go +++ b/src/source_controller/transfer-service/sync/sync.go @@ -34,8 +34,9 @@ import ( "configcenter/src/source_controller/transfer-service/sync/medium" "configcenter/src/source_controller/transfer-service/sync/metadata" "configcenter/src/source_controller/transfer-service/sync/watch" + "configcenter/src/storage/dal/mongo/sharding" "configcenter/src/storage/driver/mongodb" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" "github.com/prometheus/client_golang/prometheus" "github.com/tidwall/gjson" @@ -50,7 +51,7 @@ type Syncer struct { } // NewSyncer new cmdb data syncer -func NewSyncer(conf *options.Config, isMaster discovery.ServiceManageInterface, loopW stream.LoopInterface, +func NewSyncer(conf *options.Config, isMaster discovery.ServiceManageInterface, task *task.Task, cacheCli cacheservice.CacheServiceClientInterface, reg prometheus.Registerer) (*Syncer, error) { if !conf.Sync.EnableSync { @@ -60,8 +61,8 @@ func NewSyncer(conf *options.Config, isMaster discovery.ServiceManageInterface, // check if id generator is enabled, can only start syncing when id generator is enabled configAdminCond := map[string]interface{}{"_id": common.ConfigAdminID} configAdminData := make(map[string]string) - err := mongodb.Client().Table(common.BKTableNameSystem).Find(configAdminCond).Fields(common.ConfigAdminValueField). - One(context.Background(), &configAdminData) + err := mongodb.Shard(sharding.NewShardOpts().WithIgnoreTenant()).Table(common.BKTableNameSystem). + Find(configAdminCond).Fields(common.ConfigAdminValueField).One(context.Background(), &configAdminData) if err != nil { blog.Errorf("get config admin data failed, err: %v, cond: %+v", err, configAdminCond) return nil, err @@ -111,7 +112,7 @@ func NewSyncer(conf *options.Config, isMaster discovery.ServiceManageInterface, } } - err = syncer.run(conf, loopW, transMedium, cacheCli) + err = syncer.run(conf, task, transMedium, cacheCli) if err != nil { return nil, err } @@ -163,7 +164,7 @@ func parseDestExConf(conf *options.Config) (map[types.ResType]map[string][]optio return idRuleMap, innerDataIDMap } -func (s *Syncer) run(conf *options.Config, loopW stream.LoopInterface, transMedium medium.ClientI, +func (s *Syncer) run(conf *options.Config, task *task.Task, transMedium medium.ClientI, cacheCli cacheservice.CacheServiceClientInterface) error { switch conf.Sync.Role { @@ -174,7 +175,7 @@ func (s *Syncer) run(conf *options.Config, loopW stream.LoopInterface, transMedi return nil } - watcher, err := watch.New(conf.Sync.Name, loopW, s.isMaster, s.metadata, cacheCli, transMedium) + watcher, err := watch.New(conf.Sync.Name, task, s.isMaster, s.metadata, cacheCli, transMedium) if err != nil { blog.Errorf("new watcher failed, err: %v", err) return err diff --git a/src/source_controller/transfer-service/sync/watch/token_handler.go b/src/source_controller/transfer-service/sync/watch/token_handler.go index b320c0f213..12c061350a 100644 --- a/src/source_controller/transfer-service/sync/watch/token_handler.go +++ b/src/source_controller/transfer-service/sync/watch/token_handler.go @@ -19,20 +19,26 @@ package watch import ( "context" + "time" synctypes "configcenter/pkg/synchronize/types" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/metadata" "configcenter/src/common/watch" + "configcenter/src/storage/dal/mongo/local" "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) -const tokenTable = "cc_SrcSyncDataToken" +const ( + tokenTable = "SrcSyncDataToken" + cursorTable = "SrcSyncDataCursor" +) -var _ types.TokenHandler = new(tokenHandler) +var _ types.TaskTokenHandler = new(tokenHandler) // tokenHandler is cmdb data syncer event token handler type tokenHandler struct { @@ -44,62 +50,65 @@ func newTokenHandler(resource synctypes.ResType) *tokenHandler { return &tokenHandler{resource: resource} } -// tokenInfo is cmdb data syncer event token info -type tokenInfo struct { - Resource synctypes.ResType `bson:"resource"` - Token string `bson:"token"` - Cursor map[watch.CursorType]string `bson:"cursor"` - StartAtTime *metadata.Time `bson:"start_at_time"` -} - // SetLastWatchToken set last event watch token -func (t *tokenHandler) SetLastWatchToken(ctx context.Context, token string) error { +func (t *tokenHandler) SetLastWatchToken(ctx context.Context, uuid string, watchDB local.DB, + token *types.TokenInfo) error { + tokenData := mapstr.MapStr{ - common.BKTokenField: token, + common.BKTokenField: token, + common.BKStartAtTimeField: token.StartAtTime, } - return t.setWatchTokenInfo(ctx, tokenData) -} - -// GetStartWatchToken get event start watch token -func (t *tokenHandler) GetStartWatchToken(ctx context.Context) (string, error) { - info, err := t.getWatchTokenInfo(ctx, common.BKTokenField) - if err != nil { - return "", err + filter := map[string]interface{}{ + "resource": watch.GenDBWatchTokenID(uuid, string(t.resource)), } - return info.Token, nil + if err := watchDB.Table(tokenTable).Upsert(ctx, filter, tokenData); err != nil { + blog.Errorf("set %s watch token info failed, data: %+v, err: %v", t.resource, tokenData, err) + return err + } + return nil } -// resetWatchToken reset watch token and start watch time -func (t *tokenHandler) resetWatchToken(startAtTime types.TimeStamp) error { +// GetStartWatchToken get event start watch token +func (t *tokenHandler) GetStartWatchToken(ctx context.Context, uuid string, watchDB local.DB) (*types.TokenInfo, + error) { + filter := map[string]interface{}{ - "resource": t.resource, - } - data := mapstr.MapStr{ - common.BKCursorField: make(map[watch.CursorType]string), - common.BKTokenField: "", - common.BKStartAtTimeField: startAtTime, + "resource": watch.GenDBWatchTokenID(uuid, string(t.resource)), } - if err := mongodb.Client("watch").Table(tokenTable).Upsert(context.Background(), filter, data); err != nil { - blog.Errorf("reset %s watch token failed, data: %+v, err: %v", t.resource, data, err) - return err + info := new(types.TokenInfo) + if err := watchDB.Table(tokenTable).Find(filter).One(ctx, &info); err != nil { + if mongodb.IsNotFoundError(err) { + return &types.TokenInfo{Token: "", StartAtTime: &types.TimeStamp{Sec: uint32(time.Now().Unix())}}, nil + } + blog.Errorf("get %s event watch token info failed, err: %v", t.resource, err) + return nil, err } - return nil + + return info, nil +} + +// cursorInfo is cmdb data syncer event token info +type cursorInfo struct { + Resource synctypes.ResType `bson:"resource"` + Cursor map[watch.CursorType]string `bson:"cursor"` + StartAtTime *metadata.Time `bson:"start_at_time"` } -// getWatchTokenInfo get event watch token info -func (t *tokenHandler) getWatchTokenInfo(ctx context.Context, fields ...string) (*tokenInfo, error) { +// getWatchCursorInfo get event watch token info +func (t *tokenHandler) getWatchCursorInfo(kit *rest.Kit) (*cursorInfo, error) { filter := map[string]interface{}{ "resource": t.resource, } - info := new(tokenInfo) - if err := mongodb.Client("watch").Table(tokenTable).Find(filter).Fields(fields...).One(ctx, &info); err != nil { - if mongodb.Client("watch").IsNotFoundError(err) { - return new(tokenInfo), nil + info := new(cursorInfo) + err := mongodb.Dal("watch").Shard(kit.ShardOpts()).Table(cursorTable).Find(filter).One(kit.Ctx, &info) + if err != nil { + if mongodb.IsNotFoundError(err) { + return new(cursorInfo), nil } - blog.Errorf("get %s event watch token info failed, err: %v", t.resource, err) + blog.Errorf("get %s event watch token info failed, err: %v, rid: %s", t.resource, err, kit.Rid) return nil, err } @@ -110,14 +119,14 @@ func (t *tokenHandler) getWatchTokenInfo(ctx context.Context, fields ...string) return info, nil } -// getWatchTokenInfo get event watch token info -func (t *tokenHandler) setWatchTokenInfo(ctx context.Context, data mapstr.MapStr) error { +// setWatchCursorInfo get event watch token info +func (t *tokenHandler) setWatchCursorInfo(kit *rest.Kit, data mapstr.MapStr) error { filter := map[string]interface{}{ "resource": t.resource, } - if err := mongodb.Client("watch").Table(tokenTable).Upsert(ctx, filter, data); err != nil { - blog.Errorf("set %s watch token info failed, data: %+v, err: %v", t.resource, data, err) + if err := mongodb.Dal("watch").Shard(kit.ShardOpts()).Table(cursorTable).Upsert(kit.Ctx, filter, data); err != nil { + blog.Errorf("set %s watch token info failed, data: %+v, err: %v, rid: %s", t.resource, data, err, kit.Rid) return err } diff --git a/src/source_controller/transfer-service/sync/watch/watch.go b/src/source_controller/transfer-service/sync/watch/watch.go index 3d80cc60dd..48135628ae 100644 --- a/src/source_controller/transfer-service/sync/watch/watch.go +++ b/src/source_controller/transfer-service/sync/watch/watch.go @@ -27,18 +27,19 @@ import ( "configcenter/src/apimachinery/discovery" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/metadata" "configcenter/src/common/util" "configcenter/src/source_controller/transfer-service/sync/medium" syncmeta "configcenter/src/source_controller/transfer-service/sync/metadata" "configcenter/src/storage/driver/mongodb" - "configcenter/src/storage/stream" + "configcenter/src/storage/stream/task" ) // Watcher is cmdb data syncer event watcher type Watcher struct { name string - loopW stream.LoopInterface + task *task.Task isMaster discovery.ServiceManageInterface metadata *syncmeta.Metadata cacheCli cacheservice.CacheServiceClientInterface @@ -47,7 +48,7 @@ type Watcher struct { } // New new cmdb data syncer event watcher -func New(name string, loopW stream.LoopInterface, isMaster discovery.ServiceManageInterface, meta *syncmeta.Metadata, +func New(name string, task *task.Task, isMaster discovery.ServiceManageInterface, meta *syncmeta.Metadata, cacheCli cacheservice.CacheServiceClientInterface, transMedium medium.ClientI) (*Watcher, error) { // create cmdb data syncer event watch token table @@ -60,19 +61,19 @@ func New(name string, loopW stream.LoopInterface, isMaster discovery.ServiceMana if !exists { err = mongodb.Client("watch").CreateTable(ctx, tokenTable) - if err != nil && !mongodb.Client("watch").IsDuplicatedError(err) { + if err != nil && !mongodb.IsDuplicatedError(err) { blog.Errorf("create %s table failed, err: %v", tokenTable, err) return nil, err } for _, resType := range types.ListAllResTypeForIncrSync() { - token := &tokenInfo{ + token := &cursorInfo{ Resource: resType, StartAtTime: &metadata.Time{Time: time.Now()}, } err = mongodb.Client("watch").Table(tokenTable).Insert(ctx, token) - if err != nil && !mongodb.Client("watch").IsDuplicatedError(err) { + if err != nil && !mongodb.IsDuplicatedError(err) { blog.Errorf("init %s watch token failed, data: %+v, err: %v", resType, token, err) return nil, err } @@ -82,7 +83,7 @@ func New(name string, loopW stream.LoopInterface, isMaster discovery.ServiceMana // generate cmdb data syncer event watcher watcher := &Watcher{ name: name, - loopW: loopW, + task: task, isMaster: isMaster, metadata: meta, cacheCli: cacheCli, @@ -103,7 +104,8 @@ func (w *Watcher) Watch() error { cursorTypes, exists := resTypeCursorMap[resType] if exists { for _, cursorType := range cursorTypes { - go w.watchAPI(resType, cursorType) + kit := rest.NewKit() + go w.watchAPI(kit, resType, cursorType) } continue } diff --git a/src/source_controller/transfer-service/sync/watch/watch_api.go b/src/source_controller/transfer-service/sync/watch/watch_api.go index e6332cbd48..7157b14f43 100644 --- a/src/source_controller/transfer-service/sync/watch/watch_api.go +++ b/src/source_controller/transfer-service/sync/watch/watch_api.go @@ -18,7 +18,6 @@ package watch import ( - "context" "encoding/json" "errors" "fmt" @@ -27,6 +26,7 @@ import ( "configcenter/pkg/synchronize/types" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" "configcenter/src/common/metadata" "configcenter/src/common/watch" @@ -47,7 +47,7 @@ var resTypeCursorMap = map[types.ResType][]watch.CursorType{ } // watchAPI watch events by api -func (w *Watcher) watchAPI(resType types.ResType, cursorType watch.CursorType) { +func (w *Watcher) watchAPI(kit *rest.Kit, resType types.ResType, cursorType watch.CursorType) { handler := w.tokenHandlers[resType] prevStatus := false @@ -68,10 +68,9 @@ func (w *Watcher) watchAPI(resType types.ResType, cursorType watch.CursorType) { // is master status changed, re-watch event from new cursor if !prevStatus { prevStatus = true - token, err := handler.getWatchTokenInfo(context.Background(), common.BKCursorField, - common.BKStartAtTimeField) + token, err := handler.getWatchCursorInfo(kit) if err != nil { - blog.Errorf("get %s watch token info failed, err: %v", resType, err) + blog.Errorf("get %s watch cursor info failed, err: %v, rid: %s", resType, err, kit.Rid) time.Sleep(time.Second) continue } @@ -82,15 +81,14 @@ func (w *Watcher) watchAPI(resType types.ResType, cursorType watch.CursorType) { } } - kit := util.NewKit() - // watch events by api and set next watch cursor var lastCursor string var err error util.RetryWrapper(5, func() (bool, error) { lastCursor, err = w.doWatchAPI(kit, resType, opt) if err != nil { - blog.Errorf("watch %s events by api failed, err: %v, opt: %+v, rid: %s", resType, err, *opt, kit.Rid) + blog.Errorf("watch %s events by api failed, err: %v, opt: %+v, rid: %s", resType, err, *opt, + kit.Rid) return true, err } return false, nil @@ -103,7 +101,7 @@ func (w *Watcher) watchAPI(resType types.ResType, cursorType watch.CursorType) { fmt.Sprintf("%s.%s", common.BKCursorField, cursorType): lastCursor, common.BKStartAtTimeField: &metadata.Time{Time: time.Now()}, } - err = handler.setWatchTokenInfo(kit.Ctx, watchTokenInfo) + err = handler.setWatchCursorInfo(kit, watchTokenInfo) if err != nil { blog.Errorf("set %s watch cursor %s failed, err: %v, rid: %s", resType, lastCursor, err, kit.Rid) time.Sleep(time.Second) @@ -112,7 +110,7 @@ func (w *Watcher) watchAPI(resType types.ResType, cursorType watch.CursorType) { } } -func (w *Watcher) doWatchAPI(kit *util.Kit, resType types.ResType, opt *watch.WatchEventOptions) (string, error) { +func (w *Watcher) doWatchAPI(kit *rest.Kit, resType types.ResType, opt *watch.WatchEventOptions) (string, error) { // watch events by api watchRes, ccErr := w.cacheCli.Cache().Event().InnerWatchEvent(kit.Ctx, kit.Header, opt) if ccErr != nil { @@ -179,7 +177,7 @@ type eventDetailMap struct { update, create, delete map[string]json.RawMessage } -func (w *Watcher) pushSyncData(kit *util.Kit, events []*types.EventInfo) error { +func (w *Watcher) pushSyncData(kit *rest.Kit, events []*types.EventInfo) error { eventInfoMap := w.classifyEvents(kit, events) // push upsert and delete event info to transfer medium @@ -222,7 +220,7 @@ func (w *Watcher) pushSyncData(kit *util.Kit, events []*types.EventInfo) error { } // classify events by resource type and sub resources and event type -func (w *Watcher) classifyEvents(kit *util.Kit, events []*types.EventInfo) map[types.ResType]map[string]eventDetailMap { +func (w *Watcher) classifyEvents(kit *rest.Kit, events []*types.EventInfo) map[types.ResType]map[string]eventDetailMap { eventInfoMap := make(map[types.ResType]map[string]eventDetailMap) for _, event := range events { diff --git a/src/source_controller/transfer-service/sync/watch/watch_db.go b/src/source_controller/transfer-service/sync/watch/watch_db.go index c95d8bb72f..afa2ba0568 100644 --- a/src/source_controller/transfer-service/sync/watch/watch_db.go +++ b/src/source_controller/transfer-service/sync/watch/watch_db.go @@ -18,26 +18,27 @@ package watch import ( - "context" - "encoding/json" + "fmt" "time" synctypes "configcenter/pkg/synchronize/types" "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/common/mapstr" + "configcenter/src/common/http/rest" "configcenter/src/common/metadata" "configcenter/src/common/watch" - "configcenter/src/source_controller/transfer-service/sync/util" - "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) // resTypeWatchOptMap is cmdb data sync resource type to db watch options map -var resTypeWatchOptMap = map[synctypes.ResType]types.Options{ +var resTypeWatchOptMap = map[synctypes.ResType]*types.WatchCollOptions{ synctypes.ServiceInstance: { - EventStruct: new(metadata.ServiceInstance), - Collection: common.BKTableNameServiceInstance, + CollectionOptions: types.CollectionOptions{ + CollectionFilter: &types.CollectionFilter{ + Regex: fmt.Sprintf("_%s$", common.BKTableNameServiceInstance), + }, + EventStruct: new(metadata.ServiceInstance), + }, }, } @@ -45,45 +46,26 @@ var resTypeWatchOptMap = map[synctypes.ResType]types.Options{ func (w *Watcher) watchDB(resType synctypes.ResType) error { handler := w.tokenHandlers[resType] - token, err := handler.getWatchTokenInfo(context.Background(), common.BKStartAtTimeField) - if err != nil { - blog.Errorf("get %s watch db token info failed, err: %v", resType, err) - return err - } - - startAtTime := &types.TimeStamp{Sec: uint32(time.Now().Unix())} - if token.StartAtTime != nil { - startAtTime = &types.TimeStamp{ - Sec: uint32(token.StartAtTime.Unix()), - Nano: uint32(token.StartAtTime.Nanosecond()), - } - } - - watchOpts := resTypeWatchOptMap[resType] - watchOpts.StartAtTime = startAtTime - watchOpts.WatchFatalErrorCallback = handler.resetWatchToken - - opts := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: string(resType), - WatchOpt: &types.WatchOptions{ - Options: watchOpts, - }, + opts := &types.LoopBatchTaskOptions{ + WatchTaskOptions: &types.WatchTaskOptions{ + Name: string(resType), + CollOpts: resTypeWatchOptMap[resType], TokenHandler: handler, RetryOptions: &types.RetryOptions{ MaxRetryCount: 3, RetryDuration: 1 * time.Second, }, }, - EventHandler: &types.BatchHandler{ - DoBatch: func(es []*types.Event) (retry bool) { - return w.handleDBEvents(resType, watchOpts.Collection, es) + EventHandler: &types.TaskBatchHandler{ + DoBatch: func(dbInfo *types.DBInfo, es []*types.Event) bool { + return w.handleDBEvents(resType, es) }, }, BatchSize: common.BKMaxLimitSize, } - if err = w.loopW.WithBatch(opts); err != nil { + err := w.task.AddLoopBatchTask(opts) + if err != nil { blog.Errorf("watch %s events from db failed, err: %v", resType, err) return err } @@ -92,55 +74,12 @@ func (w *Watcher) watchDB(resType synctypes.ResType) error { } // handleDBEvents handle db events -func (w *Watcher) handleDBEvents(resType synctypes.ResType, coll string, es []*types.Event) (retry bool) { - kit := util.NewKit() - - // get deleted event oid to detail map - delOids := make([]string, 0) - for _, e := range es { - if e.OperationType == types.Delete { - delOids = append(delOids, e.Oid) - } - } - - delOidMap := make(map[string]json.RawMessage) - if len(delOids) > 0 { - cond := mapstr.MapStr{ - "oid": mapstr.MapStr{common.BKDBIN: delOids}, - "coll": coll, - } - archives := make([]delArchiveInfo, 0) - err := mongodb.Client().Table(common.BKTableNameDelArchive).Find(cond).All(kit.Ctx, &archives) - if err != nil { - blog.Errorf("get del archive failed, err: %v, cond: %+v, rid: %s", err, cond, kit.Rid) - return true - } - - for _, archive := range archives { - if archive.Detail == nil { - continue - } - - detail, err := json.Marshal(archive.Detail) - if err != nil { - blog.Errorf("marshal del archive detail failed, err: %v, archive: %+v, rid: %s", err, archive, kit.Rid) - return true - } - delOidMap[archive.Oid] = detail - } - } +func (w *Watcher) handleDBEvents(resType synctypes.ResType, es []*types.Event) bool { + kit := rest.NewKit() eventInfos := make([]*synctypes.EventInfo, 0) for _, e := range es { eventType := watch.ConvertOperateType(e.OperationType) - if eventType == watch.Delete { - delDetail, exists := delOidMap[e.Oid] - if !exists { - continue - } - e.DocBytes = delDetail - } - eventInfo, needSync := w.metadata.ParseEventDetail(eventType, resType, e.Oid, e.DocBytes) if !needSync { continue @@ -156,8 +95,3 @@ func (w *Watcher) handleDBEvents(resType synctypes.ResType, coll string, es []*t return false } - -type delArchiveInfo struct { - Oid string `bson:"oid"` - Detail mapstr.MapStr `bson:"detail"` -} From 93ebe26c2395ea75eb7d32bc71250be8dcebc33f Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 13 Mar 2025 19:40:52 +0800 Subject: [PATCH 08/10] feat: update migrate watch data logics --story=120702860 --- src/scene_server/admin_server/app/server.go | 7 +- .../admin_server/service/migrate.go | 271 +++++++++++++++--- .../admin_server/service/past_migrate.go | 9 - .../admin_server/service/sharding.go | 39 +-- 4 files changed, 247 insertions(+), 79 deletions(-) diff --git a/src/scene_server/admin_server/app/server.go b/src/scene_server/admin_server/app/server.go index 29c984b67b..cff4ce13fa 100644 --- a/src/scene_server/admin_server/app/server.go +++ b/src/scene_server/admin_server/app/server.go @@ -16,7 +16,6 @@ package app import ( "context" "fmt" - "time" iamcli "configcenter/src/ac/iam" "configcenter/src/common/auth" @@ -31,7 +30,6 @@ import ( "configcenter/src/scene_server/admin_server/iam" "configcenter/src/scene_server/admin_server/logics" svc "configcenter/src/scene_server/admin_server/service" - "configcenter/src/storage/dal/mongo/sharding" "configcenter/src/storage/dal/redis" "configcenter/src/storage/driver/mongodb" "configcenter/src/thirdparty/monitor" @@ -64,11 +62,10 @@ func Run(ctx context.Context, cancel context.CancelFunc, op *options.ServerOptio db := mongodb.Dal() process.Service.SetDB(db) - watchDB, err := sharding.NewDisableDBShardingMongo(process.Config.WatchDB.GetMongoConf(), time.Minute) - if err != nil { + if err = mongodb.SetWatchCli("watch", &process.Config.WatchDB, process.Config.Crypto); err != nil { return fmt.Errorf("connect watch mongo server failed, err: %v", err) } - process.Service.SetWatchDB(watchDB) + process.Service.SetWatchDB(mongodb.Dal("watch")) cache, err := redis.NewFromConfig(process.Config.Redis) if err != nil { diff --git a/src/scene_server/admin_server/service/migrate.go b/src/scene_server/admin_server/service/migrate.go index 676a893de4..605020a5c9 100644 --- a/src/scene_server/admin_server/service/migrate.go +++ b/src/scene_server/admin_server/service/migrate.go @@ -33,10 +33,13 @@ import ( "configcenter/src/common/mapstr" "configcenter/src/common/metadata" commontype "configcenter/src/common/types" + "configcenter/src/common/util" "configcenter/src/common/version" "configcenter/src/common/watch" "configcenter/src/scene_server/admin_server/upgrader" "configcenter/src/source_controller/cacheservice/event" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/dal/mongo/sharding" daltypes "configcenter/src/storage/dal/types" "configcenter/src/storage/driver/mongodb" streamtypes "configcenter/src/storage/stream/types" @@ -72,25 +75,24 @@ func (s *Service) migrateDatabase(req *restful.Request, resp *restful.Response) return } - if err := s.createWatchDBChainCollections(kit); err != nil { - blog.Errorf("create watch db chain collections failed, err: %v", err) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommMigrateFailed, err.Error()), - } - resp.WriteError(http.StatusInternalServerError, result) - return - } - result, err := upgrader.Upgrade(kit, s.db, nil) if err != nil { blog.Errorf("db upgrade failed, err: %v", err) result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommMigrateFailed, err.Error()), + Msg: kit.CCError.Errorf(common.CCErrCommMigrateFailed, err.Error()), } resp.WriteError(http.StatusInternalServerError, result) return } + if err = s.createWatchDBChainCollections(kit); err != nil { + blog.Errorf("create watch db chain collections failed, err: %v", err) + resp.WriteError(http.StatusInternalServerError, &metadata.RespError{ + Msg: kit.CCError.Errorf(common.CCErrCommMigrateFailed, err.Error()), + }) + return + } + resp.WriteEntity(metadata.NewSuccessResp(result)) } @@ -98,19 +100,14 @@ func (s *Service) migrateDatabase(req *restful.Request, resp *restful.Response) const dbChainTTLTime = 5 * 24 * 60 * 60 func (s *Service) createWatchDBChainCollections(kit *rest.Kit) error { - // create watch token table to store the last watch token info for every collections - exists, err := s.watchDB.Shard(kit.SysShardOpts()).HasTable(s.ctx, common.BKTableNameWatchToken) + watchDBToDBRelation, err := s.getWatchDBToDBRelation(kit) if err != nil { - blog.Errorf("check if table %s exists failed, err: %v, rid: %s", common.BKTableNameWatchToken, err, kit.Rid) return err } - if !exists { - err = s.watchDB.Shard(kit.SysShardOpts()).CreateTable(s.ctx, common.BKTableNameWatchToken) - if err != nil && !mongodb.IsDuplicatedError(err) { - blog.Errorf("create table %s failed, err: %v, rid: %s", common.BKTableNameWatchToken, err, kit.Rid) - return err - } + // create watch token table and init the watch token for dbs + if err := s.createWatchToken(kit, watchDBToDBRelation); err != nil { + return err } // create watch chain node table and init the last token info as empty for all collections @@ -123,8 +120,8 @@ func (s *Service) createWatchDBChainCollections(kit *rest.Kit) error { } err = tenant.ExecForAllTenants(func(tenantID string) error { - kit = kit.NewKit() - kit.TenantID = tenantID + // TODO 在新增租户初始化时同时增加watch相关表,并刷新cache的tenant + kit = kit.NewKit().WithTenant(tenantID) exists, err := s.watchDB.Shard(kit.ShardOpts()).HasTable(s.ctx, key.ChainCollection()) if err != nil { blog.Errorf("check if table %s exists failed, err: %v, rid: %s", key.ChainCollection(), err, kit.Rid) @@ -139,11 +136,11 @@ func (s *Service) createWatchDBChainCollections(kit *rest.Kit) error { } } - if err = s.createWatchIndexes(kit, tenantID, cursorType, key); err != nil { + if err = s.createWatchIndexes(kit, cursorType, key); err != nil { return err } - if err = s.createWatchToken(kit, tenantID, key); err != nil { + if err = s.createLastWatchEvent(kit, key); err != nil { return err } return nil @@ -151,11 +148,143 @@ func (s *Service) createWatchDBChainCollections(kit *rest.Kit) error { if err != nil { return err } + + // TODO 在新增DB时同时增加db relation和token数据 + err = s.createWatchTokenForEventKey(kit, key, watchDBToDBRelation) + if err != nil { + return err + } } return nil } -func (s *Service) createWatchIndexes(kit *rest.Kit, tenantID string, cursorType watch.CursorType, key event.Key) error { +// getWatchDBToDBRelation get watch db uuid to db uuids relation +func (s *Service) getWatchDBToDBRelation(kit *rest.Kit) (map[string][]string, error) { + // get all db uuids + uuidMap := make(map[string]struct{}) + err := s.db.ExecForAllDB(func(db local.DB) error { + dbClient, ok := db.(*local.Mongo) + if !ok { + return fmt.Errorf("db to be watched is not an instance of local mongo") + } + uuidMap[dbClient.GetMongoClient().UUID()] = struct{}{} + return nil + }) + if err != nil { + return nil, err + } + + // get watch db relations + relations := make([]sharding.WatchDBRelation, 0) + if err := s.watchDB.Shard(kit.SysShardOpts()).Table(common.BKTableNameWatchDBRelation).Find(nil). + All(kit.Ctx, &relations); err != nil { + blog.Errorf("get watch db relation failed, err: %v, rid: %s", err, kit.Rid) + return nil, err + } + + watchDBToDBRelation := make(map[string][]string) + for _, relation := range relations { + watchDBToDBRelation[relation.WatchDB] = append(watchDBToDBRelation[relation.WatchDB], relation.DB) + delete(uuidMap, relation.DB) + } + + // get default watch db uuid for new db to be watched + cond := map[string]any{common.MongoMetaID: common.ShardingDBConfID} + conf := new(sharding.ShardingDBConf) + err = s.watchDB.Shard(kit.SysShardOpts()).Table(common.BKTableNameSystem).Find(cond).One(kit.Ctx, &conf) + if err != nil { + blog.Errorf("get sharding db conf failed, err: %v, rid: %s", err, kit.Rid) + return nil, err + } + defaultWatchDBUUID := conf.ForNewData + + // create watch db relation for dbs without watch db + newRelations := make([]sharding.WatchDBRelation, 0) + for uuid := range uuidMap { + watchDBToDBRelation[defaultWatchDBUUID] = append(watchDBToDBRelation[defaultWatchDBUUID], uuid) + newRelations = append(newRelations, sharding.WatchDBRelation{ + WatchDB: defaultWatchDBUUID, + DB: uuid, + }) + } + + if len(newRelations) > 0 { + err = s.watchDB.Shard(kit.SysShardOpts()).Table(common.BKTableNameWatchDBRelation).Insert(kit.Ctx, newRelations) + if err != nil { + blog.Errorf("create watch db relations(%+v) failed, err: %v, rid: %s", newRelations, err, kit.Rid) + return nil, err + } + } + + return watchDBToDBRelation, nil +} + +func (s *Service) createWatchToken(kit *rest.Kit, watchDBToDBRelation map[string][]string) error { + return s.watchDB.ExecForAllDB(func(watchDB local.DB) error { + // create watch token table to store the last watch token info for db and every collection + exists, err := watchDB.HasTable(s.ctx, common.BKTableNameWatchToken) + if err != nil { + blog.Errorf("check if table %s exists failed, err: %v, rid: %s", common.BKTableNameWatchToken, err, kit.Rid) + return err + } + + if !exists { + err = watchDB.CreateTable(s.ctx, common.BKTableNameWatchToken) + if err != nil && !mongodb.IsDuplicatedError(err) { + blog.Errorf("create table %s failed, err: %v, rid: %s", common.BKTableNameWatchToken, err, kit.Rid) + return err + } + } + + // get all exist db watch tokens + mongo, ok := watchDB.(*local.Mongo) + if !ok { + return fmt.Errorf("db is not *local.Mongo type") + } + uuids := watchDBToDBRelation[mongo.GetMongoClient().UUID()] + if len(uuids) == 0 { + return nil + } + + filter := map[string]interface{}{ + common.MongoMetaID: map[string]interface{}{common.BKDBIN: uuids}, + } + + existUUIDs, err := watchDB.Table(common.BKTableNameWatchToken).Distinct(kit.Ctx, common.MongoMetaID, filter) + if err != nil { + blog.Errorf("check if dbs(%+v) watch token exists failed, err: %v, rid: %s", uuids, err, kit.Rid) + return err + } + + existUUIDMap := make(map[string]struct{}) + for _, uuid := range existUUIDs { + existUUIDMap[util.GetStrByInterface(uuid)] = struct{}{} + } + + // create watch token for dbs to be watched + for _, uuid := range uuids { + if _, exists := existUUIDMap[uuid]; exists { + continue + } + + data := mapstr.MapStr{ + common.MongoMetaID: uuid, + common.BKTokenField: "", + common.BKStartAtTimeField: streamtypes.TimeStamp{ + Sec: uint32(time.Now().Unix()), + Nano: 0, + }, + } + if err = watchDB.Table(common.BKTableNameWatchToken).Insert(s.ctx, data); err != nil { + blog.Errorf("create db watch token failed, err: %v, data: %+v, rid: %s", err, data, kit.Rid) + return err + } + } + return nil + }) +} + +func (s *Service) createWatchIndexes(kit *rest.Kit, cursorType watch.CursorType, key event.Key) error { indexes := []daltypes.Index{ {Name: "index_id", Keys: bson.D{{common.BKFieldID, -1}}, Background: true, Unique: true}, {Name: "index_cursor", Keys: bson.D{{common.BKCursorField, -1}}, Background: true, Unique: true}, @@ -195,12 +324,36 @@ func (s *Service) createWatchIndexes(kit *rest.Kit, tenantID string, cursorType return nil } -func (s *Service) createWatchToken(kit *rest.Kit, tenantID string, key event.Key) error { +func (s *Service) createWatchTokenForEventKey(kit *rest.Kit, key event.Key, + watchDBToDBRelation map[string][]string) error { + + // create watch token of this key for every db + err := s.watchDB.ExecForAllDB(func(db local.DB) error { + mongo, ok := db.(*local.Mongo) + if !ok { + return fmt.Errorf("db is not *local.Mongo type") + } + + for _, uuid := range watchDBToDBRelation[mongo.GetMongoClient().UUID()] { + if err := s.createWatchTokenForDB(kit, db, uuid, key); err != nil { + blog.Errorf("init %s key %s watch token failed, err: %v, rid: %s", uuid, key.Namespace(), err, kit.Rid) + return err + } + } + return nil + }) + if err != nil { + return err + } + return nil +} + +func (s *Service) createWatchTokenForDB(kit *rest.Kit, watchDB local.DB, uuid string, key event.Key) error { filter := map[string]interface{}{ - "_id": key.Collection(), + "_id": watch.GenDBWatchTokenID(uuid, key.Collection()), } - count, err := s.watchDB.Shard(kit.SysShardOpts()).Table(common.BKTableNameWatchToken).Find(filter).Count(s.ctx) + count, err := watchDB.Table(common.BKTableNameWatchToken).Find(filter).Count(s.ctx) if err != nil { blog.Errorf("check if last watch token exists failed, err: %v, filter: %+v", err, filter) return err @@ -214,14 +367,15 @@ func (s *Service) createWatchToken(kit *rest.Kit, tenantID string, key event.Key // host identity's watch token is different with other identity. // only set coll is ok, the other fields is useless data := mapstr.MapStr{ - "_id": key.Collection(), - common.BKTableNameBaseHost: watch.LastChainNodeData{Coll: common.BKTableNameBaseHost}, - common.BKTableNameModuleHostConfig: watch.LastChainNodeData{Coll: common.BKTableNameModuleHostConfig}, - common.BKTableNameBaseProcess: watch.LastChainNodeData{Coll: common.BKTableNameBaseProcess}, + "_id": watch.GenDBWatchTokenID(uuid, key.Collection()), + common.BKTableNameBaseHost: new(streamtypes.TokenInfo), + common.BKTableNameModuleHostConfig: new(streamtypes.TokenInfo), + common.BKTableNameBaseProcess: new(streamtypes.TokenInfo), + common.BKTableNameProcessInstanceRelation: new(streamtypes.TokenInfo), } - err = s.watchDB.Shard(kit.SysShardOpts()).Table(common.BKTableNameWatchToken).Insert(s.ctx, data) + err = watchDB.Table(common.BKTableNameWatchToken).Insert(s.ctx, data) if err != nil { - blog.Errorf("init last watch token failed, err: %v, data: %+v", err, data) + blog.Errorf("init last watch token failed, err: %v, data: %+v, rid: %s", err, data, kit.Rid) return err } return nil @@ -230,30 +384,55 @@ func (s *Service) createWatchToken(kit *rest.Kit, tenantID string, key event.Key if key.Collection() == event.BizSetRelationKey.Collection() { // biz set relation's watch token is generated in the same way with the host identity's watch token data := mapstr.MapStr{ - "_id": key.Collection(), - common.BKTableNameBaseApp: watch.LastChainNodeData{Coll: common.BKTableNameBaseApp}, - common.BKTableNameBaseBizSet: watch.LastChainNodeData{Coll: common.BKTableNameBaseBizSet}, - common.BKFieldID: 0, - common.BKTokenField: "", + "_id": watch.GenDBWatchTokenID(uuid, key.Collection()), + common.BKTableNameBaseApp: new(streamtypes.TokenInfo), + common.BKTableNameBaseBizSet: new(streamtypes.TokenInfo), } - err = s.watchDB.Shard(kit.SysShardOpts()).Table(common.BKTableNameWatchToken).Insert(s.ctx, data) + err = watchDB.Table(common.BKTableNameWatchToken).Insert(s.ctx, data) if err != nil { - blog.Errorf("init last biz set relation watch token failed, err: %v, data: %+v", err, data) + blog.Errorf("init last biz set rel watch token failed, err: %v, data: %+v, rid: %s", err, data, kit.Rid) return err } return nil } - data := watch.LastChainNodeData{ - Coll: key.Collection(), - Token: "", - StartAtTime: streamtypes.TimeStamp{ + data := mapstr.MapStr{ + common.MongoMetaID: watch.GenDBWatchTokenID(uuid, key.Collection()), + common.BKTokenField: "", + common.BKStartAtTimeField: streamtypes.TimeStamp{ Sec: uint32(time.Now().Unix()), Nano: 0, }, } - if err = s.watchDB.Shard(kit.SysShardOpts()).Table(common.BKTableNameWatchToken).Insert(s.ctx, data); err != nil { - blog.Errorf("init last watch token failed, err: %v, data: %+v", err, data) + if err = watchDB.Table(common.BKTableNameWatchToken).Insert(s.ctx, data); err != nil { + blog.Errorf("init last watch token failed, err: %v, data: %+v, rid: %s", err, data, kit.Rid) + return err + } + return nil +} + +func (s *Service) createLastWatchEvent(kit *rest.Kit, key event.Key) error { + filter := map[string]interface{}{ + "_id": key.Collection(), + } + + count, err := s.watchDB.Shard(kit.ShardOpts()).Table(common.BKTableNameLastWatchEvent).Find(filter).Count(s.ctx) + if err != nil { + blog.Errorf("check if last watch event exists failed, err: %v, filter: %+v, rid: %s", err, filter, kit.Rid) + return err + } + + if count > 0 { + return nil + } + + data := watch.LastChainNodeData{ + Coll: key.Collection(), + ID: 0, + Cursor: "", + } + if err = s.watchDB.Shard(kit.ShardOpts()).Table(common.BKTableNameLastWatchEvent).Insert(s.ctx, data); err != nil { + blog.Errorf("create last watch event failed, err: %v, data: %+v, rid: %s", err, data, kit.Rid) return err } return nil diff --git a/src/scene_server/admin_server/service/past_migrate.go b/src/scene_server/admin_server/service/past_migrate.go index 91d3c73feb..613687cb96 100644 --- a/src/scene_server/admin_server/service/past_migrate.go +++ b/src/scene_server/admin_server/service/past_migrate.go @@ -42,15 +42,6 @@ func (s *Service) migrate(req *restful.Request, resp *restful.Response) { } kit := rest.NewKitFromHeader(rHeader, s.CCErr) - if err := s.createWatchDBChainCollections(kit); err != nil { - blog.Errorf("create watch db chain collections failed, err: %v, rid: %s", err, rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommMigrateFailed, err.Error()), - } - resp.WriteError(http.StatusInternalServerError, result) - return - } - preVersion, finishedVersions, err := history.Upgrade(s.ctx, s.db.Shard(kit.SysShardOpts()), s.cache, s.iam, updateCfg) if err != nil { diff --git a/src/scene_server/admin_server/service/sharding.go b/src/scene_server/admin_server/service/sharding.go index ca0b565fc3..9dbaa1684c 100644 --- a/src/scene_server/admin_server/service/sharding.go +++ b/src/scene_server/admin_server/service/sharding.go @@ -46,9 +46,9 @@ func (s *Service) initShardingApi(api *restful.WebService) { // ShardingDBConfig is the sharding db config for api type ShardingDBConfig struct { - MasterDB string `json:"master_db"` - ForNewTenant string `json:"for_new_tenant"` - SlaveDB map[string]SlaveDBConfig `json:"slave_db"` + MasterDB string `json:"master_db"` + ForNewData string `json:"for_new_data"` + SlaveDB map[string]SlaveDBConfig `json:"slave_db"` } // SlaveDBConfig is the slave db config for api @@ -69,9 +69,9 @@ func (s *Service) GetShardingDBConfig(req *restful.Request, resp *restful.Respon } result := &ShardingDBConfig{ - MasterDB: conf.MasterDB, - ForNewTenant: conf.ForNewTenant, - SlaveDB: make(map[string]SlaveDBConfig), + MasterDB: conf.MasterDB, + ForNewData: conf.ForNewData, + SlaveDB: make(map[string]SlaveDBConfig), } for uuid, mongoConf := range conf.SlaveDB { @@ -134,7 +134,7 @@ func (s *Service) getShardingDBConf(kit *rest.Kit) (*sharding.ShardingDBConf, er // UpdateShardingDBReq is the update sharding db config request type UpdateShardingDBReq struct { - ForNewTenant string `json:"for_new_tenant,omitempty"` + ForNewData string `json:"for_new_data,omitempty"` CreateSlaveDB []SlaveDBConfig `json:"create_slave_db,omitempty"` UpdateSlaveDB map[string]UpdateSlaveDBInfo `json:"update_slave_db,omitempty"` } @@ -177,8 +177,8 @@ func (s *Service) UpdateShardingDBConfig(req *restful.Request, resp *restful.Res cond := map[string]any{common.MongoMetaID: common.ShardingDBConfID} updateData := map[string]any{ - "for_new_tenant": updateConf.ForNewTenant, - "slave_db": updateConf.SlaveDB, + "for_new_data": updateConf.ForNewData, + "slave_db": updateConf.SlaveDB, } err = s.db.Shard(kit.SysShardOpts()).Table(common.BKTableNameSystem).Update(s.ctx, cond, updateData) if err != nil { @@ -247,23 +247,23 @@ func (s *Service) genUpdatedShardingDBConf(kit *rest.Kit, dbConf *sharding.Shard } // update new tenant db config, check if the new tenant db config exists - if conf.ForNewTenant != "" { + if conf.ForNewData != "" { // use uuid to specify the new tenant db config for db that already exists - _, uuidExists := dbConf.SlaveDB[conf.ForNewTenant] - if conf.ForNewTenant == dbConf.MasterDB || uuidExists { - dbConf.ForNewTenant = conf.ForNewTenant + _, uuidExists := dbConf.SlaveDB[conf.ForNewData] + if conf.ForNewData == dbConf.MasterDB || uuidExists { + dbConf.ForNewData = conf.ForNewData return dbConf, nil } // use name to specify the new tenant db config for new db that doesn't have uuid before creation - uuid, nameExists := nameUUIDMap[conf.ForNewTenant] + uuid, nameExists := nameUUIDMap[conf.ForNewData] if nameExists { - dbConf.ForNewTenant = uuid + dbConf.ForNewData = uuid return dbConf, nil } - blog.Errorf("add new tenant db %s is invalid, rid: %s", conf.ForNewTenant, kit.Rid) - return nil, kit.CCError.CCErrorf(common.CCErrCommParamsInvalid, "for_new_tenant") + blog.Errorf("add new tenant db %s is invalid, rid: %s", conf.ForNewData, kit.Rid) + return nil, kit.CCError.CCErrorf(common.CCErrCommParamsInvalid, "for_new_data") } return dbConf, nil } @@ -292,7 +292,7 @@ func (s *Service) genDBSlaveConf(kit *rest.Kit, name string, disabled bool, conf } func (s *Service) saveUpdateShardingDBAudit(kit *rest.Kit, preConf, curConf *sharding.ShardingDBConf) error { - id, err := s.db.Shard(kit.SysShardOpts()).NextSequence(kit.Ctx, common.BKTableNameAuditLog) + id, err := s.db.Shard(kit.SysShardOpts()).NextSequence(kit.Ctx, common.BKTableNamePlatformAuditLog) if err != nil { blog.Errorf("generate next audit log id failed, err: %v, rid: %s", err, kit.Rid) return err @@ -311,7 +311,8 @@ func (s *Service) saveUpdateShardingDBAudit(kit *rest.Kit, preConf, curConf *sha RequestID: kit.Rid, } - if err = s.db.Shard(kit.SysShardOpts()).Table(common.BKTableNameAuditLog).Insert(kit.Ctx, audit); err != nil { + err = s.db.Shard(kit.SysShardOpts()).Table(common.BKTableNamePlatformAuditLog).Insert(kit.Ctx, audit) + if err != nil { blog.Errorf("save sharding db config audit log failed, err: %v, rid: %s", err, kit.Rid) return err } From 4e89a4c05b0cd2c278c5d8f6e975a456ecee871c Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Thu, 3 Apr 2025 20:50:55 +0800 Subject: [PATCH 09/10] chore: ci & review bugfix --story=120702860 --- pkg/tenant/event.go | 2 +- .../admin_server/service/migrate.go | 51 +- .../admin_server/service/tenant.go | 35 ++ .../cacheservice/app/server.go | 1 + .../cacheservice/cache/biz-topo/topo.go | 12 +- .../cache/biz-topo/watch/brief.go | 2 +- .../cacheservice/cache/biz-topo/watch/kube.go | 6 +- .../cache/biz-topo/watch/watch.go | 13 +- .../cacheservice/cache/cache.go | 16 +- .../cacheservice/cache/custom/cache.go | 13 +- .../cacheservice/cache/custom/watch/watch.go | 22 +- .../cacheservice/cache/general/cache.go | 5 +- .../general/full-sync-cond/full_sync_cond.go | 10 +- .../cache/general/full-sync-cond/watch.go | 6 +- .../cacheservice/cache/general/watch/watch.go | 2 +- .../event/bsrelation/bsrelation.go | 26 +- .../cacheservice/event/bsrelation/event.go | 12 +- .../cacheservice/event/flow/event.go | 143 +++-- .../cacheservice/event/flow/flow.go | 25 +- .../cacheservice/event/flow/inst_asst_flow.go | 27 +- .../cacheservice/event/flow/instance_flow.go | 29 +- .../cacheservice/event/identifier/event.go | 14 +- .../event/identifier/identifier.go | 38 +- .../cacheservice/event/loop/loop_watch.go | 4 +- .../cacheservice/event/loop/task.go | 41 +- .../cacheservice/event/mix-event/flow.go | 14 +- .../cacheservice/service/service.go | 53 +- .../transfer-service/service/service.go | 26 +- .../transfer-service/sync/sync.go | 17 +- .../transfer-service/sync/watch/watch.go | 17 +- .../transfer-service/sync/watch/watch_db.go | 12 +- src/storage/stream/loop/loop_watch.go | 519 ------------------ src/storage/stream/scheduler/scheduler.go | 273 +++++++++ .../stream/{task => scheduler}/token.go | 105 +--- src/storage/stream/scheduler/util.go | 84 +++ src/storage/stream/scheduler/watch.go | 253 +++++++++ src/storage/stream/stream.go | 23 - .../stream/task/{watch_task.go => db_task.go} | 80 ++- src/storage/stream/task/task.go | 330 ++--------- src/storage/stream/task/util.go | 23 - src/storage/stream/types/task.go | 14 - src/storage/stream/types/types.go | 152 ----- src/test/test.go | 1 + 43 files changed, 1131 insertions(+), 1420 deletions(-) delete mode 100644 src/storage/stream/loop/loop_watch.go create mode 100644 src/storage/stream/scheduler/scheduler.go rename src/storage/stream/{task => scheduler}/token.go (57%) create mode 100644 src/storage/stream/scheduler/util.go create mode 100644 src/storage/stream/scheduler/watch.go rename src/storage/stream/task/{watch_task.go => db_task.go} (71%) diff --git a/pkg/tenant/event.go b/pkg/tenant/event.go index 828794b0c6..2fcd5e4232 100644 --- a/pkg/tenant/event.go +++ b/pkg/tenant/event.go @@ -54,7 +54,7 @@ func NewTenantEventChan(name string) <-chan TenantEvent { return ch } - eventChan := make(chan TenantEvent) + eventChan := make(chan TenantEvent, 1) tenantEventChannels[name] = eventChan go func() { for _, tenant := range allTenants { diff --git a/src/scene_server/admin_server/service/migrate.go b/src/scene_server/admin_server/service/migrate.go index 605020a5c9..0bc1fb51d8 100644 --- a/src/scene_server/admin_server/service/migrate.go +++ b/src/scene_server/admin_server/service/migrate.go @@ -121,29 +121,7 @@ func (s *Service) createWatchDBChainCollections(kit *rest.Kit) error { err = tenant.ExecForAllTenants(func(tenantID string) error { // TODO 在新增租户初始化时同时增加watch相关表,并刷新cache的tenant - kit = kit.NewKit().WithTenant(tenantID) - exists, err := s.watchDB.Shard(kit.ShardOpts()).HasTable(s.ctx, key.ChainCollection()) - if err != nil { - blog.Errorf("check if table %s exists failed, err: %v, rid: %s", key.ChainCollection(), err, kit.Rid) - return err - } - - if !exists { - err = s.watchDB.Shard(kit.ShardOpts()).CreateTable(s.ctx, key.ChainCollection()) - if err != nil && !mongodb.IsDuplicatedError(err) { - blog.Errorf("create table %s failed, err: %v, rid: %s", key.ChainCollection(), err, kit.Rid) - return err - } - } - - if err = s.createWatchIndexes(kit, cursorType, key); err != nil { - return err - } - - if err = s.createLastWatchEvent(kit, key); err != nil { - return err - } - return nil + return s.addTenantWatchToken(kit.NewKit().WithTenant(tenantID), cursorType, key) }) if err != nil { return err @@ -284,6 +262,33 @@ func (s *Service) createWatchToken(kit *rest.Kit, watchDBToDBRelation map[string }) } +// getWatchDBToDBRelation get watch db uuid to db uuids relation +func (s *Service) addTenantWatchToken(kit *rest.Kit, cursorType watch.CursorType, key event.Key) error { + exists, err := s.watchDB.Shard(kit.ShardOpts()).HasTable(s.ctx, key.ChainCollection()) + if err != nil { + blog.Errorf("check if table %s exists failed, err: %v, rid: %s", key.ChainCollection(), err, kit.Rid) + return err + } + + if !exists { + err = s.watchDB.Shard(kit.ShardOpts()).CreateTable(s.ctx, key.ChainCollection()) + if err != nil && !mongodb.IsDuplicatedError(err) { + blog.Errorf("create table %s failed, err: %v, rid: %s", key.ChainCollection(), err, kit.Rid) + return err + } + } + + if err = s.createWatchIndexes(kit, cursorType, key); err != nil { + return err + } + + if err = s.createLastWatchEvent(kit, key); err != nil { + return err + } + return nil + +} + func (s *Service) createWatchIndexes(kit *rest.Kit, cursorType watch.CursorType, key event.Key) error { indexes := []daltypes.Index{ {Name: "index_id", Keys: bson.D{{common.BKFieldID, -1}}, Background: true, Unique: true}, diff --git a/src/scene_server/admin_server/service/tenant.go b/src/scene_server/admin_server/service/tenant.go index ddc1c62d96..52643eb809 100644 --- a/src/scene_server/admin_server/service/tenant.go +++ b/src/scene_server/admin_server/service/tenant.go @@ -35,7 +35,9 @@ import ( "configcenter/src/common/index" "configcenter/src/common/metadata" apigwcli "configcenter/src/common/resource/apigw" + "configcenter/src/common/watch" "configcenter/src/scene_server/admin_server/logics" + "configcenter/src/source_controller/cacheservice/event" "configcenter/src/storage/dal/mongo/local" "configcenter/src/storage/driver/mongodb" "configcenter/src/thirdparty/apigw/user" @@ -60,6 +62,14 @@ func (s *Service) addTenant(req *restful.Request, resp *restful.Response) { _, exist := tenant.GetTenant(kit.TenantID) if exist { + // add watch token for new tenant + // TODO 如果租户已经存在的情况下也调一下,防止之前新增租户了但是这个失败了 + if err := s.addWatchTokenForNewTenant(kit); err != nil { + blog.Errorf("add watch token for new tenant %s failed, err: %v, rid: %s", kit.TenantID, err, kit.Rid) + result := &metadata.RespError{Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error())} + resp.WriteError(http.StatusInternalServerError, result) + return + } resp.WriteEntity(metadata.NewSuccessResp("tenant exist")) return } @@ -150,9 +160,34 @@ func (s *Service) addTenant(req *restful.Request, resp *restful.Response) { blog.Errorf("refresh tenants failed, err: %v, rid: %s", err, kit.Rid) } + // add watch token for new tenant + // TODO 如果租户已经存在的情况下也调一下,防止之前新增租户了但是这个失败了 + if err = s.addWatchTokenForNewTenant(kit); err != nil { + blog.Errorf("add watch token for new tenant %s failed, err: %v, rid: %s", kit.TenantID, err, kit.Rid) + result := &metadata.RespError{Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error())} + resp.WriteError(http.StatusInternalServerError, result) + return + } + resp.WriteEntity(metadata.NewSuccessResp("add tenant success")) } +func (s *Service) addWatchTokenForNewTenant(kit *rest.Kit) error { + cursorTypes := watch.ListCursorTypes() + for _, cursorType := range cursorTypes { + key, err := event.GetResourceKeyWithCursorType(cursorType) + if err != nil { + blog.Errorf("get resource key with cursor type %s failed, err: %v, rid: %s", cursorType, err, kit.Rid) + return err + } + + if err = s.addTenantWatchToken(kit, cursorType, key); err != nil { + return err + } + } + return nil +} + func addDefaultArea(kit *rest.Kit, db local.DB) error { // add default area cond := map[string]interface{}{"bk_cloud_name": "Default Area"} diff --git a/src/source_controller/cacheservice/app/server.go b/src/source_controller/cacheservice/app/server.go index 3e0b71081c..9b58ee381e 100644 --- a/src/source_controller/cacheservice/app/server.go +++ b/src/source_controller/cacheservice/app/server.go @@ -103,6 +103,7 @@ func Run(ctx context.Context, cancel context.CancelFunc, op *options.ServerOptio } select { case <-ctx.Done(): + cacheService.Scheduler().Stop() } return nil } diff --git a/src/source_controller/cacheservice/cache/biz-topo/topo.go b/src/source_controller/cacheservice/cache/biz-topo/topo.go index eb82933a8b..b28c22b092 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/topo.go +++ b/src/source_controller/cacheservice/cache/biz-topo/topo.go @@ -37,7 +37,6 @@ import ( "configcenter/src/source_controller/cacheservice/cache/custom/cache" watchcli "configcenter/src/source_controller/cacheservice/event/watch" "configcenter/src/storage/driver/mongodb" - "configcenter/src/storage/stream/task" ) // Topo defines the business topology caching logics @@ -47,14 +46,14 @@ type Topo struct { } // New Topo -func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, cacheSet *cache.CacheSet, - watchCli *watchcli.Client) (*Topo, error) { +func New(isMaster discovery.ServiceManageInterface, cacheSet *cache.CacheSet, watchCli *watchcli.Client) (*Topo, + error) { t := &Topo{ isMaster: isMaster, } - watcher, err := watch.New(isMaster, watchTask, cacheSet, watchCli) + watcher, err := watch.New(isMaster, cacheSet, watchCli) if err != nil { return nil, fmt.Errorf("new watcher failed, err: %v", err) } @@ -67,6 +66,11 @@ func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, cacheS return t, nil } +// Watcher returns the business topology event watcher +func (t *Topo) Watcher() *watch.Watcher { + return t.watcher +} + // loopBizTopoCache launch the task to loop business's brief topology every interval minutes. func (t *Topo) loopBizTopoCache(topoKey key.Key) { for { diff --git a/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go b/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go index 7c7672a8e7..dd41aa69b9 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go +++ b/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go @@ -63,7 +63,7 @@ func (w *Watcher) watchBrief() error { } func (w *briefWatcher) watchEvents(cursorType watch.CursorType) error { - loopEventChan := make(chan loop.TenantEvent) + loopEventChan := make(chan loop.TenantEvent, 1) name := fmt.Sprintf("biz topo %s %s", w.cacheKey.Type(), cursorType) tenantChan := tenant.NewTenantEventChan(name) diff --git a/src/source_controller/cacheservice/cache/biz-topo/watch/kube.go b/src/source_controller/cacheservice/cache/biz-topo/watch/kube.go index c85a9c3176..1f3c11edd3 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/watch/kube.go +++ b/src/source_controller/cacheservice/cache/biz-topo/watch/kube.go @@ -36,6 +36,7 @@ import ( tokenhandler "configcenter/src/source_controller/cacheservice/cache/token-handler" dbtypes "configcenter/src/storage/dal/types" "configcenter/src/storage/driver/mongodb" + "configcenter/src/storage/stream/task" streamtypes "configcenter/src/storage/stream/types" "go.mongodb.org/mongo-driver/bson/primitive" @@ -113,11 +114,12 @@ func (w *kubeWatcher) watchTopo(obj string, doBatch func(*streamtypes.DBInfo, [] opts.WatchTaskOptions.CollOpts.EventStruct = new(kubetypes.Pod) } - err = w.watcher.task.AddLoopBatchTask(opts) + watchTask, err := task.NewLoopBatchTask(opts) if err != nil { - blog.Errorf("watch kube biz topo collection %s failed, err: %v", collection, err) + blog.Errorf("generate kube biz topo collection %s watch task failed, err: %v", collection, err) return err } + w.watcher.tasks = append(w.watcher.tasks, watchTask) } return nil diff --git a/src/source_controller/cacheservice/cache/biz-topo/watch/watch.go b/src/source_controller/cacheservice/cache/biz-topo/watch/watch.go index 31b7107f6d..df57d89377 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/watch/watch.go +++ b/src/source_controller/cacheservice/cache/biz-topo/watch/watch.go @@ -28,20 +28,20 @@ import ( // Watcher defines mongodb event watcher for biz topology type Watcher struct { isMaster discovery.ServiceManageInterface - task *task.Task cacheSet *cache.CacheSet watchCli *watchcli.Client + tasks []*task.Task } // New biz topology mongodb event watcher -func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, cacheSet *cache.CacheSet, - watchCli *watchcli.Client) (*Watcher, error) { +func New(isMaster discovery.ServiceManageInterface, cacheSet *cache.CacheSet, watchCli *watchcli.Client) (*Watcher, + error) { watcher := &Watcher{ isMaster: isMaster, - task: watchTask, cacheSet: cacheSet, watchCli: watchCli, + tasks: make([]*task.Task, 0), } if err := watcher.watchKube(); err != nil { @@ -54,3 +54,8 @@ func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, cacheS return watcher, nil } + +// GetWatchTasks returns the event watch tasks +func (w *Watcher) GetWatchTasks() []*task.Task { + return w.tasks +} diff --git a/src/source_controller/cacheservice/cache/cache.go b/src/source_controller/cacheservice/cache/cache.go index 0edd9c9e39..881545ca00 100644 --- a/src/source_controller/cacheservice/cache/cache.go +++ b/src/source_controller/cacheservice/cache/cache.go @@ -29,24 +29,24 @@ import ( ) // NewCache new cache service -func NewCache(watchTask *task.Task, isMaster discovery.ServiceManageInterface) (*ClientSet, error) { +func NewCache(isMaster discovery.ServiceManageInterface) (*ClientSet, error) { if err := mainline.NewMainlineCache(isMaster); err != nil { return nil, fmt.Errorf("new mainline cache failed, err: %v", err) } - customCache, err := custom.New(isMaster, watchTask) + customCache, err := custom.New(isMaster) if err != nil { return nil, fmt.Errorf("new custom resource cache failed, err: %v", err) } watchCli := watch.NewClient(mongodb.Dal("watch"), mongodb.Dal(), redis.Client()) - generalCache, err := general.New(isMaster, watchTask, watchCli) + generalCache, err := general.New(isMaster, watchCli) if err != nil { return nil, fmt.Errorf("new general resource cache failed, err: %v", err) } - topoTreeClient, err := biztopo.New(isMaster, watchTask, customCache.CacheSet(), watchCli) + topoTreeClient, err := biztopo.New(isMaster, customCache.CacheSet(), watchCli) if err != nil { return nil, fmt.Errorf("new common topo cache failed, err: %v", err) } @@ -73,3 +73,11 @@ type ClientSet struct { Custom *custom.Cache General *general.Cache } + +// GetWatchTasks returns the event watch tasks +func (c *ClientSet) GetWatchTasks() []*task.Task { + tasks := c.Topo.Watcher().GetWatchTasks() + tasks = append(tasks, c.Custom.Watcher().GetWatchTasks()...) + tasks = append(tasks, c.General.FullSyncCond().GetWatchTasks()...) + return tasks +} diff --git a/src/source_controller/cacheservice/cache/custom/cache.go b/src/source_controller/cacheservice/cache/custom/cache.go index 02cd56fef1..3954cc72ad 100644 --- a/src/source_controller/cacheservice/cache/custom/cache.go +++ b/src/source_controller/cacheservice/cache/custom/cache.go @@ -24,23 +24,25 @@ import ( "configcenter/src/apimachinery/discovery" "configcenter/src/source_controller/cacheservice/cache/custom/cache" "configcenter/src/source_controller/cacheservice/cache/custom/watch" - "configcenter/src/storage/stream/task" ) // Cache defines the custom resource caching logics type Cache struct { cacheSet *cache.CacheSet + watcher *watch.Watcher } // New Cache -func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task) (*Cache, error) { +func New(isMaster discovery.ServiceManageInterface) (*Cache, error) { t := &Cache{ cacheSet: cache.New(isMaster), } - if err := watch.Init(watchTask, t.cacheSet); err != nil { + watcher, err := watch.Init(t.cacheSet) + if err != nil { return nil, fmt.Errorf("initialize custom resource watcher failed, err: %v", err) } + t.watcher = watcher t.cacheSet.LoopRefreshCache() return t, nil @@ -50,3 +52,8 @@ func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task) (*Cach func (c *Cache) CacheSet() *cache.CacheSet { return c.cacheSet } + +// Watcher returns custom resource event watcher +func (c *Cache) Watcher() *watch.Watcher { + return c.watcher +} diff --git a/src/source_controller/cacheservice/cache/custom/watch/watch.go b/src/source_controller/cacheservice/cache/custom/watch/watch.go index fc724b1eb1..bfba129623 100644 --- a/src/source_controller/cacheservice/cache/custom/watch/watch.go +++ b/src/source_controller/cacheservice/cache/custom/watch/watch.go @@ -35,26 +35,31 @@ import ( // Watcher defines mongodb event watcher for custom resource type Watcher struct { - task *task.Task cacheSet *cache.CacheSet + tasks []*task.Task } // Init custom resource mongodb event watcher -func Init(watchTask *task.Task, cacheSet *cache.CacheSet) error { +func Init(cacheSet *cache.CacheSet) (*Watcher, error) { watcher := &Watcher{ - task: watchTask, cacheSet: cacheSet, + tasks: make([]*task.Task, 0), } if err := watcher.watchPodLabel(); err != nil { - return err + return nil, err } if err := watcher.watchSharedNsRel(); err != nil { - return err + return nil, err } - return nil + return watcher, nil +} + +// GetWatchTasks returns the event watch tasks +func (w *Watcher) GetWatchTasks() []*task.Task { + return w.tasks } type watchOptions struct { @@ -102,11 +107,12 @@ func (w *Watcher) watchCustomResource(opt *watchOptions) (bool, error) { BatchSize: 200, } - err = w.task.AddLoopBatchTask(opts) + watchTask, err := task.NewLoopBatchTask(opts) if err != nil { - blog.Errorf("watch custom resource %s, but add loop batch task failed, err: %v", name, err) + blog.Errorf("watch custom resource %s, but generate loop batch task failed, err: %v", name, err) return false, err } + w.tasks = append(w.tasks, watchTask) return exists, nil } diff --git a/src/source_controller/cacheservice/cache/general/cache.go b/src/source_controller/cacheservice/cache/general/cache.go index a6758ad03d..b98857eda3 100644 --- a/src/source_controller/cacheservice/cache/general/cache.go +++ b/src/source_controller/cacheservice/cache/general/cache.go @@ -28,7 +28,6 @@ import ( "configcenter/src/source_controller/cacheservice/cache/general/types" "configcenter/src/source_controller/cacheservice/cache/general/watch" watchcli "configcenter/src/source_controller/cacheservice/event/watch" - "configcenter/src/storage/stream/task" ) // Cache defines the general resource caching logics @@ -38,7 +37,7 @@ type Cache struct { } // New Cache -func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, watchCli *watchcli.Client) (*Cache, error) { +func New(isMaster discovery.ServiceManageInterface, watchCli *watchcli.Client) (*Cache, error) { cacheSet := cache.GetAllCache() fullSyncCondChMap := make(map[general.ResType]chan<- types.FullSyncCondEvent) @@ -49,7 +48,7 @@ func New(isMaster discovery.ServiceManageInterface, watchTask *task.Task, watchC fullSyncCondChMap[resType] = cacheInst.FullSyncCondCh() } - fullSyncCondCli, err := fullsynccond.New(watchTask, fullSyncCondChMap) + fullSyncCondCli, err := fullsynccond.New(fullSyncCondChMap) if err != nil { return nil, fmt.Errorf("init full sync cond failed, err: %v", err) } diff --git a/src/source_controller/cacheservice/cache/general/full-sync-cond/full_sync_cond.go b/src/source_controller/cacheservice/cache/general/full-sync-cond/full_sync_cond.go index 6b317c01d9..24752231ab 100644 --- a/src/source_controller/cacheservice/cache/general/full-sync-cond/full_sync_cond.go +++ b/src/source_controller/cacheservice/cache/general/full-sync-cond/full_sync_cond.go @@ -28,14 +28,13 @@ import ( // FullSyncCond defines the full sync cond related logics type FullSyncCond struct { - task *task.Task chMap map[general.ResType]chan<- types.FullSyncCondEvent + tasks []*task.Task } // New FullSyncCond -func New(watchTask *task.Task, chMap map[general.ResType]chan<- types.FullSyncCondEvent) (*FullSyncCond, error) { +func New(chMap map[general.ResType]chan<- types.FullSyncCondEvent) (*FullSyncCond, error) { f := &FullSyncCond{ - task: watchTask, chMap: chMap, } @@ -45,3 +44,8 @@ func New(watchTask *task.Task, chMap map[general.ResType]chan<- types.FullSyncCo return f, nil } + +// GetWatchTasks returns the event watch tasks +func (f *FullSyncCond) GetWatchTasks() []*task.Task { + return f.tasks +} diff --git a/src/source_controller/cacheservice/cache/general/full-sync-cond/watch.go b/src/source_controller/cacheservice/cache/general/full-sync-cond/watch.go index a2a8dd4b24..c8fa8bdec1 100644 --- a/src/source_controller/cacheservice/cache/general/full-sync-cond/watch.go +++ b/src/source_controller/cacheservice/cache/general/full-sync-cond/watch.go @@ -33,6 +33,7 @@ import ( tokenhandler "configcenter/src/source_controller/cacheservice/cache/token-handler" "configcenter/src/storage/dal/mongo/local" "configcenter/src/storage/driver/mongodb" + "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" ) @@ -67,11 +68,12 @@ func (f *FullSyncCond) Watch() error { BatchSize: 200, } - err := f.task.AddLoopBatchTask(opts) + watchTask, err := task.NewLoopBatchTask(opts) if err != nil { - blog.Errorf("add watch full sync cond task failed, err: %v", err) + blog.Errorf("generate watch full sync cond task failed, err: %v", err) return err } + f.tasks = append(f.tasks, watchTask) return nil } diff --git a/src/source_controller/cacheservice/cache/general/watch/watch.go b/src/source_controller/cacheservice/cache/general/watch/watch.go index 51ac4970c8..3f58118033 100644 --- a/src/source_controller/cacheservice/cache/general/watch/watch.go +++ b/src/source_controller/cacheservice/cache/general/watch/watch.go @@ -71,7 +71,7 @@ func (w *Watcher) watch() error { name := fmt.Sprintf("%s%s:%s", common.BKCacheKeyV3Prefix, "common_res", resType) - loopEventChan := make(chan loop.TenantEvent) + loopEventChan := make(chan loop.TenantEvent, 1) go w.watchCacheChange(cursorType, name, loopEventChan) diff --git a/src/source_controller/cacheservice/event/bsrelation/bsrelation.go b/src/source_controller/cacheservice/event/bsrelation/bsrelation.go index 55f1584b2d..6c11e33c64 100644 --- a/src/source_controller/cacheservice/event/bsrelation/bsrelation.go +++ b/src/source_controller/cacheservice/event/bsrelation/bsrelation.go @@ -14,7 +14,6 @@ package bsrelation import ( - "context" "time" "configcenter/src/common" @@ -29,11 +28,22 @@ const ( bizSetRelationLockTTL = 1 * time.Minute ) +// BizSetRelation is the biz set relation event flow struct +type BizSetRelation struct { + tasks []*task.Task +} + +// GetWatchTasks returns the event watch tasks +func (b *BizSetRelation) GetWatchTasks() []*task.Task { + return b.tasks +} + // NewBizSetRelation init and run biz set relation event watch -func NewBizSetRelation(task *task.Task) error { +func NewBizSetRelation() (*BizSetRelation, error) { + bizSetRel := &BizSetRelation{tasks: make([]*task.Task, 0)} + base := mixevent.MixEventFlowOptions{ MixKey: event.BizSetRelationKey, - Task: task, EventLockKey: bizSetRelationLockKey, EventLockTTL: bizSetRelationLockTTL, } @@ -42,20 +52,20 @@ func NewBizSetRelation(task *task.Task) error { bizSet := base bizSet.Key = event.BizSetKey bizSet.WatchFields = []string{common.BKBizSetIDField, common.BKBizSetScopeField} - if err := newBizSetRelation(context.Background(), bizSet); err != nil { + if err := bizSetRel.addWatchTask(bizSet); err != nil { blog.Errorf("watch biz set event for biz set relation failed, err: %v", err) - return err + return nil, err } blog.Info("watch biz set relation events, watch biz set success") // watch biz event biz := base biz.Key = event.BizKey - if err := newBizSetRelation(context.Background(), biz); err != nil { + if err := bizSetRel.addWatchTask(biz); err != nil { blog.Errorf("watch biz event for biz set relation failed, err: %v", err) - return err + return nil, err } blog.Info("watch biz set relation events, watch biz success") - return nil + return bizSetRel, nil } diff --git a/src/source_controller/cacheservice/event/bsrelation/event.go b/src/source_controller/cacheservice/event/bsrelation/event.go index 448b1bc807..db2e373aca 100644 --- a/src/source_controller/cacheservice/event/bsrelation/event.go +++ b/src/source_controller/cacheservice/event/bsrelation/event.go @@ -33,8 +33,8 @@ import ( "configcenter/src/storage/stream/types" ) -// newBizSetRelation init and run biz set relation event watch with sub event key -func newBizSetRelation(ctx context.Context, opts mixevent.MixEventFlowOptions) error { +// addWatchTask add biz set relation event watch task with sub event key +func (b *BizSetRelation) addWatchTask(opts mixevent.MixEventFlowOptions) error { relation := bizSetRelation{ mixKey: opts.MixKey, key: opts.Key, @@ -66,7 +66,13 @@ func newBizSetRelation(ctx context.Context, opts mixevent.MixEventFlowOptions) e return err } - return flow.RunFlow(ctx) + flowTask, err := flow.GenWatchTask() + if err != nil { + return err + } + + b.tasks = append(b.tasks, flowTask) + return nil } // bizSetRelation biz set relation event watch logic struct diff --git a/src/source_controller/cacheservice/event/flow/event.go b/src/source_controller/cacheservice/event/flow/event.go index 2efe2aed8c..6dc2052850 100644 --- a/src/source_controller/cacheservice/event/flow/event.go +++ b/src/source_controller/cacheservice/event/flow/event.go @@ -22,193 +22,186 @@ import ( ) // NewEvent new event flow -func NewEvent(watchTask *task.Task) error { - e := Event{ - task: watchTask, +func NewEvent() (*Event, error) { + e := &Event{ + tasks: make([]*task.Task, 0), } - if err := e.runHost(context.Background()); err != nil { - blog.Errorf("run host event flow failed, err: %v", err) - return err + if err := e.addHostTask(); err != nil { + blog.Errorf("add host event flow task failed, err: %v", err) + return nil, err } - if err := e.runModuleHostRelation(context.Background()); err != nil { - blog.Errorf("run module host config event flow failed, err: %v", err) - return err + if err := e.addModuleHostRelationTask(); err != nil { + blog.Errorf("add module host config event flow task failed, err: %v", err) + return nil, err } - if err := e.runBizSet(context.Background()); err != nil { - blog.Errorf("run biz set event flow failed, err: %v", err) - return err + if err := e.addBizSetTask(); err != nil { + blog.Errorf("add biz set event flow task failed, err: %v", err) + return nil, err } - if err := e.runBiz(context.Background()); err != nil { - blog.Errorf("run biz event flow failed, err: %v", err) - return err + if err := e.addBizTask(); err != nil { + blog.Errorf("add biz event flow task failed, err: %v", err) + return nil, err } - if err := e.runSet(context.Background()); err != nil { - blog.Errorf("run set event flow failed, err: %v", err) - return err + if err := e.addSetTask(); err != nil { + blog.Errorf("add set event flow task failed, err: %v", err) + return nil, err } - if err := e.runModule(context.Background()); err != nil { - blog.Errorf("run module event flow failed, err: %v", err) - return err + if err := e.addModuleTask(); err != nil { + blog.Errorf("add module event flow task failed, err: %v", err) + return nil, err } - if err := e.runObjectBase(context.Background()); err != nil { - blog.Errorf("run object base event flow failed, err: %v", err) - return err + if err := e.addObjectBaseTask(context.Background()); err != nil { + blog.Errorf("add object base event flow task failed, err: %v", err) + return nil, err } - if err := e.runProcess(context.Background()); err != nil { - blog.Errorf("run process event flow failed, err: %v", err) - return err + if err := e.addProcessTask(); err != nil { + blog.Errorf("add process event flow task failed, err: %v", err) + return nil, err } - if err := e.runProcessInstanceRelation(context.Background()); err != nil { - blog.Errorf("run process instance relation event flow failed, err: %v", err) - return err + if err := e.addProcessInstanceRelationTask(); err != nil { + blog.Errorf("add process instance relation event flow task failed, err: %v", err) + return nil, err } - if err := e.runInstAsst(context.Background()); err != nil { - blog.Errorf("run instance association event flow failed, err: %v", err) - return err + if err := e.addInstAsstTask(); err != nil { + blog.Errorf("add instance association event flow task failed, err: %v", err) + return nil, err } - if err := e.runPlat(context.Background()); err != nil { - blog.Errorf("run plat event flow failed, err: %v", err) + if err := e.addPlatTask(); err != nil { + blog.Errorf("add plat event flow task failed, err: %v", err) } - if err := e.runProject(context.Background()); err != nil { - blog.Errorf("run project event flow failed, err: %v", err) + if err := e.addProjectTask(); err != nil { + blog.Errorf("add project event flow task failed, err: %v", err) } - return nil + return e, nil } // Event is the event flow struct type Event struct { - task *task.Task + tasks []*task.Task } -func (e *Event) runHost(ctx context.Context) error { +// GetWatchTasks returns the event flow tasks +func (e *Event) GetWatchTasks() []*task.Task { + return e.tasks +} + +func (e *Event) addHostTask() error { opts := flowOptions{ key: event.HostKey, - task: e.task, EventStruct: new(metadata.HostMapStr), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runModuleHostRelation(ctx context.Context) error { +func (e *Event) addModuleHostRelationTask() error { opts := flowOptions{ key: event.ModuleHostRelationKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runBiz(ctx context.Context) error { +func (e *Event) addBizTask() error { opts := flowOptions{ key: event.BizKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runSet(ctx context.Context) error { +func (e *Event) addSetTask() error { opts := flowOptions{ key: event.SetKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runModule(ctx context.Context) error { +func (e *Event) addModuleTask() error { opts := flowOptions{ key: event.ModuleKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runObjectBase(ctx context.Context) error { +func (e *Event) addObjectBaseTask(ctx context.Context) error { opts := flowOptions{ key: event.ObjectBaseKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newInstanceFlow(ctx, opts, parseEvent) + return e.addInstanceFlowTask(ctx, opts, parseEvent) } -func (e *Event) runProcess(ctx context.Context) error { +func (e *Event) addProcessTask() error { opts := flowOptions{ key: event.ProcessKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runProcessInstanceRelation(ctx context.Context) error { +func (e *Event) addProcessInstanceRelationTask() error { opts := flowOptions{ key: event.ProcessInstanceRelationKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runInstAsst(ctx context.Context) error { +func (e *Event) addInstAsstTask() error { opts := flowOptions{ key: event.InstAsstKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newInstAsstFlow(ctx, opts, parseInstAsstEvent) + return e.addInstAsstFlowTask(opts, parseInstAsstEvent) } -func (e *Event) runBizSet(ctx context.Context) error { +func (e *Event) addBizSetTask() error { opts := flowOptions{ key: event.BizSetKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runPlat(ctx context.Context) error { +func (e *Event) addPlatTask() error { opts := flowOptions{ key: event.PlatKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } -func (e *Event) runProject(ctx context.Context) error { +func (e *Event) addProjectTask() error { opts := flowOptions{ key: event.ProjectKey, - task: e.task, EventStruct: new(map[string]interface{}), } - return newFlow(ctx, opts, parseEvent) + return e.addFlowTask(opts, parseEvent) } diff --git a/src/source_controller/cacheservice/event/flow/flow.go b/src/source_controller/cacheservice/event/flow/flow.go index 11f497dae3..6b7ed08084 100644 --- a/src/source_controller/cacheservice/event/flow/flow.go +++ b/src/source_controller/cacheservice/event/flow/flow.go @@ -35,17 +35,22 @@ import ( type flowOptions struct { key event.Key - task *task.Task EventStruct interface{} } -func newFlow(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { +func (e *Event) addFlowTask(opts flowOptions, parseEvent parseEventFunc) error { flow, err := NewFlow(opts, parseEvent) if err != nil { return err } - return flow.RunFlow(ctx) + flowTask, err := flow.GenWatchTask() + if err != nil { + return err + } + + e.tasks = append(e.tasks, flowTask) + return nil } // NewFlow create a new event watch flow @@ -122,9 +127,9 @@ const ( cursorQueueSize = 50000 ) -// RunFlow run event flow -func (f *Flow) RunFlow(ctx context.Context) error { - blog.Infof("start run flow for key: %s.", f.key.Namespace()) +// GenWatchTask generate event flow watch task +func (f *Flow) GenWatchTask() (*task.Task, error) { + blog.Infof("generate flow watch task for key: %s.", f.key.Namespace()) f.tokenHandler = NewFlowTokenHandler(f.key, f.metrics) @@ -151,12 +156,12 @@ func (f *Flow) RunFlow(ctx context.Context) error { BatchSize: batchSize, } - err := f.task.AddLoopBatchTask(opts) + flowTask, err := task.NewLoopBatchTask(opts) if err != nil { - blog.Errorf("run %s flow, but add loop batch task failed, err: %v", f.key.Namespace(), err) - return err + blog.Errorf("run %s flow, but generate loop batch task failed, err: %v", f.key.Namespace(), err) + return nil, err } - return nil + return flowTask, nil } func (f *Flow) doBatch(dbInfo *types.DBInfo, es []*types.Event) (retry bool) { diff --git a/src/source_controller/cacheservice/event/flow/inst_asst_flow.go b/src/source_controller/cacheservice/event/flow/inst_asst_flow.go index f886d02178..19741b2b78 100644 --- a/src/source_controller/cacheservice/event/flow/inst_asst_flow.go +++ b/src/source_controller/cacheservice/event/flow/inst_asst_flow.go @@ -13,16 +13,16 @@ package flow import ( - "context" "fmt" "time" "configcenter/src/common" "configcenter/src/common/blog" + "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" ) -func newInstAsstFlow(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { +func (e *Event) addInstAsstFlowTask(opts flowOptions, parseEvent parseEventFunc) error { flow, err := NewFlow(opts, parseEvent) if err != nil { return err @@ -31,7 +31,13 @@ func newInstAsstFlow(ctx context.Context, opts flowOptions, parseEvent parseEven Flow: flow, } - return instAsstFlow.RunFlow(ctx) + flowTask, err := instAsstFlow.GenWatchTask() + if err != nil { + return err + } + + e.tasks = append(e.tasks, flowTask) + return nil } // InstAsstFlow instance association event watch flow @@ -39,9 +45,9 @@ type InstAsstFlow struct { Flow } -// RunFlow run instance association event watch flow -func (f *InstAsstFlow) RunFlow(ctx context.Context) error { - blog.Infof("start run flow for key: %s.", f.key.Namespace()) +// GenWatchTask generate instance association event watch flow task +func (f *InstAsstFlow) GenWatchTask() (*task.Task, error) { + blog.Infof("generate flow watch task for key: %s.", f.key.Namespace()) f.tokenHandler = NewFlowTokenHandler(f.key, f.metrics) @@ -68,11 +74,10 @@ func (f *InstAsstFlow) RunFlow(ctx context.Context) error { BatchSize: batchSize, } - err := f.task.AddLoopBatchTask(opts) + flowTask, err := task.NewLoopBatchTask(opts) if err != nil { - blog.Errorf("run %s flow, but add loop batch task failed, err: %v", f.key.Namespace(), err) - return err + blog.Errorf("run %s flow, but generate loop batch task failed, err: %v", f.key.Namespace(), err) + return nil, err } - - return nil + return flowTask, nil } diff --git a/src/source_controller/cacheservice/event/flow/instance_flow.go b/src/source_controller/cacheservice/event/flow/instance_flow.go index c8aba7a364..0a4d75d3e8 100644 --- a/src/source_controller/cacheservice/event/flow/instance_flow.go +++ b/src/source_controller/cacheservice/event/flow/instance_flow.go @@ -33,6 +33,7 @@ import ( dbtypes "configcenter/src/storage/dal/types" "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/driver/redis" + "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" "github.com/tidwall/gjson" @@ -40,7 +41,7 @@ import ( "go.mongodb.org/mongo-driver/mongo" ) -func newInstanceFlow(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { +func (e *Event) addInstanceFlowTask(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { flow, err := NewFlow(opts, parseEvent) if err != nil { return err @@ -67,7 +68,13 @@ func newInstanceFlow(ctx context.Context, opts flowOptions, parseEvent parseEven return err } - return instFlow.RunFlow(ctx) + flowTask, err := instFlow.GenWatchTask() + if err != nil { + return err + } + + e.tasks = append(e.tasks, flowTask) + return nil } // syncMainlineObjectMap refresh mainline object ID map every 5 minutes @@ -120,8 +127,8 @@ type InstanceFlow struct { *mainlineObjectMap } -// RunFlow TODO -func (f *InstanceFlow) RunFlow(ctx context.Context) error { +// GenWatchTask generate instance event watch flow task +func (f *InstanceFlow) GenWatchTask() (*task.Task, error) { blog.Infof("start run flow for key: %s.", f.key.Namespace()) f.tokenHandler = NewFlowTokenHandler(f.key, f.metrics) @@ -149,13 +156,12 @@ func (f *InstanceFlow) RunFlow(ctx context.Context) error { BatchSize: batchSize, } - err := f.task.AddLoopBatchTask(opts) + flowTask, err := task.NewLoopBatchTask(opts) if err != nil { - blog.Errorf("run %s flow, but add loop batch task failed, err: %v", f.key.Namespace(), err) - return err + blog.Errorf("run %s flow, but generate loop batch task failed, err: %v", f.key.Namespace(), err) + return nil, err } - - return nil + return flowTask, nil } func (f *InstanceFlow) doBatch(dbInfo *types.DBInfo, es []*types.Event) (retry bool) { @@ -420,6 +426,11 @@ func (f *InstanceFlow) convertTableInstEvent(es []*types.Event, rid string) ([]* } continue } + + _, exists = srcObjIDInstIDsMap[tenantID] + if !exists { + srcObjIDInstIDsMap[tenantID] = make(map[string][]int64) + } srcObjIDInstIDsMap[tenantID][srcObjID] = append(srcObjIDInstIDsMap[tenantID][srcObjID], tenantObjIDInstIDsMap[tenantID][objID]...) } diff --git a/src/source_controller/cacheservice/event/identifier/event.go b/src/source_controller/cacheservice/event/identifier/event.go index 8c1949341b..c7062bed5d 100644 --- a/src/source_controller/cacheservice/event/identifier/event.go +++ b/src/source_controller/cacheservice/event/identifier/event.go @@ -13,7 +13,6 @@ package identifier import ( - "context" "fmt" "time" @@ -22,7 +21,6 @@ import ( "configcenter/src/common/watch" "configcenter/src/source_controller/cacheservice/event" mixevent "configcenter/src/source_controller/cacheservice/event/mix-event" - "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" ) @@ -34,10 +32,9 @@ const ( type identityOptions struct { key event.Key watchFields []string - task *task.Task } -func newIdentity(ctx context.Context, opts identityOptions) error { +func (i *Identity) addWatchTask(opts identityOptions) error { identity := hostIdentity{ identityOptions: opts, metrics: event.InitialMetrics(opts.key.Collection(), "host_identifier"), @@ -47,7 +44,6 @@ func newIdentity(ctx context.Context, opts identityOptions) error { MixKey: event.HostIdentityKey, Key: opts.key, WatchFields: opts.watchFields, - Task: opts.task, EventLockTTL: hostIdentityLockTTL, EventLockKey: hostIdentityLockKey, } @@ -57,7 +53,13 @@ func newIdentity(ctx context.Context, opts identityOptions) error { return err } - return flow.RunFlow(ctx) + flowTask, err := flow.GenWatchTask() + if err != nil { + return err + } + + i.tasks = append(i.tasks, flowTask) + return nil } type hostIdentity struct { diff --git a/src/source_controller/cacheservice/event/identifier/identifier.go b/src/source_controller/cacheservice/event/identifier/identifier.go index 79bd1035ad..88c9c9cba2 100644 --- a/src/source_controller/cacheservice/event/identifier/identifier.go +++ b/src/source_controller/cacheservice/event/identifier/identifier.go @@ -14,55 +14,63 @@ package identifier import ( - "context" - "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/source_controller/cacheservice/event" "configcenter/src/storage/stream/task" ) +// Identity is the host identity event flow struct +type Identity struct { + tasks []*task.Task +} + +// GetWatchTasks returns the event watch tasks +func (i *Identity) GetWatchTasks() []*task.Task { + return i.tasks +} + // NewIdentity new host identifier event watch -func NewIdentity(task *task.Task) error { - base := identityOptions{ - task: task, - } +func NewIdentity() (*Identity, error) { + identity := &Identity{tasks: make([]*task.Task, 0)} + + base := identityOptions{} host := base host.key = event.HostKey host.watchFields = needCareHostFields - if err := newIdentity(context.Background(), host); err != nil { + if err := identity.addWatchTask(host); err != nil { blog.Errorf("new host identify host event failed, err: %v", err) - return err + return nil, err } blog.Info("host identity events, watch host success.") relation := base relation.key = event.ModuleHostRelationKey relation.watchFields = []string{common.BKHostIDField} - if err := newIdentity(context.Background(), relation); err != nil { + if err := identity.addWatchTask(relation); err != nil { blog.Errorf("new host identify host relation event failed, err: %v", err) - return err + return nil, err } blog.Info("host identity events, watch host relation success.") process := base process.key = event.ProcessKey process.watchFields = []string{common.BKProcessIDField} - if err := newIdentity(context.Background(), process); err != nil { + if err := identity.addWatchTask(process); err != nil { blog.Errorf("new host identify process event failed, err: %v", err) - return err + return nil, err } blog.Info("host identity events, watch process success.") procRel := base procRel.key = event.ProcessInstanceRelationKey procRel.watchFields = []string{common.BKHostIDField} - if err := newIdentity(context.Background(), procRel); err != nil { + if err := identity.addWatchTask(procRel); err != nil { blog.Errorf("new host identify process relation event failed, err: %v", err) - return err + return nil, err } blog.Info("host identity events, watch process relation success.") - return nil + return identity, nil } diff --git a/src/source_controller/cacheservice/event/loop/loop_watch.go b/src/source_controller/cacheservice/event/loop/loop_watch.go index 0bbbd60ac3..1d06c2d1af 100644 --- a/src/source_controller/cacheservice/event/loop/loop_watch.go +++ b/src/source_controller/cacheservice/event/loop/loop_watch.go @@ -19,7 +19,7 @@ package loop import ( - "context" + "sync" "configcenter/src/apimachinery/discovery" "configcenter/src/common/blog" @@ -76,7 +76,7 @@ func (w *LoopWatcher) AddLoopWatchTask(opts *LoopWatchTaskOptions) error { tokenHandler: opts.TokenHandler, eventHandler: opts.EventHandler, tenantChan: opts.TenantChan, - tenantCancelFunc: make(map[string]context.CancelFunc), + tenantCancelFunc: sync.Map{}, } go task.run() diff --git a/src/source_controller/cacheservice/event/loop/task.go b/src/source_controller/cacheservice/event/loop/task.go index a631b2366f..8427b21fbf 100644 --- a/src/source_controller/cacheservice/event/loop/task.go +++ b/src/source_controller/cacheservice/event/loop/task.go @@ -45,8 +45,7 @@ type loopWatchTask struct { eventHandler EventHandler tenantChan <-chan TenantEvent - mu sync.Mutex - tenantCancelFunc map[string]context.CancelFunc + tenantCancelFunc sync.Map } // run loop watch task @@ -66,17 +65,14 @@ func (t *loopWatchTask) run() { // startTenantTask start loop watch task for new tenant func (t *loopWatchTask) startTenantTask(tenantID string, opts *watch.WatchEventOptions) { - t.mu.Lock() - defer t.mu.Unlock() - - if _, exists := t.tenantCancelFunc[tenantID]; exists { + if _, exists := t.tenantCancelFunc.Load(tenantID); exists { return } ctx, cancel := context.WithCancel(util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode)) kit := rest.NewKit().WithCtx(ctx).WithTenant(tenantID) - t.tenantCancelFunc[tenantID] = cancel + t.tenantCancelFunc.Store(tenantID, cancel) go t.loopWatch(kit, opts) blog.Infof("start tenant %s loop watch task %s, rid: %s", tenantID, t.name, kit.Rid) @@ -84,23 +80,30 @@ func (t *loopWatchTask) startTenantTask(tenantID string, opts *watch.WatchEventO // stopTenantTask stop loop watch task for removed or disabled tenant func (t *loopWatchTask) stopTenantTask(tenantID string, isAllTenant bool) { - t.mu.Lock() - defer t.mu.Unlock() - if isAllTenant { - for id, cancel := range t.tenantCancelFunc { - cancel() - blog.Infof("stop tenant %s loop watch task %s", id, t.name) - } - t.tenantCancelFunc = make(map[string]context.CancelFunc) + t.tenantCancelFunc.Range(func(id, cancel any) bool { + t.cancelTenantTask(tenantID, cancel) + return true + }) return } - if cancel, exists := t.tenantCancelFunc[tenantID]; exists { - cancel() - delete(t.tenantCancelFunc, tenantID) - blog.Infof("stop tenant %s loop watch task %s", tenantID, t.name) + if cancel, exists := t.tenantCancelFunc.Load(tenantID); exists { + t.cancelTenantTask(tenantID, cancel) + } +} + +func (t *loopWatchTask) cancelTenantTask(tenantID, cancel any) { + cancelFunc, ok := cancel.(context.CancelFunc) + if !ok { + blog.Errorf("tenant %s loop watch task %s cancel func is invalid", tenantID, t.name) + t.tenantCancelFunc.Delete(tenantID) + return } + + cancelFunc() + t.tenantCancelFunc.Delete(tenantID) + blog.Infof("stop tenant %s loop watch task %s", tenantID, t.name) } // LoopWatch loop watch event flow diff --git a/src/source_controller/cacheservice/event/mix-event/flow.go b/src/source_controller/cacheservice/event/mix-event/flow.go index 9efaca0aa2..a2612f92a8 100644 --- a/src/source_controller/cacheservice/event/mix-event/flow.go +++ b/src/source_controller/cacheservice/event/mix-event/flow.go @@ -37,7 +37,6 @@ type MixEventFlowOptions struct { MixKey event.Key Key event.Key WatchFields []string - Task *task.Task EventLockTTL time.Duration EventLockKey string } @@ -81,9 +80,9 @@ func NewMixEventFlow(opts MixEventFlowOptions, rearrangeEvents rearrangeEventsFu const batchSize = 500 -// RunFlow run mix event flow -func (f *MixEventFlow) RunFlow(ctx context.Context) error { - blog.Infof("start run %s event flow for key: %s.", f.MixKey.Namespace(), f.Key.Namespace()) +// GenWatchTask generate event flow watch task +func (f *MixEventFlow) GenWatchTask() (*task.Task, error) { + blog.Infof("generate %s flow watch task for for key: %s.", f.MixKey.Namespace(), f.Key.Namespace()) es := make(map[string]interface{}) @@ -117,12 +116,7 @@ func (f *MixEventFlow) RunFlow(ctx context.Context) error { opts.CollOpts.EventStruct = new(metadata.HostMapStr) } - err := f.Task.AddLoopBatchTask(opts) - if err != nil { - blog.Errorf("watch %s events, but add watch batch task failed, err: %v", f.MixKey.Namespace(), err) - return err - } - return nil + return task.NewLoopBatchTask(opts) } // doBatch batch handle events diff --git a/src/source_controller/cacheservice/service/service.go b/src/source_controller/cacheservice/service/service.go index 86fb56f36f..16f6939771 100644 --- a/src/source_controller/cacheservice/service/service.go +++ b/src/source_controller/cacheservice/service/service.go @@ -36,8 +36,8 @@ import ( "configcenter/src/source_controller/cacheservice/event/identifier" "configcenter/src/source_controller/coreservice/core" "configcenter/src/storage/driver/mongodb" + "configcenter/src/storage/stream/scheduler" "configcenter/src/storage/stream/task" - "configcenter/src/storage/stream/types" "configcenter/src/thirdparty/logplatform/opentelemetry" "github.com/emicklei/go-restful/v3" @@ -47,6 +47,7 @@ import ( type CacheServiceInterface interface { WebService() *restful.Container SetConfig(cfg options.Config, engine *backbone.Engine, err errors.CCErrorIf, language language.CCLanguageIf) error + Scheduler() *scheduler.Scheduler } // New create cache service instance @@ -64,17 +65,18 @@ type cacheService struct { core core.Core cacheSet *cache.ClientSet authManager *extensions.AuthManager + scheduler *scheduler.Scheduler } // SetConfig TODO -func (s *cacheService) SetConfig(cfg options.Config, engine *backbone.Engine, err errors.CCErrorIf, +func (s *cacheService) SetConfig(cfg options.Config, engine *backbone.Engine, errf errors.CCErrorIf, lang language.CCLanguageIf) error { s.cfg = cfg s.engine = engine - if nil != err { - s.err = err + if errf != nil { + s.err = errf } if nil != lang { @@ -93,41 +95,51 @@ func (s *cacheService) SetConfig(cfg options.Config, engine *backbone.Engine, er } s.authManager = extensions.NewAuthManager(engine.CoreAPI, iamCli) - watchTaskOpt := &types.NewTaskOptions{ - StopNotifier: make(<-chan struct{}), - } - watchTask, taskErr := task.New(mongodb.Dal(), mongodb.Dal("watch"), engine.ServiceManageInterface, watchTaskOpt) - if taskErr != nil { - blog.Errorf("new watch task instance failed, err: %v", taskErr) - return taskErr + taskScheduler, err := scheduler.New(mongodb.Dal(), mongodb.Dal("watch"), engine.ServiceManageInterface) + if err != nil { + blog.Errorf("new watch task scheduler instance failed, err: %v", err) + return err } + s.scheduler = taskScheduler + watchTasks := make([]*task.Task, 0) - c, cacheErr := cacheop.NewCache(watchTask, engine.ServiceManageInterface) + c, cacheErr := cacheop.NewCache(engine.ServiceManageInterface) if cacheErr != nil { blog.Errorf("new cache instance failed, err: %v", cacheErr) return cacheErr } s.cacheSet = c + watchTasks = append(watchTasks, c.GetWatchTasks()...) - flowErr := flow.NewEvent(watchTask) + flowEvent, flowErr := flow.NewEvent() if flowErr != nil { blog.Errorf("new watch event failed, err: %v", flowErr) return flowErr } + watchTasks = append(watchTasks, flowEvent.GetWatchTasks()...) - if err := identifier.NewIdentity(watchTask); err != nil { + hostIdentity, err := identifier.NewIdentity() + if err != nil { blog.Errorf("new host identity event failed, err: %v", err) return err } + watchTasks = append(watchTasks, hostIdentity.GetWatchTasks()...) - if err := bsrelation.NewBizSetRelation(watchTask); err != nil { + bsRelation, err := bsrelation.NewBizSetRelation() + if err != nil { blog.Errorf("new biz set relation event failed, err: %v", err) return err } + watchTasks = append(watchTasks, bsRelation.GetWatchTasks()...) + + if err = taskScheduler.AddTasks(watchTasks...); err != nil { + blog.Errorf("add event watch tasks failed, err: %v", err) + return err + } - taskErr = watchTask.Start() - if taskErr != nil { - return taskErr + if err = taskScheduler.Start(); err != nil { + blog.Errorf("start event watch task scheduler failed, err: %v", err) + return err } return nil @@ -159,6 +171,11 @@ func (s *cacheService) WebService() *restful.Container { return container } +// Scheduler returns the watch task scheduler +func (s *cacheService) Scheduler() *scheduler.Scheduler { + return s.scheduler +} + // Language TODO func (s *cacheService) Language(header http.Header) language.DefaultCCLanguageIf { lang := httpheader.GetLanguage(header) diff --git a/src/source_controller/transfer-service/service/service.go b/src/source_controller/transfer-service/service/service.go index 3a0d1430fd..fe4042981d 100644 --- a/src/source_controller/transfer-service/service/service.go +++ b/src/source_controller/transfer-service/service/service.go @@ -30,8 +30,7 @@ import ( "configcenter/src/source_controller/transfer-service/app/options" "configcenter/src/source_controller/transfer-service/sync" "configcenter/src/storage/driver/mongodb" - "configcenter/src/storage/stream/task" - "configcenter/src/storage/stream/types" + "configcenter/src/storage/stream/scheduler" "configcenter/src/thirdparty/logplatform/opentelemetry" "github.com/emicklei/go-restful/v3" @@ -45,22 +44,29 @@ type Service struct { // New Service func New(conf *options.Config, engine *backbone.Engine) (*Service, error) { - watchTaskOpt := &types.NewTaskOptions{ - StopNotifier: make(<-chan struct{}), - } - watchTask, taskErr := task.New(mongodb.Dal(), mongodb.Dal("watch"), engine.ServiceManageInterface, watchTaskOpt) - if taskErr != nil { - blog.Errorf("new watch task instance failed, err: %v", taskErr) - return nil, taskErr + taskScheduler, err := scheduler.New(mongodb.Dal(), mongodb.Dal("watch"), engine.ServiceManageInterface) + if err != nil { + blog.Errorf("new watch task scheduler instance failed, err: %v", err) + return nil, err } - syncer, err := sync.NewSyncer(conf, engine.ServiceManageInterface, watchTask, engine.CoreAPI.CacheService(), + syncer, err := sync.NewSyncer(conf, engine.ServiceManageInterface, engine.CoreAPI.CacheService(), engine.Metric().Registry()) if err != nil { blog.Errorf("new syncer failed, err: %v", err) return nil, err } + if err = taskScheduler.AddTasks(syncer.GetWatchTasks()...); err != nil { + blog.Errorf("add event watch tasks to scheduler failed, err: %v", err) + return nil, err + } + + if err = taskScheduler.Start(); err != nil { + blog.Errorf("start event watch task scheduler failed, err: %v", err) + return nil, err + } + return &Service{ engine: engine, syncer: syncer, diff --git a/src/source_controller/transfer-service/sync/sync.go b/src/source_controller/transfer-service/sync/sync.go index 473ddf57c5..ea4ecd6f4a 100644 --- a/src/source_controller/transfer-service/sync/sync.go +++ b/src/source_controller/transfer-service/sync/sync.go @@ -48,10 +48,11 @@ type Syncer struct { isMaster discovery.ServiceManageInterface metadata *metadata.Metadata resSyncerMap map[types.ResType]*resSyncer + tasks []*task.Task } // NewSyncer new cmdb data syncer -func NewSyncer(conf *options.Config, isMaster discovery.ServiceManageInterface, task *task.Task, +func NewSyncer(conf *options.Config, isMaster discovery.ServiceManageInterface, cacheCli cacheservice.CacheServiceClientInterface, reg prometheus.Registerer) (*Syncer, error) { if !conf.Sync.EnableSync { @@ -112,7 +113,7 @@ func NewSyncer(conf *options.Config, isMaster discovery.ServiceManageInterface, } } - err = syncer.run(conf, task, transMedium, cacheCli) + err = syncer.run(conf, transMedium, cacheCli) if err != nil { return nil, err } @@ -164,7 +165,7 @@ func parseDestExConf(conf *options.Config) (map[types.ResType]map[string][]optio return idRuleMap, innerDataIDMap } -func (s *Syncer) run(conf *options.Config, task *task.Task, transMedium medium.ClientI, +func (s *Syncer) run(conf *options.Config, transMedium medium.ClientI, cacheCli cacheservice.CacheServiceClientInterface) error { switch conf.Sync.Role { @@ -175,13 +176,14 @@ func (s *Syncer) run(conf *options.Config, task *task.Task, transMedium medium.C return nil } - watcher, err := watch.New(conf.Sync.Name, task, s.isMaster, s.metadata, cacheCli, transMedium) + watcher, err := watch.New(conf.Sync.Name, s.isMaster, s.metadata, cacheCli, transMedium) if err != nil { blog.Errorf("new watcher failed, err: %v", err) return err } - if err = watcher.Watch(); err != nil { + s.tasks, err = watcher.Watch() + if err != nil { blog.Errorf("watch src event failed, err: %v", err) return err } @@ -207,3 +209,8 @@ type resSyncer struct { lgc logics.Logics metadata *metadata.Metadata } + +// GetWatchTasks returns the event watch tasks +func (s *Syncer) GetWatchTasks() []*task.Task { + return s.tasks +} diff --git a/src/source_controller/transfer-service/sync/watch/watch.go b/src/source_controller/transfer-service/sync/watch/watch.go index 48135628ae..d3581531cd 100644 --- a/src/source_controller/transfer-service/sync/watch/watch.go +++ b/src/source_controller/transfer-service/sync/watch/watch.go @@ -39,7 +39,6 @@ import ( // Watcher is cmdb data syncer event watcher type Watcher struct { name string - task *task.Task isMaster discovery.ServiceManageInterface metadata *syncmeta.Metadata cacheCli cacheservice.CacheServiceClientInterface @@ -48,7 +47,7 @@ type Watcher struct { } // New new cmdb data syncer event watcher -func New(name string, task *task.Task, isMaster discovery.ServiceManageInterface, meta *syncmeta.Metadata, +func New(name string, isMaster discovery.ServiceManageInterface, meta *syncmeta.Metadata, cacheCli cacheservice.CacheServiceClientInterface, transMedium medium.ClientI) (*Watcher, error) { // create cmdb data syncer event watch token table @@ -83,7 +82,6 @@ func New(name string, task *task.Task, isMaster discovery.ServiceManageInterface // generate cmdb data syncer event watcher watcher := &Watcher{ name: name, - task: task, isMaster: isMaster, metadata: meta, cacheCli: cacheCli, @@ -99,7 +97,8 @@ func New(name string, task *task.Task, isMaster discovery.ServiceManageInterface } // Watch cmdb data syncer events and push the events to transfer medium -func (w *Watcher) Watch() error { +func (w *Watcher) Watch() ([]*task.Task, error) { + tasks := make([]*task.Task, 0) for _, resType := range types.ListAllResTypeForIncrSync() { cursorTypes, exists := resTypeCursorMap[resType] if exists { @@ -115,11 +114,13 @@ func (w *Watcher) Watch() error { continue } - if err := w.watchDB(resType); err != nil { - blog.Errorf("watch %s events from db failed, err: %v", resType, err) - return err + watchTask, err := w.watchDB(resType) + if err != nil { + blog.Errorf("new watch %s events task failed, err: %v", resType, err) + return nil, err } + tasks = append(tasks, watchTask) } - return nil + return tasks, nil } diff --git a/src/source_controller/transfer-service/sync/watch/watch_db.go b/src/source_controller/transfer-service/sync/watch/watch_db.go index afa2ba0568..9e736c62b5 100644 --- a/src/source_controller/transfer-service/sync/watch/watch_db.go +++ b/src/source_controller/transfer-service/sync/watch/watch_db.go @@ -23,10 +23,10 @@ import ( synctypes "configcenter/pkg/synchronize/types" "configcenter/src/common" - "configcenter/src/common/blog" "configcenter/src/common/http/rest" "configcenter/src/common/metadata" "configcenter/src/common/watch" + "configcenter/src/storage/stream/task" "configcenter/src/storage/stream/types" ) @@ -43,7 +43,7 @@ var resTypeWatchOptMap = map[synctypes.ResType]*types.WatchCollOptions{ } // watchDB watch db events for resource that are not watched by flow -func (w *Watcher) watchDB(resType synctypes.ResType) error { +func (w *Watcher) watchDB(resType synctypes.ResType) (*task.Task, error) { handler := w.tokenHandlers[resType] opts := &types.LoopBatchTaskOptions{ @@ -64,13 +64,7 @@ func (w *Watcher) watchDB(resType synctypes.ResType) error { BatchSize: common.BKMaxLimitSize, } - err := w.task.AddLoopBatchTask(opts) - if err != nil { - blog.Errorf("watch %s events from db failed, err: %v", resType, err) - return err - } - - return nil + return task.NewLoopBatchTask(opts) } // handleDBEvents handle db events diff --git a/src/storage/stream/loop/loop_watch.go b/src/storage/stream/loop/loop_watch.go deleted file mode 100644 index 066ea02f93..0000000000 --- a/src/storage/stream/loop/loop_watch.go +++ /dev/null @@ -1,519 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making 蓝鲸 available. - * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved. - * Licensed under the MIT License (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * http://opensource.org/licenses/MIT - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied. See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Package loop defines loop watch logics -package loop - -import ( - "context" - "time" - - "configcenter/src/apimachinery/discovery" - "configcenter/src/common/blog" - "configcenter/src/storage/stream/event" - "configcenter/src/storage/stream/types" -) - -// NewLoopWatch TODO -func NewLoopWatch(streamW *event.Event, isMaster discovery.ServiceManageInterface) (*LoopsWatch, error) { - loops := &LoopsWatch{ - streamWatch: streamW, - isMaster: isMaster, - } - - return loops, nil -} - -// LoopsWatch TODO -type LoopsWatch struct { - streamWatch *event.Event - isMaster discovery.ServiceManageInterface -} - -// WithOne allows users to watch events one by one. -func (lw *LoopsWatch) WithOne(opts *types.LoopOneOptions) error { - if err := opts.Validate(); err != nil { - blog.Errorf("run loop watch, but option is invalid, err: %v", err) - return err - } - - watchOpt, err := lw.updateStartTokenInfo(&opts.LoopOptions) - if err != nil { - return err - } - watchOpt.WatchFatalErrorCallback = opts.TokenHandler.ResetWatchToken - - var cancel func() - var cancelCtx context.Context - cancelCtx, cancel = context.WithCancel(context.Background()) - - watcher, err := lw.streamWatch.Watch(cancelCtx, watchOpt) - if err != nil { - blog.Errorf("%s job, run loop, but watch failed, err: %v", opts.Name, err) - cancel() - return err - } - - retrySignal := make(chan struct{}) - retryObserver := &retryHandler{ - retryCounter: 0, - maxRetryCnt: opts.RetryOptions.MaxRetryCount, - } - - handler := func(newCancelCtx context.Context, newWatcher *types.Watcher, newRetrySignal chan struct{}) { - lw.tryLoopWithOne(newCancelCtx, newWatcher, newRetrySignal, retryObserver, opts) - } - - // start watch retry signal - go lw.watchRetry(cancel, retrySignal, &opts.LoopOptions, handler) - - // start loop with events one by one. - go lw.tryLoopWithOne(cancelCtx, watcher, retrySignal, retryObserver, opts) - - return nil -} - -func (lw *LoopsWatch) updateStartTokenInfo(opts *types.LoopOptions) (*types.WatchOptions, error) { - startToken, err := opts.TokenHandler.GetStartWatchToken(context.Background()) - if err != nil { - blog.Errorf("%s job, loop watch db %s, but get start watch token failed, err: %v", opts.Name, - lw.streamWatch.DBName, err) - return nil, err - } - - // update the start token. - if len(startToken.Token) != 0 { - opts.WatchOpt.StartAfterToken = &types.EventToken{Data: startToken.Token} - } - if startToken.StartAtTime != nil { - opts.WatchOpt.StartAtTime = startToken.StartAtTime - } - - return opts.WatchOpt, nil -} - -// WithBatch allows users to watch events with batch. -func (lw *LoopsWatch) WithBatch(opts *types.LoopBatchOptions) error { - if err := opts.Validate(); err != nil { - blog.Errorf("run loop watch batch, but option is invalid, err: %v", err) - return err - } - - watchOpt, err := lw.updateStartTokenInfo(&opts.LoopOptions) - if err != nil { - return err - } - watchOpt.WatchFatalErrorCallback = opts.TokenHandler.ResetWatchToken - - var cancel func() - var cancelCtx context.Context - cancelCtx, cancel = context.WithCancel(context.Background()) - - watcher, err := lw.streamWatch.Watch(cancelCtx, watchOpt) - if err != nil { - blog.Errorf("%s job, run loop, but watch failed, err: %v", opts.Name, err) - cancel() - return err - } - - retrySignal := make(chan struct{}) - retryObserver := &retryHandler{ - retryCounter: 0, - maxRetryCnt: opts.RetryOptions.MaxRetryCount, - } - - handler := func(newCancelCtx context.Context, newWatcher *types.Watcher, newRetrySignal chan struct{}) { - lw.tryLoopWithBatch(newCancelCtx, newWatcher, newRetrySignal, retryObserver, opts) - } - - // start watch retry signal - go lw.watchRetry(cancel, retrySignal, &opts.LoopOptions, handler) - - // start loop with events one by one. - go lw.tryLoopWithBatch(cancelCtx, watcher, retrySignal, retryObserver, opts) - - return nil -} - -type handlerFunc func(ctxWithCancel context.Context, watcher *types.Watcher, retrySignal chan struct{}) - -// watchRetry watch the loop jobs to check if a new watch is needed. if yes, then re-watch again. -func (lw *LoopsWatch) watchRetry(cancel context.CancelFunc, - retrySignal chan struct{}, - opts *types.LoopOptions, - doHandler handlerFunc) { - - for { - // initialize a new context - ctx := context.Background() - - select { - case <-opts.StopNotifier: - cancel() - blog.Warnf("received stop %s loop watch job notify, stopping now.", opts.Name) - return - - // wait for another retry - case <-retrySignal: - // wait for a well and then do the retry work. - time.Sleep(opts.RetryOptions.RetryDuration) - - // initialize a new retry signal, exit loop immediately for next usage. - retrySignal = make(chan struct{}) - - // cancel the former watch - cancel() - - // use the last token to resume so that we can start again from where we stopped. - watchOpt, err := lw.updateStartTokenInfo(opts) - if err != nil { - // notify retry signal, exit loop - close(retrySignal) - continue - } - opts.WatchOpt = watchOpt - - blog.Errorf("%s job, the former watch loop: %s failed, start retry again from token: %+v.", opts.Name, - lw.streamWatch.DBName, watchOpt.StartAfterToken) - - var cancelCtx context.Context - cancelCtx, cancel = context.WithCancel(ctx) - - watcher, err := lw.streamWatch.Watch(cancelCtx, opts.WatchOpt) - if err != nil { - cancel() - // notify retry signal, exit loop - close(retrySignal) - blog.Errorf("%s job, run retry loop, but watch failed, err: %v", err) - continue - } - - // start handle loop jobs - go doHandler(cancelCtx, watcher, retrySignal) - - blog.Warnf("%s job, retry loop %s from token: %+v success.", opts.Name, lw.streamWatch.DBName, - watchOpt.StartAfterToken) - } - } -} - -// tryLoopWithBatch try handle event with batch -func (lw *LoopsWatch) tryLoopWithBatch(ctxWithCancel context.Context, - watcher *types.Watcher, - retrySignal chan struct{}, - retryObserver *retryHandler, - opts *types.LoopBatchOptions) { - - ticker := time.NewTicker(50 * time.Millisecond) - - observer := &observer{ - isMaster: lw.isMaster, - previousStatus: lw.isMaster.IsMaster(), - } - - for { - reWatch, loop := observer.canLoop() - if reWatch { - // stop the tick to release resource. - ticker.Stop() - blog.Warnf("%s job, master status has changed, try to re-watch again, db:%s", opts.Name, - lw.streamWatch.DBName) - // trigger re-watch action now. - close(retrySignal) - // exit the for loop - return - } - - if !loop { - blog.V(5).Infof("%s job, loop %s event, but not master, skip.", opts.Name, lw.streamWatch.DBName) - time.Sleep(5 * time.Second) - continue - } - - batchEvents := make([]*types.Event, 0) - // get event with loop. - for { - select { - case <-ctxWithCancel.Done(): - // stop the tick to release resource. - ticker.Stop() - blog.Warnf("%s job, received cancel loop watch %s signal, exit loop.", opts.Name, lw.streamWatch.DBName) - // exist the goroutine - return - - case one := <-watcher.EventChan: - batchEvents = append(batchEvents, one) - if blog.V(4) { - blog.Infof("%s job, received %s event, detail: %s, op-time: %s, rid: %s", opts.Name, - lw.streamWatch.DBName, one.String(), one.ClusterTime.String(), one.ID()) - } - - // calculate event count, try to get more event for a batch - if len(batchEvents) < opts.BatchSize { - // continue to get more events - continue - } - case <-ticker.C: - // handle with batch event. - if len(batchEvents) == 0 { - // ticks, but no events received, loop next round to get events. - continue - } - case <-opts.StopNotifier: - ticker.Stop() - blog.Warnf("received stop %s loop watch job notify, stopping now.", opts.Name) - return - } - - // break the for loop to handle event for now. - break - } - - if lw.handleBatchEvents(ctxWithCancel, batchEvents, opts, retryObserver, retrySignal) { - return - } - } -} - -// handleBatchEvents handle batch events, returns if the loop watch needs retry -func (lw *LoopsWatch) handleBatchEvents(ctx context.Context, batchEvents []*types.Event, opts *types.LoopBatchOptions, - retryObserver *retryHandler, retrySignal chan struct{}) bool { - - // for safety guarantee - if len(batchEvents) == 0 { - return false - } - - first := batchEvents[0] - - blog.Infof("%s job, received %s batch %d events, first op-time: %s rid: %s.", opts.Name, lw.streamWatch.DBName, - len(batchEvents), first.ClusterTime.String(), first.ID()) - - retry := opts.EventHandler.DoBatch(batchEvents) - if retry { - if retryObserver.canStillRetry() { - blog.Warnf("%s job, received %s %d events in batch, but do batch failed, retry now, rid: %s", opts.Name, - lw.streamWatch.DBName, len(batchEvents), first.ID()) - // an error occurred, we need to retry it later. - // tell the schedule to re-watch again. - close(retrySignal) - // exit this goroutine. - return true - } - - blog.Warnf("%s job, collection %s batch watch retry exceed max count, skip, rid: %s.", opts.Name, - lw.streamWatch.DBName, first.ID()) - // save the event token now. - } - - // reset retry counter so that the previous retry count will not affect the next event - retryObserver.resetRetryCounter() - - last := batchEvents[len(batchEvents)-1] - // update the last watched token for resume usage. - lastToken := &types.TokenInfo{ - Token: last.Token.Data, - StartAtTime: &last.ClusterTime, - } - if err := opts.TokenHandler.SetLastWatchToken(ctx, lastToken); err != nil { - blog.Errorf("%s job, loop watch %s event, but set last token failed, err: %v, rid: %s, retry later.", - opts.Name, lw.streamWatch.DBName, err, first.ID()) - - // retry later. - close(retrySignal) - // exit this goroutine - return true - } - return false -} - -// tryLoopWithOne try handle event one by one -func (lw *LoopsWatch) tryLoopWithOne(ctxWithCancel context.Context, - watcher *types.Watcher, - retrySignal chan struct{}, - retryObserver *retryHandler, - opts *types.LoopOneOptions) { - - observer := &observer{ - isMaster: lw.isMaster, - previousStatus: lw.isMaster.IsMaster(), - } - - for one := range watcher.EventChan { - select { - case <-ctxWithCancel.Done(): - blog.Warnf("%s job, received cancel loop watch %s signal, exit loop, exit loop", opts.Name, - lw.streamWatch.DBName) - return - case <-opts.StopNotifier: - blog.Warnf("received stop %s loop watch job notify, stopping now.", opts.Name) - return - default: - } - - reWatch, loop := observer.canLoop() - if reWatch { - blog.Warnf("%s job, master status has changed, try to re-watch %s again", opts.Name, lw.streamWatch.DBName) - // trigger re-watch action now. - close(retrySignal) - // exit the for loop - return - } - - if !loop { - blog.Infof("%s job, received %s %s event, but not master, skip. details: %s, rid: %s", opts.Name, - lw.streamWatch.DBName, one.OperationType, one.String(), one.ID()) - continue - } - - blog.Infof("%s job, received %s event, type: %s, op-time: %s rid: %s", opts.Name, lw.streamWatch.DBName, - one.OperationType, one.ClusterTime.String(), one.ID()) - - if blog.V(4) { - blog.Infof("%s job, event details: %s, oid: %s", opts.Name, one.String(), one.ID()) - } - - retry := lw.tryOne(one, opts) - if retry { - if retryObserver.canStillRetry() { - blog.Warnf("%s job, retry watch %s later. rid: %s", opts.Name, lw.streamWatch.DBName, one.ID()) - // an error occurred, we need to retry it later. - // tell the schedule to re-watch again. - close(retrySignal) - // exist this goroutine. - return - } - - blog.Warnf("%s job, retry %s event exceed max count, skip, detail: %s, rid: %s", opts.Name, - lw.streamWatch.DBName, one.String(), one.ID()) - // save the event token now. - } - - // reset retry counter so that the previous retry count will not affect the next event - retryObserver.resetRetryCounter() - - // update the last watched token for resume usage. - lastToken := &types.TokenInfo{ - Token: one.Token.Data, - StartAtTime: &one.ClusterTime, - } - if err := opts.TokenHandler.SetLastWatchToken(ctxWithCancel, lastToken); err != nil { - blog.Errorf("%s job, loop watch %s event, but set last watched token failed, err: %v, rid: %s, "+ - "retry later.", lw.streamWatch.DBName, err, one.ID()) - - // retry later. - close(retrySignal) - // exist this goroutine - return - } - } -} - -// tryOne handle event one by one. -func (lw *LoopsWatch) tryOne(e *types.Event, opts *types.LoopOneOptions) (retry bool) { - switch e.OperationType { - case types.Insert: - retry := opts.EventHandler.DoAdd(e) - if retry { - blog.Warnf("%s job, received %s %s event, but do add job failed, retry now, rid: %s", opts.Name, - lw.streamWatch.DBName, e.OperationType, e.ID()) - - return retry - } - - case types.Update, types.Replace: - retry := opts.EventHandler.DoUpdate(e) - if retry { - blog.Warnf("%s job, received %s %s event, but do update job failed, retry now, rid: %s", opts.Name, - lw.streamWatch.DBName, e.OperationType, e.ID()) - - return retry - } - - case types.Delete: - retry := opts.EventHandler.DoDelete(e) - if retry { - blog.Warnf("%s job, received %s %s event, but do delete job failed, retry now, rid: %s", opts.Name, - lw.streamWatch.DBName, e.OperationType, e.ID()) - - return retry - } - - case types.Invalidate: - blog.Errorf("%s job, watch %s event, received invalid operation type, doc: %s, rid: %s", opts.Name, - lw.streamWatch.DBName, e.DocBytes, e.ID()) - return false - - default: - blog.Errorf("%s job, watch %s event, received unsupported operation type, doc: %s, rid: %s", opts.Name, - lw.streamWatch.DBName, e.DocBytes, e.ID()) - return false - } - - return false -} - -// describe the event retry policy -type retryHandler struct { - // current retry count - retryCounter int - // a event's max retry count - maxRetryCnt int -} - -// canStillRetry TODO -// check if this event can still retry -func (r *retryHandler) canStillRetry() bool { - r.retryCounter += 1 - - if r.retryCounter > r.maxRetryCnt { - r.retryCounter = 0 - return false - } - - return true -} - -func (r *retryHandler) resetRetryCounter() { - r.retryCounter = 0 -} - -type observer struct { - isMaster discovery.ServiceManageInterface - previousStatus bool -} - -// canLoop describe whether we can still loop the next event or next batch events. -// this is a master slave service. we should re-watch the event from the previous -// event token, only when we do this, we can loop the continuous events later which -// is no events is skipped or duplicated. -func (o *observer) canLoop() (reWatch bool, loop bool) { - current := o.isMaster.IsMaster() - - if o.previousStatus == current { - if !current { - // not master, status not changed, and can not loop - return false, false - } else { - // is master, status not changed, and can loop - return false, true - } - } - - blog.Infof("loop watch, is master status changed from %v to %v.", o.previousStatus, current) - - // update status - o.previousStatus = current - - // status already changed, and can not continue loop, need to re-watch again. - return true, false -} diff --git a/src/storage/stream/scheduler/scheduler.go b/src/storage/stream/scheduler/scheduler.go new file mode 100644 index 0000000000..2b4fd8d44e --- /dev/null +++ b/src/storage/stream/scheduler/scheduler.go @@ -0,0 +1,273 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +// Package scheduler defines event watch task scheduler logics +package scheduler + +import ( + "context" + "fmt" + "time" + + "configcenter/src/apimachinery/discovery" + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/common/util" + "configcenter/src/storage/dal" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/stream/event" + "configcenter/src/storage/stream/task" + "configcenter/src/storage/stream/types" +) + +// Scheduler is the event watch task scheduler +type Scheduler struct { + isMaster discovery.ServiceManageInterface + + // watchTasks is the task name to watch task map + watchTasks map[string]*task.Task + + // eventMap is the db uuid to event instance map + eventMap map[string]*event.Event + // dbClients is the db uuid to db client map + dbClients map[string]local.DB + // watchClients is the db uuid to watch client map + watchClients map[string]*local.Mongo + + // these options are used to generate loop watch options + majorityCommitted *bool + maxAwaitTime *time.Duration + + // stopNotifier is used when user need to stop loop events and release related resources. + // It's a optional option. when it's not set(as is nil), then the loop will not exit forever. + // Otherwise, user can use it to stop loop events. + // When a user want to stop the loop, the only thing that a user need to do is to just + // **close** this stop notifier channel. + // Attention: + // Close this notifier channel is the only way to stop loop correctly. + // Do not send data to this channel. + stopNotifier chan struct{} +} + +// New creates a new watch task scheduler +func New(db, watchDB dal.Dal, isMaster discovery.ServiceManageInterface) (*Scheduler, error) { + t := &Scheduler{ + isMaster: isMaster, + eventMap: make(map[string]*event.Event), + dbClients: make(map[string]local.DB), + watchClients: make(map[string]*local.Mongo), + watchTasks: make(map[string]*task.Task), + stopNotifier: make(chan struct{}), + } + + watchDBRelation, defaultWatchDBUUID, err := genWatchDBRelationInfo(watchDB) + if err != nil { + return nil, err + } + + // generate watch db uuid to watch db client map + watchDBClientMap := make(map[string]*local.Mongo) + err = watchDB.ExecForAllDB(func(db local.DB) error { + dbClient, ok := db.(*local.Mongo) + if !ok { + return fmt.Errorf("watch db is not an instance of local mongo") + } + watchDBClientMap[dbClient.GetMongoClient().UUID()] = dbClient + return nil + }) + if err != nil { + blog.Errorf("get all watch db client failed, err: %v", err) + return nil, err + } + + // generate db uuid to db client & watch db client & loop watch instance map + err = db.ExecForAllDB(func(db local.DB) error { + dbClient, ok := db.(*local.Mongo) + if !ok { + return fmt.Errorf("db to be watched is not an instance of local mongo") + } + mongoClient := dbClient.GetMongoClient() + uuid := mongoClient.UUID() + + watchDBUUID, exists := watchDBRelation[uuid] + if !exists { + blog.Warnf("db %s has no watch db, use default watch db for new db", uuid) + watchDBUUID = defaultWatchDBUUID + } + + watchClient, exists := watchDBClientMap[watchDBUUID] + if !exists { + return fmt.Errorf("db %s related watch db %s is invalid", uuid, watchDBUUID) + } + t.watchClients[uuid] = watchClient + t.dbClients[uuid] = dbClient + + eventInst, err := event.NewEvent(mongoClient.Client(), mongoClient.DBName(), uuid) + if err != nil { + return fmt.Errorf("new event for db %s failed, err: %v", uuid, err) + } + t.eventMap[uuid] = eventInst + return nil + }) + if err != nil { + blog.Errorf("generate db uuid related map failed, err: %v", err) + return nil, err + } + + return t, nil +} + +// AddTasks add watch tasks to scheduler +func (s *Scheduler) AddTasks(tasks ...*task.Task) error { + for _, t := range tasks { + _, exists := s.watchTasks[t.Name] + if exists { + return fmt.Errorf("loop watch task %s already exists", t.Name) + } + + if t.MajorityCommitted != nil && *t.MajorityCommitted { + s.majorityCommitted = t.MajorityCommitted + } + if t.MaxAwaitTime != nil && (s.maxAwaitTime == nil || *t.MaxAwaitTime > *s.maxAwaitTime) { + s.maxAwaitTime = t.MaxAwaitTime + } + + s.watchTasks[t.Name] = t + } + return nil +} + +// Start execute all watch tasks +func (s *Scheduler) Start() error { + if len(s.watchTasks) == 0 { + blog.Warnf("no watch task to start") + return nil + } + + // generate task name to collection options map and db uuid to task name to db watch tasks map by watch task info + collOptions := make(map[string]types.WatchCollOptions) + listCollOptions := make(map[string]types.CollectionOptions) + dbWatchTasks := make(map[string]map[string]*task.DBWatchTask) + var batchSize int + for taskName, watchTask := range s.watchTasks { + collOptions[taskName] = *watchTask.CollOptions + if watchTask.NeedList { + listCollOptions[taskName] = watchTask.CollOptions.CollectionOptions + } + if watchTask.BatchSize > batchSize { + batchSize = watchTask.BatchSize + } + for uuid, dbClient := range s.dbClients { + dbTask, err := task.NewDBWatchTask(watchTask, &types.DBInfo{ + UUID: uuid, + WatchDB: s.watchClients[uuid], + CcDB: dbClient, + }) + if err != nil { + return err + } + if _, exists := dbWatchTasks[uuid]; !exists { + dbWatchTasks[uuid] = make(map[string]*task.DBWatchTask) + } + dbWatchTasks[uuid][taskName] = dbTask + } + } + + // list data for all list watch tasks + if len(listCollOptions) > 0 { + err := s.startList(listCollOptions, batchSize, dbWatchTasks) + if err != nil { + return err + } + } + + // loop watch all db events for all tasks + err := s.startLoopWatch(collOptions, dbWatchTasks, batchSize) + if err != nil { + return err + } + + // run watch tasks for all dbs + for _, dbTaskMap := range dbWatchTasks { + for _, dbTask := range dbTaskMap { + dbTask.Start(s.stopNotifier) + } + } + + return nil +} + +func (s *Scheduler) startList(listCollOptions map[string]types.CollectionOptions, batchSize int, + dbWatchTasks map[string]map[string]*task.DBWatchTask) error { + + for uuid, eventInst := range s.eventMap { + ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) + opt := &types.ListOptions{ + CollOpts: listCollOptions, + PageSize: &batchSize, + WithRetry: true, + } + listCh, err := eventInst.List(ctx, opt) + if err != nil { + blog.Errorf("list db %s failed, err: %v, options: %+v", uuid, err, *opt) + return err + } + + go func(uuid string) { + for e := range listCh { + task, exists := dbWatchTasks[uuid][e.TaskID] + if !exists { + blog.Warnf("loop watch task %s not exists, event: %+v", e.TaskID, *e) + continue + } + task.ListChan <- e + } + }(uuid) + } + return nil +} + +func (s *Scheduler) startLoopWatch(collOptions map[string]types.WatchCollOptions, + dbWatchTasks map[string]map[string]*task.DBWatchTask, batchSize int) error { + + for uuid, dbTaskMap := range dbWatchTasks { + watcher, err := s.newDBWatcher(uuid, dbTaskMap) + if err != nil { + blog.Errorf("new db watcher for db %s failed, err: %v", uuid, err) + return err + } + + opts := &types.WatchOptions{ + Options: types.Options{ + MajorityCommitted: s.majorityCommitted, + MaxAwaitTime: s.maxAwaitTime, + CollOpts: collOptions, + }, + } + err = watcher.loopWatch(opts, batchSize) + if err != nil { + blog.Errorf("start loop watch for db %s failed, err: %v", uuid, err) + return err + } + } + return nil +} + +// Stop the task scheduler +func (s *Scheduler) Stop() { + close(s.stopNotifier) +} diff --git a/src/storage/stream/task/token.go b/src/storage/stream/scheduler/token.go similarity index 57% rename from src/storage/stream/task/token.go rename to src/storage/stream/scheduler/token.go index 8eca422e19..317d69e956 100644 --- a/src/storage/stream/task/token.go +++ b/src/storage/stream/scheduler/token.go @@ -15,77 +15,31 @@ * to the current version of the project delivered to anyone in the future. */ -package task +package scheduler import ( "context" - "sync" "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/storage/dal/mongo/local" "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/types" ) -type dbTokenHandler struct { - uuid string - watchDB local.DB - - lastToken *types.TokenInfo - taskMap map[string]*dbWatchTask - lastTokenInfo map[string]string - mu sync.RWMutex +func (w *dbWatcher) setTaskLastTokenInfo(taskID, token string) { + w.mu.Lock() + defer w.mu.Unlock() + w.lastTokenInfo[taskID] = token } -// newDBTokenHandler new token handler for db watch task -func newDBTokenHandler(uuid string, watchDB local.DB, taskMap map[string]*dbWatchTask) (*dbTokenHandler, error) { - handler := &dbTokenHandler{ - uuid: uuid, - watchDB: watchDB, - taskMap: taskMap, - lastTokenInfo: make(map[string]string), - } - - lastToken, err := handler.GetStartWatchToken(context.Background()) - if err != nil { - return nil, err - } - handler.lastToken = lastToken - - tokenChan := make(chan struct{}) - - for taskID, task := range taskMap { - if task.lastToken != nil { - handler.lastTokenInfo[taskID] = task.lastToken.Token - } - task.tokenChan = tokenChan - } - - go func() { - for _ = range tokenChan { - handler.setLastWatchToken() - } - }() - return handler, nil -} - -func (d *dbTokenHandler) setTaskLastTokenInfo(taskLastTokenMap map[string]string) { - d.mu.Lock() - defer d.mu.Unlock() - for taskID, token := range taskLastTokenMap { - d.lastTokenInfo[taskID] = token - } -} - -func (d *dbTokenHandler) setLastWatchToken() { +func (w *dbWatcher) setLastWatchToken() { // update last token for db to the earliest last token of all db watch tasks // this token specifies the last event that all db watch tasks has handled var lastToken *types.TokenInfo allFinished := false - for taskID, task := range d.taskMap { - token := task.lastToken + for taskID, task := range w.taskMap { + token := task.LastToken // if token is nil, skip it if token == nil { @@ -93,11 +47,11 @@ func (d *dbTokenHandler) setLastWatchToken() { } isFinished := true - d.mu.RLock() - if token.Token < d.lastTokenInfo[taskID] { + w.mu.RLock() + if token.Token < w.lastTokenInfo[taskID] { isFinished = false } - d.mu.RUnlock() + w.mu.RUnlock() if lastToken == nil { lastToken = token @@ -131,12 +85,12 @@ func (d *dbTokenHandler) setLastWatchToken() { } // if no events are handled, do not update the last token - if lastToken == nil || lastToken.Token == "" || lastToken.Token <= d.lastToken.Token { + if lastToken == nil || lastToken.Token == "" || lastToken.Token <= w.lastToken.Token { return } filter := map[string]interface{}{ - "_id": d.uuid, + "_id": w.uuid, } data := map[string]interface{}{ @@ -144,29 +98,26 @@ func (d *dbTokenHandler) setLastWatchToken() { common.BKStartAtTimeField: lastToken.StartAtTime, } - if err := d.watchDB.Table(common.BKTableNameWatchToken).Update(context.Background(), filter, data); err != nil { - blog.Errorf("set db %s last watch token failed, err: %v, data: %+v", d.uuid, err, data) + if err := w.watchDB.Table(common.BKTableNameWatchToken).Update(context.Background(), filter, data); err != nil { + blog.Errorf("set db %s last watch token failed, err: %v, data: %+v", w.uuid, err, data) + // notify retry signal, retry loop watch logics + w.notifyRetry() return } - d.lastToken = lastToken -} - -// SetLastWatchToken set last watch token for db watch task -func (d *dbTokenHandler) SetLastWatchToken(ctx context.Context, token *types.TokenInfo) error { - return nil + w.lastToken = lastToken } -// GetStartWatchToken get start watch token of db watch task -func (d *dbTokenHandler) GetStartWatchToken(ctx context.Context) (*types.TokenInfo, error) { +// getStartWatchToken get start watch token of db watch task +func (w *dbWatcher) getStartWatchToken(ctx context.Context) (*types.TokenInfo, error) { filter := map[string]interface{}{ - "_id": d.uuid, + "_id": w.uuid, } data := new(types.TokenInfo) - err := d.watchDB.Table(common.BKTableNameWatchToken).Find(filter).One(ctx, data) + err := w.watchDB.Table(common.BKTableNameWatchToken).Find(filter).One(ctx, data) if err != nil { if !mongodb.IsNotFoundError(err) { - blog.Errorf("get db %s last watch token failed, err: %v", d.uuid, err) + blog.Errorf("get db %s last watch token failed, err: %v", w.uuid, err) return nil, err } return new(types.TokenInfo), nil @@ -174,10 +125,10 @@ func (d *dbTokenHandler) GetStartWatchToken(ctx context.Context) (*types.TokenIn return data, nil } -// ResetWatchToken set watch token to empty and set the start watch time to the given one for next watch -func (d *dbTokenHandler) ResetWatchToken(startAtTime types.TimeStamp) error { +// resetWatchToken set watch token to empty and set the start watch time to the given one for next watch +func (w *dbWatcher) resetWatchToken(startAtTime types.TimeStamp) error { filter := map[string]interface{}{ - "_id": d.uuid, + "_id": w.uuid, } data := map[string]interface{}{ @@ -185,8 +136,8 @@ func (d *dbTokenHandler) ResetWatchToken(startAtTime types.TimeStamp) error { common.BKStartAtTimeField: startAtTime, } - if err := d.watchDB.Table(common.BKTableNameWatchToken).Update(context.Background(), filter, data); err != nil { - blog.Errorf("reset db %s watch token failed, err: %v, data: %+v", d.uuid, err, data) + if err := w.watchDB.Table(common.BKTableNameWatchToken).Update(context.Background(), filter, data); err != nil { + blog.Errorf("reset db %s watch token failed, err: %v, data: %+v", w.uuid, err, data) return err } return nil diff --git a/src/storage/stream/scheduler/util.go b/src/storage/stream/scheduler/util.go new file mode 100644 index 0000000000..2a9250caee --- /dev/null +++ b/src/storage/stream/scheduler/util.go @@ -0,0 +1,84 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package scheduler + +import ( + "context" + "fmt" + + "configcenter/src/apimachinery/discovery" + "configcenter/src/common" + "configcenter/src/common/blog" + "configcenter/src/storage/dal" + "configcenter/src/storage/dal/mongo/sharding" +) + +// genWatchDBRelationInfo generate db uuid to watch db uuid map and default watch db uuid for db without watch db +func genWatchDBRelationInfo(db dal.Dal) (map[string]string, string, error) { + ctx := context.Background() + masterDB := db.Shard(sharding.NewShardOpts().WithIgnoreTenant()) + + relations := make([]sharding.WatchDBRelation, 0) + if err := masterDB.Table(common.BKTableNameWatchDBRelation).Find(nil).All(ctx, &relations); err != nil { + return nil, "", fmt.Errorf("get db and watch db relation failed, err: %v", err) + } + + watchDBRelation := make(map[string]string) + for _, relation := range relations { + watchDBRelation[relation.DB] = relation.WatchDB + } + + cond := map[string]any{common.MongoMetaID: common.ShardingDBConfID} + conf := new(sharding.ShardingDBConf) + err := masterDB.Table(common.BKTableNameSystem).Find(cond).One(ctx, &conf) + if err != nil { + return nil, "", fmt.Errorf("get sharding db conf failed, err: %v", err) + } + return watchDBRelation, conf.ForNewData, nil +} + +type watchObserver struct { + isMaster discovery.ServiceManageInterface + previousStatus bool +} + +// canLoop describe whether we can still loop the next event or next batch events. +// this is a master slave service. we should re-watch the event from the previous +// event token, only when we do this, we can loop the continuous events later which +// is no events is skipped or duplicated. +func (o *watchObserver) canLoop() (reWatch bool, loop bool) { + current := o.isMaster.IsMaster() + + if o.previousStatus == current { + if !current { + // not master, status not changed, and can not loop + return false, false + } else { + // is master, status not changed, and can loop + return false, true + } + } + + blog.Infof("loop watch, is master status changed from %v to %v.", o.previousStatus, current) + + // update status + o.previousStatus = current + + // status already changed, and can not continue loop, need to re-watch again. + return true, false +} diff --git a/src/storage/stream/scheduler/watch.go b/src/storage/stream/scheduler/watch.go new file mode 100644 index 0000000000..c658855162 --- /dev/null +++ b/src/storage/stream/scheduler/watch.go @@ -0,0 +1,253 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package scheduler + +import ( + "context" + "sync" + "time" + + "configcenter/src/apimachinery/discovery" + "configcenter/src/common/blog" + "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/stream/event" + "configcenter/src/storage/stream/task" + "configcenter/src/storage/stream/types" +) + +type dbWatcher struct { + uuid string + watchDB local.DB + streamWatch *event.Event + isMaster discovery.ServiceManageInterface + + // taskMap is the task id to + taskMap map[string]*task.DBWatchTask + + // lastToken is the last db watch token that has been set + lastToken *types.TokenInfo + // lastTokenInfo is the task id to last event token map, which is used to judge if all events have been handled + lastTokenInfo map[string]string + mu sync.RWMutex + + // retryChan is used to notify the db watcher to retry watch + retryChan chan struct{} + // tokenChan is used to notify the db watcher to set the last token + tokenChan chan struct{} + // stopNotifier is used to notify the db watcher to stop + stopNotifier <-chan struct{} +} + +// newDBWatcher new db event watcher for task scheduler +func (s *Scheduler) newDBWatcher(uuid string, taskMap map[string]*task.DBWatchTask) (*dbWatcher, error) { + watcher := &dbWatcher{ + uuid: uuid, + watchDB: s.watchClients[uuid], + streamWatch: s.eventMap[uuid], + isMaster: s.isMaster, + taskMap: taskMap, + lastTokenInfo: make(map[string]string), + retryChan: make(chan struct{}, 1), + tokenChan: make(chan struct{}, 1), + stopNotifier: s.stopNotifier, + } + + lastToken, err := watcher.getStartWatchToken(context.Background()) + if err != nil { + return nil, err + } + watcher.lastToken = lastToken + + for taskID, task := range taskMap { + if task.LastToken != nil { + watcher.lastTokenInfo[taskID] = task.LastToken.Token + } + task.TokenChan = watcher.tokenChan + } + + go func() { + for range watcher.tokenChan { + watcher.setLastWatchToken() + } + }() + return watcher, nil +} + +func (w *dbWatcher) loopWatch(watchOpt *types.WatchOptions, batchSize int) error { + watchOpt.WatchFatalErrorCallback = w.resetWatchToken + + ctx, cancel := context.WithCancel(context.Background()) + + watcher, watchOpt, err := w.watch(ctx, watchOpt) + if err != nil { + cancel() + return err + } + + go func() { + ticker := time.NewTicker(50 * time.Millisecond) + observer := &watchObserver{ + isMaster: w.isMaster, + previousStatus: w.isMaster.IsMaster(), + } + + for { + select { + case <-w.stopNotifier: + cancel() + close(w.tokenChan) + blog.Warnf("received stop %s loop watch job notify, stopping now.", w.uuid) + return + case <-w.retryChan: + // wait for a while and then do the retry work. + time.Sleep(types.DefaultRetryDuration) + + // initialize a new retry signal, exit loop immediately for next usage. + w.retryChan = make(chan struct{}, 1) + + // cancel the former watch + cancel() + + ctx, cancel = context.WithCancel(context.Background()) + watcher, watchOpt, err = w.watch(ctx, watchOpt) + if err != nil { + // notify retry signal, exit loop + w.notifyRetry() + continue + } + + blog.Errorf("%s job, the former watch loop: %s failed, start retry again from token: %+v.", w.uuid, + w.streamWatch.DBName, watchOpt.StartAfterToken) + + ticker = time.NewTicker(50 * time.Millisecond) + observer = &watchObserver{ + isMaster: w.isMaster, + previousStatus: w.isMaster.IsMaster(), + } + default: + reWatch, loop := observer.canLoop() + if reWatch { + // stop the tick to release resource. + ticker.Stop() + blog.Warnf("%s job, master status has changed, try to re-watch again, db:%s", w.uuid, + w.streamWatch.DBName) + // trigger re-watch action now. + w.notifyRetry() + // exit the for loop + continue + } + + if !loop { + blog.V(5).Infof("%s job, loop %s event, but not master, skip.", w.uuid, w.streamWatch.DBName) + time.Sleep(5 * time.Second) + continue + } + + w.handleEvents(watcher, ticker, batchSize) + } + } + }() + + return nil +} + +func (w *dbWatcher) watch(ctx context.Context, watchOpt *types.WatchOptions) (*types.Watcher, *types.WatchOptions, + error) { + + startToken, err := w.getStartWatchToken(context.Background()) + if err != nil { + blog.Errorf("%s job, loop watch db %s, but get start watch token failed, err: %v", w.uuid, w.streamWatch.DBName, + err) + return nil, nil, err + } + w.lastToken = startToken + + // update the start token of the watch options + if len(startToken.Token) != 0 { + watchOpt.StartAfterToken = &types.EventToken{Data: startToken.Token} + } + if startToken.StartAtTime != nil { + watchOpt.StartAtTime = startToken.StartAtTime + } + + watcher, err := w.streamWatch.Watch(ctx, watchOpt) + if err != nil { + blog.Errorf("%s job, run loop, but watch failed, err: %v", w.uuid, err) + return nil, nil, err + } + + return watcher, watchOpt, nil +} + +// handleEvents handle events +func (w *dbWatcher) handleEvents(watcher *types.Watcher, ticker *time.Ticker, batchSize int) { + batchEventsLen := 0 + var first *types.Event + + // get event with loop. + for { + select { + case one := <-watcher.EventChan: + task, exists := w.taskMap[one.TaskID] + if !exists { + blog.Warnf("loop watch task %s not exists, event: %+v", one.TaskID, *one) + continue + } + task.EventChan <- one + w.setTaskLastTokenInfo(one.TaskID, one.Token.Data) + + if blog.V(4) { + blog.Infof("%s job, received %s event, detail: %s, op-time: %s, rid: %s", w.uuid, + w.streamWatch.DBName, one.String(), one.ClusterTime.String(), one.ID()) + } + + // calculate event count, try to get more event for a batch + batchEventsLen++ + if first == nil { + first = one + } + if batchEventsLen < batchSize { + // continue to get more events + continue + } + case <-ticker.C: + // handle with batch event. + if batchEventsLen == 0 { + // ticks, but no events received, loop next round to get events. + return + } + case <-w.stopNotifier: + ticker.Stop() + blog.Warnf("received stop %s loop watch job notify, stopping now.", w.uuid) + return + } + + // break the for loop to handle event for now. + break + } + + blog.Infof("%s job, received %s batch %d events, first op-time: %s, rid: %s.", w.uuid, w.streamWatch.DBName, + batchEventsLen, first.ClusterTime.String(), first.ID()) +} + +func (w *dbWatcher) notifyRetry() { + select { + case w.retryChan <- struct{}{}: + default: + } +} diff --git a/src/storage/stream/stream.go b/src/storage/stream/stream.go index e2ebf148c4..dab6127360 100644 --- a/src/storage/stream/stream.go +++ b/src/storage/stream/stream.go @@ -18,10 +18,8 @@ import ( "fmt" "time" - "configcenter/src/apimachinery/discovery" "configcenter/src/storage/dal/mongo/local" "configcenter/src/storage/stream/event" - "configcenter/src/storage/stream/loop" "configcenter/src/storage/stream/types" "go.mongodb.org/mongo-driver/mongo" @@ -73,24 +71,3 @@ func newEvent(conf local.MongoConf) (*event.Event, error) { } return event, nil } - -// LoopInterface is the interface for event loop stream. -type LoopInterface interface { - WithOne(opts *types.LoopOneOptions) error - WithBatch(opts *types.LoopBatchOptions) error -} - -// NewLoopStream create a new event loop stream. -func NewLoopStream(conf local.MongoConf, isMaster discovery.ServiceManageInterface) (LoopInterface, error) { - event, err := newEvent(conf) - if err != nil { - return nil, err - } - - loop, err := loop.NewLoopWatch(event, isMaster) - if err != nil { - return nil, err - } - - return loop, nil -} diff --git a/src/storage/stream/task/watch_task.go b/src/storage/stream/task/db_task.go similarity index 71% rename from src/storage/stream/task/watch_task.go rename to src/storage/stream/task/db_task.go index 0caf7379a6..f21f84e89f 100644 --- a/src/storage/stream/task/watch_task.go +++ b/src/storage/stream/task/db_task.go @@ -25,66 +25,52 @@ import ( "configcenter/src/storage/stream/types" ) -// watchTask is the resource watch task -type watchTask struct { - // name is the watch task name that uniquely identifies the watch task - name string - // collOptions is the watch collection options - collOptions *types.WatchCollOptions - // eventHandler is the batch event handler - eventHandler *types.TaskBatchHandler - // tokenHandler is the token handler - tokenHandler types.TaskTokenHandler - // needList defines whether to list all data before watch - needList bool - - retryOptions *types.RetryOptions - batchSize int -} - -type dbWatchTask struct { - *watchTask +// DBWatchTask is the resource watch task for one db +type DBWatchTask struct { + *Task dbInfo *types.DBInfo - eventChan chan *types.Event - listChan chan *types.Event - lastToken *types.TokenInfo - tokenChan chan struct{} + EventChan chan *types.Event + ListChan chan *types.Event + LastToken *types.TokenInfo + // TokenChan is used to notify the token handler that the task token has changed + TokenChan chan struct{} } // maxUnhandledEventLimit if the number of unhandled events exceeds this value, block the event watch process const maxUnhandledEventLimit = 2000 -func newDBWatchTask(task *watchTask, dbInfo *types.DBInfo) (*dbWatchTask, error) { +// NewDBWatchTask generate a new db watch task +func NewDBWatchTask(task *Task, dbInfo *types.DBInfo) (*DBWatchTask, error) { lastToken, err := task.tokenHandler.GetStartWatchToken(context.Background(), dbInfo.UUID, dbInfo.WatchDB) if err != nil { - blog.Errorf("get task %s db %s last watch token failed, err: %v", task.name, dbInfo.UUID, err) + blog.Errorf("get task %s db %s last watch token failed, err: %v", task.Name, dbInfo.UUID, err) return nil, err } - return &dbWatchTask{ - watchTask: task, + return &DBWatchTask{ + Task: task, dbInfo: dbInfo, - eventChan: make(chan *types.Event, maxUnhandledEventLimit+task.batchSize), - listChan: make(chan *types.Event, task.batchSize), - lastToken: lastToken, + EventChan: make(chan *types.Event, maxUnhandledEventLimit+task.BatchSize), + ListChan: make(chan *types.Event, task.BatchSize), + LastToken: lastToken, }, nil } -// start execute watch task -func (t *dbWatchTask) start(stopNotifier <-chan struct{}) { +// Start execute watch task +func (t *DBWatchTask) Start(stopNotifier <-chan struct{}) { go func() { // list all data before watch if this task is a list watch task - if t.needList { - t.lastToken = &types.TokenInfo{ + if t.NeedList { + t.LastToken = &types.TokenInfo{ StartAtTime: &types.TimeStamp{ Sec: uint32(time.Now().Unix()), }, } events := make([]*types.Event, 0) - for event := range t.listChan { + for event := range t.ListChan { events = append(events, event) - if len(events) == t.batchSize { + if len(events) == t.BatchSize { t.eventHandler.DoBatch(t.dbInfo, events) } if event.OperationType == types.ListDone { @@ -102,14 +88,14 @@ func (t *dbWatchTask) start(stopNotifier <-chan struct{}) { events := make([]*types.Event, 0) for { select { - case one := <-t.eventChan: + case one := <-t.EventChan: // skip previous event with smaller token - if !compareToken(one, t.lastToken) { - blog.V(4).Infof("%s-%s job, skip previous event(%s)", t.name, t.dbInfo.UUID, one.String()) + if !compareToken(one, t.LastToken) { + blog.V(4).Infof("%s-%s job, skip previous event(%s)", t.Name, t.dbInfo.UUID, one.String()) continue } events = append(events, one) - if len(events) < t.batchSize { + if len(events) < t.BatchSize { continue } case <-ticker.C: @@ -129,11 +115,11 @@ func (t *dbWatchTask) start(stopNotifier <-chan struct{}) { }() } -func (t *dbWatchTask) handleEvents(events []*types.Event) { +func (t *DBWatchTask) handleEvents(events []*types.Event) { ctx := context.Background() first, last := events[0], events[len(events)-1] rid := first.ID() - blog.Infof("%s-%s job, received %d events, first op-time: %s, fist token: %s, rid: %s", t.name, t.dbInfo.UUID, + blog.Infof("%s-%s job, received %d events, first op-time: %s, fist token: %s, rid: %s", t.Name, t.dbInfo.UUID, len(events), first.ClusterTime.String(), first.Token.Data, rid) needRetry := false @@ -144,11 +130,11 @@ func (t *dbWatchTask) handleEvents(events []*types.Event) { time.Sleep(t.retryOptions.RetryDuration) lastToken, err := t.tokenHandler.GetStartWatchToken(ctx, t.dbInfo.UUID, t.dbInfo.WatchDB) if err != nil { - blog.Errorf("get task %s db %s token failed, err: %v, rid: %s", t.name, t.dbInfo.UUID, err, rid) + blog.Errorf("get task %s db %s token failed, err: %v, rid: %s", t.Name, t.dbInfo.UUID, err, rid) time.Sleep(t.retryOptions.RetryDuration) continue } - t.lastToken = lastToken + t.LastToken = lastToken // if current token is greater than last token, return if !compareToken(last, lastToken) { @@ -183,14 +169,14 @@ func (t *dbWatchTask) handleEvents(events []*types.Event) { StartAtTime: &last.ClusterTime, } if err := t.tokenHandler.SetLastWatchToken(ctx, t.dbInfo.UUID, t.dbInfo.WatchDB, lastToken); err != nil { - blog.Errorf("set task %s db %s last watch token(%+v) failed, err: %v, rid: %s", t.name, t.dbInfo.UUID, + blog.Errorf("set task %s db %s last watch token(%+v) failed, err: %v, rid: %s", t.Name, t.dbInfo.UUID, *lastToken, err, rid) needRetry = true continue } - t.lastToken = lastToken + t.LastToken = lastToken select { - case t.tokenChan <- struct{}{}: + case t.TokenChan <- struct{}{}: default: } return diff --git a/src/storage/stream/task/task.go b/src/storage/stream/task/task.go index ba3c7f3b98..81d75c8f40 100644 --- a/src/storage/stream/task/task.go +++ b/src/storage/stream/task/task.go @@ -19,135 +19,39 @@ package task import ( - "context" - "fmt" "time" - "configcenter/src/apimachinery/discovery" - "configcenter/src/common" "configcenter/src/common/blog" - "configcenter/src/common/util" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/local" - "configcenter/src/storage/stream/event" - "configcenter/src/storage/stream/loop" "configcenter/src/storage/stream/types" ) -// Task is the event watch task that contains all resource watch tasks +// Task is the resource watch task type Task struct { - // eventMap is the db uuid to event instance map - eventMap map[string]*event.Event - // loopWatch is the db uuid to loop watch instance map - loopWatch map[string]*loop.LoopsWatch - // dbClients is the db uuid to db client map - dbClients map[string]local.DB - // watchClients is the db uuid to watch client map - watchClients map[string]*local.Mongo - // watchTasks is the task name to watch task map - watchTasks map[string]*watchTask - - // these options are used to generate loop watch options - majorityCommitted *bool - maxAwaitTime *time.Duration - - // stopNotifier is used when user need to stop loop events and release related resources. - // It's a optional option. when it's not set(as is nil), then the loop will not exit forever. - // Otherwise, user can use it to stop loop events. - // When a user want to stop the loop, the only thing that a user need to do is to just - // **close** this stop notifier channel. - // Attention: - // Close this notifier channel is the only way to stop loop correctly. - // Do not send data to this channel. - stopNotifier <-chan struct{} + // Name is the watch task name that uniquely identifies the watch task + Name string + // CollOptions is the watch collection options + CollOptions *types.WatchCollOptions + // eventHandler is the batch event handler + eventHandler *types.TaskBatchHandler + // tokenHandler is the token handler + tokenHandler types.TaskTokenHandler + // retryOptions is the watch task retry options + retryOptions *types.RetryOptions + // NeedList defines whether to list all data before watch + NeedList bool + // BatchSize is the batch event size for one loop + BatchSize int + MajorityCommitted *bool + MaxAwaitTime *time.Duration } -// New create a new watch task instance -func New(db, watchDB dal.Dal, isMaster discovery.ServiceManageInterface, opts *types.NewTaskOptions) (*Task, error) { +// NewLoopOneTask create a loop watch task that handles one event at one time +func NewLoopOneTask(opts *types.LoopOneTaskOptions) (*Task, error) { if err := opts.Validate(); err != nil { - blog.Errorf("validate new task options(%+v) failed, err: %v", opts, err) - return nil, err - } - - t := &Task{ - eventMap: make(map[string]*event.Event), - loopWatch: make(map[string]*loop.LoopsWatch), - dbClients: make(map[string]local.DB), - watchClients: make(map[string]*local.Mongo), - watchTasks: make(map[string]*watchTask), - stopNotifier: opts.StopNotifier, - } - - watchDBRelation, err := genWatchDBRelationMap(watchDB) - if err != nil { - return nil, err - } - - // generate watch db uuid to watch db client map - watchDBClientMap := make(map[string]*local.Mongo) - err = watchDB.ExecForAllDB(func(db local.DB) error { - dbClient, ok := db.(*local.Mongo) - if !ok { - return fmt.Errorf("watch db is not an instance of local mongo") - } - watchDBClientMap[dbClient.GetMongoClient().UUID()] = dbClient - return nil - }) - if err != nil { - blog.Errorf("get all watch db client failed, err: %v", err) - return nil, err - } - - // generate db uuid to db client & watch db client & loop watch instance map - err = db.ExecForAllDB(func(db local.DB) error { - dbClient, ok := db.(*local.Mongo) - if !ok { - return fmt.Errorf("db to be watched is not an instance of local mongo") - } - mongoClient := dbClient.GetMongoClient() - uuid := mongoClient.UUID() - - watchDBUUID, exists := watchDBRelation[uuid] - if !exists { - blog.Warnf("db %s has no watch db", uuid) - return nil - } - - watchClient, exists := watchDBClientMap[watchDBUUID] - if !exists { - return fmt.Errorf("db %s related watch db %s is invalid", uuid, watchDBUUID) - } - t.watchClients[uuid] = watchClient - t.dbClients[uuid] = dbClient - - eventInst, err := event.NewEvent(mongoClient.Client(), mongoClient.DBName(), uuid) - if err != nil { - return fmt.Errorf("new event for db %s failed, err: %v", uuid, err) - } - t.eventMap[uuid] = eventInst - - loopWatch, err := loop.NewLoopWatch(eventInst, isMaster) - if err != nil { - return fmt.Errorf("new loop watch for db %s failed, err: %v", uuid, err) - } - t.loopWatch[uuid] = loopWatch - return nil - }) - if err != nil { - blog.Errorf("generate db uuid related map failed, err: %v", err) + blog.Errorf("validate loop one task options(%s) failed, err: %v", opts.Name, err) return nil, err } - return t, nil -} - -// AddLoopOneTask add a loop watch task that handles one event at one time -func (t *Task) AddLoopOneTask(opts *types.LoopOneTaskOptions) error { - if err := opts.Validate(); err != nil { - blog.Errorf("validate loop batch task options(%s) failed, err: %v", opts.Name, err) - return err - } - batchOpts := &types.LoopBatchTaskOptions{ WatchTaskOptions: opts.WatchTaskOptions, BatchSize: 1, @@ -175,190 +79,40 @@ func (t *Task) AddLoopOneTask(opts *types.LoopOneTaskOptions) error { }, } - return t.addWatchTask(batchOpts, false) + return newTask(batchOpts, false), nil } -// AddLoopBatchTask add a loop watch task that handles batch events -func (t *Task) AddLoopBatchTask(opts *types.LoopBatchTaskOptions) error { +// NewLoopBatchTask add a loop watch task that handles batch events +func NewLoopBatchTask(opts *types.LoopBatchTaskOptions) (*Task, error) { if err := opts.Validate(); err != nil { blog.Errorf("validate loop batch task options(%s) failed, err: %v", opts.Name, err) - return err + return nil, err } - return t.addWatchTask(opts, false) + + return newTask(opts, false), nil } -// AddListWatchTask add a list watch task -func (t *Task) AddListWatchTask(opts *types.LoopBatchTaskOptions) error { +// NewListWatchTask add a list watch task +func NewListWatchTask(opts *types.LoopBatchTaskOptions) (*Task, error) { if err := opts.Validate(); err != nil { blog.Errorf("validate list watch task options(%s) failed, err: %v", opts.Name, err) - return err - } - return t.addWatchTask(opts, true) -} - -func (t *Task) addWatchTask(opts *types.LoopBatchTaskOptions, needList bool) error { - _, exists := t.watchTasks[opts.Name] - if exists { - return fmt.Errorf("loop watch task %s already exists", opts.Name) - } - - if opts.MajorityCommitted != nil && *opts.MajorityCommitted { - t.majorityCommitted = opts.MajorityCommitted - } - if opts.MaxAwaitTime != nil && (t.maxAwaitTime == nil || *opts.MaxAwaitTime > *t.maxAwaitTime) { - t.maxAwaitTime = opts.MaxAwaitTime - } - - t.watchTasks[opts.Name] = &watchTask{ - name: opts.Name, - collOptions: opts.CollOpts, - eventHandler: opts.EventHandler, - tokenHandler: opts.TokenHandler, - needList: needList, - retryOptions: opts.RetryOptions, - batchSize: opts.BatchSize, - } - - return nil -} - -// Start execute all watch tasks -func (t *Task) Start() error { - if len(t.watchTasks) == 0 { - return nil - } - - // generate task name to collection options map and db uuid to task name to db watch tasks map by watch task info - collOptions := make(map[string]types.WatchCollOptions) - listCollOptions := make(map[string]types.CollectionOptions) - dbWatchTasks := make(map[string]map[string]*dbWatchTask) - var batchSize int - for taskName, task := range t.watchTasks { - collOptions[taskName] = *task.collOptions - if task.needList { - listCollOptions[taskName] = task.collOptions.CollectionOptions - } - if task.batchSize > batchSize { - batchSize = task.batchSize - } - for uuid, dbClient := range t.dbClients { - dbTask, err := newDBWatchTask(task, &types.DBInfo{ - UUID: uuid, - WatchDB: t.watchClients[uuid], - CcDB: dbClient, - }) - if err != nil { - return err - } - if _, exists := dbWatchTasks[uuid]; !exists { - dbWatchTasks[uuid] = make(map[string]*dbWatchTask) - } - dbWatchTasks[uuid][taskName] = dbTask - } - } - - // list data for all list watch tasks - if len(listCollOptions) > 0 { - err := t.startList(listCollOptions, batchSize, dbWatchTasks) - if err != nil { - return err - } - } - - // loop watch all db events for all tasks - err := t.startLoopWatch(collOptions, dbWatchTasks, batchSize) - if err != nil { - return err - } - - // run watch tasks for all dbs - for _, dbTaskMap := range dbWatchTasks { - for _, dbTask := range dbTaskMap { - dbTask.start(t.stopNotifier) - } + return nil, err } - return nil -} - -func (t *Task) startList(listCollOptions map[string]types.CollectionOptions, batchSize int, - dbWatchTasks map[string]map[string]*dbWatchTask) error { - - for uuid, eventInst := range t.eventMap { - ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) - opt := &types.ListOptions{ - CollOpts: listCollOptions, - PageSize: &batchSize, - WithRetry: true, - } - listCh, err := eventInst.List(ctx, opt) - if err != nil { - blog.Errorf("list db %s failed, err: %v, options: %+v", uuid, err, *opt) - return err - } - - go func(uuid string) { - for e := range listCh { - task, exists := dbWatchTasks[uuid][e.TaskID] - if !exists { - blog.Warnf("loop watch task %s not exists, event: %+v", e.TaskID, *e) - continue - } - task.listChan <- e - } - }(uuid) - } - return nil + return newTask(opts, true), nil } -func (t *Task) startLoopWatch(collOptions map[string]types.WatchCollOptions, - dbWatchTasks map[string]map[string]*dbWatchTask, batchSize int) error { - - for uuid, loopWatch := range t.loopWatch { - uuid := uuid - tokenHandler, err := newDBTokenHandler(uuid, t.watchClients[uuid], dbWatchTasks[uuid]) - if err != nil { - return err - } - opts := &types.LoopBatchOptions{ - LoopOptions: types.LoopOptions{ - Name: uuid, - WatchOpt: &types.WatchOptions{ - Options: types.Options{ - MajorityCommitted: t.majorityCommitted, - MaxAwaitTime: t.maxAwaitTime, - CollOpts: collOptions, - }, - }, - TokenHandler: tokenHandler, - RetryOptions: &types.RetryOptions{ - MaxRetryCount: types.DefaultRetryCount, - RetryDuration: types.DefaultRetryDuration, - }, - StopNotifier: t.stopNotifier, - }, - EventHandler: &types.BatchHandler{DoBatch: func(es []*types.Event) (retry bool) { - taskLastTokenMap := make(map[string]string) - for _, e := range es { - task, exists := dbWatchTasks[uuid][e.TaskID] - if !exists { - blog.Warnf("loop watch task %s not exists, event: %+v", e.TaskID, *e) - continue - } - task.eventChan <- e - taskLastTokenMap[e.TaskID] = e.Token.Data - } - tokenHandler.setTaskLastTokenInfo(taskLastTokenMap) - return false - }}, - BatchSize: batchSize, - } - - err = loopWatch.WithBatch(opts) - if err != nil { - blog.Errorf("start loop watch for db failed, err: %v", err) - return err - } +// newTask generate a new watch task +func newTask(opts *types.LoopBatchTaskOptions, needList bool) *Task { + return &Task{ + Name: opts.Name, + CollOptions: opts.CollOpts, + eventHandler: opts.EventHandler, + tokenHandler: opts.TokenHandler, + retryOptions: opts.RetryOptions, + NeedList: needList, + BatchSize: opts.BatchSize, + MajorityCommitted: opts.MajorityCommitted, + MaxAwaitTime: opts.MaxAwaitTime, } - return nil } diff --git a/src/storage/stream/task/util.go b/src/storage/stream/task/util.go index b9cd098d6b..353e85d0ec 100644 --- a/src/storage/stream/task/util.go +++ b/src/storage/stream/task/util.go @@ -18,32 +18,9 @@ package task import ( - "context" - "fmt" - - "configcenter/src/common" - "configcenter/src/storage/dal" - "configcenter/src/storage/dal/mongo/sharding" "configcenter/src/storage/stream/types" ) -// genWatchDBRelationMap generate db uuid to watch db uuid map -func genWatchDBRelationMap(db dal.Dal) (map[string]string, error) { - ctx := context.Background() - masterDB := db.Shard(sharding.NewShardOpts().WithIgnoreTenant()) - - relations := make([]sharding.WatchDBRelation, 0) - if err := masterDB.Table(common.BKTableNameWatchDBRelation).Find(nil).All(ctx, &relations); err != nil { - return nil, fmt.Errorf("get db and watch db relation failed, err: %v", err) - } - - watchDBRelation := make(map[string]string) - for _, relation := range relations { - watchDBRelation[relation.DB] = relation.WatchDB - } - return watchDBRelation, nil -} - // compareToken compare event with token, returns if event is greater than the token func compareToken(event *types.Event, token *types.TokenInfo) bool { if token == nil { diff --git a/src/storage/stream/types/task.go b/src/storage/stream/types/task.go index ea82375dc9..8639d370cd 100644 --- a/src/storage/stream/types/task.go +++ b/src/storage/stream/types/task.go @@ -25,20 +25,6 @@ import ( "configcenter/src/storage/dal/mongo/local" ) -// NewTaskOptions is the new task options -type NewTaskOptions struct { - StopNotifier <-chan struct{} -} - -// Validate NewTaskOptions -func (o *NewTaskOptions) Validate() error { - if o.StopNotifier == nil { - // if not set, then set never stop loop as default - o.StopNotifier = make(<-chan struct{}) - } - return nil -} - // TaskTokenHandler is the token handler for db watch task type TaskTokenHandler interface { SetLastWatchToken(ctx context.Context, uuid string, watchDB local.DB, token *TokenInfo) error diff --git a/src/storage/stream/types/types.go b/src/storage/stream/types/types.go index 81bf311204..252a46f723 100644 --- a/src/storage/stream/types/types.go +++ b/src/storage/stream/types/types.go @@ -14,7 +14,6 @@ package types import ( - "context" "errors" "fmt" "reflect" @@ -457,13 +456,6 @@ func GetEventDetail(detailStr *string) *string { return &detail } -// TokenHandler is the token handler interface -type TokenHandler interface { - SetLastWatchToken(ctx context.Context, token *TokenInfo) error - GetStartWatchToken(ctx context.Context) (token *TokenInfo, err error) - ResetWatchToken(startAtTime TimeStamp) error -} - // TokenInfo is the watch token info type TokenInfo struct { Token string `bson:"token"` @@ -472,139 +464,11 @@ type TokenInfo struct { TenantID string `bson:"tenant_id"` } -// LoopOptions TODO -type LoopOptions struct { - // name of this loop watch - Name string - WatchOpt *WatchOptions - TokenHandler TokenHandler - RetryOptions *RetryOptions - - // StopNotifier is used when user need to stop loop events and release related resources. - // It's a optional option. when it's not set(as is nil), then the loop will not exit forever. - // Otherwise, user can use it to stop loop events. - // When a user want to stop the loop, the only thing that a user need to do is to just - // **close** this stop notifier channel. - // Attention: - // Close this notifier channel is the only way to stop loop correctly. - // Do not send data to this channel. - StopNotifier <-chan struct{} -} - -// LoopOneOptions TODO -type LoopOneOptions struct { - LoopOptions - EventHandler *OneHandler -} - -// Validate TODO -func (lo *LoopOneOptions) Validate() error { - if len(lo.Name) == 0 { - return errors.New("loop watch should have a name") - } - - if lo.TokenHandler == nil { - return errors.New("token handler is nil") - } - - if lo.EventHandler == nil { - return errors.New("event handler is nil") - } - - if lo.EventHandler.DoAdd == nil || lo.EventHandler.DoUpdate == nil || lo.EventHandler.DoDelete == nil { - return errors.New("invalid event handler options with add, update or delete is nil") - } - - if lo.RetryOptions != nil { - if lo.RetryOptions.MaxRetryCount <= 0 { - lo.RetryOptions.MaxRetryCount = DefaultRetryCount - } - - if lo.RetryOptions.RetryDuration == 0 { - lo.RetryOptions.RetryDuration = DefaultRetryDuration - } - - if lo.RetryOptions.RetryDuration < 500*time.Millisecond { - return errors.New("invalid retry duration, can not less than 500ms") - } - } else { - lo.RetryOptions = &RetryOptions{ - MaxRetryCount: DefaultRetryCount, - RetryDuration: DefaultRetryDuration, - } - } - - if lo.LoopOptions.StopNotifier == nil { - // if not set, then set never stop loop as default - lo.LoopOptions.StopNotifier = make(<-chan struct{}) - } - - return nil -} - -// LoopBatchOptions TODO -type LoopBatchOptions struct { - LoopOptions - EventHandler *BatchHandler - // describe how many events in a batch. - BatchSize int -} - const ( - defaultBatchSize = 200 DefaultRetryCount = 10 DefaultRetryDuration = 1 * time.Second ) -// Validate TODO -func (lo *LoopBatchOptions) Validate() error { - if len(lo.Name) == 0 { - return errors.New("loop watch should have a name") - } - - if lo.TokenHandler == nil { - return errors.New("token handler is nil") - } - - if lo.EventHandler == nil { - return errors.New("event handler is nil") - } - - if lo.EventHandler.DoBatch == nil { - return errors.New("batch handler is nil") - } - - if lo.BatchSize == 0 { - lo.BatchSize = defaultBatchSize - } - - if lo.RetryOptions != nil { - if lo.RetryOptions.MaxRetryCount <= 0 { - lo.RetryOptions.MaxRetryCount = DefaultRetryCount - } - - if lo.RetryOptions.RetryDuration == 0 { - lo.RetryOptions.RetryDuration = DefaultRetryDuration - } - - if lo.RetryOptions.RetryDuration < 200*time.Millisecond { - return errors.New("invalid retry duration, can not less than 200ms") - } - } else { - lo.RetryOptions = &RetryOptions{ - MaxRetryCount: DefaultRetryCount, - RetryDuration: DefaultRetryDuration, - } - } - - if lo.LoopOptions.StopNotifier == nil { - // if not set, then set never stop loop as default - lo.LoopOptions.StopNotifier = make(<-chan struct{}) - } - - return nil -} - // RetryOptions TODO type RetryOptions struct { // the maximum count to retry, when a event is handled failed. @@ -614,19 +478,3 @@ type RetryOptions struct { // default RetryDuration time.Duration } - -// OneHandler TODO -type OneHandler struct { - // retry decide whether event(s) is required to retry after - // a event is handled failed - DoAdd func(event *Event) (retry bool) - DoUpdate func(event *Event) (retry bool) - DoDelete func(event *Event) (retry bool) -} - -// BatchHandler TODO -type BatchHandler struct { - // DoBatch means handle the event with batch, - // when this is enabled, then DoAdd, DoUpdate, DoDelete will be ignored - DoBatch func(es []*Event) (retry bool) -} diff --git a/src/test/test.go b/src/test/test.go index 126484c1df..3e27d03411 100644 --- a/src/test/test.go +++ b/src/test/test.go @@ -174,6 +174,7 @@ func ClearDatabase() { err := db.Shard(sharding.NewShardOpts().WithIgnoreTenant()).Table(tableName).Delete(context.Background(), mapstr.MapStr{common.BKFieldDBID: mapstr.MapStr{common.BKDBNE: common.ShardingDBConfID}}) Expect(err).Should(BeNil()) + case common.BKTableNameWatchDBRelation: default: err := db.Shard(sharding.NewShardOpts().WithIgnoreTenant()).DropTable(context.Background(), tableName) Expect(err).Should(BeNil()) From cace75be24805d365cf1a3bec2a6231db8e33a3a Mon Sep 17 00:00:00 2001 From: wcy00000000000000 <2269766985@qq.com> Date: Wed, 7 May 2025 11:29:26 +0800 Subject: [PATCH 10/10] chore: ci & review bugfix --story=120702860 --- pkg/tenant/event.go | 26 +++-- pkg/tenant/tenant.go | 2 +- src/apimachinery/refresh/api.go | 4 +- src/common/tablenames.go | 5 +- .../admin_server/logics/tenant.go | 21 ++-- .../admin_server/service/migrate.go | 30 ++--- .../admin_server/service/sharding.go | 4 +- .../admin_server/service/tenant.go | 90 +++++---------- .../task_server/service/service.go | 19 ---- .../cache/biz-topo/watch/brief.go | 4 +- .../cacheservice/cache/general/watch/watch.go | 4 +- .../cacheservice/event/flow/event.go | 70 ++++++++++++ .../cacheservice/event/flow/flow.go | 17 +-- .../cacheservice/event/flow/inst_asst_flow.go | 18 --- .../cacheservice/event/flow/instance_flow.go | 39 +------ .../cacheservice/event/loop/task.go | 2 +- .../cacheservice/event/watch/watch.go | 50 ++++----- .../cacheservice/service/service.go | 20 ++-- .../cacheservice/service/tenant.go | 44 ++++++++ .../coreservice/core/hostapplyrule/plan.go | 5 +- .../coreservice/service/tenant.go | 13 --- src/storage/dal/mongo/sharding/mongo.go | 103 +++++++++++++++--- src/storage/stream/event/list.go | 2 +- src/storage/stream/event/utils.go | 2 +- src/storage/stream/event/watch.go | 61 +++++++---- src/storage/stream/scheduler/scheduler.go | 34 +++--- src/storage/stream/scheduler/watch.go | 7 +- src/storage/stream/task/util.go | 2 +- src/storage/stream/types/task.go | 2 +- src/storage/stream/types/types.go | 12 +- src/test/test.go | 4 - 31 files changed, 400 insertions(+), 316 deletions(-) create mode 100644 src/source_controller/cacheservice/service/tenant.go diff --git a/pkg/tenant/event.go b/pkg/tenant/event.go index 2fcd5e4232..f41d4a074d 100644 --- a/pkg/tenant/event.go +++ b/pkg/tenant/event.go @@ -26,13 +26,13 @@ import ( var ( prevTenantInfo = make(map[string]types.Tenant) tenantEventChannels = make(map[string]chan TenantEvent) - tenantEventChLock sync.RWMutex + tenantEventChLock sync.Mutex ) // TenantEvent is the tenant event info type TenantEvent struct { EventType EventType - TenantID string + Tenant types.Tenant } // EventType is the tenant event type @@ -54,14 +54,14 @@ func NewTenantEventChan(name string) <-chan TenantEvent { return ch } - eventChan := make(chan TenantEvent, 1) + eventChan := make(chan TenantEvent) tenantEventChannels[name] = eventChan go func() { for _, tenant := range allTenants { if tenant.Status == types.EnabledStatus { eventChan <- TenantEvent{ EventType: Create, - TenantID: tenant.TenantID, + Tenant: tenant, } } } @@ -85,8 +85,8 @@ func RemoveTenantEventChan(name string) { // generateAndPushTenantEvent compare the tenant with the previous tenant info to generate and push event func generateAndPushTenantEvent(tenants []types.Tenant) { - tenantEventChLock.RLock() - defer tenantEventChLock.RUnlock() + tenantEventChLock.Lock() + defer tenantEventChLock.Unlock() prevTenantMap := make(map[string]types.Tenant) @@ -95,11 +95,15 @@ func generateAndPushTenantEvent(tenants []types.Tenant) { prevTenantMap[tenantID] = tenant prevTenant, exists := prevTenantInfo[tenantID] - if !exists && tenant.Status == types.EnabledStatus { + if !exists { + if tenant.Status != types.EnabledStatus { + continue + } + for _, eventChan := range tenantEventChannels { eventChan <- TenantEvent{ EventType: Create, - TenantID: tenantID, + Tenant: tenant, } } continue @@ -113,7 +117,7 @@ func generateAndPushTenantEvent(tenants []types.Tenant) { for _, eventChan := range tenantEventChannels { eventChan <- TenantEvent{ EventType: eventType, - TenantID: tenantID, + Tenant: tenant, } } } @@ -121,11 +125,11 @@ func generateAndPushTenantEvent(tenants []types.Tenant) { delete(prevTenantInfo, tenantID) } - for tenantID := range prevTenantInfo { + for _, tenant := range prevTenantInfo { for _, eventChan := range tenantEventChannels { eventChan <- TenantEvent{ EventType: Delete, - TenantID: tenantID, + Tenant: tenant, } } } diff --git a/pkg/tenant/tenant.go b/pkg/tenant/tenant.go index 13e75ae79b..f4cfe93741 100644 --- a/pkg/tenant/tenant.go +++ b/pkg/tenant/tenant.go @@ -99,8 +99,8 @@ func SetTenant(tenant []types.Tenant) { for _, t := range allTenants { tenantMap[t.TenantID] = &t } - generateAndPushTenantEvent(allTenants) lock.Unlock() + generateAndPushTenantEvent(allTenants) } func refreshTenantInfo() error { diff --git a/src/apimachinery/refresh/api.go b/src/apimachinery/refresh/api.go index 4ce95e6319..2e9a1ccf55 100644 --- a/src/apimachinery/refresh/api.go +++ b/src/apimachinery/refresh/api.go @@ -34,10 +34,10 @@ func (r *refresh) RefreshTenant(moduleName string) ([]types.Tenant, error) { case commontypes.CC_MODULE_APISERVER: r.capability.Discover = r.disc.ApiServer() - case commontypes.CC_MODULE_TASK: r.capability.Discover = r.disc.TaskServer() - + case commontypes.CC_MODULE_CACHESERVICE: + r.capability.Discover = r.disc.CacheService() default: return nil, fmt.Errorf("unsupported refresh module: %s", moduleName) } diff --git a/src/common/tablenames.go b/src/common/tablenames.go index b73f773947..21f3cff75b 100644 --- a/src/common/tablenames.go +++ b/src/common/tablenames.go @@ -63,7 +63,6 @@ const ( BKTableNameHistory = "History" BKTableNameHostFavorite = "HostFavourite" BKTableNameAuditLog = "AuditLog" - BKTableNamePlatformAuditLog = "PlatformAuditLog" BKTableNameUserAPI = "UserAPI" BKTableNameDynamicGroup = "DynamicGroup" BKTableNameUserCustom = "UserCustom" @@ -288,12 +287,14 @@ var platformTableMap = map[string]struct{}{ BKTableNameIDgenerator: {}, BKTableNameTenant: {}, BKTableNameTenantTemplate: {}, - BKTableNamePlatformAuditLog: {}, BKTableNameWatchToken: {}, BKTableNameAPITask: {}, BKTableNameAPITaskSyncHistory: {}, BKTableNameWatchDBRelation: {}, BKTableNameFullSyncCond: {}, + BKTableNameCacheWatchToken: {}, + "SrcSyncDataToken": {}, + "SrcSyncDataCursor": {}, } // IsPlatformTable returns if the target table is a platform table diff --git a/src/scene_server/admin_server/logics/tenant.go b/src/scene_server/admin_server/logics/tenant.go index 5951fa2783..e9811131bf 100644 --- a/src/scene_server/admin_server/logics/tenant.go +++ b/src/scene_server/admin_server/logics/tenant.go @@ -18,15 +18,17 @@ package logics import ( + "context" "fmt" "configcenter/pkg/tenant" - "configcenter/pkg/tenant/types" "configcenter/src/apimachinery" "configcenter/src/common/blog" "configcenter/src/common/http/rest" commontypes "configcenter/src/common/types" + "configcenter/src/storage/dal" "configcenter/src/storage/dal/mongo/local" + "configcenter/src/storage/dal/mongo/sharding" ) // NewTenantInterface get new tenant cli interface @@ -52,19 +54,24 @@ func GetNewTenantCli(kit *rest.Kit, cli interface{}) (local.DB, string, error) { } // RefreshTenants refresh tenant info, skip tenant verify for apiserver -func RefreshTenants(coreAPI apimachinery.ClientSetInterface) error { +func RefreshTenants(coreAPI apimachinery.ClientSetInterface, db dal.Dal) error { + tenants, err := tenant.GetAllTenantsFromDB(context.Background(), + db.Shard(sharding.NewShardOpts().WithIgnoreTenant())) + if err != nil { + blog.Errorf("get all tenants failed, err: %v", err) + return err + } + tenant.SetTenant(tenants) - var tenants []types.Tenant - var err error - needRefreshServer := []string{commontypes.CC_MODULE_APISERVER, commontypes.CC_MODULE_TASK} + needRefreshServer := []string{commontypes.CC_MODULE_APISERVER, commontypes.CC_MODULE_TASK, + commontypes.CC_MODULE_CACHESERVICE} for _, module := range needRefreshServer { - tenants, err = coreAPI.Refresh().RefreshTenant(module) + _, err = coreAPI.Refresh().RefreshTenant(module) if err != nil { blog.Errorf("refresh tenant info failed, module: %s, err: %v", module, err) return err } } - tenant.SetTenant(tenants) return nil } diff --git a/src/scene_server/admin_server/service/migrate.go b/src/scene_server/admin_server/service/migrate.go index d73714c0ec..7a0268e73f 100644 --- a/src/scene_server/admin_server/service/migrate.go +++ b/src/scene_server/admin_server/service/migrate.go @@ -86,6 +86,11 @@ func (s *Service) migrateDatabase(req *restful.Request, resp *restful.Response) return } + // refresh tenants, ignore refresh tenants error + if err = logics.RefreshTenants(s.CoreAPI, s.db); err != nil { + blog.Errorf("refresh tenant failed, err: %v", err) + } + if err = s.createWatchDBChainCollections(kit); err != nil { blog.Errorf("create watch db chain collections failed, err: %v", err) resp.WriteError(http.StatusInternalServerError, &metadata.RespError{ @@ -94,11 +99,6 @@ func (s *Service) migrateDatabase(req *restful.Request, resp *restful.Response) return } - // refresh tenants, ignore refresh tenants error - if err = logics.RefreshTenants(s.CoreAPI); err != nil { - blog.Errorf("refresh tenant failed, err: %v", err) - } - resp.WriteEntity(metadata.NewSuccessResp(result)) } @@ -126,14 +126,12 @@ func (s *Service) createWatchDBChainCollections(kit *rest.Kit) error { } err = tenant.ExecForAllTenants(func(tenantID string) error { - // TODO 在新增租户初始化时同时增加watch相关表,并刷新cache的tenant return s.addTenantWatchToken(kit.NewKit().WithTenant(tenantID), cursorType, key) }) if err != nil { return err } - // TODO 在新增DB时同时增加db relation和token数据 err = s.createWatchTokenForEventKey(kit, key, watchDBToDBRelation) if err != nil { return err @@ -321,16 +319,20 @@ func (s *Service) createWatchIndexes(kit *rest.Kit, cursorType watch.CursorType, existIdxMap[index.Name] = true } + createIndexes := make([]daltypes.Index, 0) for _, index := range indexes { - if _, exist := existIdxMap[index.Name]; exist { - continue + if _, exist := existIdxMap[index.Name]; !exist { + createIndexes = append(createIndexes, index) } + } + if len(createIndexes) == 0 { + return nil + } - err = s.watchDB.Shard(kit.ShardOpts()).Table(key.ChainCollection()).CreateIndex(s.ctx, index) - if err != nil && !mongodb.IsDuplicatedError(err) { - blog.Errorf("create indexes for table %s failed, err: %v, rid: %s", key.ChainCollection(), err, kit.Rid) - return err - } + err = s.watchDB.Shard(kit.ShardOpts()).Table(key.ChainCollection()).BatchCreateIndexes(s.ctx, createIndexes) + if err != nil && !mongodb.IsDuplicatedError(err) { + blog.Errorf("create indexes for table %s failed, err: %v, rid: %s", key.ChainCollection(), err, kit.Rid) + return err } return nil } diff --git a/src/scene_server/admin_server/service/sharding.go b/src/scene_server/admin_server/service/sharding.go index 2217525c8d..fb4efcdc9a 100644 --- a/src/scene_server/admin_server/service/sharding.go +++ b/src/scene_server/admin_server/service/sharding.go @@ -262,7 +262,7 @@ func (s *Service) genUpdatedShardingDBConf(kit *rest.Kit, dbConf *sharding.Shard return dbConf, nil } - blog.Errorf("add new tenant db %s is invalid, rid: %s", conf.ForNewData, kit.Rid) + blog.Errorf("for new data db %s is invalid, rid: %s", conf.ForNewData, kit.Rid) return nil, kit.CCError.CCErrorf(common.CCErrCommParamsInvalid, "for_new_data") } return dbConf, nil @@ -292,7 +292,7 @@ func (s *Service) genDBSlaveConf(kit *rest.Kit, name string, disabled bool, conf } func (s *Service) saveUpdateShardingDBAudit(kit *rest.Kit, preConf, curConf *sharding.ShardingDBConf) error { - id, err := s.db.Shard(kit.SysShardOpts()).NextSequence(kit.Ctx, common.BKTableNamePlatformAuditLog) + id, err := s.db.Shard(kit.SysShardOpts()).NextSequence(kit.Ctx, common.BKTableNameAuditLog) if err != nil { blog.Errorf("generate next audit log id failed, err: %v, rid: %s", err, kit.Rid) return err diff --git a/src/scene_server/admin_server/service/tenant.go b/src/scene_server/admin_server/service/tenant.go index e664883b0a..c5e864dbb1 100644 --- a/src/scene_server/admin_server/service/tenant.go +++ b/src/scene_server/admin_server/service/tenant.go @@ -27,7 +27,6 @@ import ( tenanttmp "configcenter/pkg/types/tenant-template" "configcenter/src/common" "configcenter/src/common/blog" - httpheader "configcenter/src/common/http/header" "configcenter/src/common/http/rest" "configcenter/src/common/index" "configcenter/src/common/mapstr" @@ -45,28 +44,22 @@ import ( ) func (s *Service) addTenant(req *restful.Request, resp *restful.Response) { - rHeader := req.Request.Header - defErr := s.CCErr.CreateDefaultCCErrorIf(httpheader.GetLanguage(rHeader)) - kit := rest.NewKitFromHeader(rHeader, s.CCErr) + kit := rest.NewKitFromHeader(req.Request.Header, s.CCErr) if !s.Config.EnableMultiTenantMode { blog.Errorf("multi-tenant mode is not enabled, cannot add tenant, rid: %s", kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, - fmt.Errorf("multi-tenant mode is not enabled, cannot add tenant")), - } - resp.WriteError(http.StatusInternalServerError, result) + resp.WriteError(http.StatusInternalServerError, &metadata.RespError{ + Msg: kit.CCError.New(common.CCErrCommAddTenantErr, "multi-tenant mode is not enabled, cannot add tenant")}) return } _, exist := tenant.GetTenant(kit.TenantID) if exist { - // add watch token for new tenant - // TODO 如果租户已经存在的情况下也调一下,防止之前新增租户了但是这个失败了 + // add watch token for new tenant in case tenant is created without watch tokens if err := s.addWatchTokenForNewTenant(kit); err != nil { blog.Errorf("add watch token for new tenant %s failed, err: %v, rid: %s", kit.TenantID, err, kit.Rid) - result := &metadata.RespError{Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error())} - resp.WriteError(http.StatusInternalServerError, result) + resp.WriteError(http.StatusInternalServerError, &metadata.RespError{ + Msg: kit.CCError.New(common.CCErrCommAddTenantErr, err.Error())}) return } resp.WriteEntity(metadata.NewSuccessResp("tenant exist")) @@ -78,10 +71,8 @@ func (s *Service) addTenant(req *restful.Request, resp *restful.Response) { tenants, err := apigwcli.Client().User().GetTenants(kit.Ctx, kit.Header) if err != nil { blog.Errorf("get tenants from bk-user failed, err: %v, rid: %s", err, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, fmt.Errorf("get tenants from bk-user failed")), - } - resp.WriteError(http.StatusInternalServerError, result) + resp.WriteError(http.StatusInternalServerError, &metadata.RespError{ + Msg: kit.CCError.New(common.CCErrCommAddTenantErr, "get tenants from bk-user failed")}) } tenantMap := make(map[string]user.Status) @@ -91,60 +82,47 @@ func (s *Service) addTenant(req *restful.Request, resp *restful.Response) { if status, ok := tenantMap[kit.TenantID]; !ok || status != user.EnabledStatus { blog.Errorf("tenant %s invalid, rid: %s", kit.TenantID, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, - fmt.Errorf("tenant %s invalid", kit.TenantID)), - } - resp.WriteError(http.StatusInternalServerError, result) + resp.WriteError(http.StatusInternalServerError, &metadata.RespError{ + Msg: kit.CCError.Errorf(common.CCErrCommAddTenantErr, fmt.Sprintf("tenant %s invalid", kit.TenantID))}) return } } + if err := s.addTenantData(kit); err != nil { + resp.WriteError(http.StatusInternalServerError, &metadata.RespError{ + Msg: kit.CCError.New(common.CCErrCommAddTenantErr, err.Error())}) + return + } + + resp.WriteEntity(metadata.NewSuccessResp("add tenant success")) +} + +func (s *Service) addTenantData(kit *rest.Kit) error { cli, dbUUID, err := logics.GetNewTenantCli(kit, mongodb.Dal()) if err != nil { blog.Errorf("get new tenant db failed, err: %v, rid: %s", err, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, fmt.Errorf("get new tenant db failed")), - } - resp.WriteError(http.StatusInternalServerError, result) - return + return err } if err = addTableIndexes(kit, cli); err != nil { blog.Errorf("create table and indexes for tenant %s failed, err: %v, rid: %s", kit.TenantID, err, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error()), - } - resp.WriteError(http.StatusInternalServerError, result) - return + return err } // add default area if err = addDefaultArea(kit, cli); err != nil { blog.Errorf("add default area failed, err: %v, rid: %s", err, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error()), - } - resp.WriteError(http.StatusInternalServerError, result) - return + return err } if err = addDataFromTemplate(kit, cli); err != nil { blog.Errorf("create init data for tenant %s failed, err: %v", kit.TenantID, err) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error()), - } - resp.WriteError(http.StatusInternalServerError, result) - return + return err } if err = addResPool(kit, cli); err != nil { - blog.Errorf("add default resouce pool failed, err: %v, rid: %s", err, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error()), - } - resp.WriteError(http.StatusInternalServerError, result) - return + blog.Errorf("add default resource pool failed, err: %v, rid: %s", err, kit.Rid) + return err } // add tenant db relation @@ -156,28 +134,20 @@ func (s *Service) addTenant(req *restful.Request, resp *restful.Response) { err = mongodb.Shard(kit.SysShardOpts()).Table(common.BKTableNameTenant).Insert(kit.Ctx, data) if err != nil { blog.Errorf("add tenant db relations failed, err: %v, rid: %s", err, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error()), - } - resp.WriteError(http.StatusInternalServerError, result) - return + return err } // refresh tenants, ignore refresh tenants error - if err = logics.RefreshTenants(s.CoreAPI); err != nil { + if err = logics.RefreshTenants(s.CoreAPI, s.db); err != nil { blog.Errorf("refresh tenants failed, err: %v, rid: %s", err, kit.Rid) } // add watch token for new tenant - // TODO 如果租户已经存在的情况下也调一下,防止之前新增租户了但是这个失败了 if err = s.addWatchTokenForNewTenant(kit); err != nil { blog.Errorf("add watch token for new tenant %s failed, err: %v, rid: %s", kit.TenantID, err, kit.Rid) - result := &metadata.RespError{Msg: defErr.Errorf(common.CCErrCommAddTenantErr, err.Error())} - resp.WriteError(http.StatusInternalServerError, result) - return + return err } - - resp.WriteEntity(metadata.NewSuccessResp("add tenant success")) + return nil } func (s *Service) addWatchTokenForNewTenant(kit *rest.Kit) error { diff --git a/src/scene_server/task_server/service/service.go b/src/scene_server/task_server/service/service.go index 1003a9c4cd..04c108d394 100644 --- a/src/scene_server/task_server/service/service.go +++ b/src/scene_server/task_server/service/service.go @@ -35,7 +35,6 @@ import ( "configcenter/src/common/webservice/restfulservice" "configcenter/src/scene_server/task_server/app/options" "configcenter/src/scene_server/task_server/logics" - "configcenter/src/storage/dal/mongo/sharding" "configcenter/src/storage/dal/redis" "configcenter/src/storage/driver/mongodb" "configcenter/src/thirdparty/logplatform/opentelemetry" @@ -175,24 +174,6 @@ func (s *Service) RefreshTenants(req *restful.Request, resp *restful.Response) { } tenant.SetTenant(tenants) - // refresh tenant db map - shardingMongoManager, ok := mongodb.Dal().(*sharding.ShardingMongoManager) - if !ok { - blog.Errorf("convert to ShardingMongoManager failed, err: %v, rid: %s", err, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommRefreshTenantErr, fmt.Errorf("get sharding mongo manager failed")), - } - resp.WriteError(http.StatusInternalServerError, result) - } - - if err = shardingMongoManager.RefreshTenantDBMap(); err != nil { - blog.Errorf("refresh tenant db map failed, err: %v, rid: %s", err, kit.Rid) - result := &metadata.RespError{ - Msg: defErr.Errorf(common.CCErrCommRefreshTenantErr, fmt.Errorf("get sharding mongo manager failed")), - } - resp.WriteError(http.StatusInternalServerError, result) - return - } resp.WriteEntity(metadata.NewSuccessResp(tenants)) } diff --git a/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go b/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go index dd41aa69b9..f6297b1a3f 100644 --- a/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go +++ b/src/source_controller/cacheservice/cache/biz-topo/watch/brief.go @@ -73,7 +73,7 @@ func (w *briefWatcher) watchEvents(cursorType watch.CursorType) error { case tenant.Create: loopEventChan <- loop.TenantEvent{ EventType: watch.Create, - TenantID: e.TenantID, + TenantID: e.Tenant.TenantID, WatchOpts: &watch.WatchEventOptions{ EventTypes: []watch.EventType{watch.Create, watch.Delete}, Fields: []string{common.BKAppIDField}, @@ -83,7 +83,7 @@ func (w *briefWatcher) watchEvents(cursorType watch.CursorType) error { case tenant.Delete: loopEventChan <- loop.TenantEvent{ EventType: watch.Delete, - TenantID: e.TenantID, + TenantID: e.Tenant.TenantID, } } } diff --git a/src/source_controller/cacheservice/cache/general/watch/watch.go b/src/source_controller/cacheservice/cache/general/watch/watch.go index 3f58118033..85810a4339 100644 --- a/src/source_controller/cacheservice/cache/general/watch/watch.go +++ b/src/source_controller/cacheservice/cache/general/watch/watch.go @@ -141,13 +141,13 @@ func (w *Watcher) watchCacheChange(cursorType watch.CursorType, name string, loo case tenant.Create: loopEventChan <- loop.TenantEvent{ EventType: watch.Create, - TenantID: e.TenantID, + TenantID: e.Tenant.TenantID, WatchOpts: &watch.WatchEventOptions{Resource: cursorType}, } case tenant.Delete: loopEventChan <- loop.TenantEvent{ EventType: watch.Delete, - TenantID: e.TenantID, + TenantID: e.Tenant.TenantID, } } } diff --git a/src/source_controller/cacheservice/event/flow/event.go b/src/source_controller/cacheservice/event/flow/event.go index 6dc2052850..f276bbd5bd 100644 --- a/src/source_controller/cacheservice/event/flow/event.go +++ b/src/source_controller/cacheservice/event/flow/event.go @@ -15,6 +15,7 @@ package flow import ( "context" + "configcenter/pkg/tenant" "configcenter/src/common/blog" "configcenter/src/common/metadata" "configcenter/src/source_controller/cacheservice/event" @@ -205,3 +206,72 @@ func (e *Event) addProjectTask() error { return e.addFlowTask(opts, parseEvent) } + +func (e *Event) addFlowTask(opts flowOptions, parseEvent parseEventFunc) error { + flow, err := NewFlow(opts, parseEvent) + if err != nil { + return err + } + + flowTask, err := flow.GenWatchTask() + if err != nil { + return err + } + + e.tasks = append(e.tasks, flowTask) + return nil +} + +func (e *Event) addInstanceFlowTask(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { + flow, err := NewFlow(opts, parseEvent) + if err != nil { + return err + } + instFlow := InstanceFlow{ + Flow: flow, + mainlineObjectMap: &mainlineObjectMap{ + data: make(map[string]map[string]struct{}), + }, + } + + err = tenant.ExecForAllTenants(func(tenantID string) error { + mainlineObjMap, err := instFlow.getMainlineObjectMap(ctx, tenantID) + if err != nil { + blog.Errorf("run object instance watch, but get tenant %s mainline objects failed, err: %v", tenantID, err) + return err + } + instFlow.mainlineObjectMap.Set(tenantID, mainlineObjMap) + + go instFlow.syncMainlineObjectMap(tenantID) + return nil + }) + if err != nil { + return err + } + + flowTask, err := instFlow.GenWatchTask() + if err != nil { + return err + } + + e.tasks = append(e.tasks, flowTask) + return nil +} + +func (e *Event) addInstAsstFlowTask(opts flowOptions, parseEvent parseEventFunc) error { + flow, err := NewFlow(opts, parseEvent) + if err != nil { + return err + } + instAsstFlow := InstAsstFlow{ + Flow: flow, + } + + flowTask, err := instAsstFlow.GenWatchTask() + if err != nil { + return err + } + + e.tasks = append(e.tasks, flowTask) + return nil +} diff --git a/src/source_controller/cacheservice/event/flow/flow.go b/src/source_controller/cacheservice/event/flow/flow.go index 6b7ed08084..544b34bd69 100644 --- a/src/source_controller/cacheservice/event/flow/flow.go +++ b/src/source_controller/cacheservice/event/flow/flow.go @@ -38,21 +38,6 @@ type flowOptions struct { EventStruct interface{} } -func (e *Event) addFlowTask(opts flowOptions, parseEvent parseEventFunc) error { - flow, err := NewFlow(opts, parseEvent) - if err != nil { - return err - } - - flowTask, err := flow.GenWatchTask() - if err != nil { - return err - } - - e.tasks = append(e.tasks, flowTask) - return nil -} - // NewFlow create a new event watch flow func NewFlow(opts flowOptions, parseEvent parseEventFunc) (Flow, error) { if parseEvent == nil { @@ -249,7 +234,7 @@ func (f *Flow) parseEvents(dbInfo *types.DBInfo, es []*types.Event, rid string) // collect event's basic metrics f.metrics.CollectBasic(e) - tenant, chainNode, detail, retry, err := f.parseEvent(dbInfo.CcDB, f.key, e, ids[index], rid) + tenant, chainNode, detail, retry, err := f.parseEvent(dbInfo.DB, f.key, e, ids[index], rid) if err != nil { if retry { return nil, nil, nil, false, err diff --git a/src/source_controller/cacheservice/event/flow/inst_asst_flow.go b/src/source_controller/cacheservice/event/flow/inst_asst_flow.go index 19741b2b78..341388f660 100644 --- a/src/source_controller/cacheservice/event/flow/inst_asst_flow.go +++ b/src/source_controller/cacheservice/event/flow/inst_asst_flow.go @@ -22,24 +22,6 @@ import ( "configcenter/src/storage/stream/types" ) -func (e *Event) addInstAsstFlowTask(opts flowOptions, parseEvent parseEventFunc) error { - flow, err := NewFlow(opts, parseEvent) - if err != nil { - return err - } - instAsstFlow := InstAsstFlow{ - Flow: flow, - } - - flowTask, err := instAsstFlow.GenWatchTask() - if err != nil { - return err - } - - e.tasks = append(e.tasks, flowTask) - return nil -} - // InstAsstFlow instance association event watch flow type InstAsstFlow struct { Flow diff --git a/src/source_controller/cacheservice/event/flow/instance_flow.go b/src/source_controller/cacheservice/event/flow/instance_flow.go index 0a4d75d3e8..19e62bcbf9 100644 --- a/src/source_controller/cacheservice/event/flow/instance_flow.go +++ b/src/source_controller/cacheservice/event/flow/instance_flow.go @@ -20,7 +20,6 @@ import ( "sync" "time" - "configcenter/pkg/tenant" "configcenter/src/common" "configcenter/src/common/blog" "configcenter/src/common/mapstr" @@ -41,42 +40,6 @@ import ( "go.mongodb.org/mongo-driver/mongo" ) -func (e *Event) addInstanceFlowTask(ctx context.Context, opts flowOptions, parseEvent parseEventFunc) error { - flow, err := NewFlow(opts, parseEvent) - if err != nil { - return err - } - instFlow := InstanceFlow{ - Flow: flow, - mainlineObjectMap: &mainlineObjectMap{ - data: make(map[string]map[string]struct{}), - }, - } - - err = tenant.ExecForAllTenants(func(tenantID string) error { - mainlineObjMap, err := instFlow.getMainlineObjectMap(ctx, tenantID) - if err != nil { - blog.Errorf("run object instance watch, but get tenant %s mainline objects failed, err: %v", tenantID, err) - return err - } - instFlow.mainlineObjectMap.Set(tenantID, mainlineObjMap) - - go instFlow.syncMainlineObjectMap(tenantID) - return nil - }) - if err != nil { - return err - } - - flowTask, err := instFlow.GenWatchTask() - if err != nil { - return err - } - - e.tasks = append(e.tasks, flowTask) - return nil -} - // syncMainlineObjectMap refresh mainline object ID map every 5 minutes func (f *InstanceFlow) syncMainlineObjectMap(tenantID string) { for { @@ -255,7 +218,7 @@ func (f *InstanceFlow) parseEvents(dbInfo *types.DBInfo, ids []uint64, eventMap f.metrics.CollectBasic(e) idIdx := oidIndexMap[e.Oid+e.Collection] - tenantID, chainNode, detail, retry, err := f.parseEvent(dbInfo.CcDB, key, e, ids[idIdx], rid) + tenantID, chainNode, detail, retry, err := f.parseEvent(dbInfo.DB, key, e, ids[idIdx], rid) if err != nil { if retry { return nil, nil, nil, err diff --git a/src/source_controller/cacheservice/event/loop/task.go b/src/source_controller/cacheservice/event/loop/task.go index 8427b21fbf..5d647191b8 100644 --- a/src/source_controller/cacheservice/event/loop/task.go +++ b/src/source_controller/cacheservice/event/loop/task.go @@ -172,7 +172,7 @@ func (t *loopWatchTask) doWatch(kit *rest.Kit, opts *watch.WatchEventOptions) er return err } - blog.Errorf("watch event failed, re-watch from now, err: %v, opt: %+v, rid: %s", err, + blog.Errorf("watch event failed, re-watch from now, err: %v, opt: %+v, rid: %s", ccErr, *opts, kit.Rid) return ccErr } diff --git a/src/source_controller/cacheservice/event/watch/watch.go b/src/source_controller/cacheservice/event/watch/watch.go index bd0c1c948d..c7a639f7f3 100644 --- a/src/source_controller/cacheservice/event/watch/watch.go +++ b/src/source_controller/cacheservice/event/watch/watch.go @@ -619,34 +619,32 @@ func (c *Client) WatchWithCursor(kit *rest.Kit, key event.Key, opts *watch.Watch // has already looped for timeout seconds, and we still got no event. // return with NoEventCursor and empty detail opts.Cursor = watch.NoEventCursor - return []*watch.WatchEventDetail{{ - Cursor: watch.NoEventCursor, - Resource: opts.Resource, - EventType: "", - Detail: nil, - }}, nil - } else { - // 如果最后一个事件存在,则重新拉取匹配watch条件(type和sub resource)的事件,防止最后一个事件正好在超时之后但是 - // 拉取之前产生的情况下丢失从超时起到最后一个事件之间的事件。如果从起始cursor到最后一个事件之间没有匹配事件的话, - // 返回最后一个事件,以免下次拉取时需要从起始cursor再重新拉取一遍不匹配的事件 - searchOpt.id = nodeID - nodes, err = c.searchFollowingEventChainNodesByID(kit, searchOpt) - if err != nil { - blog.Errorf("watch event from cursor: %s failed, err: %v, rid: %s", opts.Cursor, err, kit.Rid) - return nil, err - } - if len(nodes) != 0 { - return c.getEventDetailsWithNodes(kit, opts, nodes, key) - } + return []*watch.WatchEventDetail{{Cursor: watch.NoEventCursor, Resource: opts.Resource}}, nil + } - resp := &watch.WatchEventDetail{ - Cursor: lastNode.Cursor, - Resource: opts.Resource, - Detail: nil, - } - // at least the tail node should be scanned, so something goes wrong. - return []*watch.WatchEventDetail{resp}, nil + // 如果最后一个事件存在,则重新拉取匹配watch条件(type和sub resource)的事件,防止最后一个事件正好在超时之后但是 + // 拉取之前产生的情况下丢失从超时起到最后一个事件之间的事件。如果从起始cursor到最后一个事件之间没有匹配事件的话, + // 返回最后一个事件,以免下次拉取时需要从起始cursor再重新拉取一遍不匹配的事件 + searchOpt.id = nodeID + nodes, err = c.searchFollowingEventChainNodesByID(kit, searchOpt) + if err != nil { + blog.Errorf("watch event from cursor: %s failed, err: %v, rid: %s", opts.Cursor, err, kit.Rid) + return nil, err } + if len(nodes) != 0 { + return c.getEventDetailsWithNodes(kit, opts, nodes, key) + } + // at least the tail node should be scanned, so something goes wrong. + return []*watch.WatchEventDetail{{Cursor: lastNode.Cursor, Resource: opts.Resource}}, nil + } + + if !exists { + exists, nodes, nodeID, err = c.searchFollowingEventChainNodes(kit, searchOpt) + if err != nil { + blog.Errorf("search nodes after cursor %s failed, err: %v, rid: %s", opts.Cursor, err, kit.Rid) + return nil, err + } + continue } searchOpt.id = nodeID diff --git a/src/source_controller/cacheservice/service/service.go b/src/source_controller/cacheservice/service/service.go index 16f6939771..1645342417 100644 --- a/src/source_controller/cacheservice/service/service.go +++ b/src/source_controller/cacheservice/service/service.go @@ -37,7 +37,6 @@ import ( "configcenter/src/source_controller/coreservice/core" "configcenter/src/storage/driver/mongodb" "configcenter/src/storage/stream/scheduler" - "configcenter/src/storage/stream/task" "configcenter/src/thirdparty/logplatform/opentelemetry" "github.com/emicklei/go-restful/v3" @@ -101,7 +100,6 @@ func (s *cacheService) SetConfig(cfg options.Config, engine *backbone.Engine, er return err } s.scheduler = taskScheduler - watchTasks := make([]*task.Task, 0) c, cacheErr := cacheop.NewCache(engine.ServiceManageInterface) if cacheErr != nil { @@ -109,31 +107,34 @@ func (s *cacheService) SetConfig(cfg options.Config, engine *backbone.Engine, er return cacheErr } s.cacheSet = c - watchTasks = append(watchTasks, c.GetWatchTasks()...) + if err = taskScheduler.AddTasks(c.GetWatchTasks()...); err != nil { + return err + } flowEvent, flowErr := flow.NewEvent() if flowErr != nil { blog.Errorf("new watch event failed, err: %v", flowErr) return flowErr } - watchTasks = append(watchTasks, flowEvent.GetWatchTasks()...) + if err = taskScheduler.AddTasks(flowEvent.GetWatchTasks()...); err != nil { + return err + } hostIdentity, err := identifier.NewIdentity() if err != nil { blog.Errorf("new host identity event failed, err: %v", err) return err } - watchTasks = append(watchTasks, hostIdentity.GetWatchTasks()...) + if err = taskScheduler.AddTasks(hostIdentity.GetWatchTasks()...); err != nil { + return err + } bsRelation, err := bsrelation.NewBizSetRelation() if err != nil { blog.Errorf("new biz set relation event failed, err: %v", err) return err } - watchTasks = append(watchTasks, bsRelation.GetWatchTasks()...) - - if err = taskScheduler.AddTasks(watchTasks...); err != nil { - blog.Errorf("add event watch tasks failed, err: %v", err) + if err = taskScheduler.AddTasks(bsRelation.GetWatchTasks()...); err != nil { return err } @@ -166,6 +167,7 @@ func (s *cacheService) WebService() *restful.Container { commonAPI := new(restful.WebService).Produces(restful.MIME_JSON) commonAPI.Route(commonAPI.GET("/healthz").To(s.Healthz)) commonAPI.Route(commonAPI.GET("/version").To(restfulservice.Version)) + commonAPI.Route(commonAPI.POST("/refresh/tenants").To(s.RefreshTenant)) container.Add(commonAPI) return container diff --git a/src/source_controller/cacheservice/service/tenant.go b/src/source_controller/cacheservice/service/tenant.go new file mode 100644 index 0000000000..8002029efd --- /dev/null +++ b/src/source_controller/cacheservice/service/tenant.go @@ -0,0 +1,44 @@ +/* + * Tencent is pleased to support the open source community by making + * 蓝鲸智云 - 配置平台 (BlueKing - Configuration System) available. + * Copyright (C) 2017 THL A29 Limited, + * a Tencent company. All rights reserved. + * Licensed under the MIT License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at http://opensource.org/licenses/MIT + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * We undertake not to change the open source license (MIT license) applicable + * to the current version of the project delivered to anyone in the future. + */ + +package service + +import ( + "net/http" + + "configcenter/pkg/tenant" + "configcenter/src/common/blog" + "configcenter/src/common/http/rest" + "configcenter/src/common/metadata" + "configcenter/src/storage/driver/mongodb" + + "github.com/emicklei/go-restful/v3" +) + +// RefreshTenant refresh tenant info +func (s *cacheService) RefreshTenant(req *restful.Request, resp *restful.Response) { + kit := rest.NewKitFromHeader(req.Request.Header, s.err) + tenants, err := tenant.GetAllTenantsFromDB(kit.Ctx, mongodb.Shard(kit.SysShardOpts())) + if err != nil { + blog.Errorf("refresh tenant info but get all tenants failed, err: %v, rid: %s", err) + resp.WriteError(http.StatusOK, &metadata.RespError{Msg: err}) + return + } + + tenant.SetTenant(tenants) + resp.WriteEntity(metadata.NewSuccessResp(tenants)) +} diff --git a/src/source_controller/coreservice/core/hostapplyrule/plan.go b/src/source_controller/coreservice/core/hostapplyrule/plan.go index f5d4397fd5..7077e6b3fa 100644 --- a/src/source_controller/coreservice/core/hostapplyrule/plan.go +++ b/src/source_controller/coreservice/core/hostapplyrule/plan.go @@ -16,6 +16,7 @@ import ( "fmt" "sort" "strings" + "time" "configcenter/src/common" "configcenter/src/common/blog" @@ -272,11 +273,11 @@ func isRuleEqualOrNot(pType string, expectValue interface{}, propertyValue inter } case common.FieldTypeTime: - expectVal, ok := expectValue.(primitive.DateTime) + var ok bool + expectValue, ok = expectValue.(time.Time) if !ok { return false, errors.New(common.CCErrCommUnexpectedFieldType, "expect value type error") } - expectValue = expectVal.Time() propertyValue, err = metadata.ParseTime(propertyValue) if err != nil { diff --git a/src/source_controller/coreservice/service/tenant.go b/src/source_controller/coreservice/service/tenant.go index 95f5f93a12..b0482cb5cc 100644 --- a/src/source_controller/coreservice/service/tenant.go +++ b/src/source_controller/coreservice/service/tenant.go @@ -24,7 +24,6 @@ import ( "configcenter/src/common/blog" "configcenter/src/common/http/rest" "configcenter/src/common/mapstr" - "configcenter/src/storage/dal/mongo/sharding" "configcenter/src/storage/driver/mongodb" ) @@ -49,17 +48,5 @@ func (s *coreService) RefreshAllTenants(ctx *rest.Contexts) { } tenant.SetTenant(tenants) - // refresh tenant db map - shardingMongoManager, ok := mongodb.Dal().(*sharding.ShardingMongoManager) - if !ok { - blog.Errorf("convert to ShardingMongoManager failed, err: %v, rid: %s", err, ctx.Kit.Rid) - ctx.RespAutoError(err) - return - } - if err = shardingMongoManager.RefreshTenantDBMap(); err != nil { - blog.Errorf("refresh tenant db map failed, err: %v, rid: %s", err, ctx.Kit.Rid) - ctx.RespAutoError(err) - return - } ctx.RespEntity(tenants) } diff --git a/src/storage/dal/mongo/sharding/mongo.go b/src/storage/dal/mongo/sharding/mongo.go index a4c3c096a9..5f3012d67b 100644 --- a/src/storage/dal/mongo/sharding/mongo.go +++ b/src/storage/dal/mongo/sharding/mongo.go @@ -21,6 +21,7 @@ import ( "context" "errors" "fmt" + "sync" "time" "configcenter/pkg/tenant" @@ -62,7 +63,7 @@ func NewShardingMongo(config local.MongoConf, timeout time.Duration, crypto cryp return nil, err } - if err = sharding.RefreshTenantDBMap(); err != nil { + if err = sharding.refreshTenantDBMap(); err != nil { return nil, err } @@ -70,13 +71,30 @@ func NewShardingMongo(config local.MongoConf, timeout time.Duration, crypto cryp go func() { for { time.Sleep(time.Minute) - if err = sharding.RefreshTenantDBMap(); err != nil { + if err = sharding.refreshTenantDBMap(); err != nil { blog.Errorf("refresh tenant to db relation failed, err: %v", err) continue } } }() + tenantChan := tenant.NewTenantEventChan(fmt.Sprintf("sharding_db_%s", clientInfo.masterCli.UUID())) + go func() { + for e := range tenantChan { + switch e.EventType { + case tenant.Create: + client, exists := sharding.dbClientMap[e.Tenant.Database] + if !exists { + blog.Errorf("tenant %s related db %s config not found", e.Tenant.TenantID, e.Tenant.Database) + continue + } + sharding.tenantCli.set(e.Tenant.TenantID, client) + case tenant.Delete: + sharding.tenantCli.delete(e.Tenant.TenantID) + } + } + }() + return sharding, nil } @@ -87,13 +105,37 @@ type shardingMongoClient struct { // newDataCli is the client for mongodb that new data without specified db will be stored into newDataCli *local.MongoClient // tenantCli is the tenant id to mongodb client map - tenantCli map[string]*local.MongoClient + tenantCli *tenantMongoCliMap // dbClientMap is the db uuid to mongodb client map dbClientMap map[string]*local.MongoClient // tm is the transaction manager tm *local.ShardingTxnManager } +type tenantMongoCliMap struct { + tenantCli map[string]*local.MongoClient + sync.RWMutex +} + +func (m *tenantMongoCliMap) get(tenantID string) (*local.MongoClient, bool) { + m.RLock() + cli, exists := m.tenantCli[tenantID] + m.RUnlock() + return cli, exists +} + +func (m *tenantMongoCliMap) set(tenantID string, cli *local.MongoClient) { + m.Lock() + m.tenantCli[tenantID] = cli + m.Unlock() +} + +func (m *tenantMongoCliMap) delete(tenantID string) { + m.Lock() + delete(m.tenantCli, tenantID) + m.Unlock() +} + func newShardingMongoClient(config local.MongoConf, timeout time.Duration, crypto cryptor.Cryptor) ( *shardingMongoClient, *local.Mongo, error) { @@ -104,9 +146,11 @@ func newShardingMongoClient(config local.MongoConf, timeout time.Duration, crypt } clientInfo := &shardingMongoClient{ - masterCli: masterCli, - newDataCli: nil, - tenantCli: make(map[string]*local.MongoClient), + masterCli: masterCli, + newDataCli: nil, + tenantCli: &tenantMongoCliMap{ + tenantCli: make(map[string]*local.MongoClient), + }, dbClientMap: nil, tm: new(local.ShardingTxnManager), } @@ -150,14 +194,14 @@ func newShardingMongoClient(config local.MongoConf, timeout time.Duration, crypt } // newTenantDB new db client for tenant -func (c *shardingMongoClient) newTenantDB(tenant string, conf *local.MongoCliConf) local.DB { - if tenant == "" { +func (c *shardingMongoClient) newTenantDB(tenantID string, conf *local.MongoCliConf) local.DB { + if tenantID == "" { return local.NewErrDB(errors.New("tenant is not set")) } - client, exists := c.tenantCli[tenant] + client, exists := c.tenantCli.get(tenantID) if !exists { - return local.NewErrDB(fmt.Errorf("tenant %s not exists", tenant)) + return local.NewErrDB(fmt.Errorf("tenant %s not exists", tenantID)) } if client.Disabled() { @@ -169,7 +213,7 @@ func (c *shardingMongoClient) newTenantDB(tenant string, conf *local.MongoCliCon return local.NewErrDB(err) } - db, err := local.NewMongo(client, txnManager, conf, &local.MongoOptions{Tenant: tenant}) + db, err := local.NewMongo(client, txnManager, conf, &local.MongoOptions{Tenant: tenantID}) if err != nil { return local.NewErrDB(err) } @@ -249,8 +293,8 @@ func getShardingDBConfig(ctx context.Context, c *local.Mongo) (*ShardingDBConf, return conf, nil } -// RefreshTenantDBMap refresh tenant to db relation -func (m *ShardingMongoManager) RefreshTenantDBMap() error { +// refreshTenantDBMap refresh tenant to db relation +func (m *ShardingMongoManager) refreshTenantDBMap() error { tenantDBMap := make(map[string]string) for _, relation := range tenant.GetAllTenants() { tenantDBMap[relation.TenantID] = relation.Database @@ -265,7 +309,7 @@ func (m *ShardingMongoManager) RefreshTenantDBMap() error { tenantCli[tenant] = client } - m.tenantCli = tenantCli + m.tenantCli.tenantCli = tenantCli return nil } @@ -352,6 +396,31 @@ func NewWatchMongo(config local.MongoConf, timeout time.Duration, crypto cryptor return nil, err } + tenantChan := tenant.NewTenantEventChan(fmt.Sprintf("watch_sharding_db_%s", clientInfo.masterCli.UUID())) + go func() { + for e := range tenantChan { + switch e.EventType { + case tenant.Create: + watchDBUUID, exists := sharding.dbWatchDBMap[e.Tenant.Database] + if !exists { + blog.Errorf("tenant %s db %s watch db config not found, use default watch db: %s", + e.Tenant.TenantID, e.Tenant.Database, clientInfo.newDataCli.UUID()) + + sharding.tenantCli.set(e.Tenant.TenantID, clientInfo.newDataCli) + continue + } + client, exists := sharding.dbClientMap[watchDBUUID] + if !exists { + blog.Errorf("tenant %s related watch db %s config not found", e.Tenant.TenantID, watchDBUUID) + continue + } + sharding.tenantCli.set(e.Tenant.TenantID, client) + case tenant.Delete: + sharding.tenantCli.delete(e.Tenant.TenantID) + } + } + }() + go func() { for { time.Sleep(time.Minute) @@ -371,6 +440,10 @@ func (m *WatchMongo) refreshTenantDBMap() error { watchDBUUID, exists := m.dbWatchDBMap[relation.Database] if exists { tenantDBMap[relation.TenantID] = watchDBUUID + } else { + blog.Warnf("tenant %s related db %s watch db not found, use default watch db %s", relation.TenantID, + relation.Database, m.newDataCli.UUID()) + tenantDBMap[relation.TenantID] = m.newDataCli.UUID() } } @@ -383,7 +456,7 @@ func (m *WatchMongo) refreshTenantDBMap() error { tenantCli[tenant] = client } - m.tenantCli = tenantCli + m.tenantCli.tenantCli = tenantCli return nil } diff --git a/src/storage/stream/event/list.go b/src/storage/stream/event/list.go index 08042852b9..60b612f8c0 100644 --- a/src/storage/stream/event/list.go +++ b/src/storage/stream/event/list.go @@ -125,7 +125,7 @@ func (e *Event) lister(ctx context.Context, withRetry bool, opts *listOptions, c } } -// get collection related task ids, find options and filters +// parseCollListOpts get collection related task ids, find options and filters func (e *Event) parseCollListOpts(collection string, opts *listOptions) ([]string, *options.FindOptions, mapstr.MapStr, bool, error) { taskIDs, fields, filters := make([]string, 0), make([]string, 0), make([]filter.RuleFactory, 0) diff --git a/src/storage/stream/event/utils.go b/src/storage/stream/event/utils.go index 583aaa848b..950a930cd7 100644 --- a/src/storage/stream/event/utils.go +++ b/src/storage/stream/event/utils.go @@ -34,7 +34,7 @@ const fullDocPrefix = "fullDocument." var eventFields = []string{"_id", "operationType", "clusterTime", "ns", "documentKey", "updateDescription"} func generateOptions(opts *types.Options) (mongo.Pipeline, *options.ChangeStreamOptions, *parsedCollOptsInfo) { - collOptsInfo := parseCollOpts(opts.CollOpts) + collOptsInfo := parseCollOpts(opts.TaskCollOptsMap) allFilters := genWatchFilter(collOptsInfo.collCondMap, collOptsInfo.collOpTypeMap) diff --git a/src/storage/stream/event/watch.go b/src/storage/stream/event/watch.go index fa7d3d97c1..b1eada2d5a 100644 --- a/src/storage/stream/event/watch.go +++ b/src/storage/stream/event/watch.go @@ -15,12 +15,16 @@ package event import ( "context" "errors" + "fmt" "strings" "sync" "time" "configcenter/src/common/blog" + types2 "configcenter/src/common/types" "configcenter/src/storage/stream/types" + "configcenter/src/thirdparty/monitor" + "configcenter/src/thirdparty/monitor/meta" "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" @@ -74,7 +78,11 @@ func (e *Event) watch(ctx context.Context, pipeline mongo.Pipeline, streamOption Watch(ctx, pipeline, streamOptions) if err != nil && isFatalError(err) { - // TODO: send alarm immediately. + monitor.Collect(&meta.Alarm{ + Type: meta.FlowFatalError, + Detail: fmt.Sprintf("watch db: %s got a fatal error: %v, skip resume token and retry", err, e.DBName), + Module: types2.CC_MODULE_CACHESERVICE, + }) blog.Errorf("mongodb watch db: %s got a fatal error, skip resume token and retry, err: %v", e.DBName, err) // reset the resume token, because we can not use the former resume token to watch success for now. streamOptions.StartAfter = nil @@ -108,24 +116,31 @@ func (e *Event) watch(ctx context.Context, pipeline mongo.Pipeline, streamOption } type loopWatchOpts struct { + // Options is the cmdb watch options *types.Options + // streamOptions is the mongodb change stream options streamOptions *options.ChangeStreamOptions - stream *mongo.ChangeStream - pipeline mongo.Pipeline - eventChan chan *types.Event - currentToken types.EventToken - collOptsInfo *parsedCollOptsInfo + // stream is the mongodb change stream + stream *mongo.ChangeStream + // pipeline is the mongodb change stream aggregation pipeline which is used to filter events + pipeline mongo.Pipeline + // eventChan is the event channel that receives mongodb events + eventChan chan *types.Event + // currentToken is the current change stream token + currentToken types.EventToken + // collOptsInfo is the parsed watch task and collection info + collOptsInfo *parsedCollOptsInfo + // collTasksMap is the collection to task ids map + collTasksMap map[string][]string } func (e *Event) loopWatch(ctx context.Context, opts *loopWatchOpts) { retry := false opts.currentToken = types.EventToken{Data: ""} + opts.collTasksMap = make(map[string][]string) e.setCleaner(ctx, opts.eventChan) - // init collection to task ids map - collTasksMap := make(map[string][]string) - for { // no events, try cancel watch here. select { @@ -159,7 +174,7 @@ func (e *Event) loopWatch(ctx context.Context, opts *loopWatchOpts) { default: } - opts, retry = e.handleStreamEvent(ctx, opts, collTasksMap) + opts, retry = e.handleStreamEvent(ctx, opts) if retry { break } @@ -224,9 +239,12 @@ func (e *Event) retryWatch(ctx context.Context, opts *loopWatchOpts) (*loopWatch Watch(ctx, opts.pipeline, streamOptions) if err != nil { if isFatalError(err) { - // TODO: send alarm immediately. - blog.Errorf("mongodb watch db: %s got a fatal error, skip resume token and retry, err: %v", - e.DBName, err) + monitor.Collect(&meta.Alarm{ + Type: meta.FlowFatalError, + Detail: fmt.Sprintf("watch db: %s got a fatal error: %v, skip resume token and retry", err, e.DBName), + Module: types2.CC_MODULE_CACHESERVICE, + }) + blog.Errorf("mongodb watch db: %s got a fatal error, skip resume token and retry, err: %v", e.DBName, err) // reset the resume token, because we can not use the former resume token to watch success for now. streamOptions.StartAfter = nil opts.StartAfterToken = nil @@ -259,9 +277,7 @@ func (e *Event) retryWatch(ctx context.Context, opts *loopWatchOpts) (*loopWatch return opts, false } -func (e *Event) handleStreamEvent(ctx context.Context, opts *loopWatchOpts, collTasksMap map[string][]string) ( - *loopWatchOpts, bool) { - +func (e *Event) handleStreamEvent(ctx context.Context, opts *loopWatchOpts) (*loopWatchOpts, bool) { event := new(types.RawEvent) if err := opts.stream.Decode(event); err != nil { blog.Errorf("watch db %s, but decode to raw event struct failed, err: %v", e.DBName, err) @@ -296,23 +312,23 @@ func (e *Event) handleStreamEvent(ctx context.Context, opts *loopWatchOpts, coll opts.currentToken.Data = event.EventStream.Token.Data - e.parseEvent(event, opts.eventChan, opts.collOptsInfo, collTasksMap) + opts.collTasksMap = e.parseEvent(event, opts.eventChan, opts.collOptsInfo, opts.collTasksMap) return opts, false } func (e *Event) parseEvent(event *types.RawEvent, eventChan chan *types.Event, collOptsInfo *parsedCollOptsInfo, - collTasksMap map[string][]string) { + collTasksMap map[string][]string) map[string][]string { base := event.EventStream collInfo, err := parseCollInfo(base.Namespace.Collection) if err != nil { blog.Errorf("parse event(%+v) collection info failed, err: %v", base, err) - return + return collTasksMap } - // get the event task ids matching the collection name + // get the event task ids matching the collection name, cache the task ids info in collTasksMap taskIDs, exists := collTasksMap[base.Namespace.Collection] if !exists { for collRegex, regex := range collOptsInfo.collRegexMap { @@ -325,7 +341,7 @@ func (e *Event) parseEvent(event *types.RawEvent, eventChan chan *types.Event, c if len(taskIDs) == 0 { blog.Errorf("watch db %s, but get invalid event not matching any task, base: %+v", e.DBName, base) - return + return collTasksMap } // decode the event data to the event data struct, use pre data for delete event @@ -336,7 +352,7 @@ func (e *Event) parseEvent(event *types.RawEvent, eventChan chan *types.Event, c if rawDoc == nil { blog.Errorf("watch db %s, but get invalid event with no detail, base: %+v", e.DBName, base) - return + return collTasksMap } var wg sync.WaitGroup @@ -367,6 +383,7 @@ func (e *Event) parseEvent(event *types.RawEvent, eventChan chan *types.Event, c }(taskID) } wg.Wait() + return collTasksMap } // isFatalError if watch encountered a fatal error, we should watch without resume token, which means from now. diff --git a/src/storage/stream/scheduler/scheduler.go b/src/storage/stream/scheduler/scheduler.go index 2b4fd8d44e..4e45b66584 100644 --- a/src/storage/stream/scheduler/scheduler.go +++ b/src/storage/stream/scheduler/scheduler.go @@ -136,6 +136,7 @@ func (s *Scheduler) AddTasks(tasks ...*task.Task) error { for _, t := range tasks { _, exists := s.watchTasks[t.Name] if exists { + blog.Errorf("add watch task %s to scheduler failed, task already exists", t.Name) return fmt.Errorf("loop watch task %s already exists", t.Name) } @@ -154,19 +155,19 @@ func (s *Scheduler) AddTasks(tasks ...*task.Task) error { // Start execute all watch tasks func (s *Scheduler) Start() error { if len(s.watchTasks) == 0 { - blog.Warnf("no watch task to start") - return nil + blog.Errorf("no watch task to start") + return fmt.Errorf("no watch task to start") } // generate task name to collection options map and db uuid to task name to db watch tasks map by watch task info - collOptions := make(map[string]types.WatchCollOptions) - listCollOptions := make(map[string]types.CollectionOptions) + taskCollOptsMap := make(map[string]types.WatchCollOptions) + taskListCollOptsMap := make(map[string]types.CollectionOptions) dbWatchTasks := make(map[string]map[string]*task.DBWatchTask) var batchSize int for taskName, watchTask := range s.watchTasks { - collOptions[taskName] = *watchTask.CollOptions + taskCollOptsMap[taskName] = *watchTask.CollOptions if watchTask.NeedList { - listCollOptions[taskName] = watchTask.CollOptions.CollectionOptions + taskListCollOptsMap[taskName] = watchTask.CollOptions.CollectionOptions } if watchTask.BatchSize > batchSize { batchSize = watchTask.BatchSize @@ -175,7 +176,7 @@ func (s *Scheduler) Start() error { dbTask, err := task.NewDBWatchTask(watchTask, &types.DBInfo{ UUID: uuid, WatchDB: s.watchClients[uuid], - CcDB: dbClient, + DB: dbClient, }) if err != nil { return err @@ -188,15 +189,15 @@ func (s *Scheduler) Start() error { } // list data for all list watch tasks - if len(listCollOptions) > 0 { - err := s.startList(listCollOptions, batchSize, dbWatchTasks) + if len(taskListCollOptsMap) > 0 { + err := s.startList(taskListCollOptsMap, batchSize, dbWatchTasks) if err != nil { return err } } // loop watch all db events for all tasks - err := s.startLoopWatch(collOptions, dbWatchTasks, batchSize) + err := s.startLoopWatch(taskCollOptsMap, dbWatchTasks, batchSize) if err != nil { return err } @@ -211,13 +212,13 @@ func (s *Scheduler) Start() error { return nil } -func (s *Scheduler) startList(listCollOptions map[string]types.CollectionOptions, batchSize int, +func (s *Scheduler) startList(taskListCollOptsMap map[string]types.CollectionOptions, batchSize int, dbWatchTasks map[string]map[string]*task.DBWatchTask) error { for uuid, eventInst := range s.eventMap { ctx := util.SetDBReadPreference(context.Background(), common.SecondaryPreferredMode) opt := &types.ListOptions{ - CollOpts: listCollOptions, + CollOpts: taskListCollOptsMap, PageSize: &batchSize, WithRetry: true, } @@ -241,7 +242,7 @@ func (s *Scheduler) startList(listCollOptions map[string]types.CollectionOptions return nil } -func (s *Scheduler) startLoopWatch(collOptions map[string]types.WatchCollOptions, +func (s *Scheduler) startLoopWatch(taskCollOptsMap map[string]types.WatchCollOptions, dbWatchTasks map[string]map[string]*task.DBWatchTask, batchSize int) error { for uuid, dbTaskMap := range dbWatchTasks { @@ -253,9 +254,10 @@ func (s *Scheduler) startLoopWatch(collOptions map[string]types.WatchCollOptions opts := &types.WatchOptions{ Options: types.Options{ - MajorityCommitted: s.majorityCommitted, - MaxAwaitTime: s.maxAwaitTime, - CollOpts: collOptions, + MajorityCommitted: s.majorityCommitted, + MaxAwaitTime: s.maxAwaitTime, + TaskCollOptsMap: taskCollOptsMap, + WatchFatalErrorCallback: watcher.resetWatchToken, }, } err = watcher.loopWatch(opts, batchSize) diff --git a/src/storage/stream/scheduler/watch.go b/src/storage/stream/scheduler/watch.go index c658855162..71c5ac1b83 100644 --- a/src/storage/stream/scheduler/watch.go +++ b/src/storage/stream/scheduler/watch.go @@ -89,8 +89,6 @@ func (s *Scheduler) newDBWatcher(uuid string, taskMap map[string]*task.DBWatchTa } func (w *dbWatcher) loopWatch(watchOpt *types.WatchOptions, batchSize int) error { - watchOpt.WatchFatalErrorCallback = w.resetWatchToken - ctx, cancel := context.WithCancel(context.Background()) watcher, watchOpt, err := w.watch(ctx, watchOpt) @@ -127,6 +125,7 @@ func (w *dbWatcher) loopWatch(watchOpt *types.WatchOptions, batchSize int) error watcher, watchOpt, err = w.watch(ctx, watchOpt) if err != nil { // notify retry signal, exit loop + blog.Errorf("watch db %s with opt(%+v) failed, err: %v, retry again", w.uuid, *watchOpt) w.notifyRetry() continue } @@ -173,7 +172,7 @@ func (w *dbWatcher) watch(ctx context.Context, watchOpt *types.WatchOptions) (*t if err != nil { blog.Errorf("%s job, loop watch db %s, but get start watch token failed, err: %v", w.uuid, w.streamWatch.DBName, err) - return nil, nil, err + return nil, watchOpt, err } w.lastToken = startToken @@ -188,7 +187,7 @@ func (w *dbWatcher) watch(ctx context.Context, watchOpt *types.WatchOptions) (*t watcher, err := w.streamWatch.Watch(ctx, watchOpt) if err != nil { blog.Errorf("%s job, run loop, but watch failed, err: %v", w.uuid, err) - return nil, nil, err + return nil, watchOpt, err } return watcher, watchOpt, nil diff --git a/src/storage/stream/task/util.go b/src/storage/stream/task/util.go index 353e85d0ec..9e38bd7fd6 100644 --- a/src/storage/stream/task/util.go +++ b/src/storage/stream/task/util.go @@ -21,7 +21,7 @@ import ( "configcenter/src/storage/stream/types" ) -// compareToken compare event with token, returns if event is greater than the token +// compareToken compare event with token, returns true if event is greater than the token func compareToken(event *types.Event, token *types.TokenInfo) bool { if token == nil { return true diff --git a/src/storage/stream/types/task.go b/src/storage/stream/types/task.go index 8639d370cd..d23dc480db 100644 --- a/src/storage/stream/types/task.go +++ b/src/storage/stream/types/task.go @@ -159,5 +159,5 @@ type DBInfo struct { // UUID is the cc db uuid UUID string WatchDB *local.Mongo - CcDB local.DB + DB local.DB } diff --git a/src/storage/stream/types/types.go b/src/storage/stream/types/types.go index 252a46f723..c981381965 100644 --- a/src/storage/stream/types/types.go +++ b/src/storage/stream/types/types.go @@ -95,7 +95,7 @@ func (opts *ListOptions) CheckSetDefault() error { } if opts.PageSize != nil { - if *opts.PageSize < 0 || *opts.PageSize > 2000 { + if *opts.PageSize < 200 || *opts.PageSize > 2000 { return fmt.Errorf("invalid page size, range is [200,2000]") } } else { @@ -190,8 +190,8 @@ type Options struct { // default value is 1000ms MaxAwaitTime *time.Duration - // CollOpts is the watch task id to watch options for different collections - CollOpts map[string]WatchCollOptions + // TaskCollOptsMap is the watch task id to watch options for different collections + TaskCollOptsMap map[string]WatchCollOptions // StartAfterToken describe where you want to watch the event. // Note: the returned event doesn't contains the token represented, @@ -211,13 +211,13 @@ var defaultMaxAwaitTime = time.Second // CheckSetDefault check the legal of each option, and set the default value func (opts *Options) CheckSetDefault() error { - if len(opts.CollOpts) == 0 { + if len(opts.TaskCollOptsMap) == 0 { return errors.New("invalid Namespace field, database and collection can not be empty") } - for i, opt := range opts.CollOpts { + for taskID, opt := range opts.TaskCollOptsMap { if err := opt.Validate(); err != nil { - return fmt.Errorf("collection options[%s] is invalid, err: %v", i, err) + return fmt.Errorf("task %s collection options is invalid, err: %v", taskID, err) } } diff --git a/src/test/test.go b/src/test/test.go index f8796ee382..0091a7333e 100644 --- a/src/test/test.go +++ b/src/test/test.go @@ -462,8 +462,4 @@ func refreshTenant() { Expect(err).Should(BeNil()) tenant.SetTenant(allTenants) - shardingMongoManager, ok := db.(*sharding.ShardingMongoManager) - Expect(ok).Should(Equal(true)) - err = shardingMongoManager.RefreshTenantDBMap() - Expect(err).Should(BeNil()) }