Skip to content

Commit 57590d0

Browse files
committed
feat(backend): 支持日志采集实时下发配置 #10313
# Reviewed, transaction id: 41316
1 parent 173b78d commit 57590d0

File tree

13 files changed

+167
-24
lines changed

13 files changed

+167
-24
lines changed

.gtmproject.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ github:
66
repo_name: "blueking-dbm"
77

88
# 指定里程碑ID,
9-
milestone_id: "12"
9+
milestone_id: "13"
1010

1111
project:
1212
# 主分支

dbm-ui/backend/components/bklog/client.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,18 @@ def __init__(self):
4343
description=_("创建采集项的前置检查"),
4444
)
4545
self.list_collectors = self.generate_data_api(
46-
method="GET",
47-
url="databus_collectors/",
48-
description=_("获取采集项列表"),
46+
method="GET", url="databus_collectors/", description=_("获取采集项列表"), cache_time=60 * 60
4947
)
5048
self.search_index_set = self.generate_data_api(
5149
method="GET",
5250
url="search_index_set/",
5351
description=_("查询索引集列表"),
5452
)
53+
self.run_databus_collectors = self.generate_data_api(
54+
method="POST",
55+
url="databus_collectors/{collector_config_id}/run/",
56+
description=_("日志平台-订阅IP下发"),
57+
)
5558

5659

5760
BKLogApi = _BKLogApi()

dbm-ui/backend/db_meta/models/cluster_monitor.py

+18
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,24 @@
114114
# mysql和tendbcluster采集器一致
115115
INSTANCE_MONITOR_PLUGINS[DBType.TenDBCluster] = INSTANCE_MONITOR_PLUGINS[DBType.MySQL]
116116

117+
INSTANCE_BKLOG_PLUGINS = {
118+
DBType.Redis: {
119+
MachineType.PREDIXY: {"name": "predixy", "plugin_id": "redis_slowlog"},
120+
MachineType.TWEMPROXY: {"name": "twemproxy", "plugin_id": "redis_slowlog"},
121+
MachineType.REDIS: {"name": "redis", "plugin_id": "redis_slowlog"},
122+
MachineType.TENDISCACHE: {"name": "tendiscache", "plugin_id": "redis_slowlog"},
123+
MachineType.TENDISSSD: {"name": "tendisssd", "plugin_id": "redis_slowlog"},
124+
MachineType.TENDISPLUS: {"name": "tendisplus", "plugin_id": "redis_slowlog"},
125+
},
126+
DBType.MySQL: {
127+
MachineType.BACKEND: {"name": "mysql", "plugin_id": "mysql_slowlog"},
128+
},
129+
DBType.TenDBCluster: {
130+
MachineType.SPIDER: {"name": "spider", "plugin_id": "mysql_slowlog"},
131+
MachineType.REMOTE: {"name": "mysql", "plugin_id": "mysql_slowlog"},
132+
},
133+
}
134+
117135
SET_NAME_TEMPLATE = "db.{db_type}.{monitor_plugin_name}"
118136

119137
SHORT_NAMES = list(

dbm-ui/backend/db_periodic_task/local_tasks/db_meta/constants.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
99
specific language governing permissions and limitations under the License.
1010
"""
11-
11+
from backend.configuration.constants import DBType
1212
from backend.db_meta.enums import ClusterType
1313

1414
UNIFY_QUERY_PARAMS = {
@@ -33,6 +33,15 @@
3333
"type": "instant",
3434
}
3535

36+
EXPORTER_UP_QUERY_TEMPLATE = {
37+
DBType.Redis: {
38+
"range": 5,
39+
"dbm_redis_exporter": """count by (cluster_domain) (
40+
bkmonitor:exporter_dbm_redis_exporter:redis_up{instance_role='redis_master'}
41+
)""",
42+
}
43+
}
44+
3645
QUERY_TEMPLATE = {
3746
ClusterType.TendisTwemproxyRedisInstance: {
3847
"range": 5,

dbm-ui/backend/db_periodic_task/local_tasks/db_meta/sync_cluster_stat.py

+25
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import datetime
1313
import json
1414
import logging
15+
import time
1516
from collections import defaultdict
1617

1718
from celery import current_app
@@ -26,6 +27,7 @@
2627
from backend.db_meta.models import Cluster
2728
from backend.db_periodic_task.local_tasks import register_periodic_task, start_new_span
2829
from backend.db_periodic_task.local_tasks.db_meta.constants import (
30+
EXPORTER_UP_QUERY_TEMPLATE,
2931
QUERY_TEMPLATE,
3032
SAME_QUERY_TEMPLATE_CLUSTER_TYPE_MAP,
3133
UNIFY_QUERY_PARAMS,
@@ -35,6 +37,29 @@
3537
logger = logging.getLogger("celery")
3638

3739

40+
def query_cluster_exporter_up(db_type, exporter):
41+
"""查询某类集群的 exporter 是否正常"""
42+
# 获取查询模板
43+
query_template = EXPORTER_UP_QUERY_TEMPLATE.get(db_type)
44+
if not query_template:
45+
logger.error("No query template for cluster type: %s and exporter: %s", db_type, exporter)
46+
return {}
47+
48+
# 查询业务固定为DBA,查询时间取模板range
49+
params = copy.deepcopy(UNIFY_QUERY_PARAMS)
50+
params["bk_biz_id"] = env.DBA_APP_BK_BIZ_ID
51+
params["end_time"] = int(time.time())
52+
params["start_time"] = params["end_time"] - int(query_template["range"]) * 60
53+
params["query_configs"][0]["promql"] = query_template[exporter]
54+
55+
# 查询exporter up指标
56+
series = BKMonitorV3Api.unify_query(params)["series"]
57+
cluster_exporter_up_map = {
58+
data["dimensions"]["cluster_domain"]: data["datapoints"][0][0] for data in series if data["datapoints"]
59+
}
60+
return cluster_exporter_up_map
61+
62+
3863
def query_cap(bk_biz_id, cluster_type, cap_key="used"):
3964
"""查询某类集群的某种容量: used/total"""
4065

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available.
4+
Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved.
5+
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at https://opensource.org/licenses/MIT
7+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
8+
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
9+
specific language governing permissions and limitations under the License.
10+
"""
11+
12+
import logging
13+
14+
from django.db.models import Count
15+
from django.utils.translation import gettext as _
16+
from rest_framework.decorators import action
17+
from rest_framework.response import Response
18+
19+
from backend.bk_web.swagger import common_swagger_auto_schema
20+
from backend.bk_web.viewsets import SystemViewSet
21+
from backend.configuration.constants import DBType
22+
from backend.db_meta.enums import InstanceRole
23+
from backend.db_meta.models import StorageInstance
24+
from backend.db_report.enums import SWAGGER_TAG
25+
26+
logger = logging.getLogger("root")
27+
28+
29+
class ClusterExporterUpViewSet(SystemViewSet):
30+
@common_swagger_auto_schema(
31+
operation_summary=_("获取redis集群的exporter数与分片数不一致的报表"),
32+
tags=[SWAGGER_TAG],
33+
)
34+
@action(methods=["POST"], detail=False)
35+
def get_redis_exporter_mismatch(self, request):
36+
"""获取redis集群的exporter数与分片数不一致的报表"""
37+
# 获取exporter数与集群的映射
38+
from backend.db_periodic_task.local_tasks.db_meta.sync_cluster_stat import query_cluster_exporter_up
39+
40+
exporter_map = query_cluster_exporter_up(DBType.Redis, "dbm_redis_exporter")
41+
# 获取元数据的集群分片映射
42+
redis_masters = StorageInstance.objects.filter(instance_role=InstanceRole.REDIS_MASTER)
43+
shard_map = {
44+
item["cluster__immute_domain"]: item["total"]
45+
for item in redis_masters.values("cluster__immute_domain").annotate(total=Count("id"))
46+
}
47+
# 过滤exporter与元数据不一致的集群
48+
mismatch_clusters = [
49+
{"domain": domain, "shard": shard, "exporter_up": exporter_map.get(domain, 0)}
50+
for domain, shard in shard_map.items()
51+
if shard != exporter_map.get(domain, 0)
52+
]
53+
return Response(mismatch_clusters)

dbm-ui/backend/db_services/redis/toolbox/handlers.py

+6
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from backend.db_services.redis.resources.redis_cluster.query import RedisListRetrieveResource
2727
from backend.exceptions import ApiResultError
2828
from backend.flow.utils.base.payload_handler import PayloadHandler
29+
from backend.flow.utils.redis import redis_util
2930
from backend.flow.utils.redis.redis_proxy_util import (
3031
get_cluster_proxy_version,
3132
get_cluster_proxy_version_for_upgrade,
@@ -138,6 +139,11 @@ def get_update_cluster_versions(cls, cluster_id: int, node_type: str):
138139
else:
139140
return get_cluster_proxy_version_for_upgrade(cluster_id)
140141

142+
@classmethod
143+
def list_cluster_big_version(cls, cluster_id: int):
144+
"""查询集群可更新的大版本"""
145+
return redis_util.get_cluster_update_version(cluster_id)
146+
141147
@classmethod
142148
def webconsole_rpc(cls, cluster_id: int, cmd: str, db_num: int = 0, raw: bool = True, **kwargs):
143149
"""

dbm-ui/backend/db_services/redis/toolbox/serializers.py

+4
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ class GetClusterVersionSerializer(serializers.Serializer):
220220
type = serializers.ChoiceField(help_text=_("请求版本类型"), choices=RedisVersionQueryType.get_choices())
221221

222222

223+
class ListClusterBigVersionSerializer(serializers.Serializer):
224+
cluster_id = serializers.IntegerField(help_text=_("集群ID"))
225+
226+
223227
class GetClusterCapacityInfoSerializer(serializers.Serializer):
224228
cluster_id = serializers.IntegerField(help_text=_("集群ID"))
225229
new_storage_version = serializers.CharField(help_text=_("存储版本"))

dbm-ui/backend/db_services/redis/toolbox/views.py

+11
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
GetClusterCapacityInfoSerializer,
2626
GetClusterModuleInfoSerializer,
2727
GetClusterVersionSerializer,
28+
ListClusterBigVersionSerializer,
2829
QueryByOneClusterSerializer,
2930
QueryClusterIpsSerializer,
3031
)
@@ -85,6 +86,16 @@ def get_cluster_versions(self, request, bk_biz_id, **kwargs):
8586
else:
8687
return Response(ToolboxHandler.get_update_cluster_versions(cluster_id, node_type))
8788

89+
@common_swagger_auto_schema(
90+
operation_summary=_("查询集群可更新大版本"),
91+
query_serializer=ListClusterBigVersionSerializer(),
92+
tags=[SWAGGER_TAG],
93+
)
94+
@action(methods=["GET"], detail=False, serializer_class=ListClusterBigVersionSerializer, pagination_class=None)
95+
def list_cluster_big_version(self, request, bk_biz_id, **kwargs):
96+
data = self.params_validate(self.get_serializer_class())
97+
return Response(ToolboxHandler.list_cluster_big_version(data["cluster_id"]))
98+
8899
@common_swagger_auto_schema(
89100
operation_summary=_("获取集群容量变更所需信息"),
90101
query_serializer=GetClusterCapacityInfoSerializer(),

dbm-ui/backend/env/bklog.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
BKLOG_CLUSTER_SUPPORT_HOT_COLD = get_type_env(key="BKLOG_CLUSTER_SUPPORT_HOT_COLD", _type=bool, default=False)
2020

2121
# 日志默认存储天数
22-
BKLOG_DEFAULT_RETENTION = get_type_env(key="BKLOG_DEFAULT_RETENTION", _type=int, default=7)
22+
BKLOG_DEFAULT_RETENTION = get_type_env(key="BKLOG_DEFAULT_RETENTION", _type=int, default=30)
2323

2424
# 自定义日志保留天数
2525
BKLOG_MYSQL_DBBACKUP_RESULT_RETENTION = get_type_env(key="BKLOG_MYSQL_DBBACKUP_RESULT_RETENTION", _type=int)

dbm-ui/backend/flow/consts.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ class StateType(str, StructuredEnum):
202202

203203

204204
class OperateCollectorActionEnum(str, StructuredEnum):
205-
INSTALL = EnumField("install", _("安装"))
205+
INSTALL = EnumField("INSTALL", _("安装"))
206206
UNINSTALL = EnumField("UNINSTALL", _("卸载"))
207207

208208

dbm-ui/backend/flow/plugins/components/collections/common/base_service.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def __log__(
275275
return JobApi.get_job_instance_ip_log({**payload, **ip_dict}, raw=True)
276276

277277
@staticmethod
278-
def __url__(job_instance_id: int, link: bool = True):
278+
def __url__(job_instance_id: int, link: bool = False):
279279
"""
280280
获取job任务链接
281281
"""

dbm-ui/backend/flow/utils/cc_manage.py

+30-16
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
from django.db import transaction
1818

1919
from backend import env
20-
from backend.components import BKMonitorV3Api, CCApi
20+
from backend.components import BKLogApi, BKMonitorV3Api, CCApi
2121
from backend.configuration.models import BizSettings, DBAdministrator
2222
from backend.db_meta.enums import ClusterType, ClusterTypeMachineTypeDefine
2323
from backend.db_meta.models import AppMonitorTopo, Cluster, ClusterMonitorTopo, Machine, ProxyInstance, StorageInstance
24-
from backend.db_meta.models.cluster_monitor import INSTANCE_MONITOR_PLUGINS, SET_NAME_TEMPLATE
24+
from backend.db_meta.models.cluster_monitor import INSTANCE_BKLOG_PLUGINS, INSTANCE_MONITOR_PLUGINS, SET_NAME_TEMPLATE
2525
from backend.db_monitor.models import CollectInstance, MonitorPolicy
2626
from backend.db_monitor.utils import create_bklog_collector
2727
from backend.db_services.cmdb.biz import get_or_create_cmdb_module_with_name, get_or_create_set_with_name
@@ -581,12 +581,17 @@ def operate_collector(bk_biz_id: int, db_type: str, machine_type: str, bk_instan
581581
if not bk_instance_ids:
582582
return
583583

584-
logger.error(
584+
logger.info(
585585
"operate_collector: {db_type} {machine_type} {bk_instance_ids} {action}".format(
586586
db_type=db_type, machine_type=machine_type, bk_instance_ids=bk_instance_ids, action=action
587587
)
588588
)
589589

590+
# 获取下发的实例和采集范围
591+
nodes = [{"id": bk_instance_id, "bk_biz_id": bk_biz_id} for bk_instance_id in bk_instance_ids]
592+
scope = {"bk_biz_id": bk_biz_id, "object_type": "SERVICE", "node_type": "INSTANCE", "nodes": nodes}
593+
594+
# --- 下发监控采集器 ---
590595
plugin_id = INSTANCE_MONITOR_PLUGINS[db_type][machine_type]["plugin_id"]
591596
collect_instances = CollectInstance.objects.filter(db_type=db_type, plugin_id=plugin_id)
592597
for collect_ins in collect_instances:
@@ -595,22 +600,31 @@ def operate_collector(bk_biz_id: int, db_type: str, machine_type: str, bk_instan
595600
continue
596601
# 下发采集器
597602
try:
598-
nodes = [{"id": bk_instance_id, "bk_biz_id": bk_biz_id} for bk_instance_id in bk_instance_ids]
599603
BKMonitorV3Api.run_collect_config(
600-
{
601-
"bk_biz_id": env.DBA_APP_BK_BIZ_ID,
602-
"id": collect_ins.collect_id,
603-
"scope": {
604-
"bk_biz_id": bk_biz_id,
605-
"object_type": "SERVICE",
606-
"node_type": "INSTANCE",
607-
"nodes": nodes,
608-
},
609-
"action": action,
610-
}
604+
{"bk_biz_id": env.DBA_APP_BK_BIZ_ID, "id": collect_ins.collect_id, "scope": scope, "action": action},
605+
use_admin=True,
611606
)
612607
except ApiError as err:
613-
logger.error(f"[run_collect_config] id:{collect_ins.collect_id} error: {err}")
608+
logger.error(f"[monitor] id:{collect_ins.collect_id} error: {err}")
609+
610+
# --- 下发日志采集器 ---
611+
plugin_id = INSTANCE_BKLOG_PLUGINS[db_type][machine_type]["plugin_id"]
612+
# 获取当前采集项的列表
613+
data = BKLogApi.list_collectors({"bk_biz_id": env.DBA_APP_BK_BIZ_ID, "pagesize": 500, "page": 1}, use_admin=True)
614+
collectors_name__info_map = {collector["collector_config_name_en"]: collector for collector in data["list"]}
615+
collect = collectors_name__info_map.get(plugin_id)
616+
# 忽略不存在的采集项
617+
if not collect:
618+
return
619+
# 下发采集器
620+
collect_id = collect["collector_config_id"]
621+
try:
622+
BKLogApi.run_databus_collectors(
623+
{"bk_biz_id": env.DBA_APP_BK_BIZ_ID, "collector_config_id": collect_id, "scope": scope, "action": action},
624+
use_admin=True,
625+
)
626+
except ApiError as err:
627+
logger.error(f"[bklog] id:{collect_id} error: {err}")
614628

615629

616630
def trigger_operate_collector(

0 commit comments

Comments
 (0)