Skip to content

Commit 24620f6

Browse files
authored
Feat/2.3.0 (#1884)
2 parents 1ceb48e + 4b57314 commit 24620f6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+211
-36
lines changed

src/backend/bisheng/llm/domain/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ def parse_token_usage(result: Any) -> tuple[int, int, int, int]:
6262
output_token += tmp2
6363
cache_token += tmp3
6464
total_token += tmp4
65+
elif isinstance(result, ChatGenerationChunk):
66+
token_usage = result.generation_info.get('token_usage', {})
67+
input_token, output_token, cache_token, total_token = get_token_from_usage(token_usage)
68+
else:
69+
logger.warning(f'unknown result type: {type(result)}')
6570
return input_token, output_token, cache_token, total_token
6671

6772

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
from bisheng.core.search.elasticsearch.manager import get_statistics_es_connection_sync
2+
3+
INDEX_MAPPING = {
4+
"mappings": { # Defining the indexed Mapping
5+
"properties": {
6+
"event_id": {"type": "keyword"},
7+
"event_type": {"type": "keyword"},
8+
"trace_id": {"type": "keyword"},
9+
"timestamp": {"type": "date", "format": "strict_date_optional_time||epoch_second"},
10+
"user_context": {
11+
"type": "object",
12+
"properties": {
13+
"user_id": {"type": "integer"},
14+
"user_name": {"type": "keyword"},
15+
"user_group_infos": {
16+
"type": "object",
17+
"properties": {
18+
"user_group_id": {"type": "integer"},
19+
"user_group_name": {"type": "keyword"}
20+
}
21+
},
22+
"user_role_infos": {
23+
"type": "object",
24+
"properties": {
25+
"role_id": {"type": "integer"},
26+
"role_name": {"type": "keyword"},
27+
"group_id": {"type": "integer"},
28+
}
29+
}
30+
}
31+
},
32+
"event_data": {
33+
"type": "object",
34+
"dynamic": True
35+
}
36+
}
37+
}
38+
}
39+
40+
import time
41+
42+
43+
def wait_for_task(
44+
es,
45+
task_id: str,
46+
poll_interval: int = 10,
47+
timeout: int = 3600,
48+
):
49+
"""
50+
轮询 ES task 状态,直到完成或超时
51+
"""
52+
start_time = time.time()
53+
54+
while True:
55+
56+
task_info = es.tasks.get(task_id=task_id)
57+
completed = task_info.get("completed", False)
58+
59+
if completed:
60+
response = task_info.get("response", {})
61+
failures = response.get("failures", [])
62+
total = response.get("total", 0)
63+
created = response.get("created", 0)
64+
updated = response.get("updated", 0)
65+
66+
print(
67+
f"[REINDEX DONE] total={total}, created={created}, updated={updated}"
68+
)
69+
70+
if failures:
71+
raise RuntimeError(f"Reindex failures: {failures}")
72+
73+
return response
74+
75+
if time.time() - start_time > timeout:
76+
raise TimeoutError(f"Reindex task timeout: {task_id}")
77+
78+
status = task_info.get("task", {}).get("status", {})
79+
print(
80+
f"[REINDEX RUNNING] "
81+
f"total={status.get('total', 0)} "
82+
f"created={status.get('created', 0)} "
83+
f"updated={status.get('updated', 0)}"
84+
)
85+
86+
time.sleep(poll_interval)
87+
88+
89+
def count_docs(es, index):
90+
return es.count(index=index)["count"]
91+
92+
93+
if __name__ == '__main__':
94+
es_conn = get_statistics_es_connection_sync()
95+
temp_index_name = "base_telemetry_events_v1"
96+
original_index_name = "base_telemetry_events"
97+
98+
# 1. 记录原始数据量
99+
try:
100+
source_count = count_docs(es_conn, original_index_name)
101+
print(f"Original doc count: {source_count}")
102+
except:
103+
source_count = 0
104+
print("Original index might not exist.")
105+
106+
# 创建临时索引
107+
if not es_conn.indices.exists(index=temp_index_name):
108+
es_conn.indices.create(index=temp_index_name, body=INDEX_MAPPING)
109+
print(f"Created temporary index: {temp_index_name}")
110+
111+
# 使用Elasticsearch的_reindex API进行数据迁移
112+
reindex_body = {
113+
"source": {
114+
"index": original_index_name
115+
},
116+
"dest": {
117+
"index": temp_index_name
118+
}
119+
}
120+
121+
resp = es_conn.options(request_timeout=3600).reindex(
122+
body=reindex_body,
123+
wait_for_completion=False
124+
)
125+
126+
task_id = resp["task"]
127+
print(f"Reindex started, task_id={task_id}")
128+
129+
wait_for_task(
130+
es_conn,
131+
task_id=task_id,
132+
poll_interval=5,
133+
timeout=3600
134+
)
135+
136+
# 删除原始索引
137+
es_conn.indices.delete(index=original_index_name)
138+
139+
# 将临时索引重命名为原始索引名
140+
es_conn.indices.put_alias(index=temp_index_name, name=original_index_name)
141+
142+
143+
print(f"Reindexed data to {original_index_name} successfully.")
144+
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
3+
export PYTHONPATH="./"
4+
echo "Reindexing telemetry events..."
5+
python bisheng/script/base_telemetry_events_reindex.py
6+
echo "Reindexing completed."
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)