Skip to content

Commit 512ece9

Browse files
z275748353张龙彬
andauthored
Csghub wl jlp (#1352)
* add dataflow * add dataflow * remove antv/x6/lib/registry package * Fix bugs * Fix bugs * add package * add package * add zhHantOps * 1.Adjust the image path for operator management 2.Add permission judgment on whether to display the menu in operator management * Operator Management: Modification of dataflow/operator/ interface * 1.Add the cancellation of execution for internationalization and status verification 2.If a user has no organization or no authorized operators, all public operators will be queried by default * 1.Add the cancellation of execution for internationalization and status verification 2.If a user has no organization or no authorized operators, all public operators will be queried by default * 1.Add the cancellation of execution for internationalization and status verification 2.If a user has no organization or no authorized operators, all public operators will be queried by default * Add newly developed operators, internationalize tools, and supplement the internationalization of statistics. * Add newly developed operators, internationalize tools, and supplement the internationalization of statistics. --------- Co-authored-by: 张龙彬 <[email protected]>
1 parent 9363e20 commit 512ece9

File tree

10 files changed

+488
-29
lines changed

10 files changed

+488
-29
lines changed

frontend/src/components/dataflow_config/algTemplate/operatorManagement/index.vue

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ import {
431431
// 引入国际化文件
432432
import zhOps from "../../../../locales/zh_js/operator_zh.json";
433433
import enOps from "../../../../locales/en_js/operator_en.json";
434+
import zhHantOps from '../../../../locales/zh_hant_js/operator_zhHant.json'
434435
435436
const { t, locale } = useI18n();
436437
const origin = window.location.origin + "/";
@@ -439,6 +440,7 @@ const origin = window.location.origin + "/";
439440
const operatorI18n = {
440441
zh: zhOps,
441442
en: enOps,
443+
zhHant: zhHantOps
442444
};
443445
444446
// 基础状态管理

frontend/src/components/dataflow_config/dataflow/workflowEditor.vue

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,13 @@
205205
import jsYaml from 'js-yaml';
206206
import zhOps from "../../../locales/zh_js/operator_zh.json";
207207
import enOps from "../../../locales/en_js/operator_en.json";
208+
import zhHantOps from '../../../locales/zh_hant_js/operator_zhHant.json'
208209
import { useI18n } from "vue-i18n";
209210
const { t, locale } = useI18n();
210211
const operatorI18n = {
211212
zh: zhOps,
212213
en: enOps,
214+
zhHant: zhHantOps
213215
};
214216
const userStore = useUserStore();
215217
const origin = window.location.origin + '/';

frontend/src/components/dataflow_config/systemDashboard/celeryNodeService.vue

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
</p>
99
</div>
1010
<p class="text-gray-900 text-2xl font-medium">
11-
数据源
11+
{{ t("dataPipelines.data_source") }}
1212
</p>
1313

1414
<div>
@@ -18,7 +18,7 @@
1818
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
1919
>
2020
<div class="flex justify-between items-center">
21-
<span class="text-sm text-gray-600">等待中</span>
21+
<span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
2222
<div
2323
class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
2424
>
@@ -34,7 +34,7 @@
3434
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
3535
>
3636
<div class="flex justify-between items-center">
37-
<span class="text-sm text-gray-600">执行中</span>
37+
<span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
3838
<div
3939
class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
4040
>
@@ -49,7 +49,7 @@
4949
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
5050
>
5151
<div class="flex justify-between items-center">
52-
<span class="text-sm text-gray-600">执行结束(正常)</span>
52+
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
5353
<div
5454
class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
5555
>
@@ -64,7 +64,7 @@
6464
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
6565
>
6666
<div class="flex justify-between items-center">
67-
<span class="text-sm text-gray-600">执行结束(错误)</span>
67+
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
6868
<div
6969
class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
7070
>
@@ -77,7 +77,7 @@
7777
</div>
7878

7979
<p class="text-gray-900 text-2xl font-medium mt-[16px]">
80-
格式转换
80+
{{ t("dataPipelines.formatConversion") }}
8181
</p>
8282

8383
<div>
@@ -87,7 +87,7 @@
8787
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
8888
>
8989
<div class="flex justify-between items-center">
90-
<span class="text-sm text-gray-600">等待中</span>
90+
<span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
9191
<div
9292
class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
9393
>
@@ -102,7 +102,7 @@
102102
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
103103
>
104104
<div class="flex justify-between items-center">
105-
<span class="text-sm text-gray-600">执行中</span>
105+
<span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
106106
<div
107107
class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
108108
>
@@ -117,7 +117,7 @@
117117
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
118118
>
119119
<div class="flex justify-between items-center">
120-
<span class="text-sm text-gray-600">已停止</span>
120+
<span class="text-sm text-gray-600">{{ t("dataPipelines.stopped") }}</span>
121121
<div
122122
class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
123123
>
@@ -132,7 +132,7 @@
132132
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
133133
>
134134
<div class="flex justify-between items-center">
135-
<span class="text-sm text-gray-600">执行结束(正常)</span>
135+
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
136136
<div
137137
class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
138138
>
@@ -147,7 +147,7 @@
147147
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
148148
>
149149
<div class="flex justify-between items-center">
150-
<span class="text-sm text-gray-600">执行结束(错误)</span>
150+
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
151151
<div
152152
class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
153153
>
@@ -160,7 +160,7 @@
160160
</div>
161161

162162
<p class="text-gray-900 text-2xl font-medium mt-[16px]">
163-
数据处理
163+
{{ t("dataPipelines.dataProcessing") }}
164164
</p>
165165

166166
<div>
@@ -170,7 +170,7 @@
170170
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
171171
>
172172
<div class="flex justify-between items-center">
173-
<span class="text-sm text-gray-600">等待中</span>
173+
<span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
174174
<div
175175
class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
176176
>
@@ -186,7 +186,7 @@
186186
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
187187
>
188188
<div class="flex justify-between items-center">
189-
<span class="text-sm text-gray-600">执行中</span>
189+
<span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
190190
<div
191191
class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
192192
>
@@ -201,7 +201,7 @@
201201
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
202202
>
203203
<div class="flex justify-between items-center">
204-
<span class="text-sm text-gray-600">执行结束(正常)</span>
204+
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
205205
<div
206206
class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
207207
>
@@ -216,7 +216,7 @@
216216
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
217217
>
218218
<div class="flex justify-between items-center">
219-
<span class="text-sm text-gray-600">执行结束(错误)</span>
219+
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
220220
<div
221221
class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
222222
>
@@ -260,7 +260,7 @@
260260
<div class="borderBox mt-8">
261261
<div class="py-5 px-6">
262262
<p class="text-lg text-gray-900 font-medium">
263-
Celery 节点服务列表
263+
{{ t("dataPipelines.celery_node_service_list") }}
264264
</p>
265265
</div>
266266
<div class="flex flex-col">
@@ -278,18 +278,18 @@
278278
/>
279279
<el-table-column
280280
prop="current_ip"
281-
label="IP地址"
281+
:label="t('dataPipelines.ip_address')"
282282
min-width="180"
283283
/>
284284
<el-table-column
285285
prop="task_count"
286-
label="当前任务数"
287-
min-width="160"
286+
:label="t('dataPipelines.current_number_tasks')"
287+
min-width="200"
288288
/>
289289

290290
<el-table-column
291291
prop="status"
292-
label="节点状态"
292+
:label="t('dataPipelines.node_status')"
293293
min-width="160"
294294
>
295295
<template #default="scope">
@@ -306,7 +306,7 @@
306306

307307
<el-table-column
308308
prop="ack_time"
309-
label="心跳时间"
309+
:label="t('dataPipelines.heartbeat_time')"
310310
min-width="280"
311311
>
312312
<template #default="scope">

frontend/src/locales/en_js/datapipelines.js

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export const dataPipelines = {
1313
"targetFormat": "Target Format",
1414
"dataFlowBranch": "Data Flow Branch",
1515
"startExecution": "Start Execution",
16+
"inProgress": "In Progress",
1617
"searchTaskName": "Search Task Name",
1718
"confirmTermination": "Confirm Termination",
1819
"terminate": "Terminate",
@@ -39,6 +40,7 @@ export const dataPipelines = {
3940
}
4041
},
4142
"testingConnection": "Testing connection",
43+
"submitting": "Submitting",
4244
"pleaseSelectAnExecutionTime": "Please select an execution time",
4345
"deletingTask": "Deleting task",
4446
"terminatingTask": "Terminating task",
@@ -183,14 +185,24 @@ export const dataPipelines = {
183185
"deduplicate": "Deduplicate",
184186
"remove": "Remove",
185187
"data_refine": "Data Refinement",
188+
"Internal": "Internal",
186189
"data_generation": "Data Generation",
187190
"data_enhancement": "Data Enhancement",
188-
"Internal": "Internal",
191+
192+
"data_source": "Data Source",
193+
"execution_completed_normally": "Execution completed (normal)",
194+
"execution_end_error": "Execution ended (error)",
195+
"stopped": "Stopped",
196+
"celery_node_service_list": "Celery Node Service List",
197+
"ip_address": "IP Address",
198+
"current_number_tasks": "Current Number of Tasks",
199+
"node_status": "Node Status",
200+
"heartbeat_time": "Heartbeat Time",
201+
189202
"taskType": "Task Type",
190203
"dataCleaning": "Data Cleaning",
191204
"processingStatus": "Processing Status",
192205
"processingText": "Processing Text",
193-
"inProgress": "In Progress",
194206
"completed": "Completed",
195207
"dataSource": "Data Source",
196208
"dataSourceBranch": "Data Source Branch",
@@ -244,7 +256,6 @@ export const dataPipelines = {
244256
"uploadFailedTips2": "The icon size cannot exceed 10MB.",
245257
"uploadFailedTips3": "Upload failed, please try again",
246258
"networkError": "Network error, please check the connection and try again",
247-
"submitting": "Submitting",
248259
"algorithmTemplateDescription": "The algorithm template allows users to build workflows using various model operators, enabling tasks such as data cleaning, automated data augmentation, and analysis.",
249260
"taskTemplate": "Task Template",
250261
"searchTemplate": "Search Template",
@@ -362,6 +373,9 @@ export const dataPipelines = {
362373

363374
"opencsg_data_extraction_preprocess_internal": "opencsg data extraction preprocess",
364375
"opencsg_scrape_url_data_preprocess_internal": "opencsg scrape url data preprocess",
376+
"fineweb_edu_chinese_common_internal": "fineweb edu chinese common",
377+
"smoltalk_chinese_common_internal": "smoltalk chinese common",
378+
"cosmopedia_chinese_preprocess_internal": "cosmopedia chinese preprocess",
365379

366380

367381
"analysis_common_internal_dec": "This analyzer class is used to analyze specific datasets. It calculates statistics for all filtering operations in the configuration file, applies various analyses (such as overall analysis, column-by-column analysis, etc.) to these statistics, and generates analysis results (statistical tables, distribution charts, etc.) to help users better understand the input dataset.",
@@ -379,4 +393,7 @@ export const dataPipelines = {
379393
"quality_classifier_common_internal_dec": "This quality classifier class is used to predict the scores of documents in the dataset. It will calculate scores for all rows and provide two columns for each row: score and should_keep, to help users decide which row should be deleted. By default, if the score is higher than 0.9, the row will be marked as should_keep=1.",
380394
"opencsg_data_extraction_preprocess_internal_dec": "A high-quality tool for converting PDF to Markdown and JSON",
381395
"opencsg_scrape_url_data_preprocess_internal_dec": "A large language model-based data scraping tool for websites and local documents (XML, HTML, JSON, etc.)",
396+
"fineweb_edu_chinese_common_internal_dec": "Users can define their own scoring criteria, score the data from the data source based on these criteria, and filter the data. The maximum score is 5.",
397+
"smoltalk_chinese_common_internal_dec": "Use a fixed system_prompt to generate relevant multi-round dialogues with a large model and score them. Filter the data based on the score specified by the user, and only retain the one with the highest score.",
398+
"cosmopedia_chinese_preprocess_internal_dec": "A detailed tutorial on converting raw text to WikiHow style using the MakeCosmopediaMapper operator. This tool invokes large language models to generate structured tutorial content based on the input seed text.",
382399
}

frontend/src/locales/en_js/operator_en.json

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,5 +1670,106 @@
16701670
"after": "The dataset adds embedding, nn_indices, and nn_scores fields containing vector representations of text and nearest neighbor information"
16711671
},
16721672
"params": []
1673+
},
1674+
"gather_generated_data_filter": {
1675+
"name": "gather_generated_data_filter",
1676+
"description": "Filter for collecting and processing generated data.",
1677+
"type": "Filter",
1678+
"group": "",
1679+
"samples": {
1680+
"before": "Based on the results of the previous step, remove the | | and < | im_end | > characters and filter to get the empty content data.",
1681+
"after": ""
1682+
},
1683+
"params": []
1684+
},
1685+
"annotate_edu_train_bert_scorer_mapper": {
1686+
"name": "annotate_edu_train_bert_scorer_mapper",
1687+
"description": "Annotate Edu Train BERT Scorer",
1688+
"type": "Filter",
1689+
"group": "",
1690+
"samples": {
1691+
"before": "Here is a more concise translation of the provided sentence:'Score a field and add a _score field for the result.'",
1692+
"after": ""
1693+
},
1694+
"params": [
1695+
{
1696+
"name": "auth_token",
1697+
"type": "LIST",
1698+
"option_values": null,
1699+
"value": ""
1700+
},
1701+
{
1702+
"name": "model_name",
1703+
"type": "LIST",
1704+
"option_values": null,
1705+
"value": "text-embedding-v4"
1706+
},
1707+
{
1708+
"name": "dimensions",
1709+
"type": "PositiveFloat",
1710+
"option_values": null,
1711+
"value": "1024"
1712+
},
1713+
{
1714+
"name": "model_url",
1715+
"type": "LIST",
1716+
"option_values": null,
1717+
"value": "https://dashscope.aliyuncs.com/compatible-mode/v1"
1718+
},
1719+
{
1720+
"name": "query_text",
1721+
"type": "LIST",
1722+
"option_values": null,
1723+
"value": "What is Deep Learning?"
1724+
}
1725+
]
1726+
},
1727+
"dedup_and_save_deduplicator": {
1728+
"name": "dedup_and_save_deduplicator",
1729+
"description": "A deduplicator based on graph connectivity. It constructs a similarity graph by connecting samples with similarity scores above the threshold, then keeps only one sample (with minimum index) from each connected component. Suitable for datasets with pre-computed nearest neighbor similarity information.",
1730+
"type": "Deduplicator",
1731+
"group": "",
1732+
"samples": {
1733+
"before": "",
1734+
"after": ""
1735+
},
1736+
"params": [
1737+
{
1738+
"name": "similarity_threshold",
1739+
"type": "PositiveFloat",
1740+
"option_values": null,
1741+
"value": 0.5
1742+
}
1743+
]
1744+
},
1745+
"pipeline_magpie_zh_mapper": {
1746+
"name": "pipeline_magpie_zh_mapper",
1747+
"description": "Using the deepseek-v2.5 or qwen2.5 model, generate multi-round dialogue data based on the manually designed system_prompt corresponding to multiple tasks",
1748+
"type": "Mapper",
1749+
"group": "",
1750+
"samples": {
1751+
"before": "",
1752+
"after": ""
1753+
},
1754+
"params": [
1755+
{
1756+
"name": "model_name",
1757+
"type": "LIST",
1758+
"option_values": null,
1759+
"value": "qwen-plus"
1760+
},
1761+
{
1762+
"name": "auth_token",
1763+
"type": "LIST",
1764+
"option_values": null,
1765+
"value": ""
1766+
},
1767+
{
1768+
"name": "model_url",
1769+
"type": "LIST",
1770+
"option_values": null,
1771+
"value": "https://dashscope.aliyuncs.com/compatible-mode/v1"
1772+
}
1773+
]
16731774
}
16741775
}

0 commit comments

Comments
 (0)