Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README-zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,15 @@ make install-mineru
```

### 部署DeerFlow服务
1. 修改runtime/deer-flow/.env.example,添加SEARCH_API_KEY和EMBEDDING模型配置
2. 修改runtime/deer-flow/.conf.yaml.example,添加基础模型服务配置
3. 执行`make install-deer-flow`
```bash
make install-deer-flow
```

### 本地开发部署
本地代码修改后,请执行以下命令构建镜像并使用本地镜像部署
```bash
make build
make install REGISTRY=""
make install dev=true
```

## 🤝 贡献指南
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ public void deleteTask(String taskId) {
}

public void executeTask(String taskId) {
List<CleaningResultDto> failed = cleaningResultRepo.findByInstanceId(taskId, "FAILED");
Set<String> failedSet = failed.stream().map(CleaningResultDto::getSrcFileId).collect(Collectors.toSet());
List<CleaningResultDto> succeed = cleaningResultRepo.findByInstanceId(taskId, "COMPLETED");
Set<String> succeedSet = succeed.stream().map(CleaningResultDto::getSrcFileId).collect(Collectors.toSet());
CleaningTaskDto task = cleaningTaskRepo.findTaskById(taskId);
scanDataset(taskId, task.getSrcDatasetId(), failedSet);
scanDataset(taskId, task.getSrcDatasetId(), succeedSet);
cleaningResultRepo.deleteByInstanceId(taskId, "FAILED");
taskScheduler.executeTask(taskId);
}
Expand Down Expand Up @@ -232,7 +232,7 @@ private void scanDataset(String taskId, String srcDatasetId) {
} while (pageNumber < datasetFiles.getTotalPages());
}

private void scanDataset(String taskId, String srcDatasetId, Set<String> failedFiles) {
private void scanDataset(String taskId, String srcDatasetId, Set<String> succeedFiles) {
int pageNumber = 0;
int pageSize = 500;
PagingQuery pageRequest = new PagingQuery(pageNumber, pageSize);
Expand All @@ -243,7 +243,7 @@ private void scanDataset(String taskId, String srcDatasetId, Set<String> failedF
break;
}
List<Map<String, Object>> files = datasetFiles.getContent().stream()
.filter(content -> failedFiles.contains(content.getId()))
.filter(content -> !succeedFiles.contains(content.getId()))
.map(content -> Map.of("fileName", (Object) content.getFileName(),
"fileSize", content.getFileSize(),
"filePath", content.getFilePath(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,6 @@ const OperatorFlow: React.FC<OperatorFlowProps> = ({
{operator?.categories?.map((categoryId) => {
return <Tag color="default">{categoryMap[categoryId].name}</Tag>
})}
{/* 参数状态指示 */}
{Object.values(operator.configs).some(
(param: any) =>
(param.type === "input" && !param.value) ||
(param.type === "checkbox" &&
Array.isArray(param.value) &&
param.value.length === 0)
) && <Tag color="red">待配置</Tag>}
{/* 操作按钮 */}
<span
className="cursor-pointer text-red-500"
Expand Down
13 changes: 9 additions & 4 deletions runtime/python-executor/datamate/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from datamate.core.constant import Fields
from datamate.core.base_op import OPERATORS, BaseOp

from core.base_op import Filter as RELATIVE_Filter, Mapper as RELATIVE_Mapper, Slicer as RELATIVE_Slicer

rd.DataContext.get_current().enable_progress_bars = False


Expand Down Expand Up @@ -136,7 +138,10 @@ def load_ops_module(self, op_name):
parent_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ops")
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
registry_content = OPERATORS.modules[op_name]
registry_content = OPERATORS.modules.get(op_name)
if registry_content is None:
from core.base_op import OPERATORS as RELATIVE_OPERATORS
registry_content = RELATIVE_OPERATORS.modules.get(op_name)
if isinstance(registry_content, str):
# registry_content是module的路径
submodule = importlib.import_module(registry_content)
Expand Down Expand Up @@ -171,23 +176,23 @@ def _run_single_op(self, operators_cls, init_kwargs, **kwargs):

kwargs.update({"ext_params": {}, "failed_reason": {}, "target_type": None})
try:
if issubclass(operators_cls, Mapper):
if issubclass(operators_cls, (Mapper, RELATIVE_Mapper)):
self.data = self.data.map(operators_cls,
fn_constructor_kwargs=init_kwargs,
fn_kwargs=kwargs,
resources=resources,
num_cpus=0.05,
concurrency=(1, 1 if operators_cls.use_model else int(max_actor_nums)))

elif issubclass(operators_cls, Slicer):
elif issubclass(operators_cls, (Slicer, RELATIVE_Slicer)):
self.data = self.data.flat_map(operators_cls,
fn_constructor_kwargs=init_kwargs,
fn_kwargs=kwargs,
resources=resources,
num_cpus=0.05,
concurrency=(1, int(max_actor_nums)))

elif issubclass(operators_cls, Filter):
elif issubclass(operators_cls, (Filter, RELATIVE_Filter)):
self.data = self.data.filter(operators_cls,
fn_constructor_kwargs=init_kwargs,
fn_kwargs=kwargs,
Expand Down
2 changes: 1 addition & 1 deletion scripts/images/deer-flow-backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ RUN --mount=type=cache,target=/root/.cache/uv \
EXPOSE 8000

# Run the application.
CMD ["uv", "run", "python", "server.py", "--host", "0.0.0.0", "--port", "8000"]
CMD ["uv", "run", "--no-sync", "python", "server.py", "--host", "0.0.0.0", "--port", "8000"]