labring · FinleyGe · Sep 15, 2025 · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/bun.lock b/bun.lock
diff --git a/modules/tool/packages/mineru/children/parseLocal/config.ts b/modules/tool/packages/mineru/children/parseLocal/config.ts
@@ -0,0 +1,117 @@
+import { defineTool } from '@tool/type';
+import { FlowNodeInputTypeEnum, WorkflowIOValueTypeEnum } from '@tool/type/fastgpt';
+import { ToolTypeEnum } from '@tool/type/tool';
+
+export default defineTool({
+  type: ToolTypeEnum.tools,
+  name: {
+    'zh-CN': '本地部署解析',
+    en: 'Parse file by local MinerU api v2'
+  },
+  description: {
+    'zh-CN': '使用本地部署的 MinerU api v2 解析文件，支持 pdf、png、jpg、jpeg 等多种格式',
+    en: 'Parse the file using the local MinerU api v2, support pdf, png, jpg, jpeg等多种格式'
+  },
+  courseUrl: 'https://github.com/opendatalab/MinerU/blob/master/mineru/cli/fast_api.py#L63',
+  versionList: [
+    {
+      value: '0.1.0',
+      description: 'Default version',
+      inputs: [
+        {
+          key: 'files',
+          label: 'files',
+          renderTypeList: [FlowNodeInputTypeEnum.fileSelect, FlowNodeInputTypeEnum.reference],
+          valueType: WorkflowIOValueTypeEnum.arrayString,
+          required: true,
+          description: '需要解析的文件（支持.pdf、.png、.jpg、.jpeg 多种格式）',
+          canSelectFile: true,
+          canSelectImg: true
+        },
+        {
+          key: 'parse_method',
+          label: '解析方法',
+          renderTypeList: [FlowNodeInputTypeEnum.select],
+          valueType: WorkflowIOValueTypeEnum.string,
+          list: [
+            { label: 'auto', value: 'auto' },
+            { label: 'ocr', value: 'ocr' },
+            { label: 'txt', value: 'txt' }
+          ],
+          required: false,
+          description: '解析方法，默认 auto',
+          defaultValue: 'auto'
+        },
+        {
+          key: 'formula_enable',
+          label: '开启公式识别',
+          renderTypeList: [FlowNodeInputTypeEnum.switch],
+          valueType: WorkflowIOValueTypeEnum.boolean,
+          required: false,
+          description: '是否启动公式识别功能，默认 true',
+          defaultValue: true
+        },
+        {
+          key: 'table_enable',
+          label: '开启表格识别',
+          renderTypeList: [FlowNodeInputTypeEnum.switch],
+          valueType: WorkflowIOValueTypeEnum.boolean,
+          required: false,
+          description: '是否启动表格识别功能，默认 true',
+          defaultValue: true
+        },
+        {
+          key: 'return_content_list',
+          label: '返回结构化 json',
+          renderTypeList: [FlowNodeInputTypeEnum.switch],
+          valueType: WorkflowIOValueTypeEnum.boolean,
+          required: false,
+          description: '是否返回结构化 json，默认 false',
+          defaultValue: false
+        },
+        {
+          key: 'lang_list',
+          label: '文档语言',
+          renderTypeList: [FlowNodeInputTypeEnum.textarea],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: false,
+          description:
+            '指定文档语言，默认 ch，长度跟文件数量一致，否则取第一个，按逗号分隔，其他可选值列表详见：https://www.paddleocr.ai/latest/en/version3.x/algorithm/PP-OCRv5/PP-OCRv5_multi_languages.html#4-supported-languages-and-abbreviations',
+          defaultValue: 'ch'
+        },
+        {
+          key: 'backend',
+          label: '解析后端',
+          renderTypeList: [FlowNodeInputTypeEnum.select],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: false,
+          description: 'mineru 解析后端，默认pipeline。',
+          list: [
+            { label: 'pipeline', value: 'pipeline' },
+            { label: 'vlm-transformers', value: 'vlm-transformers' },
+            { label: 'vlm-sglang-engine', value: 'vlm-sglang-engine' },
+            { label: 'vlm-sglang-client', value: 'vlm-sglang-client' }
+          ],
+          defaultValue: 'pipeline'
+        },
+        {
+          key: 'sglang_server_url',
+          label: 'sglang 服务地址',
+          renderTypeList: [FlowNodeInputTypeEnum.input],
+          valueType: WorkflowIOValueTypeEnum.string,
+          required: false,
+          description: 'sglang 服务地址，当 backend 为 vlm-sglang-client 时必填。',
+          defaultValue: ''
+        }
+      ],
+      outputs: [
+        {
+          valueType: WorkflowIOValueTypeEnum.arrayObject,
+          key: 'result',
+          label: '解析结果',
+          description: '解析后的数据'
+        }
+      ]
+    }
+  ]
+});
diff --git a/modules/tool/packages/mineru/children/parseLocal/index.ts b/modules/tool/packages/mineru/children/parseLocal/index.ts
@@ -0,0 +1,10 @@
+import config from './config';
+import { InputType, OutputType, tool as toolCb } from './src';
+import { exportTool } from '@tool/utils/tool';
+
+export default exportTool({
+  toolCb,
+  InputType,
+  OutputType,
+  config
+});
diff --git a/modules/tool/packages/mineru/children/parseLocal/src/index.ts b/modules/tool/packages/mineru/children/parseLocal/src/index.ts
@@ -0,0 +1,172 @@
+import { uploadFile } from '@tool/utils/uploadFile';
+import { z } from 'zod';
+import path from 'path';
+
+export const InputType = z.object({
+  base_url: z.string(),
+  token: z.string().optional().default(''),
+  files: z.array(z.string()),
+  parse_method: z.string().optional().default('auto'),
+  formula_enable: z.boolean().optional().default(true),
+  table_enable: z.boolean().optional().default(true),
+  return_md: z.boolean().optional().default(true),
+  return_content_list: z.boolean().optional().default(false),
+  lang_list: z.string().optional().default('ch'),
+  backend: z.string().optional().default('pipeline'),
+  sglang_server_url: z.string().optional().default('')
+});
+
+interface InnerPropsType extends z.infer<typeof InputType> {
+  headers: Record<string, string>;
+}
+
+interface ParsedResultItemType {
+  images: Record<string, string>;
+  content_list?: string;
+  md_content: string;
+}
+
+interface ParsedResultType {
+  results: Record<string, ParsedResultItemType>;
+}
+
+interface ResultItemType {
+  filename: string;
+  images?: string[];
+  content_list?: any[];
+  md_content?: string;
+}
+
+export const OutputType = z.object({
+  result: z.record(
+    z.array(
+      z.object({
+        filename: z.string(),
+        images: z.array(z.string()).optional(),
+        content_list: z.array(z.any()).optional(),
+        md_content: z.string().optional()
+      })
+    )
+  )
+});
+
+function buildHeaders(token?: string) {
+  if (token) {
+    return {
+      Authorization: `Bearer ${token}`
+    };
+  }
+
+  return {};
+}
+
+async function uploadBase64Image(filename: string, content: string) {
+  const { accessUrl } = await uploadFile({
+    base64: content,
+    defaultFilename: filename
+  });
+
+  return accessUrl;
+}
+
+function replaceImageUrl(content: string, images: Record<string, string>) {
+  for (const [key, value] of Object.entries(images)) {
+    content = content.replace(new RegExp(`images/${key}`, 'g'), value);
+  }
+  return content;
+}
+
+export async function tool(props: z.infer<typeof InputType>): Promise<z.infer<typeof OutputType>> {
+  const { base_url, token, lang_list, files: propsFiles } = props;
+
+  if (!base_url) {
+    return Promise.reject('MinerU base url is required');
+  }
+
+  const innerProps: InnerPropsType = {
+    ...props,
+    headers: buildHeaders(token) as Record<string, string>
+  };
+
+  const { files } = innerProps;
+  let langList = lang_list.split(',');
+
+  if (langList.length === 0) {
+    langList = ['ch'];
+  }
+  const url = `${base_url}/file_parse`;
+
+  const result: Record<string, ResultItemType[]> = {};
+
+  // 构造一次性多文件上传的表单
+  const formData = new FormData();
+  for (const filePath of files) {
+    const fileblob = await fetch(filePath).then((res) => res.blob());
+    const baseName = path.basename(filePath.split('?')[0]);
+    formData.append('files', fileblob, baseName);
+  }
+  formData.append('server_url', innerProps.sglang_server_url);
+  formData.append('lang_list', innerProps.lang_list);
+  formData.append('backend', innerProps.backend);
+  formData.append('parse_method', innerProps.parse_method);
+  formData.append('formula_enable', innerProps.formula_enable.toString());
+  formData.append('table_enable', innerProps.table_enable.toString());
+  formData.append('return_md', innerProps.return_md.toString());
+  formData.append('return_content_list', innerProps.return_content_list.toString());
+  formData.append('return_images', true.toString());
+
+  const requestHeaders: Record<string, string> = { ...innerProps.headers };
+
+  const res = await fetch(url, {
+    method: 'POST',
+    headers: requestHeaders,
+    body: formData
+  });
+
+  if (res.status !== 200) {
+    return Promise.reject(`Parse failed: ${res.status} ${res.statusText} ${await res.text()}`);
+  }
+
+  const data: ParsedResultType = await res.json();
+
+  if (!data.results) {
+    return Promise.reject('Parsed result is empty');
+  }
+
+  for (const [parsedFilename, result_item] of Object.entries(data.results ?? {})) {
+    const item: ResultItemType = {
+      filename: parsedFilename
+    };
+    const images: Record<string, string> = {};
+
+    if (result_item.images) {
+      item.images = [];
+      for (const [key, value] of Object.entries(result_item.images)) {
+        const accessUrl = await uploadBase64Image(key, value);
+        item.images.push(accessUrl);
+        images[key] = accessUrl;
+      }
+    }
+
+    if (result_item.content_list) {
+      try {
+        item.content_list = JSON.parse(result_item.content_list);
+      } catch (error) {
+        throw new Error('content_list is not a valid JSON string');
+      }
+    }
+
+    if (result_item.md_content) {
+      item.md_content = replaceImageUrl(result_item.md_content, images);
+    }
+
+    if (!result[parsedFilename]) {
+      result[parsedFilename] = [];
+    }
+    result[parsedFilename].push(item);
+  }
+
+  return {
+    result
+  };
+}