diff --git a/packages/shared/__tests__/file-processing.test.ts b/packages/shared/__tests__/file-processing.test.ts new file mode 100644 index 00000000000..3dcdb20bd9f --- /dev/null +++ b/packages/shared/__tests__/file-processing.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it } from 'vitest' + +import { + FeatureCapabilitySchema, + FileProcessorOverrideSchema, + FileProcessorTemplateSchema, + FileProcessorTemplatesSchema, + PRESETS_FILE_PROCESSORS +} from '../data/presets/file-processing' +import { FILE_TYPE } from '../data/types/file' + +describe('FeatureCapabilitySchema', () => { + it('supports multiple input types for a single capability', () => { + const result = FeatureCapabilitySchema.safeParse({ + feature: 'text_extraction', + inputs: [FILE_TYPE.IMAGE, FILE_TYPE.DOCUMENT], + output: FILE_TYPE.TEXT + }) + + expect(result.success).toBe(true) + }) +}) + +describe('FileProcessorTemplatesSchema', () => { + it('validates built-in presets', () => { + expect(() => FileProcessorTemplatesSchema.parse(PRESETS_FILE_PROCESSORS)).not.toThrow() + }) + + it('rejects processor-level metadata', () => { + const result = FileProcessorTemplateSchema.safeParse({ + id: 'paddleocr', + type: 'api', + metadata: {}, + capabilities: [ + { + feature: 'text_extraction', + inputs: [FILE_TYPE.IMAGE], + output: FILE_TYPE.TEXT + } + ] + }) + + expect(result.success).toBe(false) + }) + + it('rejects duplicate features in a single processor template', () => { + const result = FileProcessorTemplateSchema.safeParse({ + id: 'paddleocr', + type: 'api', + capabilities: [ + { + feature: 'text_extraction', + inputs: [FILE_TYPE.IMAGE], + output: FILE_TYPE.TEXT + }, + { + feature: 'text_extraction', + inputs: [FILE_TYPE.DOCUMENT], + output: FILE_TYPE.TEXT + } + ] + }) + + expect(result.success).toBe(false) + }) +}) + +describe('FileProcessorOverrideSchema', () => { + it('accepts valid overrides', () => { + const result = FileProcessorOverrideSchema.safeParse({ + apiKeys: ['test-key'], + capabilities: { + text_extraction: { + apiHost: 'https://example.com', + modelId: 'model-1' + } + }, + options: { + langs: ['eng', 'chi_sim'] + } + }) + + expect(result.success).toBe(true) + }) + + it('rejects invalid urls', () => { + const result = FileProcessorOverrideSchema.safeParse({ + capabilities: { + markdown_conversion: { + apiHost: 'not-a-url' + } + } + }) + + expect(result.success).toBe(false) + }) + + it('rejects unknown feature overrides', () => { + const result = FileProcessorOverrideSchema.safeParse({ + capabilities: { + vision: { + apiHost: 'https://example.com' + } + } + }) + + expect(result.success).toBe(false) + }) +}) diff --git a/packages/shared/__tests__/file-type.test.ts b/packages/shared/__tests__/file-type.test.ts new file mode 100644 index 00000000000..cabd1758bc3 --- /dev/null +++ b/packages/shared/__tests__/file-type.test.ts @@ -0,0 +1,15 @@ +import { describe, expect, it } from 'vitest' + +import { FILE_TYPE, FileTypeSchema } from '../data/types/file' + +describe('FileTypeSchema', () => { + it('accepts canonical file types', () => { + expect(FileTypeSchema.safeParse(FILE_TYPE.IMAGE).success).toBe(true) + expect(FileTypeSchema.safeParse(FILE_TYPE.DOCUMENT).success).toBe(true) + expect(FileTypeSchema.safeParse(FILE_TYPE.TEXT).success).toBe(true) + }) + + it('rejects unknown file types', () => { + expect(FileTypeSchema.safeParse('markdown').success).toBe(false) + }) +}) diff --git a/packages/shared/data/api/schemas/fileProcessing.ts b/packages/shared/data/api/schemas/fileProcessing.ts new file mode 100644 index 00000000000..0ece3faa9a4 --- /dev/null +++ b/packages/shared/data/api/schemas/fileProcessing.ts @@ -0,0 +1,49 @@ +/** + * File Processing API Schema definitions + * + * Contains file processing endpoints for: + * - Listing available processors + * - Reading and updating processor configuration + */ + +import type { FileProcessorId, FileProcessorOverride } from '@shared/data/preference/preferenceTypes' +import type { FileProcessorMerged } from '@shared/data/presets/file-processing' + +// ============================================================================ +// API Schema Definitions +// ============================================================================ + +/** + * File Processing API Schema definitions + */ +export interface FileProcessingSchemas { + /** + * List available processors + * @example GET /file-processing/processors + */ + '/file-processing/processors': { + /** Get list of available processors */ + GET: { + response: FileProcessorMerged[] + } + } + + /** + * Get or update processor configuration + * @example GET /file-processing/processors/tesseract + * @example PATCH /file-processing/processors/tesseract { "apiKeys": ["xxx"] } + */ + '/file-processing/processors/:id': { + /** Get processor configuration */ + GET: { + params: { id: FileProcessorId } + response: FileProcessorMerged + } + /** Update processor configuration */ + PATCH: { + params: { id: FileProcessorId } + body: FileProcessorOverride + response: FileProcessorMerged + } + } +} diff --git a/packages/shared/data/api/schemas/index.ts b/packages/shared/data/api/schemas/index.ts index 703b92ff247..ec6f4cf81fd 100644 --- a/packages/shared/data/api/schemas/index.ts +++ b/packages/shared/data/api/schemas/index.ts @@ -20,6 +20,7 @@ */ import type { AssertValidSchemas } from '../apiTypes' +import type { FileProcessingSchemas } from './fileProcessing' import type { MessageSchemas } from './messages' import type { TestSchemas } from './test' import type { TopicSchemas } from './topics' @@ -36,4 +37,4 @@ import type { TopicSchemas } from './topics' * 1. Create the schema file (e.g., topic.ts) * 2. Import and add to intersection below */ -export type ApiSchemas = AssertValidSchemas +export type ApiSchemas = AssertValidSchemas diff --git a/packages/shared/data/preference/preferenceSchemas.ts b/packages/shared/data/preference/preferenceSchemas.ts index 59eb8d5cbb3..79f48cc4917 100644 --- a/packages/shared/data/preference/preferenceSchemas.ts +++ b/packages/shared/data/preference/preferenceSchemas.ts @@ -404,6 +404,12 @@ export interface PreferenceSchemas { 'feature.translate.model_prompt': string // redux/settings/targetLanguage 'feature.translate.target_language': string + // redux/preprocess/defaultProvider + 'file_processing.default.markdown_conversion': PreferenceTypes.FileProcessorId + // redux/ocr/imageProviderId + 'file_processing.default.text_extraction': PreferenceTypes.FileProcessorId | null + // target-key-definitions/complex/complex + 'file_processing.overrides': PreferenceTypes.FileProcessorOverrides // redux/shortcuts/shortcuts.exit_fullscreen 'shortcut.app.exit_fullscreen': Record // redux/shortcuts/shortcuts.search_message @@ -683,6 +689,9 @@ export const DefaultPreferences: PreferenceSchemas = { 'feature.selection.trigger_mode': PreferenceTypes.SelectionTriggerMode.Selected, 'feature.translate.model_prompt': TRANSLATE_PROMPT, 'feature.translate.target_language': 'en-us', + 'file_processing.default.markdown_conversion': 'mineru', + 'file_processing.default.text_extraction': null, + 'file_processing.overrides': {} as PreferenceTypes.FileProcessorOverrides, 'shortcut.app.exit_fullscreen': { editable: false, enabled: true, key: ['Escape'], system: true }, 'shortcut.app.search_message': { editable: true, @@ -751,9 +760,9 @@ export const DefaultPreferences: PreferenceSchemas = { /** * 生成统计: - * - 总配置项: 215 + * - 总配置项: 218 * - electronStore项: 1 - * - redux项: 203 + * - redux项: 205 * - localStorage项: 0 * - dexieSettings项: 0 */ diff --git a/packages/shared/data/preference/preferenceTypes.ts b/packages/shared/data/preference/preferenceTypes.ts index 82abbd2c654..e75ba4dfe53 100644 --- a/packages/shared/data/preference/preferenceTypes.ts +++ b/packages/shared/data/preference/preferenceTypes.ts @@ -105,6 +105,44 @@ export type MultiModelMessageStyle = 'horizontal' | 'vertical' | 'fold' | 'grid' export type MultiModelGridPopoverTrigger = 'hover' | 'click' +export const FILE_PROCESSOR_TYPES = ['api', 'builtin'] as const + +export type FileProcessorType = (typeof FILE_PROCESSOR_TYPES)[number] + +export const FILE_PROCESSOR_FEATURES = ['text_extraction', 'markdown_conversion'] as const + +export type FileProcessorFeature = (typeof FILE_PROCESSOR_FEATURES)[number] + +export const FILE_PROCESSOR_IDS = [ + 'tesseract', + 'system', + 'paddleocr', + 'ovocr', + 'mineru', + 'doc2x', + 'mistral', + 'open-mineru' +] as const + +export type FileProcessorId = (typeof FILE_PROCESSOR_IDS)[number] + +export type FileProcessorOptions = Record + +export type CapabilityOverride = { + apiHost?: string + modelId?: string + metadata?: Record +} + +export type FileProcessorCapabilityOverrides = Partial> + +export type FileProcessorOverride = { + apiKeys?: string[] + capabilities?: FileProcessorCapabilityOverrides + options?: FileProcessorOptions +} + +export type FileProcessorOverrides = Partial> // ============================================================================ // WebSearch Types // ============================================================================ diff --git a/packages/shared/data/presets/file-processing.ts b/packages/shared/data/presets/file-processing.ts new file mode 100644 index 00000000000..dea92647076 --- /dev/null +++ b/packages/shared/data/presets/file-processing.ts @@ -0,0 +1,342 @@ +import * as z from 'zod' + +import { + type CapabilityOverride, + FILE_PROCESSOR_FEATURES, + FILE_PROCESSOR_IDS, + FILE_PROCESSOR_TYPES, + type FileProcessorCapabilityOverrides, + type FileProcessorFeature, + type FileProcessorOptions, + type FileProcessorOverride, + type FileProcessorOverrides +} from '../preference/preferenceTypes' +import { FILE_TYPE, FileTypeSchema } from '../types/file' + +/** + * File Processing Presets + * + * Templates are read-only metadata about processors. + * User overrides are stored separately in preferences. + * + * i18n: Display names use `processor.${id}.name` + */ + +// ============================================================================ +// Type Definitions +// ============================================================================ + +/** + * Processor service type + */ +export const FileProcessorTypeSchema = z.enum(FILE_PROCESSOR_TYPES) + +/** + * Feature type + */ +export const FileProcessorFeatureSchema = z.enum(FILE_PROCESSOR_FEATURES) + +export const FileProcessorIdSchema = z.enum(FILE_PROCESSOR_IDS) + +/** + * Input file type schema + * Reuses the canonical file type definitions shared across the app. + */ +export const FileProcessorInputSchema = FileTypeSchema.extract([FILE_TYPE.IMAGE, FILE_TYPE.DOCUMENT]) + +const FileProcessorTextOutputSchema = FileTypeSchema.extract([FILE_TYPE.TEXT]) + +/** + * Output content format schema + * `text` reuses the canonical file type, while `markdown` remains a processing-specific format. + */ +export const FileProcessorOutputSchema = z.union([FileProcessorTextOutputSchema, z.literal('markdown')]) + +/** + * Feature capability definition + * + * Each capability binds a feature with its supported inputs, output, and optional API settings. + */ +export const CapabilityMetadataSchema = z.record(z.string(), z.unknown()) +export type CapabilityMetadata = z.infer + +export const TextExtractionCapabilitySchema = z + .object({ + feature: z.literal('text_extraction'), + inputs: z.array(FileProcessorInputSchema).min(1), + output: z.literal('text'), + apiHost: z.url().optional(), + modelId: z.string().min(1).optional(), + metadata: CapabilityMetadataSchema.optional() + // supportedFormats?: string[] // Whitelist: only these formats supported (uncomment when needed) + // excludedFormats?: string[] // Blacklist: all formats except these (uncomment when needed) + }) + .strict() +export type TextExtractionCapability = z.infer + +export const MarkdownConversionCapabilitySchema = z + .object({ + feature: z.literal('markdown_conversion'), + inputs: z.array(z.literal('document')).min(1), + output: z.literal('markdown'), + apiHost: z.url().optional(), + modelId: z.string().min(1).optional(), + metadata: CapabilityMetadataSchema.optional() + // supportedFormats?: string[] // Whitelist: only these formats supported (uncomment when needed) + // excludedFormats?: string[] // Blacklist: all formats except these (uncomment when needed) + }) + .strict() +export type MarkdownConversionCapability = z.infer + +export const FeatureCapabilitySchema = z.discriminatedUnion('feature', [ + TextExtractionCapabilitySchema, + MarkdownConversionCapabilitySchema +]) +export type FeatureCapability = z.infer + +/** + * Input type (category) + * Derived from FeatureCapability to keep definitions in sync. + */ +export type FileProcessorInput = FeatureCapability['inputs'][number] + +/** + * Output type + * Derived from FeatureCapability to keep definitions in sync. + */ +export type FileProcessorOutput = FeatureCapability['output'] + +/** + * Processor template (read-only metadata) + * + * Note: Display name is retrieved via i18n key `processor.${id}.name` + */ +export const FileProcessorTemplateSchema = z + .object({ + id: FileProcessorIdSchema, + type: FileProcessorTypeSchema, + capabilities: z.array(FeatureCapabilitySchema).min(1) + }) + .strict() + .superRefine((template, ctx) => { + const seenFeatures = new Set() + + template.capabilities.forEach((capability, index) => { + if (seenFeatures.has(capability.feature)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ['capabilities', index, 'feature'], + message: `Duplicate capability feature '${capability.feature}' is not allowed. Use 'inputs' to model multiple input types.` + }) + return + } + + seenFeatures.add(capability.feature) + }) + }) +export type FileProcessorTemplate = z.infer +export const FileProcessorTemplatesSchema = z.array(FileProcessorTemplateSchema) + +// ============================================================================ +// Override Types (for user customization) +// ============================================================================ + +/** + * Processor-specific configuration + * + * Uses a generic Record type without predefined structure. + * Each processor's configuration is interpreted by UI components based on processor.id. + * + * Known options fields: + * - Tesseract: { langs: string[] } // Array of enabled language codes + * + * Examples: + * - { langs: ['chi_sim', 'eng'] } // Tesseract language config + * - { quality: 'high', timeout: 30000 } // Other processor config + */ +export const FileProcessorOptionsSchema: z.ZodType = z.record(z.string(), z.unknown()) + +/** + * Capability override (user customization for a specific feature) + * + * Stored as Record in FileProcessorOverride. + */ +export const CapabilityOverrideSchema: z.ZodType = z + .object({ + apiHost: z.url().optional(), + modelId: z.string().min(1).optional(), + metadata: CapabilityMetadataSchema.optional() + }) + .strict() + +export const FileProcessorCapabilityOverridesSchema: z.ZodType = z + .object({ + markdown_conversion: CapabilityOverrideSchema.optional(), + text_extraction: CapabilityOverrideSchema.optional() + }) + .strict() + +/** + * User-configured processor override (stored in Preference) + * + * Design principles: + * - Only stores user-modified fields + * - apiKey is shared across all features (processor-level) + * - apiHost/modelId are per-feature (in capabilities Record) + * - Field names use camelCase (consistent with TypeScript conventions) + */ +export const FileProcessorOverrideSchema: z.ZodType = z + .object({ + apiKeys: z.array(z.string().min(1)).optional(), + capabilities: FileProcessorCapabilityOverridesSchema.optional(), + options: FileProcessorOptionsSchema.optional() + }) + .strict() +export const FileProcessorOverridesSchema: z.ZodType = z.partialRecord( + FileProcessorIdSchema, + FileProcessorOverrideSchema +) + +/** + * Merged processor configuration (template + user override) + * + * Used by both Renderer (UI display/editing) and Main (execution). + * Combines the read-only template with user-configured overrides. + * + * Note: capabilities is an array (from template) with overrides merged in, + * NOT a Record like in FileProcessorOverride. + */ +export const FileProcessorMergedSchema = FileProcessorTemplateSchema.extend({ + apiKeys: z.array(z.string().min(1)).optional(), + options: FileProcessorOptionsSchema.optional() +}) +export type FileProcessorMerged = z.infer + +// ============================================================================ +// Processor Presets +// ============================================================================ + +/** + * Built-in processor presets + */ +export const PRESETS_FILE_PROCESSORS = [ + // === Image Processors (former OCR) === + { + id: 'tesseract', + type: 'builtin', + capabilities: [ + { + feature: 'text_extraction', + inputs: ['image'], + output: 'text' + } + ] + }, + { + id: 'system', + type: 'builtin', + capabilities: [{ feature: 'text_extraction', inputs: ['image'], output: 'text' }] + }, + { + id: 'paddleocr', + type: 'api', + capabilities: [ + { + feature: 'text_extraction', + inputs: ['image'], + output: 'text', + apiHost: 'https://paddleocr.aistudio-app.com/', + modelId: 'PP-OCRv5', + metadata: { + optionalPayload: { + useDocOrientationClassify: false, + useDocUnwarping: false + } + } + }, + { + feature: 'markdown_conversion', + inputs: ['document'], + output: 'markdown', + apiHost: 'https://paddleocr.aistudio-app.com/', + modelId: 'PaddleOCR-VL-1.5', + metadata: { + optionalPayload: { + useDocOrientationClassify: false, + useDocUnwarping: false + } + } + } + ] + }, + { + id: 'ovocr', + type: 'builtin', + capabilities: [{ feature: 'text_extraction', inputs: ['image'], output: 'text' }] + }, + + // === Document Processors (former Preprocess) === + { + id: 'mineru', + type: 'api', + capabilities: [ + { + feature: 'markdown_conversion', + inputs: ['document'], + output: 'markdown', + apiHost: 'https://mineru.net', + metadata: { + optionalPayload: { + enable_formula: true, + enable_table: true, + is_ocr: true + } + } + } + ] + }, + { + id: 'doc2x', + type: 'api', + capabilities: [ + { + feature: 'markdown_conversion', + inputs: ['document'], + output: 'markdown', + apiHost: 'https://v2.doc2x.noedgeai.com' + } + ] + }, + { + id: 'mistral', + type: 'api', + capabilities: [ + { + feature: 'text_extraction', + inputs: ['image'], + output: 'text', + apiHost: 'https://api.mistral.ai', + modelId: 'mistral-ocr-latest' + }, + { + feature: 'markdown_conversion', + inputs: ['document'], + output: 'markdown', + apiHost: 'https://api.mistral.ai', + modelId: 'mistral-ocr-latest' + } + ] + }, + { + id: 'open-mineru', + type: 'api', + capabilities: [ + { + feature: 'markdown_conversion', + inputs: ['document'], + output: 'markdown', + apiHost: 'http://127.0.0.1:8000' + } + ] + } +] as const satisfies readonly FileProcessorTemplate[] diff --git a/packages/shared/data/types/file.ts b/packages/shared/data/types/file.ts new file mode 100644 index 00000000000..cda56edb335 --- /dev/null +++ b/packages/shared/data/types/file.ts @@ -0,0 +1,21 @@ +import * as z from 'zod' + +export const FILE_TYPE = { + IMAGE: 'image', + VIDEO: 'video', + AUDIO: 'audio', + TEXT: 'text', + DOCUMENT: 'document', + OTHER: 'other' +} as const + +export const FileTypeSchema = z.enum([ + FILE_TYPE.IMAGE, + FILE_TYPE.VIDEO, + FILE_TYPE.AUDIO, + FILE_TYPE.TEXT, + FILE_TYPE.DOCUMENT, + FILE_TYPE.OTHER +]) + +export type FileType = z.infer diff --git a/src/main/data/api/handlers/fileProcessing.ts b/src/main/data/api/handlers/fileProcessing.ts new file mode 100644 index 00000000000..3cdb9dff8fe --- /dev/null +++ b/src/main/data/api/handlers/fileProcessing.ts @@ -0,0 +1,30 @@ +import { fileProcessingService } from '@data/services/FileProcessingService' +import type { ApiHandler, ApiMethods } from '@shared/data/api/apiTypes' +import type { FileProcessingSchemas } from '@shared/data/api/schemas/fileProcessing' + +type FileProcessingHandler> = ApiHandler< + Path, + Method +> + +export const fileProcessingHandlers: { + [Path in keyof FileProcessingSchemas]: { + [Method in keyof FileProcessingSchemas[Path]]: FileProcessingHandler> + } +} = { + '/file-processing/processors': { + GET: async () => { + return await fileProcessingService.getProcessors() + } + }, + + '/file-processing/processors/:id': { + GET: async ({ params }) => { + return await fileProcessingService.getProcessorById(params.id) + }, + + PATCH: async ({ params, body }) => { + return await fileProcessingService.updateProcessor(params.id, body) + } + } +} diff --git a/src/main/data/api/handlers/index.ts b/src/main/data/api/handlers/index.ts index 87072fdfc00..7932c6fbe0e 100644 --- a/src/main/data/api/handlers/index.ts +++ b/src/main/data/api/handlers/index.ts @@ -12,6 +12,7 @@ import type { ApiImplementation } from '@shared/data/api/apiTypes' +import { fileProcessingHandlers } from './fileProcessing' import { messageHandlers } from './messages' import { testHandlers } from './test' import { topicHandlers } from './topics' @@ -24,6 +25,7 @@ import { topicHandlers } from './topics' * TypeScript ensures exhaustive coverage - missing handlers cause compile errors. */ export const apiHandlers: ApiImplementation = { + ...fileProcessingHandlers, ...testHandlers, ...topicHandlers, ...messageHandlers diff --git a/src/main/data/migration/v2/migrators/mappings/ComplexPreferenceMappings.ts b/src/main/data/migration/v2/migrators/mappings/ComplexPreferenceMappings.ts index 4fd5f398bfc..fd339814711 100644 --- a/src/main/data/migration/v2/migrators/mappings/ComplexPreferenceMappings.ts +++ b/src/main/data/migration/v2/migrators/mappings/ComplexPreferenceMappings.ts @@ -10,7 +10,7 @@ * 4. Conditional mapping: Target keys determined by source values * * Usage: - * 1. Define transformation function in PreferenceTransformers.ts + * 1. Define transformation function in a colocated mapping file under `mappings/` * 2. Add mapping configuration to COMPLEX_PREFERENCE_MAPPINGS below * 3. Add target key definitions in target-key-definitions.json * @@ -19,6 +19,7 @@ */ import { flattenCompressionConfig, migrateWebSearchProviders } from '../transformers/PreferenceTransformers' +import { mergeFileProcessingOverrides } from './FileProcessingOverrideMappings' // ============================================================================ // Type Definitions @@ -108,36 +109,18 @@ export const COMPLEX_PREFERENCE_MAPPINGS: ComplexMapping[] = [ 'chat.web_search.compression.rag_rerank_model_id' ], transform: flattenCompressionConfig + }, + // File processing overrides merging + { + id: 'file_processing_overrides_merge', + description: 'Merge legacy OCR and preprocess providers into file processing overrides', + sources: { + preprocessProviders: { source: 'redux', category: 'preprocess', key: 'providers' }, + ocrProviders: { source: 'redux', category: 'ocr', key: 'providers' } + }, + targetKeys: ['file_processing.overrides'], + transform: mergeFileProcessingOverrides } - - // Example mappings (commented out - uncomment when needed): - // - // { - // id: 'window_bounds_split', - // description: 'Split windowBounds object into separate position and size keys', - // sources: { - // windowBounds: { source: 'electronStore', key: 'windowBounds' } - // }, - // targetKeys: [ - // 'app.window.position.x', - // 'app.window.position.y', - // 'app.window.size.width', - // 'app.window.size.height' - // ], - // transform: splitWindowBounds - // }, - // - // { - // id: 'proxy_config_merge', - // description: 'Merge proxy configuration from multiple sources', - // sources: { - // proxyEnabled: { source: 'redux', category: 'settings', key: 'proxyEnabled' }, - // proxyHost: { source: 'redux', category: 'settings', key: 'proxyHost' }, - // proxyPort: { source: 'electronStore', key: 'ProxyPort' } - // }, - // targetKeys: ['network.proxy.enabled', 'network.proxy.host', 'network.proxy.port'], - // transform: mergeProxyConfig - // } ] // ============================================================================ diff --git a/src/main/data/migration/v2/migrators/mappings/FileProcessingOverrideMappings.ts b/src/main/data/migration/v2/migrators/mappings/FileProcessingOverrideMappings.ts new file mode 100644 index 00000000000..9364699c832 --- /dev/null +++ b/src/main/data/migration/v2/migrators/mappings/FileProcessingOverrideMappings.ts @@ -0,0 +1,240 @@ +import { + type CapabilityOverride, + FILE_PROCESSOR_IDS, + type FileProcessorFeature, + type FileProcessorId, + type FileProcessorOverride, + type FileProcessorOverrides +} from '@shared/data/preference/preferenceTypes' +import { PRESETS_FILE_PROCESSORS } from '@shared/data/presets/file-processing' + +import type { TransformResult } from './ComplexPreferenceMappings' + +function isNonEmptyString(value: unknown): value is string { + return typeof value === 'string' && value.length > 0 +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value) +} + +function isFileProcessorId(value: unknown): value is FileProcessorId { + return typeof value === 'string' && FILE_PROCESSOR_IDS.includes(value as FileProcessorId) +} + +function ensureOverride(overrides: FileProcessorOverrides, id: FileProcessorId): FileProcessorOverride { + overrides[id] ??= {} + return overrides[id] +} + +function ensureCapability(override: FileProcessorOverride, feature: FileProcessorFeature): CapabilityOverride { + override.capabilities ??= {} + + const existingCapability = override.capabilities[feature] + if (existingCapability) { + return existingCapability + } + + const nextCapability: CapabilityOverride = {} + override.capabilities[feature] = nextCapability + return nextCapability +} + +function mergeOptions(override: FileProcessorOverride, options: Record) { + if (Object.keys(options).length === 0) { + return + } + + override.options = { + ...(isRecord(override.options) ? override.options : {}), + ...options + } +} + +function addApiKey(override: FileProcessorOverride, apiKey: unknown) { + if (!isNonEmptyString(apiKey)) { + return + } + + override.apiKeys ??= [] + if (!override.apiKeys.includes(apiKey)) { + override.apiKeys.push(apiKey) + } +} + +function getPresetCapability(processorId: FileProcessorId, feature: FileProcessorFeature) { + const processor = PRESETS_FILE_PROCESSORS.find((preset) => preset.id === processorId) + const capability = processor?.capabilities.find((item) => item.feature === feature) + + return { + apiHost: capability && 'apiHost' in capability ? capability.apiHost : undefined, + modelId: capability && 'modelId' in capability ? capability.modelId : undefined + } +} + +function setCapabilityApiHost( + override: FileProcessorOverride, + processorId: FileProcessorId, + feature: FileProcessorFeature, + apiHost: unknown +) { + if (!isNonEmptyString(apiHost)) { + return + } + + const presetApiHost = getPresetCapability(processorId, feature).apiHost + if (apiHost === presetApiHost) { + return + } + + ensureCapability(override, feature).apiHost = apiHost +} + +function setCapabilityModelId( + override: FileProcessorOverride, + processorId: FileProcessorId, + feature: FileProcessorFeature, + modelId: unknown +) { + if (!isNonEmptyString(modelId)) { + return + } + + const presetModelId = getPresetCapability(processorId, feature).modelId + if (modelId === presetModelId) { + return + } + + ensureCapability(override, feature).modelId = modelId +} + +function normalizeLangs(value: unknown): string[] { + if (Array.isArray(value)) { + return value.filter(isNonEmptyString) + } + + if (!isRecord(value)) { + return [] + } + + return Object.entries(value) + .filter(([, enabled]) => enabled === true) + .map(([lang]) => lang) +} + +function pruneEmptyOverrides(overrides: FileProcessorOverrides) { + for (const [processorId, override] of Object.entries(overrides)) { + if (override.apiKeys?.length === 0) { + delete override.apiKeys + } + + if (override.capabilities) { + for (const feature of Object.keys(override.capabilities) as FileProcessorFeature[]) { + const capability = override.capabilities[feature] + if (!capability || Object.keys(capability).length === 0) { + delete override.capabilities[feature] + } + } + + if (Object.keys(override.capabilities).length === 0) { + delete override.capabilities + } + } + + if (isRecord(override.options) && Object.keys(override.options).length === 0) { + delete override.options + } + + if (Object.keys(override).length === 0) { + delete overrides[processorId as FileProcessorId] + } + } +} + +function mergePreprocessProvider(overrides: FileProcessorOverrides, provider: unknown) { + if (!isRecord(provider)) { + return + } + + const providerId = provider.id + if (!isFileProcessorId(providerId)) { + return + } + + const override = ensureOverride(overrides, providerId) + const features: FileProcessorFeature[] = + providerId === 'mistral' ? ['markdown_conversion', 'text_extraction'] : ['markdown_conversion'] + + addApiKey(override, provider.apiKey) + + if (providerId !== 'paddleocr') { + features.forEach((feature) => { + setCapabilityApiHost(override, providerId, feature, provider.apiHost) + setCapabilityModelId(override, providerId, feature, provider.model) + }) + } + + if (isRecord(provider.options)) { + mergeOptions(override, provider.options) + } +} + +function mergeOcrProvider(overrides: FileProcessorOverrides, provider: unknown) { + if (!isRecord(provider)) { + return + } + + const providerId = provider.id + if (!isFileProcessorId(providerId)) { + return + } + + const config = isRecord(provider.config) ? provider.config : undefined + if (!config) { + return + } + + const override = ensureOverride(overrides, providerId) + + addApiKey(override, config.accessToken) + if (providerId !== 'paddleocr') { + setCapabilityApiHost(override, providerId, 'text_extraction', config.apiUrl) + } + + const langs = normalizeLangs(config.langs) + if (langs.length > 0) { + mergeOptions(override, { langs }) + } + + if (isRecord(config.api)) { + addApiKey(override, config.api.apiKey) + if (providerId !== 'paddleocr') { + setCapabilityApiHost(override, providerId, 'text_extraction', config.api.apiHost) + } + + if (isNonEmptyString(config.api.apiVersion)) { + mergeOptions(override, { apiVersion: config.api.apiVersion }) + } + } +} + +export function mergeFileProcessingOverrides(sources: { + preprocessProviders?: unknown + ocrProviders?: unknown +}): TransformResult { + const overrides: FileProcessorOverrides = {} + + if (Array.isArray(sources.preprocessProviders)) { + sources.preprocessProviders.forEach((provider) => mergePreprocessProvider(overrides, provider)) + } + + if (Array.isArray(sources.ocrProviders)) { + sources.ocrProviders.forEach((provider) => mergeOcrProvider(overrides, provider)) + } + + pruneEmptyOverrides(overrides) + + return { + 'file_processing.overrides': overrides + } +} diff --git a/src/main/data/migration/v2/migrators/mappings/PreferencesMappings.ts b/src/main/data/migration/v2/migrators/mappings/PreferencesMappings.ts index 73a99252fab..6dfb3d670c0 100644 --- a/src/main/data/migration/v2/migrators/mappings/PreferencesMappings.ts +++ b/src/main/data/migration/v2/migrators/mappings/PreferencesMappings.ts @@ -745,6 +745,12 @@ export const REDUX_STORE_MAPPINGS = { targetKey: 'data.backup.nutstore.max_backups' } ], + preprocess: [ + { + originalKey: 'defaultProvider', + targetKey: 'file_processing.default.markdown_conversion' + } + ], shortcuts: [ { originalKey: 'shortcuts.zoom_in', @@ -825,6 +831,12 @@ export const REDUX_STORE_MAPPINGS = { targetKey: 'chat.web_search.exclude_domains' } ], + ocr: [ + { + originalKey: 'imageProviderId', + targetKey: 'file_processing.default.text_extraction' + } + ], note: [ { originalKey: 'settings.isFullWidth', diff --git a/src/main/data/migration/v2/migrators/mappings/__tests__/ComplexPreferenceMappings.test.ts b/src/main/data/migration/v2/migrators/mappings/__tests__/ComplexPreferenceMappings.test.ts index 5baf7cc2141..69c7f9d9664 100644 --- a/src/main/data/migration/v2/migrators/mappings/__tests__/ComplexPreferenceMappings.test.ts +++ b/src/main/data/migration/v2/migrators/mappings/__tests__/ComplexPreferenceMappings.test.ts @@ -64,6 +64,15 @@ describe('ComplexPreferenceMappings', () => { expect(Array.isArray(COMPLEX_PREFERENCE_MAPPINGS)).toBe(true) }) + it('should include file processing overrides merge mapping', () => { + const fileProcessingMapping = COMPLEX_PREFERENCE_MAPPINGS.find((m) => m.id === 'file_processing_overrides_merge') + + expect(fileProcessingMapping).toBeDefined() + expect(fileProcessingMapping).toMatchObject({ + id: 'file_processing_overrides_merge', + targetKeys: ['file_processing.overrides'] + }) + }) it('should contain websearch compression flatten mapping', () => { const websearchMapping = COMPLEX_PREFERENCE_MAPPINGS.find((m) => m.id === 'websearch_compression_flatten') expect(websearchMapping).toBeDefined() @@ -79,11 +88,16 @@ describe('ComplexPreferenceMappings', () => { }) describe('getComplexMappingTargetKeys', () => { + it('should return target keys from configured mappings', () => { + const keys = getComplexMappingTargetKeys() + expect(keys).toContain('file_processing.overrides') + }) it('should return target keys from all mappings', () => { const keys = getComplexMappingTargetKeys() expect(keys).toContain('chat.web_search.compression.method') expect(keys).toContain('chat.web_search.provider_overrides') - expect(keys.length).toBe(8) // 7 websearch compression keys + 1 overrides key + expect(keys).toContain('file_processing.overrides') + expect(keys.length).toBe(9) // 7 websearch compression keys + 1 provider overrides key + 1 file processing overrides key }) it('should flatten target keys from all mappings', () => { @@ -113,6 +127,11 @@ describe('ComplexPreferenceMappings', () => { }) describe('getComplexMappingById', () => { + it('should return the configured mapping by id', () => { + const mapping = getComplexMappingById('file_processing_overrides_merge') + expect(mapping).toBeDefined() + expect(mapping?.targetKeys).toEqual(['file_processing.overrides']) + }) it('should return mapping by id', () => { const mapping = getComplexMappingById('websearch_compression_flatten') expect(mapping).toBeDefined() diff --git a/src/main/data/migration/v2/migrators/mappings/__tests__/FileProcessingOverrideMappings.test.ts b/src/main/data/migration/v2/migrators/mappings/__tests__/FileProcessingOverrideMappings.test.ts new file mode 100644 index 00000000000..0e3de67427a --- /dev/null +++ b/src/main/data/migration/v2/migrators/mappings/__tests__/FileProcessingOverrideMappings.test.ts @@ -0,0 +1,194 @@ +import { describe, expect, it } from 'vitest' + +import { mergeFileProcessingOverrides } from '../FileProcessingOverrideMappings' + +describe('FileProcessingOverrideMappings', () => { + describe('mergeFileProcessingOverrides', () => { + it('should merge preprocess and ocr providers into file processing overrides', () => { + const result = mergeFileProcessingOverrides({ + preprocessProviders: [ + { + id: 'mineru', + name: 'MinerU', + apiKey: 'mineru-key', + apiHost: 'https://mineru-proxy.example.com', + options: { enable_formula: false } + }, + { + id: 'mistral', + name: 'Mistral', + apiKey: 'mistral-key', + apiHost: 'https://mistral-proxy.example.com', + model: 'mistral-ocr-custom' + }, + { + id: 'paddleocr', + name: 'PaddleOCR', + apiKey: 'paddle-doc-key', + apiHost: 'https://paddle-doc.example.com' + } + ], + ocrProviders: [ + { + id: 'paddleocr', + name: 'PaddleOCR', + capabilities: { image: true }, + config: { + apiUrl: 'https://paddle-ocr.example.com', + accessToken: 'paddle-ocr-token' + } + }, + { + id: 'tesseract', + name: 'Tesseract', + capabilities: { image: true }, + config: { + langs: { + eng: true, + chi_sim: false, + fra: true + } + } + }, + { + id: 'system', + name: 'System', + capabilities: { image: true }, + config: { + langs: ['en-us', 'zh-cn'] + } + } + ] + }) + + expect(result).toEqual({ + 'file_processing.overrides': { + mineru: { + apiKeys: ['mineru-key'], + capabilities: { + markdown_conversion: { + apiHost: 'https://mineru-proxy.example.com' + } + }, + options: { enable_formula: false } + }, + mistral: { + apiKeys: ['mistral-key'], + capabilities: { + markdown_conversion: { + apiHost: 'https://mistral-proxy.example.com', + modelId: 'mistral-ocr-custom' + }, + text_extraction: { + apiHost: 'https://mistral-proxy.example.com', + modelId: 'mistral-ocr-custom' + } + } + }, + paddleocr: { + apiKeys: ['paddle-doc-key', 'paddle-ocr-token'] + }, + system: { + options: { + langs: ['en-us', 'zh-cn'] + } + }, + tesseract: { + options: { + langs: ['eng', 'fra'] + } + } + } + }) + }) + + it('should not migrate paddleocr api hosts and only keep keys', () => { + const result = mergeFileProcessingOverrides({ + preprocessProviders: [ + { + id: 'paddleocr', + name: 'PaddleOCR', + apiKey: 'paddle-doc-key', + apiHost: 'https://paddle-doc.example.com' + } + ], + ocrProviders: [ + { + id: 'paddleocr', + name: 'PaddleOCR', + capabilities: { image: true }, + config: { + apiUrl: 'https://paddle-ocr.example.com', + accessToken: 'paddle-ocr-token' + } + } + ] + }) + + expect(result).toEqual({ + 'file_processing.overrides': { + paddleocr: { + apiKeys: ['paddle-doc-key', 'paddle-ocr-token'] + } + } + }) + }) + + it('should apply mistral preprocess credentials to both markdown and text extraction', () => { + const result = mergeFileProcessingOverrides({ + preprocessProviders: [ + { + id: 'mistral', + name: 'Mistral', + apiKey: 'mistral-key', + apiHost: 'https://mistral-proxy.example.com', + model: 'mistral-ocr-custom' + } + ], + ocrProviders: [] + }) + + expect(result).toEqual({ + 'file_processing.overrides': { + mistral: { + apiKeys: ['mistral-key'], + capabilities: { + markdown_conversion: { + apiHost: 'https://mistral-proxy.example.com', + modelId: 'mistral-ocr-custom' + }, + text_extraction: { + apiHost: 'https://mistral-proxy.example.com', + modelId: 'mistral-ocr-custom' + } + } + } + } + }) + }) + + it('should skip empty values and preset defaults', () => { + const result = mergeFileProcessingOverrides({ + preprocessProviders: [ + { + id: 'doc2x', + name: 'Doc2x', + apiKey: '', + apiHost: 'https://v2.doc2x.noedgeai.com' + }, + { + id: 'open-mineru', + name: 'Open MinerU', + apiKey: '', + apiHost: '' + } + ], + ocrProviders: [] + }) + + expect(result).toEqual({ + 'file_processing.overrides': {} + }) + }) + }) +}) diff --git a/src/main/data/services/FileProcessingService.ts b/src/main/data/services/FileProcessingService.ts new file mode 100644 index 00000000000..2a0b2bb20ba --- /dev/null +++ b/src/main/data/services/FileProcessingService.ts @@ -0,0 +1,148 @@ +import { preferenceService } from '@data/PreferenceService' +import { loggerService } from '@logger' +import { DataApiErrorFactory } from '@shared/data/api' +import type { + CapabilityOverride, + FileProcessorFeature, + FileProcessorId, + FileProcessorOverride, + FileProcessorOverrides +} from '@shared/data/preference/preferenceTypes' +import { type FileProcessorMerged, PRESETS_FILE_PROCESSORS } from '@shared/data/presets/file-processing' + +const logger = loggerService.withContext('DataApi:FileProcessingService') + +function mergeCapabilityOverrides( + current?: Partial>, + updates?: Partial> +): Partial> | undefined { + if (!current && !updates) { + return undefined + } + + const merged: Partial> = { ...current } + + for (const feature of Object.keys(updates ?? {}) as FileProcessorFeature[]) { + merged[feature] = { + ...current?.[feature], + ...updates?.[feature] + } + } + + return merged +} + +function mergeProcessorOverrides( + current?: FileProcessorOverride, + updates?: FileProcessorOverride +): FileProcessorOverride { + const currentRest: Partial = current ? { ...current } : {} + const updateRest: Partial = updates ? { ...updates } : {} + const mergedCapabilities = mergeCapabilityOverrides(current?.capabilities, updates?.capabilities) + const mergedOptions = + current?.options || updates?.options + ? { + ...current?.options, + ...updates?.options + } + : undefined + + delete currentRest.capabilities + delete currentRest.options + delete updateRest.capabilities + delete updateRest.options + + return { + ...currentRest, + ...updateRest, + ...(mergedCapabilities && Object.keys(mergedCapabilities).length > 0 ? { capabilities: mergedCapabilities } : {}), + ...(mergedOptions && Object.keys(mergedOptions).length > 0 ? { options: mergedOptions } : {}) + } +} + +function mergeCapabilityConfig }>( + capability: T, + override?: CapabilityOverride +): T { + return { + ...capability, + ...(override?.apiHost !== undefined ? { apiHost: override.apiHost } : {}), + ...(override?.modelId !== undefined ? { modelId: override.modelId } : {}), + ...(override?.metadata !== undefined ? { metadata: override.metadata } : {}) + } +} + +export class FileProcessingService { + private static instance: FileProcessingService + + private constructor() {} + + public static getInstance(): FileProcessingService { + if (!FileProcessingService.instance) { + FileProcessingService.instance = new FileProcessingService() + } + + return FileProcessingService.instance + } + + public async getProcessors(): Promise { + const overrides = this.getOverrides() + + return PRESETS_FILE_PROCESSORS.map((preset) => this.mergeProcessorConfig(preset.id, overrides)) + } + + public async getProcessorById(id: FileProcessorId): Promise { + return this.mergeProcessorConfig(id, this.getOverrides()) + } + + public async updateProcessor(id: FileProcessorId, updates: FileProcessorOverride): Promise { + this.getPresetById(id) + + const overrides = this.getOverrides() + const nextOverrides: FileProcessorOverrides = { + ...overrides, + [id]: mergeProcessorOverrides(overrides[id], updates) + } + + await preferenceService.set('file_processing.overrides', nextOverrides) + + logger.info('Updated file processor overrides', { + processorId: id, + hasApiKeys: Boolean(nextOverrides[id]?.apiKeys?.length), + capabilityCount: Object.keys(nextOverrides[id]?.capabilities || {}).length + }) + + return this.mergeProcessorConfig(id, nextOverrides) + } + + private getOverrides(): FileProcessorOverrides { + return preferenceService.get('file_processing.overrides') ?? {} + } + + private getPresetById(processorId: FileProcessorId) { + const preset = PRESETS_FILE_PROCESSORS.find((item) => item.id === processorId) + + if (!preset) { + throw DataApiErrorFactory.notFound('File processor', processorId) + } + + return preset + } + + private mergeProcessorConfig(processorId: FileProcessorId, overrides: FileProcessorOverrides): FileProcessorMerged { + const preset = this.getPresetById(processorId) + const override = overrides[processorId] + + return { + id: preset.id, + type: preset.type, + capabilities: preset.capabilities.map((capability) => + mergeCapabilityConfig(capability, override?.capabilities?.[capability.feature]) + ), + apiKeys: override?.apiKeys, + options: override?.options + } + } +} + +export const fileProcessingService = FileProcessingService.getInstance() diff --git a/src/main/data/services/__tests__/FileProcessingService.test.ts b/src/main/data/services/__tests__/FileProcessingService.test.ts new file mode 100644 index 00000000000..f3bc2973a4e --- /dev/null +++ b/src/main/data/services/__tests__/FileProcessingService.test.ts @@ -0,0 +1,176 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +vi.mock('@data/PreferenceService', async () => { + const { MockMainPreferenceServiceExport } = await import('@test-mocks/main/PreferenceService') + return MockMainPreferenceServiceExport +}) + +import { FileProcessorMergedSchema, PRESETS_FILE_PROCESSORS } from '@shared/data/presets/file-processing' +import { MockMainPreferenceServiceUtils } from '@test-mocks/main/PreferenceService' + +import { fileProcessingService } from '../FileProcessingService' + +describe('FileProcessingService', () => { + beforeEach(() => { + vi.clearAllMocks() + MockMainPreferenceServiceUtils.resetMocks() + }) + + describe('getProcessors', () => { + it('should return all processors with merged overrides', async () => { + MockMainPreferenceServiceUtils.setPreferenceValue('file_processing.overrides', { + paddleocr: { + apiKeys: ['test-key'], + options: { + concurrency: 2 + }, + capabilities: { + markdown_conversion: { + modelId: 'custom-model' + } + } + } + }) + + const processors = await fileProcessingService.getProcessors() + const processor = processors.find((item) => item.id === 'paddleocr') + + expect(processors).toHaveLength(PRESETS_FILE_PROCESSORS.length) + expect(processor).toMatchObject({ + id: 'paddleocr', + apiKeys: ['test-key'], + options: { + concurrency: 2 + } + }) + expect(processor?.capabilities).toContainEqual( + expect.objectContaining({ + feature: 'markdown_conversion', + modelId: 'custom-model' + }) + ) + }) + }) + + describe('getProcessorById', () => { + it('should throw when processor does not exist', async () => { + await expect(fileProcessingService.getProcessorById('missing-processor' as never)).rejects.toThrow( + "File processor with id 'missing-processor' not found" + ) + }) + }) + + describe('updateProcessor', () => { + it('should merge processor overrides and preserve existing feature-specific capability fields', async () => { + MockMainPreferenceServiceUtils.setPreferenceValue('file_processing.overrides', { + paddleocr: { + capabilities: { + markdown_conversion: { + apiHost: 'https://old.example.com' + } + }, + options: { + existing: true + } + } + }) + + const updated = await fileProcessingService.updateProcessor('paddleocr', { + capabilities: { + markdown_conversion: { + modelId: 'new-model' + } + }, + options: { + timeout: 30000 + } + }) + + expect(updated.capabilities).toContainEqual( + expect.objectContaining({ + feature: 'markdown_conversion', + apiHost: 'https://old.example.com', + modelId: 'new-model' + }) + ) + expect(updated.options).toMatchObject({ + existing: true, + timeout: 30000 + }) + + expect(MockMainPreferenceServiceUtils.getPreferenceValue('file_processing.overrides')).toMatchObject({ + paddleocr: { + capabilities: { + markdown_conversion: { + apiHost: 'https://old.example.com', + modelId: 'new-model' + } + }, + options: { + existing: true, + timeout: 30000 + } + } + }) + }) + + it('should not persist overrides when processor does not exist', async () => { + const existingOverrides = { + paddleocr: { + apiKeys: ['existing-key'], + capabilities: { + markdown_conversion: { + modelId: 'existing-model' + } + } + } + } + + MockMainPreferenceServiceUtils.setPreferenceValue('file_processing.overrides', existingOverrides) + + await expect( + fileProcessingService.updateProcessor('missing-processor' as never, { + apiKeys: ['invalid-key'] + }) + ).rejects.toThrow("File processor with id 'missing-processor' not found") + + expect(MockMainPreferenceServiceUtils.getPreferenceValue('file_processing.overrides')).toEqual(existingOverrides) + }) + + it('should not persist empty options when updates do not include options', async () => { + await fileProcessingService.updateProcessor('paddleocr', { + apiKeys: ['new-key'] + }) + + const storedOverrides = MockMainPreferenceServiceUtils.getPreferenceValue('file_processing.overrides') + + expect(storedOverrides.paddleocr).toMatchObject({ + apiKeys: ['new-key'] + }) + expect(storedOverrides.paddleocr).not.toHaveProperty('options') + }) + + it('should ignore unknown capability override fields in merged configs', async () => { + MockMainPreferenceServiceUtils.setPreferenceValue('file_processing.overrides', { + paddleocr: { + capabilities: { + text_extraction: { + apiHost: 'https://override.example.com', + futureField: true + } + } + } + } as never) + + const processor = await fileProcessingService.getProcessorById('paddleocr') + const textExtraction = processor.capabilities.find((capability) => capability.feature === 'text_extraction') + + expect(textExtraction).toMatchObject({ + feature: 'text_extraction', + apiHost: 'https://override.example.com' + }) + expect(textExtraction).not.toHaveProperty('futureField') + expect(FileProcessorMergedSchema.safeParse(processor).success).toBe(true) + }) + }) +}) diff --git a/v2-refactor-temp/tools/data-classify/data/classification.json b/v2-refactor-temp/tools/data-classify/data/classification.json index 7c425dcea57..22d3feb0a6a 100644 --- a/v2-refactor-temp/tools/data-classify/data/classification.json +++ b/v2-refactor-temp/tools/data-classify/data/classification.json @@ -641,17 +641,17 @@ "originalKey": "providers", "type": "array", "defaultValue": "[ { id: 'mineru', name: 'MinerU', apiKey: '', apiHost: 'https://mineru.net' }", - "status": "pending", - "category": null, + "status": "classified", + "category": "preferences", "targetKey": null }, { "originalKey": "defaultProvider", - "type": "string", - "defaultValue": null, - "status": "pending", - "category": null, - "targetKey": null + "type": "PreferenceTypes.FileProcessorId", + "defaultValue": "mineru", + "status": "classified", + "category": "preferences", + "targetKey": "file_processing.default.markdown_conversion" } ], "runtime": [ @@ -3032,17 +3032,17 @@ "originalKey": "providers", "type": "array", "defaultValue": "BUILTIN_OCR_PROVIDERS", - "status": "pending", - "category": null, + "status": "classified", + "category": "preferences", "targetKey": null }, { "originalKey": "imageProviderId", - "type": "string", - "defaultValue": null, - "status": "pending", - "category": null, - "targetKey": null + "type": "PreferenceTypes.FileProcessorId", + "defaultValue": "VALUE: null", + "status": "classified", + "category": "preferences", + "targetKey": "file_processing.default.text_extraction" } ], "note": [ diff --git a/v2-refactor-temp/tools/data-classify/data/target-key-definitions.json b/v2-refactor-temp/tools/data-classify/data/target-key-definitions.json index e3a99ec8f4e..02fced0254d 100644 --- a/v2-refactor-temp/tools/data-classify/data/target-key-definitions.json +++ b/v2-refactor-temp/tools/data-classify/data/target-key-definitions.json @@ -33,6 +33,13 @@ "status": "pending", "description": "Window height (from complex mapping) - Example, not active" }, + { + "targetKey": "file_processing.overrides", + "type": "PreferenceTypes.FileProcessorOverrides", + "defaultValue": "VALUE: {} as PreferenceTypes.FileProcessorOverrides", + "status": "classified", + "description": "复杂迁移:合并 redux/preprocess.providers 与 redux/ocr.providers 为 file processing overrides" + }, { "targetKey": "chat.web_search.compression.cutoff_limit", "type": "number",