Skip to content

Commit bbd8413

Browse files
[Automatic Import] Add date field mapping (elastic#261580)
## Summary Fixes custom date fields being incorrectly mapped as `keyword` in generated `fields.yml` for automatic import integrations. The review agent produces `field_mappings` with accurate types (including `date`), but these were discarded after the agent completed. Instead, `generateFieldMappings` inferred types purely from JavaScript value types in pipeline output documents — and since dates are strings in JSON, they were always classified as `keyword`. This PR threads the agent's `field_mappings` through to `generateFieldMappings` so that agent-determined types (especially `date`) are preserved in the data stream saved object and subsequently in the built integration package's `fields.yml`. ### Changes - **`fields.ts`**: Added an optional `agentFieldMappings` parameter to `generateFieldMappings`. When provided, agent types override heuristic inference for non-ECS fields. ECS types retain highest precedence. - **`task_manager_service.ts`**: After the agent completes, extracts `result.field_mappings` from the agent state and passes it to `generateFieldMappings`. ### How it flows end-to-end 1. Review agent calls `submit_review` with `field_mappings` (e.g. `[{name: "my_app.created_at", type: "date"}]`) 2. Agent state includes these in `result.field_mappings` 3. Task manager passes them to `generateFieldMappings` as type hints 4. `generateFieldMappings` uses agent types for non-ECS fields, falling back to heuristic inference 5. Correct types are stored in the data stream saved object's `result.field_mapping` 6. `buildIntegrationPackage` reads `field_mapping` from the saved object and generates `fields.yml` with correct `date` types Note: The manual pipeline edit path (`updateDataStreamPipeline`) is unaffected — it does not have agent field mappings available, so it continues to use heuristic inference only. ## Test plan - [x] `fields.test.ts`: 4 new tests covering agent override, ECS precedence, fallback, and empty array - [x] `task_manager_service.test.ts`: 2 new tests verifying agent field mappings are passed through (present and absent cases) - [x] Verify end-to-end: create an integration with a data source containing date fields, confirm `fields.yml` in the downloaded package uses `type: date` instead of `type: keyword` --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
1 parent 941c8f2 commit bbd8413

4 files changed

Lines changed: 137 additions & 3 deletions

File tree

x-pack/platform/plugins/shared/automatic_import/server/services/build_integration/fields.test.ts

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,91 @@ describe('fields', () => {
268268
expect(fieldNames).not.toContain('__proto__.bad');
269269
});
270270

271+
it('uses agent field mappings to override inferred types for non-ECS fields', async () => {
272+
const docs = [
273+
{
274+
my_app: {
275+
created_at: '2024-01-15T10:30:00.000Z',
276+
updated_at: '2024-01-16T12:00:00.000Z',
277+
name: 'test',
278+
count: 5,
279+
},
280+
},
281+
];
282+
const agentFieldMappings = [
283+
{ name: 'my_app.created_at', type: 'date' as const },
284+
{ name: 'my_app.updated_at', type: 'date' as const },
285+
{ name: 'my_app.name', type: 'keyword' as const },
286+
];
287+
const fields = await generateFieldMappings(docs, fieldsMetadataClient, agentFieldMappings);
288+
289+
expect(fields).toContainEqual({
290+
name: 'my_app.created_at',
291+
type: 'date',
292+
is_ecs: false,
293+
});
294+
expect(fields).toContainEqual({
295+
name: 'my_app.updated_at',
296+
type: 'date',
297+
is_ecs: false,
298+
});
299+
expect(fields).toContainEqual({
300+
name: 'my_app.name',
301+
type: 'keyword',
302+
is_ecs: false,
303+
});
304+
expect(fields).toContainEqual({
305+
name: 'my_app.count',
306+
type: 'long',
307+
is_ecs: false,
308+
});
309+
});
310+
311+
it('ECS types take precedence over agent field mappings', async () => {
312+
const docs = [
313+
{
314+
'@timestamp': '2024-01-01T00:00:00.000Z',
315+
source: { ip: '10.0.0.1' },
316+
},
317+
];
318+
const agentFieldMappings = [
319+
{ name: '@timestamp', type: 'keyword' as const },
320+
{ name: 'source.ip', type: 'keyword' as const },
321+
];
322+
const fields = await generateFieldMappings(docs, fieldsMetadataClient, agentFieldMappings);
323+
324+
expect(fields).toContainEqual({ name: '@timestamp', type: 'date', is_ecs: true });
325+
expect(fields).toContainEqual({ name: 'source.ip', type: 'ip', is_ecs: true });
326+
});
327+
328+
it('falls back to inferred type when agent field mappings are not provided', async () => {
329+
const docs = [
330+
{
331+
my_app: {
332+
timestamp: '2024-01-01T00:00:00.000Z',
333+
},
334+
},
335+
];
336+
const fields = await generateFieldMappings(docs, fieldsMetadataClient);
337+
338+
expect(fields).toContainEqual({
339+
name: 'my_app.timestamp',
340+
type: 'keyword',
341+
is_ecs: false,
342+
});
343+
});
344+
345+
it('handles empty agent field mappings array', async () => {
346+
const docs = [{ my_app: { value: 'test' } }];
347+
const fields = await generateFieldMappings(docs, fieldsMetadataClient, []);
348+
349+
expect(fields).toContainEqual({
350+
name: 'my_app.value',
351+
type: 'keyword',
352+
is_ecs: false,
353+
});
354+
});
355+
271356
it('produces correct types for a realistic pipeline output', async () => {
272357
const docs = [
273358
{

x-pack/platform/plugins/shared/automatic_import/server/services/build_integration/fields.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
import type { IFieldsMetadataClient } from '@kbn/fields-metadata-plugin/server/services/fields_metadata/types';
9+
import type { FieldMapping } from '../../agents/state';
910
import type { FieldMappingEntry } from '../saved_objects/saved_objects_service';
1011

1112
interface SampleObj {
@@ -111,7 +112,8 @@ const collectFields = (obj: unknown, parentPath: string): RawField[] => {
111112
*/
112113
export const generateFieldMappings = async (
113114
pipelineDocs: Array<Record<string, unknown>>,
114-
fieldsMetadataClient: IFieldsMetadataClient
115+
fieldsMetadataClient: IFieldsMetadataClient,
116+
agentFieldMappings?: FieldMapping[]
115117
): Promise<FieldMappingEntry[]> => {
116118
if (pipelineDocs.length === 0) return [];
117119

@@ -126,6 +128,13 @@ export const generateFieldMappings = async (
126128

127129
if (rawFields.length === 0) return [];
128130

131+
const agentTypeMap = new Map<string, string>();
132+
if (agentFieldMappings) {
133+
for (const mapping of agentFieldMappings) {
134+
agentTypeMap.set(mapping.name, mapping.type);
135+
}
136+
}
137+
129138
const allFieldNames = rawFields.map((f) => f.name);
130139
const ecsFieldsDict = await fieldsMetadataClient.find({
131140
fieldNames: allFieldNames,
@@ -136,9 +145,10 @@ export const generateFieldMappings = async (
136145
return rawFields.map((f) => {
137146
const ecsEntry = ecsFieldsMap[f.name];
138147
const isEcs = !!ecsEntry;
148+
const agentType = agentTypeMap.get(f.name);
139149
return {
140150
...f,
141-
type: isEcs && ecsEntry.type ? ecsEntry.type : f.type,
151+
type: isEcs && ecsEntry.type ? ecsEntry.type : agentType ?? f.type,
142152
is_ecs: isEcs,
143153
};
144154
});

x-pack/platform/plugins/shared/automatic_import/server/services/task_manager/task_manager_service.test.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ describe('runTask abort handling', () => {
139139
invokeAutomaticImportAgent: jest.fn().mockResolvedValue({
140140
current_pipeline: { name: 'test', processors: [] },
141141
pipeline_generation_results: [{ _source: { answer: 42 } }],
142+
field_mappings: [
143+
{ name: 'my_app.created_at', type: 'date' },
144+
{ name: 'my_app.status', type: 'keyword' },
145+
],
142146
}),
143147
}));
144148

@@ -178,6 +182,18 @@ describe('runTask abort handling', () => {
178182
);
179183
});
180184

185+
it('passes agent field_mappings to generateFieldMappings', async () => {
186+
const abortController = new AbortController();
187+
const runner = createRunner(abortController);
188+
189+
await runner.run();
190+
191+
expect(generateFieldMappings).toHaveBeenCalledWith(expect.any(Array), expect.anything(), [
192+
{ name: 'my_app.created_at', type: 'date' },
193+
{ name: 'my_app.status', type: 'keyword' },
194+
]);
195+
});
196+
181197
it('marks data stream as cancelled when aborted before agent invocation returns', async () => {
182198
const abortController = new AbortController();
183199

@@ -885,6 +901,26 @@ describe('runTask error edge cases', () => {
885901
expect(abortController.signal.aborted).toBe(false);
886902
});
887903

904+
it('passes undefined agent field_mappings when agent does not provide them', async () => {
905+
AgentService.mockImplementation(() => ({
906+
invokeAutomaticImportAgent: jest.fn().mockResolvedValue({
907+
current_pipeline: { name: 'test', processors: [] },
908+
pipeline_generation_results: [{ _source: { value: 'test' } }],
909+
}),
910+
}));
911+
912+
buildService();
913+
const abortController = new AbortController();
914+
const runner = createRunner(abortController);
915+
await runner.run();
916+
917+
expect(generateFieldMappings).toHaveBeenCalledWith(
918+
expect.any(Array),
919+
expect.anything(),
920+
undefined
921+
);
922+
});
923+
888924
it('passes validation warnings through without failing the task', async () => {
889925
validateFieldMappings.mockResolvedValue({
890926
valid: false,

x-pack/platform/plugins/shared/automatic_import/server/services/task_manager/task_manager_service.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import { AgentService } from '../agents/agent_service';
2828
import type { AutomaticImportSamplesIndexService } from '../samples_index/index_service';
2929
import { generateFieldMappings } from '../build_integration/fields';
3030
import { validateFieldMappings } from '../build_integration/validate_fields';
31+
import type { FieldMapping } from '../../agents/state';
3132
import type { LangSmithOptions } from '../../routes/types';
3233
import type { AutomaticImportPluginStartDependencies } from '../../types';
3334
import type { AutomaticImportSavedObjectService } from '../saved_objects/saved_objects_service';
@@ -277,9 +278,11 @@ export class TaskManagerService {
277278
`Pipeline generation results objects: ${JSON.stringify(result.pipeline_generation_results)}`
278279
);
279280

281+
const agentFieldMappings = (result.field_mappings as FieldMapping[] | undefined) ?? undefined;
280282
const fieldMapping = await generateFieldMappings(
281283
(pipelineGenerationResultsObjects ?? []) as Array<Record<string, unknown>>,
282-
fieldsMetadataClient
284+
fieldsMetadataClient,
285+
agentFieldMappings
283286
);
284287
this.logger.debug(`Generated field mappings: ${JSON.stringify(fieldMapping)}`);
285288
this.throwIfAborted(abortSignal);

0 commit comments

Comments
 (0)