Skip to content

Commit c110f35

Browse files
laskevychclaude
andauthored
fix(backend): exclude disconnected fields from AI Helper sample query (#1343)
* fix(backend): exclude disconnected fields from AI Helper sample query AI Helper alias/description generation failed with a storage error (BigQuery 'Unrecognized name: <field>') when a data mart's Output Schema contained a Disconnected field. The orchestrator built the 30-row sample query from every schema field, including ones present in the schema but absent from the underlying table/view, so the storage rejected the whole query. Filter out DISCONNECTED fields before building the sample query, and skip the sample fetch when no connected fields remain (which would otherwise emit an empty SELECT). Hidden-for-reporting fields are kept on purpose, since they are real source columns and may still need an AI-generated alias/description. Add unit tests for disconnected exclusion, hidden-field retention, and the all-disconnected skip path. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * refactor(backend): extract isConnected schema-field helper Centralize the 'field exists in the source' check (status !== DISCONNECTED) in a shared isConnected() helper in data-mart-schema.utils.ts, per review feedback, so a future status that also means 'gone from the source' only needs updating in one place. Used by the AI Helper sample query and collectSchemaFieldPathTypes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * refactor(backend): use isConnected in report header generators Route the flat and BigQuery report header generators through the shared isConnected() helper instead of inline status !== DISCONNECTED checks, so every 'field exists in the source' check resolves to one place. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 94afa91 commit c110f35

7 files changed

Lines changed: 210 additions & 21 deletions
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
'owox': minor
3+
---
4+
5+
# Skip disconnected fields in AI Helper sample-data query
6+
7+
Fix AI Helper alias/description generation failing with `Unrecognized name: <field>` when a data mart's Output Schema contains a field marked Disconnected ("Field is not connected to the data source"). The 30-row sample query no longer selects disconnected fields — they exist in the schema but not in the underlying table/view, so the storage rejected the whole query — and the sample fetch is skipped entirely when no connected fields remain. Hidden-for-reporting fields are intentionally kept, since the AI may still generate an alias or description for them.
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import { GenerateDataMartMetadataOrchestratorService } from './generate-data-mart-metadata.orchestrator.service';
2+
import { DataMartMetadataScope, GenerateDataMartMetadataRequest } from './ai-insights-types';
3+
import { DataMartDefinitionType } from '../enums/data-mart-definition-type.enum';
4+
import { DataMartSchemaFieldStatus } from '../data-storage-types/enums/data-mart-schema-field-status.enum';
5+
import { DataMart } from '../entities/data-mart.entity';
6+
7+
describe('GenerateDataMartMetadataOrchestratorService', () => {
8+
const METADATA_SAMPLE_ROW_LIMIT = 30;
9+
10+
const buildSchema = (
11+
fields: Array<{
12+
name: string;
13+
status: DataMartSchemaFieldStatus;
14+
isHiddenForReporting?: boolean;
15+
}>
16+
) => ({
17+
type: 'bigquery-data-mart-schema',
18+
fields: fields.map(f => ({
19+
name: f.name,
20+
type: 'STRING',
21+
status: f.status,
22+
isHiddenForReporting: f.isHiddenForReporting ?? false,
23+
})),
24+
});
25+
26+
const createService = (schema: unknown) => {
27+
const dataMart = {
28+
id: 'dm-1',
29+
projectId: 'proj-1',
30+
definitionType: DataMartDefinitionType.SQL,
31+
title: 'Test Mart',
32+
description: 'desc',
33+
schema,
34+
} as unknown as DataMart;
35+
36+
const dataMartService = {
37+
actualizeSchemaIfExpired: jest.fn().mockResolvedValue(dataMart),
38+
};
39+
const dataMartSampleDataService = {
40+
sampleColumns: jest.fn().mockResolvedValue({ columns: [], rows: [] }),
41+
};
42+
const definitionValidatorFacade = {
43+
checkIsValid: jest.fn().mockResolvedValue(undefined),
44+
};
45+
const generateDataMartMetadataAgent = {
46+
run: jest.fn().mockResolvedValue({ fields: [] }),
47+
};
48+
49+
const service = new GenerateDataMartMetadataOrchestratorService(
50+
dataMartService as never,
51+
dataMartSampleDataService as never,
52+
definitionValidatorFacade as never,
53+
generateDataMartMetadataAgent as never,
54+
{} as never,
55+
{} as never
56+
);
57+
58+
return { service, dataMartSampleDataService, generateDataMartMetadataAgent };
59+
};
60+
61+
const request: GenerateDataMartMetadataRequest = {
62+
dataMartId: 'dm-1',
63+
projectId: 'proj-1',
64+
scope: DataMartMetadataScope.ALL_FIELD_METADATA,
65+
useSample: true,
66+
};
67+
68+
it('excludes DISCONNECTED fields from the sample query', async () => {
69+
const schema = buildSchema([
70+
{ name: 'campaign', status: DataMartSchemaFieldStatus.CONNECTED },
71+
{ name: 'country', status: DataMartSchemaFieldStatus.CONNECTED },
72+
{ name: 'clicks', status: DataMartSchemaFieldStatus.CONNECTED },
73+
{ name: 'impressions', status: DataMartSchemaFieldStatus.DISCONNECTED },
74+
]);
75+
const { service, dataMartSampleDataService } = createService(schema);
76+
77+
await service.run(request);
78+
79+
expect(dataMartSampleDataService.sampleColumns).toHaveBeenCalledWith(
80+
'dm-1',
81+
'proj-1',
82+
['campaign', 'country', 'clicks'],
83+
undefined,
84+
METADATA_SAMPLE_ROW_LIMIT
85+
);
86+
});
87+
88+
it('keeps isHiddenForReporting fields in the sample query (AI may still alias/describe them)', async () => {
89+
const schema = buildSchema([
90+
{ name: 'campaign', status: DataMartSchemaFieldStatus.CONNECTED },
91+
{
92+
name: 'internal_id',
93+
status: DataMartSchemaFieldStatus.CONNECTED,
94+
isHiddenForReporting: true,
95+
},
96+
{ name: 'clicks', status: DataMartSchemaFieldStatus.CONNECTED },
97+
]);
98+
const { service, dataMartSampleDataService } = createService(schema);
99+
100+
await service.run(request);
101+
102+
expect(dataMartSampleDataService.sampleColumns).toHaveBeenCalledWith(
103+
'dm-1',
104+
'proj-1',
105+
['campaign', 'internal_id', 'clicks'],
106+
undefined,
107+
METADATA_SAMPLE_ROW_LIMIT
108+
);
109+
});
110+
111+
it('keeps CONNECTED_WITH_DEFINITION_MISMATCH fields in the sample query', async () => {
112+
const schema = buildSchema([
113+
{ name: 'campaign', status: DataMartSchemaFieldStatus.CONNECTED },
114+
{ name: 'clicks', status: DataMartSchemaFieldStatus.CONNECTED_WITH_DEFINITION_MISMATCH },
115+
]);
116+
const { service, dataMartSampleDataService } = createService(schema);
117+
118+
await service.run(request);
119+
120+
expect(dataMartSampleDataService.sampleColumns).toHaveBeenCalledWith(
121+
'dm-1',
122+
'proj-1',
123+
['campaign', 'clicks'],
124+
undefined,
125+
METADATA_SAMPLE_ROW_LIMIT
126+
);
127+
});
128+
129+
it('skips the sample fetch when every field is DISCONNECTED', async () => {
130+
const schema = buildSchema([
131+
{ name: 'campaign', status: DataMartSchemaFieldStatus.DISCONNECTED },
132+
{ name: 'impressions', status: DataMartSchemaFieldStatus.DISCONNECTED },
133+
]);
134+
const { service, dataMartSampleDataService, generateDataMartMetadataAgent } =
135+
createService(schema);
136+
137+
await service.run(request);
138+
139+
expect(dataMartSampleDataService.sampleColumns).not.toHaveBeenCalled();
140+
expect(generateDataMartMetadataAgent.run).toHaveBeenCalledWith(
141+
expect.objectContaining({ sampleColumns: null, sampleRows: null }),
142+
expect.anything()
143+
);
144+
});
145+
});

apps/backend/src/data-marts/ai-insights/generate-data-mart-metadata.orchestrator.service.ts

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { ToolRegistry } from '../../common/ai-insights/agent/tool-registry';
55
import { BusinessViolationException } from '../../common/exceptions/business-violation.exception';
66
import { DataMartDefinitionValidatorFacade } from '../data-storage-types/facades/data-mart-definition-validator-facade.service';
77
import type { DataMartSchema } from '../data-storage-types/data-mart-schema.type';
8+
import { isConnected } from '../data-storage-types/data-mart-schema.utils';
89
import { DataMartDefinitionType } from '../enums/data-mart-definition-type.enum';
910
import { DataMartService } from '../services/data-mart.service';
1011
import { DataMartSampleDataService } from '../services/data-mart-sample-data.service';
@@ -73,19 +74,26 @@ export class GenerateDataMartMetadataOrchestratorService {
7374
let sampleColumns: string[] | null = null;
7475
let sampleRows: QueryRow[] | null = null;
7576
if (request.useSample) {
76-
const schemaColumns = schema.fields.map(f => f.name);
77-
const sample = await this.dataMartSampleDataService.sampleColumns(
78-
request.dataMartId,
79-
request.projectId,
80-
schemaColumns,
81-
undefined,
82-
METADATA_SAMPLE_ROW_LIMIT
83-
);
84-
sampleColumns = sample.columns;
85-
sampleRows = sample.rows.map(row => this.toQueryRow(sample.columns, row));
86-
this.logger.log(
87-
`Fetched ${sample.rows.length} sample row(s) across ${sample.columns.length} column(s) for data mart ${request.dataMartId}`
88-
);
77+
// Disconnected fields aren't in the underlying table/view, so selecting them fails the query.
78+
const schemaColumns = schema.fields.filter(isConnected).map(f => f.name);
79+
if (schemaColumns.length === 0) {
80+
this.logger.warn(
81+
`Skipping sample fetch for data mart ${request.dataMartId}: no connected schema fields`
82+
);
83+
} else {
84+
const sample = await this.dataMartSampleDataService.sampleColumns(
85+
request.dataMartId,
86+
request.projectId,
87+
schemaColumns,
88+
undefined,
89+
METADATA_SAMPLE_ROW_LIMIT
90+
);
91+
sampleColumns = sample.columns;
92+
sampleRows = sample.rows.map(row => this.toQueryRow(sample.columns, row));
93+
this.logger.log(
94+
`Fetched ${sample.rows.length} sample row(s) across ${sample.columns.length} column(s) for data mart ${request.dataMartId}`
95+
);
96+
}
8997
}
9098

9199
const sharedContext: SharedAgentContext = {

apps/backend/src/data-marts/data-storage-types/bigquery/services/bigquery-report-headers-generator.service.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { Injectable } from '@nestjs/common';
2-
import { DataMartSchemaFieldStatus } from '../../enums/data-mart-schema-field-status.enum';
2+
import { isConnected } from '../../data-mart-schema.utils';
33
import { DataStorageType } from '../../enums/data-storage-type.enum';
44
import { ReportHeadersGenerator } from '../../interfaces/report-headers-generator.interface';
55
import { DataMartSchema } from '../../data-mart-schema.type';
@@ -41,7 +41,7 @@ export class BigQueryReportHeadersGenerator implements ReportHeadersGenerator {
4141
const fieldHeaders: ReportDataHeader[] = [];
4242
const fullPath = parentPath ? `${parentPath}.${field.name}` : field.name;
4343

44-
if (field.status === DataMartSchemaFieldStatus.DISCONNECTED) {
44+
if (!isConnected(field)) {
4545
return fieldHeaders;
4646
}
4747

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { isConnected } from './data-mart-schema.utils';
2+
import { DataMartSchemaFieldStatus } from './enums/data-mart-schema-field-status.enum';
3+
import type { DataMartSchemaField } from './data-mart-schema.type';
4+
5+
describe('isConnected', () => {
6+
const field = (status: DataMartSchemaFieldStatus): DataMartSchemaField =>
7+
({ name: 'f', type: 'STRING', status }) as unknown as DataMartSchemaField;
8+
9+
it('treats CONNECTED as present in the source', () => {
10+
expect(isConnected(field(DataMartSchemaFieldStatus.CONNECTED))).toBe(true);
11+
});
12+
13+
it('treats CONNECTED_WITH_DEFINITION_MISMATCH as present in the source', () => {
14+
expect(isConnected(field(DataMartSchemaFieldStatus.CONNECTED_WITH_DEFINITION_MISMATCH))).toBe(
15+
true
16+
);
17+
});
18+
19+
it('treats DISCONNECTED as gone from the source', () => {
20+
expect(isConnected(field(DataMartSchemaFieldStatus.DISCONNECTED))).toBe(false);
21+
});
22+
});

apps/backend/src/data-marts/data-storage-types/data-mart-schema.utils.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@ import { Injectable } from '@nestjs/common';
77
import { DataStoragePublicCredentialsFactory } from './factories/data-storage-public-credentials.factory';
88
import { DataStorageCredentialsPublic } from '../dto/presentation/data-storage-response-api.dto';
99

10+
/**
11+
* A field is "connected" when it still exists in the data source — i.e. its status is not
12+
* DISCONNECTED (CONNECTED and CONNECTED_WITH_DEFINITION_MISMATCH both mean the column is
13+
* present). Keeping this in one place means a future status that also signals "gone from
14+
* the source" only needs handling here.
15+
*/
16+
export function isConnected(field: DataMartSchemaField): boolean {
17+
return field.status !== DataMartSchemaFieldStatus.DISCONNECTED;
18+
}
19+
1020
// A hidden-for-reporting or DISCONNECTED node prunes its whole subtree; container
1121
// nodes count as referenceable paths alongside their nested `a.b` leaves.
1222
export function collectSchemaFieldPaths(
@@ -23,7 +33,7 @@ export function collectSchemaFieldPathTypes(
2333
const result: { name: string; type: string }[] = [];
2434
for (const field of fields) {
2535
if (field.isHiddenForReporting) continue;
26-
if (field.status === DataMartSchemaFieldStatus.DISCONNECTED) continue;
36+
if (!isConnected(field)) continue;
2737
const fullName = prefix ? `${prefix}.${field.name}` : field.name;
2838
result.push({ name: fullName, type: String(field.type) });
2939
if ('fields' in field && field.fields?.length) {

apps/backend/src/data-marts/data-storage-types/interfaces/flat-report-headers-generator.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { DataMartSchema } from '../data-mart-schema.type';
2-
import { DataMartSchemaFieldStatus } from '../enums/data-mart-schema-field-status.enum';
2+
import { isConnected } from '../data-mart-schema.utils';
33
import { DataStorageType } from '../enums/data-storage-type.enum';
44
import { ReportDataHeader } from '../../dto/domain/report-data-header.dto';
55
import { ReportHeadersGenerator } from './report-headers-generator.interface';
@@ -20,10 +20,7 @@ export abstract class FlatReportHeadersGenerator implements ReportHeadersGenerat
2020
}
2121

2222
return dataMartSchema.fields
23-
.filter(
24-
field =>
25-
field.status !== DataMartSchemaFieldStatus.DISCONNECTED && !field.isHiddenForReporting
26-
)
23+
.filter(field => isConnected(field) && !field.isHiddenForReporting)
2724
.map(field => new ReportDataHeader(field.name, field.alias, field.description, field.type));
2825
}
2926
}

0 commit comments

Comments
 (0)