From 2cc6c8d251b6e7b376a6e35ef8c93f96e9a8c69b Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 2 May 2025 16:35:57 +0300 Subject: [PATCH 1/3] fix(schema-compiler): Reject pre-agg if measure is unmultiplied in query but multiplied in pre-agg --- .../src/adapter/BaseQuery.js | 4 +-- .../src/adapter/PreAggregations.ts | 33 ++++++++++++++++--- .../src/compiler/CubeEvaluator.ts | 1 + 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 60236542389df..729ec09e685a3 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -3883,11 +3883,11 @@ export class BaseQuery { * @returns {BaseQuery} */ // eslint-disable-next-line consistent-return - preAggregationQueryForSqlEvaluation(cube, preAggregation) { + preAggregationQueryForSqlEvaluation(cube, preAggregation, context = {}) { if (preAggregation.type === 'autoRollup') { return this.preAggregations.autoRollupPreAggregationQuery(cube, preAggregation); } else if (preAggregation.type === 'rollup') { - return this.preAggregations.rollupPreAggregationQuery(cube, preAggregation); + return this.preAggregations.rollupPreAggregationQuery(cube, preAggregation, context); } else if (preAggregation.type === 'originalSql') { return this; } diff --git a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.ts b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.ts index 4dfb1db0f080a..d0353f4855db8 100644 --- a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.ts +++ b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.ts @@ -49,6 +49,10 @@ export type PreAggregationForCube = { references: PreAggregationReferences; }; +export type EvaluateReferencesContext = { + inPreAggEvaluation?: boolean; +}; + export type BaseMember = BaseDimension | BaseMeasure | BaseFilter | BaseGroupFilter | BaseSegment; export type CanUsePreAggregationFn = (references: PreAggregationReferences) => boolean; @@ -582,12 +586,15 @@ export class PreAggregations { const dimensionsTrimmed = references .dimensions .map(d => CubeSymbols.joinHintFromPath(d).path); + const multipliedMeasuresTrimmed = references + .multipliedMeasures?.map(m => CubeSymbols.joinHintFromPath(m).path); return { ...references, dimensions: dimensionsTrimmed, measures: measuresTrimmed, timeDimensions: timeDimensionsTrimmed, + multipliedMeasures: multipliedMeasuresTrimmed, }; } @@ -722,6 +729,19 @@ export class PreAggregations { // TODO remove this in favor of matching with join path const referencesTrimmed = trimmedReferences(references); + // Even if there are no multiplied measures in the query (because no multiplier dimensions are requested) + // but the same measures are multiplied in the pre-aggregation, we can't use pre-aggregation + // for such queries. + if (referencesTrimmed.multipliedMeasures) { + const backAliasMultipliedMeasures = backAlias(referencesTrimmed.multipliedMeasures); + + if (transformedQuery.leafMeasures.some(m => referencesTrimmed.multipliedMeasures.includes(m)) || + transformedQuery.measures.some(m => backAliasMultipliedMeasures.includes(m)) + ) { + return false; + } + } + const dimensionsMatch = (dimensions, doBackAlias) => R.all( d => ( doBackAlias ? @@ -1193,11 +1213,11 @@ export class PreAggregations { ); } - public rollupPreAggregationQuery(cube: string, aggregation): BaseQuery { + public rollupPreAggregationQuery(cube: string, aggregation: PreAggregationDefinition, context: EvaluateReferencesContext = {}): BaseQuery { // `this.evaluateAllReferences` will retain not only members, but their join path as well, and pass join hints // to subquery. Otherwise, members in subquery would regenerate new join tree from clean state, // and it can be different from expected by join path in pre-aggregation declaration - const references = this.evaluateAllReferences(cube, aggregation); + const references = this.evaluateAllReferences(cube, aggregation, null, context); const cubeQuery = this.query.newSubQueryForCube(cube, {}); return this.query.newSubQueryForCube(cube, { rowLimit: null, @@ -1225,7 +1245,7 @@ export class PreAggregations { }); } - public autoRollupPreAggregationQuery(cube: string, aggregation): BaseQuery { + public autoRollupPreAggregationQuery(cube: string, aggregation: PreAggregationDefinition): BaseQuery { return this.query.newSubQueryForCube( cube, { @@ -1266,13 +1286,18 @@ export class PreAggregations { .toLowerCase(); } - private evaluateAllReferences(cube: string, aggregation: PreAggregationDefinition, preAggregationName: string | null = null): PreAggregationReferences { + private evaluateAllReferences(cube: string, aggregation: PreAggregationDefinition, preAggregationName: string | null = null, context: EvaluateReferencesOptions = {}): PreAggregationReferences { // TODO build a join tree for all references, so they would always include full join path // Even for preaggregation references without join path // It is necessary to be able to match query and preaggregation based on full join tree const evaluateReferences = () => { const references = this.query.cubeEvaluator.evaluatePreAggregationReferences(cube, aggregation); + if (!context.inPreAggEvaluation) { + const preAggQuery = this.query.preAggregationQueryForSqlEvaluation(cube, aggregation, { inPreAggEvaluation: true }); + const aggregateMeasures = preAggQuery?.fullKeyQueryAggregateMeasures({ hasMultipliedForPreAggregation: true }); + references.multipliedMeasures = aggregateMeasures?.multipliedMeasures?.map(m => m.measure); + } if (aggregation.type === 'rollupLambda') { if (references.rollups.length > 0) { const [firstLambdaCube] = this.query.cubeEvaluator.parsePath('preAggregations', references.rollups[0]); diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index 86a597e74046d..b65f383b4357e 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -121,6 +121,7 @@ export type PreAggregationReferences = { measures: Array, timeDimensions: Array, rollups: Array, + multipliedMeasures?: Array, }; export type PreAggregationInfo = { From 54daeba2e6329d38a0d6e3cbd9381155dae0b8ff Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 2 May 2025 17:31:45 +0300 Subject: [PATCH 2/3] add tests --- .../mssql/mssql-pre-aggregations.test.ts | 6 - .../orders_and_items_multiplied_pre_agg.yml | 69 ++++++++ .../test/unit/pre-aggregations.test.ts | 163 ++++++++++++++++++ 3 files changed, 232 insertions(+), 6 deletions(-) create mode 100644 packages/cubejs-schema-compiler/test/unit/fixtures/orders_and_items_multiplied_pre_agg.yml diff --git a/packages/cubejs-schema-compiler/test/integration/mssql/mssql-pre-aggregations.test.ts b/packages/cubejs-schema-compiler/test/integration/mssql/mssql-pre-aggregations.test.ts index 7a0c6e3af2cd6..c9aeedda541ad 100644 --- a/packages/cubejs-schema-compiler/test/integration/mssql/mssql-pre-aggregations.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/mssql/mssql-pre-aggregations.test.ts @@ -90,12 +90,6 @@ describe('MSSqlPreAggregations', () => { timeDimensionReference: createdAt, granularity: 'day', }, - approx: { - type: 'rollup', - measureReferences: [countDistinctApprox], - timeDimensionReference: createdAt, - granularity: 'day' - }, ratioRollup: { type: 'rollup', measureReferences: [checkinsTotal, uniqueSourceCount], diff --git a/packages/cubejs-schema-compiler/test/unit/fixtures/orders_and_items_multiplied_pre_agg.yml b/packages/cubejs-schema-compiler/test/unit/fixtures/orders_and_items_multiplied_pre_agg.yml new file mode 100644 index 0000000000000..98f5f5611867a --- /dev/null +++ b/packages/cubejs-schema-compiler/test/unit/fixtures/orders_and_items_multiplied_pre_agg.yml @@ -0,0 +1,69 @@ +cubes: + - name: line_items + sql_table: public.line_items + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: product_id + sql: product_id + type: number + + - name: order_id + sql: order_id + type: number + + - name: quantity + sql: quantity + type: number + + - name: price + sql: price + type: number + + - name: orders + sql_table: public.orders + joins: + - name: line_items + sql: "{CUBE}.id = {line_items}.order_id" + relationship: one_to_many + dimensions: + - name: id + sql: id + type: number + primary_key: true + - name: user_id + sql: user_id + type: number + - name: amount + sql: amount + type: number + - name: status + sql: status + type: string + - name: status_new + sql: status || '_new' + type: string + - name: created_at + sql: created_at + type: time + measures: + - name: count + type: count + - name: total_qty + type: sum + sql: amount + + pre_aggregations: + - name: pre_agg_with_multiplied_measures + dimensions: + - orders.status + - line_items.product_id + measures: + - orders.count + - orders.total_qty + timeDimension: orders.created_at + granularity: month diff --git a/packages/cubejs-schema-compiler/test/unit/pre-aggregations.test.ts b/packages/cubejs-schema-compiler/test/unit/pre-aggregations.test.ts index 450f809208206..85f2ec0b251d7 100644 --- a/packages/cubejs-schema-compiler/test/unit/pre-aggregations.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/pre-aggregations.test.ts @@ -1,3 +1,5 @@ +import fs from 'fs'; +import path from 'path'; import { prepareJsCompiler, prepareYamlCompiler } from './PrepareCompiler'; import { createECommerceSchema, createSchemaYaml } from './utils'; import { PostgresQuery, queryClass, QueryFactory } from '../../src'; @@ -447,4 +449,165 @@ describe('pre-aggregations', () => { expect(preAggregationsDescription[0].invalidateKeyQueries[0][0].includes('WHERE ((date(created_at) >= $1::timestamptz AND date(created_at) <= $2::timestamptz))')).toBeTruthy(); expect(preAggregationsDescription[0].invalidateKeyQueries[0][1]).toEqual(['__FROM_PARTITION_RANGE', '__TO_PARTITION_RANGE']); }); + + describe('rollup with multiplied measure', () => { + let compiler; + let cubeEvaluator; + let joinGraph; + + beforeAll(async () => { + const modelContent = fs.readFileSync( + path.join(process.cwd(), '/test/unit/fixtures/orders_and_items_multiplied_pre_agg.yml'), + 'utf8' + ); + ({ compiler, cubeEvaluator, joinGraph } = prepareYamlCompiler(modelContent)); + await compiler.compile(); + }); + + it('measure is unmultiplied in query but multiplied in pre-agg', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'orders.total_qty' + ], + dimensions: [], + timeDimensions: [ + { + dimension: 'orders.created_at', + dateRange: [ + '2017-05-01', + '2025-05-01' + ] + } + ] + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + // Pre-agg should not match + expect(preAggregationsDescription).toEqual([]); + }); + + it('measure is unmultiplied in query but multiplied in pre-agg + granularity', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'orders.total_qty' + ], + dimensions: [], + timeDimensions: [ + { + dimension: 'orders.created_at', + dateRange: [ + '2017-05-01', + '2025-05-01' + ], + granularity: 'month' + } + ] + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + // Pre-agg should not match + expect(preAggregationsDescription).toEqual([]); + }); + + it('measure is unmultiplied in query but multiplied in pre-agg + granularity + local dimension', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'orders.total_qty' + ], + dimensions: [ + 'orders.status' + ], + timeDimensions: [ + { + dimension: 'orders.created_at', + dateRange: [ + '2017-05-01', + '2025-05-01' + ], + granularity: 'month' + } + ] + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + // Pre-agg should not match + expect(preAggregationsDescription).toEqual([]); + }); + + it('measure is unmultiplied in query but multiplied in pre-agg + granularity + external dimension', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'orders.total_qty' + ], + dimensions: [ + 'line_items.product_id' + ], + timeDimensions: [ + { + dimension: 'orders.created_at', + dateRange: [ + '2017-05-01', + '2025-05-01' + ], + granularity: 'month' + } + ] + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + // Pre-agg should not match + expect(preAggregationsDescription).toEqual([]); + }); + + it('partial-match of query with pre-agg: 1 measure + all dimensions, no granularity', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'orders.total_qty' + ], + dimensions: [ + 'orders.status', + 'line_items.product_id' + ], + timeDimensions: [ + { + dimension: 'orders.created_at', + dateRange: [ + '2017-05-01', + '2025-05-01' + ] + } + ] + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + // Pre-agg should not match + expect(preAggregationsDescription).toEqual([]); + }); + + it('full-match of query with pre-agg: 1 measure + granularity + all dimensions', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'orders.total_qty' + ], + dimensions: [ + 'orders.status', + 'line_items.product_id' + ], + timeDimensions: [ + { + dimension: 'orders.created_at', + dateRange: [ + '2017-05-01', + '2025-05-01' + ], + granularity: 'month' + } + ] + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + expect(preAggregationsDescription.length).toEqual(1); + expect(preAggregationsDescription[0].preAggregationId).toEqual('orders.pre_agg_with_multiplied_measures'); + }); + }); }); From f1f89ef9daee2e1a1b5138152c27bc9d15db8e5a Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Thu, 8 May 2025 19:48:40 +0300 Subject: [PATCH 3/3] another type fixes --- .../src/adapter/PreAggregations.ts | 21 ++++++++++++------- .../src/compiler/CubeEvaluator.ts | 1 + 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.ts b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.ts index d0353f4855db8..b0251bb17042b 100644 --- a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.ts +++ b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.ts @@ -17,7 +17,13 @@ import { BaseSegment } from './BaseSegment'; export type RollupJoin = any; -export type PreAggregationDefinitionExtended = PreAggregationDefinition & { +export type PartitionTimeDimension = { + dimension: string; + dateRange: [string, string]; + boundaryDateRange: [string, string]; +}; + +export type PreAggregationDefinitionExtended = PreAggregationDefinition & PreAggregationReferences & { unionWithSourceData: boolean; rollupLambdaId: string; lastRollupLambda: boolean; @@ -27,6 +33,7 @@ export type PreAggregationDefinitionExtended = PreAggregationDefinition & { streamOffset: 'earliest' | 'latest'; uniqueKeyColumns: string; sqlAlias?: string; + partitionTimeDimensions?: PartitionTimeDimension[]; }; export type PreAggregationForQuery = { @@ -587,7 +594,7 @@ export class PreAggregations { .dimensions .map(d => CubeSymbols.joinHintFromPath(d).path); const multipliedMeasuresTrimmed = references - .multipliedMeasures?.map(m => CubeSymbols.joinHintFromPath(m).path); + .multipliedMeasures?.map(m => CubeSymbols.joinHintFromPath(m).path) || []; return { ...references, @@ -735,7 +742,7 @@ export class PreAggregations { if (referencesTrimmed.multipliedMeasures) { const backAliasMultipliedMeasures = backAlias(referencesTrimmed.multipliedMeasures); - if (transformedQuery.leafMeasures.some(m => referencesTrimmed.multipliedMeasures.includes(m)) || + if (transformedQuery.leafMeasures.some(m => referencesTrimmed.multipliedMeasures?.includes(m)) || transformedQuery.measures.some(m => backAliasMultipliedMeasures.includes(m)) ) { return false; @@ -1213,7 +1220,7 @@ export class PreAggregations { ); } - public rollupPreAggregationQuery(cube: string, aggregation: PreAggregationDefinition, context: EvaluateReferencesContext = {}): BaseQuery { + public rollupPreAggregationQuery(cube: string, aggregation: PreAggregationDefinitionExtended, context: EvaluateReferencesContext = {}): BaseQuery { // `this.evaluateAllReferences` will retain not only members, but their join path as well, and pass join hints // to subquery. Otherwise, members in subquery would regenerate new join tree from clean state, // and it can be different from expected by join path in pre-aggregation declaration @@ -1245,7 +1252,7 @@ export class PreAggregations { }); } - public autoRollupPreAggregationQuery(cube: string, aggregation: PreAggregationDefinition): BaseQuery { + public autoRollupPreAggregationQuery(cube: string, aggregation: PreAggregationDefinitionExtended): BaseQuery { return this.query.newSubQueryForCube( cube, { @@ -1261,7 +1268,7 @@ export class PreAggregations { ); } - private mergePartitionTimeDimensions(aggregation: PreAggregationReferences, partitionTimeDimensions: BaseTimeDimension[]) { + private mergePartitionTimeDimensions(aggregation: PreAggregationReferences, partitionTimeDimensions?: PartitionTimeDimension[]) { if (!partitionTimeDimensions) { return aggregation.timeDimensions; } @@ -1286,7 +1293,7 @@ export class PreAggregations { .toLowerCase(); } - private evaluateAllReferences(cube: string, aggregation: PreAggregationDefinition, preAggregationName: string | null = null, context: EvaluateReferencesOptions = {}): PreAggregationReferences { + private evaluateAllReferences(cube: string, aggregation: PreAggregationDefinition, preAggregationName: string | null = null, context: EvaluateReferencesContext = {}): PreAggregationReferences { // TODO build a join tree for all references, so they would always include full join path // Even for preaggregation references without join path // It is necessary to be able to match query and preaggregation based on full join tree diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index b65f383b4357e..329bfd0ccdd68 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -94,6 +94,7 @@ export type CubeRefreshKey = export type PreAggregationDefinition = { type: 'autoRollup' | 'originalSql' | 'rollupJoin' | 'rollupLambda' | 'rollup', allowNonStrictDateRangeMatch?: boolean, + useOriginalSqlPreAggregations?: boolean, timeDimensionReference?: () => ToString, granularity: string, timeDimensionReferences: Array<{ dimension: () => ToString, granularity: string }>,