Skip to content

Commit e817458

Browse files
authored
Compute daily stats in BQ (hyperledger-labs#2151)
Signed-off-by: Itai Segall <itai.segall@digitalasset.com>
1 parent b77f046 commit e817458

File tree

4 files changed

+270
-40
lines changed

4 files changed

+270
-40
lines changed

cluster/pulumi/canton-network/bigquery-sql-codegen.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33
import { existsSync, unlinkSync, writeFileSync } from 'fs';
44

5-
import { allFunctions } from './src/bigQuery_functions';
5+
import { allScanFunctions } from './src/bigQuery_functions';
66

77
if (process.argv.length != 6) {
88
console.error(
@@ -19,6 +19,8 @@ const out = process.argv[5];
1919
if (existsSync(out)) {
2020
unlinkSync(out);
2121
}
22-
allFunctions.forEach(f =>
23-
writeFileSync(out, f.toSql(project, functionsDatasetName, scanDatasetName), { flag: 'a' })
22+
// Note that we're currently code-generating only the scan functions, not the dashboards ones, as the latter are not
23+
// tested in integration tests.
24+
allScanFunctions.forEach(f =>
25+
writeFileSync(out, f.toSql(project, functionsDatasetName, scanDatasetName, ''), { flag: 'a' })
2426
);

cluster/pulumi/canton-network/src/bigQuery.ts

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import {
2323
commandScriptPath,
2424
} from '@lfdecentralizedtrust/splice-pulumi-common/src/utils';
2525

26-
import { allFunctions } from './bigQuery_functions';
26+
import { allDashboardFunctions, allScanFunctions } from './bigQuery_functions';
2727

2828
interface ScanBigQueryConfig {
2929
dataset: string;
@@ -188,12 +188,56 @@ function installBigqueryDataset(scanBigQuery: ScanBigQueryConfig): gcp.bigquery.
188188
});
189189
}
190190

191+
function installDashboardsDataset(): gcp.bigquery.Dataset {
192+
const datasetName = 'dashboards';
193+
const dataset = new gcp.bigquery.Dataset(datasetName, {
194+
datasetId: datasetName,
195+
friendlyName: `${datasetName} Dataset`,
196+
location: cloudsdkComputeRegion(),
197+
deleteContentsOnDestroy: true,
198+
labels: {
199+
cluster: CLUSTER_BASENAME,
200+
},
201+
});
202+
203+
const dataTableName = 'dashboards-data';
204+
new gcp.bigquery.Table(
205+
dataTableName,
206+
{
207+
datasetId: dataset.datasetId,
208+
tableId: dataTableName,
209+
deletionProtection: protectCloudSql,
210+
friendlyName: `${dataTableName} Table`,
211+
schema: JSON.stringify([
212+
{ name: 'as_of_record_time', type: 'TIMESTAMP', mode: 'REQUIRED' },
213+
{ name: 'migration_id', type: 'INT64', mode: 'REQUIRED' },
214+
{ name: 'locked', type: 'BIGNUMERIC' },
215+
{ name: 'unlocked', type: 'BIGNUMERIC' },
216+
{ name: 'current_supply_total', type: 'BIGNUMERIC' },
217+
{ name: 'unminted', type: 'BIGNUMERIC' },
218+
{ name: 'minted', type: 'BIGNUMERIC' },
219+
{ name: 'allowed_mint', type: 'BIGNUMERIC' },
220+
{ name: 'burned', type: 'BIGNUMERIC' },
221+
{ name: 'monthly_burn', type: 'BIGNUMERIC' },
222+
{ name: 'num_amulet_holders', type: 'INT64' },
223+
{ name: 'num_active_validators', type: 'INT64' },
224+
{ name: 'average_tps', type: 'FLOAT64' },
225+
{ name: 'peak_tps', type: 'FLOAT64' },
226+
]),
227+
},
228+
{ dependsOn: [dataset] }
229+
);
230+
231+
return dataset;
232+
}
233+
191234
function installFunctions(
192235
scanDataset: gcp.bigquery.Dataset,
236+
dashboardsDataset: gcp.bigquery.Dataset,
193237
dependsOn: pulumi.Resource[]
194238
): gcp.bigquery.Dataset {
195239
const datasetName = 'functions';
196-
const dataset = new gcp.bigquery.Dataset(datasetName, {
240+
const functionsDataset = new gcp.bigquery.Dataset(datasetName, {
197241
datasetId: datasetName,
198242
friendlyName: `${datasetName} Dataset`,
199243
location: cloudsdkComputeRegion(),
@@ -207,17 +251,34 @@ function installFunctions(
207251
// We don't just run allFunctions.map() because we want to sequence the creation, since every function
208252
// might depend on those before it.
209253
let lastResource: gcp.bigquery.Routine | undefined = undefined;
210-
for (const f in allFunctions) {
211-
lastResource = allFunctions[f].toPulumi(
254+
for (const f in allScanFunctions) {
255+
lastResource = allScanFunctions[f].toPulumi(
212256
project,
213-
dataset,
257+
functionsDataset,
258+
functionsDataset,
214259
scanDataset,
215-
lastResource ? [lastResource] : dependsOn
260+
dashboardsDataset,
261+
lastResource
262+
? [lastResource]
263+
: [...dependsOn, functionsDataset, scanDataset, dashboardsDataset]
264+
);
265+
}
266+
267+
for (const f in allDashboardFunctions) {
268+
lastResource = allDashboardFunctions[f].toPulumi(
269+
project,
270+
dashboardsDataset,
271+
functionsDataset,
272+
scanDataset,
273+
dashboardsDataset,
274+
lastResource
275+
? [lastResource]
276+
: [...dependsOn, functionsDataset, scanDataset, dashboardsDataset]
216277
);
217278
}
218279
});
219280

220-
return dataset;
281+
return functionsDataset;
221282
}
222283

223284
/* TODO (DACH-NY/canton-network-internal#341) remove this comment when enabled on all relevant clusters
@@ -445,6 +506,7 @@ export function configureScanBigQuery(
445506
dataset,
446507
pubRepSlots
447508
);
448-
installFunctions(dataset, [stream]);
509+
const dashboardsDataset = installDashboardsDataset();
510+
installFunctions(dataset, dashboardsDataset, [stream]);
449511
return;
450512
}

cluster/pulumi/canton-network/src/bigQuery_functions.ts

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
BQArray,
77
BQColumn,
88
BQFunctionArgument,
9+
BQProcedure,
910
BQScalarFunction,
1011
BQTableFunction,
1112
FLOAT64,
@@ -21,7 +22,7 @@ import {
2122
* We also support codegen of sql statements that create these functions in BigQuery, which is currently used for
2223
* the integration test in ScanTotalSupplyBigQueryIntegrationTest.
2324
*
24-
* Note that the functions are parameterized with $$FUNCTIONS_DATASET$$ and $$SCAN_DATASET$$ placeholders that are replaced
25+
* Note that the functions are parameterized with $$FUNCTIONS_DATASET$$, $$SCAN_DATASET$$ and $$DASHBOARDS_DATASET$$ placeholders that are replaced
2526
* by Pulumi and codegen, to point to the correct datasets. Any reference to a table in the scan dataset must use the
2627
* $$SCAN_DATASET$$ placeholder, e.g. `$$SCAN_DATASET$$.scan_sv_1_update_history_creates`. Similarly, all references to
2728
* another function must use the $$FUNCTIONS_DATASET$$ placeholder, e.g. `$$FUNCTIONS_DATASET$$.daml_record_path`.
@@ -544,16 +545,83 @@ const all_stats = new BQTableFunction(
544545
\`$$FUNCTIONS_DATASET$$.unminted\`(as_of_record_time, migration_id) as unminted,
545546
\`$$FUNCTIONS_DATASET$$.minted\`(as_of_record_time, migration_id) as minted,
546547
\`$$FUNCTIONS_DATASET$$.minted\`(as_of_record_time, migration_id) + \`$$FUNCTIONS_DATASET$$.unminted\`(as_of_record_time, migration_id) as allowed_mint,
547-
\`$$FUNCTIONS_DATASET$$.burned\`(as_of_record_time, migration_id) as burned,
548-
\`$$FUNCTIONS_DATASET$$.burned\`(as_of_record_time, migration_id) - \`$$FUNCTIONS_DATASET$$.burned\`(TIMESTAMP_SUB(as_of_record_time, INTERVAL 30 DAY), \`$$FUNCTIONS_DATASET$$.migration_id_at_time\`(TIMESTAMP_SUB(as_of_record_time, INTERVAL 30 DAY))) as monthly_burn,
548+
IFNULL(\`$$FUNCTIONS_DATASET$$.burned\`(as_of_record_time, migration_id), 0) as burned,
549+
IFNULL(\`$$FUNCTIONS_DATASET$$.burned\`(as_of_record_time, migration_id) - \`$$FUNCTIONS_DATASET$$.burned\`(TIMESTAMP_SUB(as_of_record_time, INTERVAL 30 DAY), \`$$FUNCTIONS_DATASET$$.migration_id_at_time\`(TIMESTAMP_SUB(as_of_record_time, INTERVAL 30 DAY))), 0) as monthly_burn,
549550
\`$$FUNCTIONS_DATASET$$.num_amulet_holders\`(as_of_record_time, migration_id) as num_amulet_holders,
550551
\`$$FUNCTIONS_DATASET$$.num_active_validators\`(as_of_record_time, migration_id) as num_active_validators,
551-
\`$$FUNCTIONS_DATASET$$.average_tps\`(as_of_record_time, migration_id) as average_tps,
552-
\`$$FUNCTIONS_DATASET$$.peak_tps\`(as_of_record_time, migration_id) as peak_tps
552+
IFNULL(\`$$FUNCTIONS_DATASET$$.average_tps\`(as_of_record_time, migration_id), 0.0) as average_tps,
553+
IFNULL(\`$$FUNCTIONS_DATASET$$.peak_tps\`(as_of_record_time, migration_id), 0.0) as peak_tps
553554
`
554555
);
555556

556-
export const allFunctions = [
557+
const all_days_since_genesis = new BQTableFunction(
558+
'all_days_since_genesis',
559+
[],
560+
[new BQColumn('as_of_record_time', TIMESTAMP)],
561+
`
562+
-- Generate all days since genesis (first record time in the scan dataset) until today.
563+
SELECT
564+
TIMESTAMP(day) as as_of_record_time
565+
FROM
566+
UNNEST(
567+
GENERATE_DATE_ARRAY(
568+
-- DATE(
569+
-- TIMESTAMP_MICROS((SELECT MIN(record_time) FROM \`$$SCAN_DATASET$$.scan_sv_1_update_history_exercises\`))
570+
--),
571+
-- TODO(DACH-NY/canton-network-internal#1461): for now we compute only last 30 days until we confirm costs, and will
572+
-- backfill to genesis later.
573+
DATE(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)),
574+
CURRENT_DATE
575+
)
576+
) as day
577+
`
578+
);
579+
580+
const days_with_missing_stats = new BQTableFunction(
581+
'days_with_missing_stats',
582+
[],
583+
[new BQColumn('as_of_record_time', TIMESTAMP)],
584+
`
585+
-- Find all days since genesis for which we do not have a stats entry at all, or its lacking some fields.
586+
SELECT as_of_record_time
587+
FROM \`$$DASHBOARDS_DATASET$$.all_days_since_genesis\`()
588+
EXCEPT DISTINCT
589+
SELECT
590+
as_of_record_time
591+
FROM \`$$DASHBOARDS_DATASET$$.dashboards-data\`
592+
WHERE
593+
locked IS NOT NULL
594+
AND unlocked IS NOT NULL
595+
AND current_supply_total IS NOT NULL
596+
AND unminted IS NOT NULL
597+
AND minted IS NOT NULL
598+
AND allowed_mint IS NOT NULL
599+
AND burned IS NOT NULL
600+
AND monthly_burn IS NOT NULL
601+
AND num_amulet_holders IS NOT NULL
602+
AND num_active_validators IS NOT NULL
603+
AND average_tps IS NOT NULL
604+
AND peak_tps IS NOT NULL
605+
`
606+
);
607+
608+
const fill_all_stats = new BQProcedure(
609+
'fill_all_stats',
610+
[],
611+
`
612+
FOR t IN
613+
(SELECT * FROM \`$$DASHBOARDS_DATASET$$.days_with_missing_stats\`())
614+
DO
615+
DELETE FROM \`$$DASHBOARDS_DATASET$$.dashboards-data\` WHERE as_of_record_time = t.as_of_record_time;
616+
617+
INSERT INTO \`da-cn-scratchnet.dashboards.dashboards-data\`
618+
SELECT * FROM \`$$FUNCTIONS_DATASET$$.all_stats\`(t.as_of_record_time, 0);
619+
620+
END FOR;
621+
`
622+
);
623+
624+
export const allScanFunctions = [
557625
iso_timestamp,
558626
daml_prim_path,
559627
daml_record_path,
@@ -580,3 +648,9 @@ export const allFunctions = [
580648
peak_tps,
581649
all_stats,
582650
];
651+
652+
export const allDashboardFunctions = [
653+
all_days_since_genesis,
654+
days_with_missing_stats,
655+
fill_all_stats,
656+
];

0 commit comments

Comments
 (0)