Compute daily stats in BQ (hyperledger-labs#2151)

isegall-da · web-flow · commit e8174589f1eb · 2025-09-05T19:03:08.000Z
Signed-off-by: Itai Segall &lt;itai.segall@digitalasset.com&gt;
diff --git a/cluster/pulumi/canton-network/bigquery-sql-codegen.ts b/cluster/pulumi/canton-network/bigquery-sql-codegen.ts
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 import { existsSync, unlinkSync, writeFileSync } from 'fs';
 
-import { allFunctions } from './src/bigQuery_functions';
+import { allScanFunctions } from './src/bigQuery_functions';
 
 if (process.argv.length != 6) {
   console.error(
@@ -19,6 +19,8 @@ const out = process.argv[5];
 if (existsSync(out)) {
   unlinkSync(out);
 }
-allFunctions.forEach(f =>
-  writeFileSync(out, f.toSql(project, functionsDatasetName, scanDatasetName), { flag: 'a' })
+// Note that we're currently code-generating only the scan functions, not the dashboards ones, as the latter are not
+// tested in integration tests.
+allScanFunctions.forEach(f =>
+  writeFileSync(out, f.toSql(project, functionsDatasetName, scanDatasetName, ''), { flag: 'a' })
 );
diff --git a/cluster/pulumi/canton-network/src/bigQuery.ts b/cluster/pulumi/canton-network/src/bigQuery.ts
@@ -23,7 +23,7 @@ import {
   commandScriptPath,
 } from '@lfdecentralizedtrust/splice-pulumi-common/src/utils';
 
-import { allFunctions } from './bigQuery_functions';
+import { allDashboardFunctions, allScanFunctions } from './bigQuery_functions';
 
 interface ScanBigQueryConfig {
   dataset: string;
@@ -188,12 +188,56 @@ function installBigqueryDataset(scanBigQuery: ScanBigQueryConfig): gcp.bigquery.
   });
 }
 
+function installDashboardsDataset(): gcp.bigquery.Dataset {
+  const datasetName = 'dashboards';
+  const dataset = new gcp.bigquery.Dataset(datasetName, {
+    datasetId: datasetName,
+    friendlyName: `${datasetName} Dataset`,
+    location: cloudsdkComputeRegion(),
+    deleteContentsOnDestroy: true,
+    labels: {
+      cluster: CLUSTER_BASENAME,
+    },
+  });
+
+  const dataTableName = 'dashboards-data';
+  new gcp.bigquery.Table(
+    dataTableName,
+    {
+      datasetId: dataset.datasetId,
+      tableId: dataTableName,
+      deletionProtection: protectCloudSql,
+      friendlyName: `${dataTableName} Table`,
+      schema: JSON.stringify([
+        { name: 'as_of_record_time', type: 'TIMESTAMP', mode: 'REQUIRED' },
+        { name: 'migration_id', type: 'INT64', mode: 'REQUIRED' },
+        { name: 'locked', type: 'BIGNUMERIC' },
+        { name: 'unlocked', type: 'BIGNUMERIC' },
+        { name: 'current_supply_total', type: 'BIGNUMERIC' },
+        { name: 'unminted', type: 'BIGNUMERIC' },
+        { name: 'minted', type: 'BIGNUMERIC' },
+        { name: 'allowed_mint', type: 'BIGNUMERIC' },
+        { name: 'burned', type: 'BIGNUMERIC' },
+        { name: 'monthly_burn', type: 'BIGNUMERIC' },
+        { name: 'num_amulet_holders', type: 'INT64' },
+        { name: 'num_active_validators', type: 'INT64' },
+        { name: 'average_tps', type: 'FLOAT64' },
+        { name: 'peak_tps', type: 'FLOAT64' },
+      ]),
+    },
+    { dependsOn: [dataset] }
+  );
+
+  return dataset;
+}
+
 function installFunctions(
   scanDataset: gcp.bigquery.Dataset,
+  dashboardsDataset: gcp.bigquery.Dataset,
   dependsOn: pulumi.Resource[]
 ): gcp.bigquery.Dataset {
   const datasetName = 'functions';
-  const dataset = new gcp.bigquery.Dataset(datasetName, {
+  const functionsDataset = new gcp.bigquery.Dataset(datasetName, {
     datasetId: datasetName,
     friendlyName: `${datasetName} Dataset`,
     location: cloudsdkComputeRegion(),
@@ -207,17 +251,34 @@ function installFunctions(
     // We don't just run allFunctions.map() because we want to sequence the creation, since every function
     // might depend on those before it.
     let lastResource: gcp.bigquery.Routine | undefined = undefined;
-    for (const f in allFunctions) {
-      lastResource = allFunctions[f].toPulumi(
+    for (const f in allScanFunctions) {
+      lastResource = allScanFunctions[f].toPulumi(
         project,
-        dataset,
+        functionsDataset,
+        functionsDataset,
         scanDataset,
-        lastResource ? [lastResource] : dependsOn
+        dashboardsDataset,
+        lastResource
+          ? [lastResource]
+          : [...dependsOn, functionsDataset, scanDataset, dashboardsDataset]
+      );
+    }
+
+    for (const f in allDashboardFunctions) {
+      lastResource = allDashboardFunctions[f].toPulumi(
+        project,
+        dashboardsDataset,
+        functionsDataset,
+        scanDataset,
+        dashboardsDataset,
+        lastResource
+          ? [lastResource]
+          : [...dependsOn, functionsDataset, scanDataset, dashboardsDataset]
       );
     }
   });
 
-  return dataset;
+  return functionsDataset;
 }
 
 /* TODO (DACH-NY/canton-network-internal#341) remove this comment when enabled on all relevant clusters
@@ -445,6 +506,7 @@ export function configureScanBigQuery(
     dataset,
     pubRepSlots
   );
-  installFunctions(dataset, [stream]);
+  const dashboardsDataset = installDashboardsDataset();
+  installFunctions(dataset, dashboardsDataset, [stream]);
   return;
 }
diff --git a/cluster/pulumi/canton-network/src/bigQuery_functions.ts b/cluster/pulumi/canton-network/src/bigQuery_functions.ts
@@ -6,6 +6,7 @@ import {
   BQArray,
   BQColumn,
   BQFunctionArgument,
+  BQProcedure,
   BQScalarFunction,
   BQTableFunction,
   FLOAT64,
@@ -21,7 +22,7 @@ import {
  * We also support codegen of sql statements that create these functions in BigQuery, which is currently used for
  * the integration test in ScanTotalSupplyBigQueryIntegrationTest.
  *
- * Note that the functions are parameterized with $$FUNCTIONS_DATASET$$ and $$SCAN_DATASET$$ placeholders that are replaced
+ * Note that the functions are parameterized with $$FUNCTIONS_DATASET$$, $$SCAN_DATASET$$ and $$DASHBOARDS_DATASET$$ placeholders that are replaced
  * by Pulumi and codegen, to point to the correct datasets. Any reference to a table in the scan dataset must use the
  * $$SCAN_DATASET$$ placeholder, e.g. `$$SCAN_DATASET$$.scan_sv_1_update_history_creates`. Similarly, all references to
  * another function must use the $$FUNCTIONS_DATASET$$ placeholder, e.g. `$$FUNCTIONS_DATASET$$.daml_record_path`.
@@ -544,16 +545,83 @@ const all_stats = new BQTableFunction(
       \`$$FUNCTIONS_DATASET$$.unminted\`(as_of_record_time, migration_id) as unminted,
       \`$$FUNCTIONS_DATASET$$.minted\`(as_of_record_time, migration_id) as minted,
       \`$$FUNCTIONS_DATASET$$.minted\`(as_of_record_time, migration_id) + \`$$FUNCTIONS_DATASET$$.unminted\`(as_of_record_time, migration_id) as allowed_mint,
-      \`$$FUNCTIONS_DATASET$$.burned\`(as_of_record_time, migration_id) as burned,
-      \`$$FUNCTIONS_DATASET$$.burned\`(as_of_record_time, migration_id) - \`$$FUNCTIONS_DATASET$$.burned\`(TIMESTAMP_SUB(as_of_record_time, INTERVAL 30 DAY), \`$$FUNCTIONS_DATASET$$.migration_id_at_time\`(TIMESTAMP_SUB(as_of_record_time, INTERVAL 30 DAY))) as monthly_burn,
+      IFNULL(\`$$FUNCTIONS_DATASET$$.burned\`(as_of_record_time, migration_id), 0) as burned,
+      IFNULL(\`$$FUNCTIONS_DATASET$$.burned\`(as_of_record_time, migration_id) - \`$$FUNCTIONS_DATASET$$.burned\`(TIMESTAMP_SUB(as_of_record_time, INTERVAL 30 DAY), \`$$FUNCTIONS_DATASET$$.migration_id_at_time\`(TIMESTAMP_SUB(as_of_record_time, INTERVAL 30 DAY))), 0) as monthly_burn,
       \`$$FUNCTIONS_DATASET$$.num_amulet_holders\`(as_of_record_time, migration_id) as num_amulet_holders,
       \`$$FUNCTIONS_DATASET$$.num_active_validators\`(as_of_record_time, migration_id) as num_active_validators,
-      \`$$FUNCTIONS_DATASET$$.average_tps\`(as_of_record_time, migration_id) as average_tps,
-      \`$$FUNCTIONS_DATASET$$.peak_tps\`(as_of_record_time, migration_id) as peak_tps
+      IFNULL(\`$$FUNCTIONS_DATASET$$.average_tps\`(as_of_record_time, migration_id), 0.0) as average_tps,
+      IFNULL(\`$$FUNCTIONS_DATASET$$.peak_tps\`(as_of_record_time, migration_id), 0.0) as peak_tps
   `
 );
 
-export const allFunctions = [
+const all_days_since_genesis = new BQTableFunction(
+  'all_days_since_genesis',
+  [],
+  [new BQColumn('as_of_record_time', TIMESTAMP)],
+  `
+    -- Generate all days since genesis (first record time in the scan dataset) until today.
+    SELECT
+      TIMESTAMP(day) as as_of_record_time
+    FROM
+      UNNEST(
+        GENERATE_DATE_ARRAY(
+          -- DATE(
+          --   TIMESTAMP_MICROS((SELECT MIN(record_time) FROM \`$$SCAN_DATASET$$.scan_sv_1_update_history_exercises\`))
+          --),
+          -- TODO(DACH-NY/canton-network-internal#1461): for now we compute only last 30 days until we confirm costs, and will
+          -- backfill to genesis later.
+          DATE(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)),
+          CURRENT_DATE
+        )
+      ) as day
+  `
+);
+
+const days_with_missing_stats = new BQTableFunction(
+  'days_with_missing_stats',
+  [],
+  [new BQColumn('as_of_record_time', TIMESTAMP)],
+  `
+    -- Find all days since genesis for which we do not have a stats entry at all, or its lacking some fields.
+    SELECT as_of_record_time
+      FROM \`$$DASHBOARDS_DATASET$$.all_days_since_genesis\`()
+      EXCEPT DISTINCT
+        SELECT
+          as_of_record_time
+          FROM \`$$DASHBOARDS_DATASET$$.dashboards-data\`
+          WHERE
+            locked IS NOT NULL
+            AND unlocked IS NOT NULL
+            AND current_supply_total IS NOT NULL
+            AND unminted IS NOT NULL
+            AND minted IS NOT NULL
+            AND allowed_mint IS NOT NULL
+            AND burned IS NOT NULL
+            AND monthly_burn IS NOT NULL
+            AND num_amulet_holders IS NOT NULL
+            AND num_active_validators IS NOT NULL
+            AND average_tps IS NOT NULL
+            AND peak_tps IS NOT NULL
+    `
+);
+
+const fill_all_stats = new BQProcedure(
+  'fill_all_stats',
+  [],
+  `
+    FOR t IN
+      (SELECT * FROM \`$$DASHBOARDS_DATASET$$.days_with_missing_stats\`())
+    DO
+      DELETE FROM \`$$DASHBOARDS_DATASET$$.dashboards-data\` WHERE as_of_record_time = t.as_of_record_time;
+
+      INSERT INTO \`da-cn-scratchnet.dashboards.dashboards-data\`
+        SELECT * FROM \`$$FUNCTIONS_DATASET$$.all_stats\`(t.as_of_record_time, 0);
+
+    END FOR;
+  `
+);
+
+export const allScanFunctions = [
   iso_timestamp,
   daml_prim_path,
   daml_record_path,
@@ -580,3 +648,9 @@ export const allFunctions = [
   peak_tps,
   all_stats,
 ];
+
+export const allDashboardFunctions = [
+  all_days_since_genesis,
+  days_with_missing_stats,
+  fill_all_stats,
+];
diff --git a/cluster/pulumi/canton-network/src/bigQuery_functions_types.ts b/cluster/pulumi/canton-network/src/bigQuery_functions_types.ts