Skip to content

feat(cli-vector): analyse mbtiles BM-1270 #3444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
533a9b6
Update the extrac cli to create stac files for cache mbtiles
Wentao-Kuang Apr 24, 2025
94373d6
Create mbtiles cli
Wentao-Kuang Apr 28, 2025
b51da1a
Add some to tasks
Wentao-Kuang Apr 28, 2025
ff52099
feat(cli-vector): introduce logic for overriding feature metadata and…
tawera-manaena May 25, 2025
0b9eb47
Remove chunkd package from cli-vector
Wentao-Kuang May 25, 2025
ba271d9
Fix dockerfile
Wentao-Kuang May 25, 2025
602fc5e
Revert the container workflow changes
Wentao-Kuang May 25, 2025
4d22ca8
refactor(cli-vector): refine logic for capturing and writing gen-ndjs…
tawera-manaena May 26, 2025
9f4a44c
feat(cli-vector): new cli to join mbtiles BM-1269 (#3437)
ccbblin May 26, 2025
bd6ad9b
Add TileJoin into the cli-vector
Wentao-Kuang Jun 2, 2025
b0f4350
Tidy up the packages installed.
Wentao-Kuang Jun 3, 2025
11f6259
Remove @types/mapbox__geojson-area
Wentao-Kuang Jun 3, 2025
1a1ccf4
fix(cli-vector): correct VectorGeoFeature type
tawera-manaena Jun 3, 2025
8c0a3c6
fix(cli-vector): restore p-limit dependency
tawera-manaena Jun 3, 2025
1cca742
fix(cli-vector): restore VectorGeoFeature type
tawera-manaena Jun 3, 2025
3471b96
feat(cli-vector): analyse mbtiles
ccbblin May 12, 2025
6c58bdb
remove result from analysis folder
ccbblin May 20, 2025
08f1614
write report to aws
ccbblin May 23, 2025
8e947f5
changes for rebase from feat/etl-create
ccbblin Jun 2, 2025
e4354ed
add mbtiles target
ccbblin Jun 3, 2025
a177657
write output mbtiles for argo task analysis
ccbblin Jun 3, 2025
55d927d
more rebase changes
ccbblin Jun 3, 2025
105c24f
lint fix
ccbblin Jun 3, 2025
acdce6e
rebase fix
ccbblin Jun 4, 2025
682e99d
pull mbtiles locally if on s3
ccbblin Jun 4, 2025
5bcddd3
need to head file for perms
ccbblin Jun 4, 2025
8e0c6f4
add analysis folder with template
ccbblin Jun 4, 2025
ac7ef50
more argo changes
ccbblin Jun 4, 2025
6a32ef2
add some logging
ccbblin Jun 5, 2025
cf0825b
more logging
ccbblin Jun 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/containers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ jobs:
cp -r packages/lambda-tiler/static/ packages/server/
cp -r packages/lambda-tiler/static/ packages/cli/
cp -r packages/cli-vector/schema/ packages/cli/
cp -r packages/cli-vector/analysis/ packages/cli/

- name: Create docker metadata
id: meta
Expand Down
28 changes: 28 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions packages/cli-vector/analysis/template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{{#data}}

# Zoom - {{zoom}}

## Biggest Tile

| X | Y | Z | Size |
| ------------- | ------------- | ------------- | ---------------- |
| {{maxTile.x}} | {{maxTile.y}} | {{maxTile.z}} | {{maxTile.size}} |

## Distributions

| Distribution | Number Of Tiles | Percentage |
| ------------ | --------------- | ---------- |
{{#distributions}}
| {{{distribution}}} | {{{tiles}}} | {{{percentage}}} |
{{/distributions}}

### Layers

| Layer Name | Number Of Features | Total Geometry Sizes | Total Attribute Sizes | Total Sizes |
| ---------- | ------------------ | -------------------- | --------------------- | ----------- |
{{#layers}}
| {{{name}}} | {{{features}}} | {{{totalGeometry}}} |{{{totalAttributes}}} |{{{totalSize}}} |
{{/layers}}

{{/data}}
3 changes: 3 additions & 0 deletions packages/cli-vector/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
},
"devDependencies": {
"@types/geojson": "^7946.0.7",
"@types/mustache": "^4.2.6",
"@types/polylabel": "^1.1.3",
"@types/tar-stream": "^2.2.2"
},
Expand All @@ -56,6 +57,8 @@
"@linzjs/docker-command": "^7.5.0",
"@linzjs/geojson": "^8.0.0",
"cmd-ts": "^0.12.1",
"mustache": "^4.2.0",
"object-sizeof": "^2.6.5",
"p-limit": "^6.2.0",
"polylabel": "^2.0.1",
"stac-ts": "^1.0.0",
Expand Down
190 changes: 190 additions & 0 deletions packages/cli-vector/src/cli/cli.analyse.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import sq from 'node:sqlite';

import { fsa, Url } from '@basemaps/shared';
import { CliInfo } from '@basemaps/shared/build/cli/info.js';
import { getLogger, logArguments } from '@basemaps/shared/build/cli/log.js';
import { VectorTile } from '@mapbox/vector-tile';
import { command, option, positional } from 'cmd-ts';
import { readFileSync } from 'fs';
import Mustache from 'mustache';
import sizeof from 'object-sizeof';
import { basename } from 'path';
import Protobuf from 'pbf';
import { gunzip } from 'zlib';

interface TileInfo {
x: number;
y: number;
z: number;
max: number;
size?: string;
}

interface Distribution {
distribution: string;
tiles: number;
percentage: string;
}

interface LayerSum {
features: number;
totalGeometry: number;
totalAttributes: number;
}

interface LayerInfo {
name: string;
features: number;
totalGeometry: string;
totalAttributes: string;
totalSize: string;
}

interface AnalysisData {
zoom: number;
maxTile: TileInfo;
distributions: Distribution[];
layers: LayerInfo[];
}

function distribution(size: number): string {
if (size <= 10000) return '0-10KB';
if (size <= 50000) return '10-50KB';
if (size <= 100000) return '50-100KB';
if (size <= 200000) return '100-200KB';
if (size <= 500000) return '200-500KB';
return '>500KB';
}

export const AnalyseArgs = {
...logArguments,
path: positional({ type: Url, displayName: 'path', description: 'Path to mbtiles' }),
template: option({
long: 'template',
defaultValue: () => 'analysis/template.md',
defaultValueIsSerializable: true,
description: 'Path of analysis template',
}),
target: option({
long: 'target',
type: Url,
description: 'Target location for the result file',
}),
};

export const AnalyseCommand = command({
name: 'analyse',
version: CliInfo.version,
description: 'Analyse vector mbtiles',
args: AnalyseArgs,
async handler(args) {
const logger = getLogger(this, args, 'cli-vector');
logger.info('AnalyseMbTiles: Start');

const analysisData: AnalysisData[] = [];

let mbtilesFile = args.path.pathname;
if (args.path.protocol !== 'file:') {
logger.info('Download Start');
const fileName = basename(args.path.pathname);
const localFile = fsa.toUrl(`tmp/${fileName}`);
await fsa.head(args.path);
const stream = fsa.readStream(args.path);
await fsa.write(localFile, stream);
mbtilesFile = localFile.pathname;
logger.info({ mbtilesFile }, 'Download End');
}

logger.info({ mbtilesFile }, 'Read mbtiles');

const db = new sq.DatabaseSync(mbtilesFile);
const MaxZoom = 15;
for (let i = 0; i <= MaxZoom; i++) {
const result = db
.prepare(
'SELECT tile_column as x, ((1 << zoom_level) - 1 - tile_row) as y, zoom_level as z, tile_data FROM tiles WHERE zoom_level=?',
)
.all(i) as [{ x: number; y: number; z: number; tile_data: Buffer }];

let maxTile: TileInfo = { x: 0, y: 0, z: 0, max: 0 };
const layersSum = new Map<string, LayerSum>();
const distributionSum = new Map<string, number>();

for (const row of result) {
const buffer: Buffer = await new Promise((resolve) => {
gunzip(row.tile_data, (_, buffer) => resolve(buffer));
});
const tile = new VectorTile(new Protobuf(buffer));

if (buffer.length > maxTile.max) maxTile = { x: row.x, y: row.y, z: row.z, max: buffer.length };

// Prepare distribution
const dis = distribution(buffer.length);
const value = distributionSum.get(dis);
if (value == null) {
distributionSum.set(dis, 1);
} else {
distributionSum.set(dis, value + 1);
}

// Prepare layer information
for (const [name, layer] of Object.entries(tile.layers)) {
let features = layer.length;

// Calculate the total features size
let totalGeometry = 0;
let totalAttributes = 0;
for (let i = 0; i < layer.length; i++) {
const feature = layer.feature(i);
const properties = sizeof(feature.properties);
const geometry = sizeof(feature.loadGeometry());
totalGeometry += properties;
totalAttributes += geometry;
}

// Cumulative total features and sizes
const layerInfo = layersSum.get(name);
if (layerInfo) {
features += layerInfo.features;
totalGeometry += layerInfo.totalGeometry;
totalAttributes += layerInfo.totalAttributes;
}
layersSum.set(name, { features, totalGeometry, totalAttributes });
}
}

// Prepare printable max tile
maxTile.size = `${(maxTile.max / 1000).toFixed(1)}KB`;

// Prepare printable distributions
const distributions: Distribution[] = [];
for (const [distribution, tiles] of distributionSum) {
const percentage = `${((tiles * 100) / result.length).toFixed(1)}%`;
distributions.push({ distribution, tiles, percentage });
}

// Prepare printable layers
const layers: LayerInfo[] = [];
for (const [name, layerInfo] of layersSum) {
const features = layerInfo.features;
const totalGeometry = `${(layerInfo.totalGeometry / 1000).toFixed(1)}KB`;
const totalAttributes = `${(layerInfo.totalAttributes / 1000).toFixed(1)}KB`;
const totalSize = `${((layerInfo.totalGeometry + layerInfo.totalAttributes) / 1000).toFixed(1)}KB`;
layers.push({ name, features, totalGeometry, totalAttributes, totalSize });
}

const data: AnalysisData = {
zoom: i,
maxTile,
distributions,
layers,
};
analysisData.push(data);
}
db.close();

const template = readFileSync(args.template).toString();
const output = Mustache.render(template, { data: analysisData });
await fsa.write(new URL('report.md', args.target), Buffer.from(output, 'utf8'));
},
});
4 changes: 4 additions & 0 deletions packages/cli-vector/src/cli/cli.join.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ export const JoinCommand = command({
if (isArgo()) {
const target = new URL(`topographic/${CliId}/${args.filename}.tar.co`, bucketPath);
await fsa.write(fsa.toUrl('/tmp/target'), JSON.stringify([target]));
const mbTilesTarget = new URL(`topographic/${CliId}/${args.filename}.mbtiles`, bucketPath);
await fsa.write(fsa.toUrl('/tmp/mbTilesTarget'), JSON.stringify(mbTilesTarget));
const analyseTarget = new URL(`topographic/${CliId}/`, bucketPath);
await fsa.write(fsa.toUrl('/tmp/analyseTarget'), JSON.stringify(analyseTarget));
}
},
});
2 changes: 2 additions & 0 deletions packages/cli-vector/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ import { subcommands } from 'cmd-ts';
import { ExtractCommand } from './cli/cli.extract.js';
import { CreateCommand } from './cli/cli.create.js';
import { JoinCommand } from './cli/cli.join.js';
import { AnalyseCommand } from './cli/cli.analyse.js';

export const VectorCli = subcommands({
name: 'vector',
cmds: {
extract: ExtractCommand,
create: CreateCommand,
join: JoinCommand,
analyse: AnalyseCommand,
},
});
2 changes: 2 additions & 0 deletions packages/cli/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ COPY ./basemaps-smoke*.tgz /app/
COPY ./static/ /app/static/
# Copy the Vector Schema into the image
COPY ./schema/ /app/schema/
# Copy the Vector Analysis into the image
COPY ./analysis/ /app/analysis/

RUN npm install ./basemaps-smoke*.tgz

Expand Down