Skip to content

Commit 01c5975

Browse files
initial geohash implementation
1 parent 9f44292 commit 01c5975

File tree

4 files changed

+104
-7
lines changed

4 files changed

+104
-7
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ tqdm = "^4.65.0"
3030
click-log = "^0.4.0"
3131
rasterio = "^1.3.6"
3232

33+
[tool.poetry.extras]
34+
h3 = ["h3pandas"]
35+
3336
[tool.poetry.group.dev.dependencies]
3437
pytest = "^7.2.2"
3538
black = "*"

raster2dggs/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from raster2dggs import __version__
44
from raster2dggs.h3 import h3
5+
from raster2dggs.geohash import geohash
56

67
# If the program does terminal interaction, make it output a short
78
# notice like this when it starts in an interactive mode:
@@ -19,6 +20,7 @@ def cli():
1920

2021

2122
cli.add_command(h3)
23+
cli.add_command(geohash)
2224

2325

2426
def main():

raster2dggs/geohash.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import logging
2+
import numpy as np
3+
from pathlib import Path
4+
from typing import Union
5+
6+
import click
7+
import click_log
8+
import dask.dataframe as dd
9+
import dask_geopandas as dgpd
10+
import geopandas as gpd
11+
import pandas as pd
12+
import rasterio as rio
13+
from rasterio.vrt import WarpedVRT
14+
15+
from raster2dggs import __version__
16+
17+
LOGGER = logging.getLogger(__name__)
18+
click_log.basic_config(LOGGER)
19+
20+
@click.command(context_settings={"show_default": True})
21+
@click_log.simple_verbosity_option(LOGGER)
22+
@click.argument("raster_input", type=click.Path(), nargs=1)
23+
@click.argument("output_directory", type=click.Path(), nargs=1)
24+
@click.option(
25+
"-p",
26+
"--precision",
27+
required=True,
28+
type=click.IntRange(1,12),
29+
default=5,
30+
help="Geohash precision (character count)"
31+
)
32+
@click.option(
33+
"-s",
34+
"--string",
35+
required=True,
36+
type=bool,
37+
default=True,
38+
help="To use string or integer Geohash"
39+
)
40+
@click.version_option(version=__version__)
41+
def geohash(
42+
raster_input: Union[str, Path],
43+
output_directory: Union[str, Path],
44+
precision: int,
45+
string: bool,
46+
):
47+
"""
48+
Ingest a raster image and index it using Geohash.
49+
50+
RASTER_INPUT is the path to input raster data; prepend with protocol like s3:// or hdfs:// for remote data.
51+
OUTPUT_DIRECTORY should be a directory, not a file, as it will be the write location for an Apache Parquet data store, with partitions equivalent to parent cells of target cells at a fixed offset. However, this can also be remote (use the appropriate prefix, e.g. s3://).
52+
"""
53+
with rio.Env(CHECK_WITH_INVERT_PROJ=True):
54+
with rio.open(raster_input) as src:
55+
with WarpedVRT(
56+
src, src_crs=src.crs, crs=rio.crs.CRS.from_epsg(4326)
57+
) as vrt:
58+
transform = vrt.transform
59+
data = vrt.read(1) # TODO more bands
60+
rows, cols = np.indices(data.shape)
61+
xs, ys = rio.transform.xy(transform, rows, cols)
62+
xs = np.array(xs).flatten()
63+
ys = np.array(ys).flatten()
64+
data = data.flatten()
65+
gdf = gpd.GeoDataFrame({
66+
"geometry": gpd.points_from_xy(xs, ys),
67+
"B01": data
68+
}, crs=vrt.crs)
69+
70+
ddf = dgpd.from_geopandas(gdf, npartitions=4)
71+
LOGGER.debug(ddf)
72+
73+
ddf['geohash'] = ddf.geohash(as_string=string, precision=precision)
74+
75+
aggregated_ddf = ddf.groupby('geohash').B01.agg(['mean', 'sum', 'count']).reset_index()
76+
77+
aggregated_ddf.columns = ['geohash', 'mean', 'sum', 'count']
78+
79+
def enforce_schema(df):
80+
df['mean'] = df['mean'].round(0).astype('int16')
81+
# df['sum'] = df['sum'].astype('int16')
82+
df['count'] = df['count'].astype('int16')
83+
return df
84+
85+
aggregated_ddf = aggregated_ddf.map_partitions(enforce_schema, meta={
86+
'geohash': 'str',
87+
'mean': 'int16',
88+
'sum': 'int16',
89+
'count': 'int16'
90+
})
91+
92+
aggregated_ddf.to_parquet(output_directory, engine="pyarrow")

raster2dggs/h3.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def _get_parent_res(parent_res: Union[None, int], resolution: int) -> int:
6464
Used for intermediate re-partioning.
6565
"""
6666
return (
67-
int(parent_res)
67+
parent_res
6868
if parent_res is not None
6969
else max(MIN_H3, (resolution - DEFAULT_PARENT_OFFSET))
7070
)
@@ -310,14 +310,14 @@ def _address_boundary_issues(
310310
"-r",
311311
"--resolution",
312312
required=True,
313-
type=click.Choice(list(map(str, range(MIN_H3, MAX_H3 + 1)))),
313+
type=click.IntRange(MIN_H3, MAX_H3),
314314
help="H3 resolution to index",
315315
)
316316
@click.option(
317317
"-pr",
318318
"--parent_res",
319319
required=False,
320-
type=click.Choice(list(map(str, range(MIN_H3, MAX_H3 + 1)))),
320+
type=click.IntRange(MIN_H3, MAX_H3),
321321
help="H3 Parent resolution to index and aggregate to. Defaults to resolution - 6",
322322
)
323323
@click.option(
@@ -379,8 +379,8 @@ def _address_boundary_issues(
379379
def h3(
380380
raster_input: Union[str, Path],
381381
output_directory: Union[str, Path],
382-
resolution: str,
383-
parent_res: str,
382+
resolution: int,
383+
parent_res: int,
384384
upscale: int,
385385
compression: str,
386386
threads: int,
@@ -398,7 +398,7 @@ def h3(
398398
OUTPUT_DIRECTORY should be a directory, not a file, as it will be the write location for an Apache Parquet data store, with partitions equivalent to parent cells of target cells at a fixed offset. However, this can also be remote (use the appropriate prefix, e.g. s3://).
399399
"""
400400
tempfile.tempdir = tempdir if tempdir is not None else tempfile.tempdir
401-
if parent_res is not None and not int(parent_res) < int(resolution):
401+
if parent_res is not None and not parent_res < resolution:
402402
raise ParentResolutionException(
403403
"Parent resolution ({pr}) must be less than target resolution ({r})".format(
404404
pr=parent_res, r=resolution
@@ -441,7 +441,7 @@ def h3(
441441
_initial_index(
442442
raster_input,
443443
output_directory,
444-
int(resolution),
444+
resolution,
445445
parent_res,
446446
warp_args,
447447
**kwargs,

0 commit comments

Comments
 (0)