Skip to content

Commit 6df5fb2

Browse files
adds support for ISEA7H and ISEA9R (others supported by DGGAL incoming)
1 parent b9a19d4 commit 6df5fb2

26 files changed

+961
-30
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ conda*
44
.conda*
55
.vscode/
66
.spyproject/
7+
tests/data/input

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ In brief, to get started:
265265
- If you're on Windows, `pip install gdal` may be necessary before running the subsequent commands.
266266
- On Linux, install GDAL 3.6+ according to your platform-specific instructions, including development headers, i.e. `libgdal-dev`.
267267
- Create the virtual environment with `poetry init`. This will install necessary dependencies.
268-
- Subsequently, the virtual environment can be re-activated with `poetry shell`.
268+
- Subsequently, the virtual environment can be re-activated with `poetry env activate`.
269269
270270
If you run `poetry install`, the CLI tool will be aliased so you can simply use `raster2dggs` rather than `poetry run raster2dggs`, which is the alternative if you do not `poetry install`.
271271

poetry.lock

Lines changed: 223 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ maidenhead = "^1.8"
3838
s2sphere = "^0.2"
3939
pya5 = "^0.5"
4040
geoarrow-pyarrow = "^0.2.0"
41+
dggal = "^0.0.6"
4142

4243
[tool.poetry.group.dev.dependencies]
4344
pytest = "^7.2.2"

raster2dggs/cli.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from raster2dggs.maidenhead import maidenhead
88
from raster2dggs.s2 import s2
99
from raster2dggs.a5 import a5
10+
from raster2dggs.isea9r import isea9r
11+
from raster2dggs.isea7h import isea7h
1012

1113

1214
@click.group()
@@ -21,6 +23,8 @@ def cli():
2123
cli.add_command(maidenhead)
2224
cli.add_command(s2)
2325
cli.add_command(a5)
26+
cli.add_command(isea9r)
27+
cli.add_command(isea7h)
2428

2529

2630
def main():

raster2dggs/common.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def address_boundary_issues(
261261
}
262262
)
263263
out_meta.index = pd.Index([], name=index_col, dtype="object")
264-
264+
print(ddf.compute(), band_cols, out_meta)
265265
with TqdmCallback(desc=f"Aggregating{'/compacting' if kwargs['compact'] else ''}"):
266266
ddf = ddf.map_partitions(
267267
indexer.parent_groupby,
@@ -289,7 +289,11 @@ def address_boundary_issues(
289289

290290
write_tasks = [
291291
dask.delayed(write_partition_as_geoparquet)(
292-
part, geo_serialisation_method, output, partition_col, kwargs["compression"]
292+
part,
293+
geo_serialisation_method,
294+
output,
295+
partition_col,
296+
kwargs["compression"],
293297
)
294298
for part in delayed_parts
295299
]

raster2dggs/constants.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@
77
MIN_MAIDENHEAD, MAX_MAIDENHEAD = 1, 6
88
MIN_S2, MAX_S2 = 0, 30
99
MIN_A5, MAX_A5 = 0, 30
10+
MIN_ISEA9R, MAX_ISEA9R = (
11+
0,
12+
18,
13+
) # NB correct for packing into 64 bit integer? see https://dggal.org/docs/html/dggal/Classes/DGGRS/Virtual%20Methods/getMaxDGGRSZoneLevel.html
14+
MIN_ISEA7H, MAX_ISEA7H = (
15+
0,
16+
18,
17+
) # NB correct?
1018

1119
DEFAULT_NAME: str = "value"
1220

@@ -33,6 +41,12 @@
3341
"maidenhead": lambda resolution: MIN_MAIDENHEAD,
3442
"s2": lambda resolution: max(MIN_S2, (resolution - DEFAULT_PARENT_OFFSET)),
3543
"a5": lambda resolution: max(MIN_A5, (resolution - DEFAULT_PARENT_OFFSET)),
44+
"isea9r": lambda resolution: max(
45+
MIN_ISEA9R, (resolution - 5)
46+
), # NB use 5 for IS/VEA9R, and 10 for IS/VEA3H, and 8 for GNOSIS --- corresponds to ~64K sub-zones
47+
"isea7h": lambda resolution: max(
48+
MIN_ISEA7H, (resolution - DEFAULT_PARENT_OFFSET)
49+
)
3650
}
3751

3852

@@ -44,6 +58,8 @@ def zero_padding(dggs: str) -> int:
4458
"maidenhead": MAX_MAIDENHEAD,
4559
"s2": MAX_S2,
4660
"a5": MAX_A5,
61+
"isea9r": MAX_ISEA9R,
62+
"isea7h": MAX_ISEA7H,
4763
}
4864
max_res = max_res_lookup.get(dggs)
4965
if max_res is None:

raster2dggs/indexerfactory.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import raster2dggs.indexers.maidenheadrasterindexer as maidenheadrasterindexer
1111
import raster2dggs.indexers.s2rasterindexer as s2rasterindexer
1212
import raster2dggs.indexers.a5rasterindexer as a5rasterindexer
13+
import raster2dggs.indexers.dggalrasterindexer as dggalrasterindexer
1314

1415
"""
1516
Match DGGS name to indexer class name
@@ -21,6 +22,8 @@
2122
"maidenhead": maidenheadrasterindexer.MaidenheadRasterIndexer,
2223
"s2": s2rasterindexer.S2RasterIndexer,
2324
"a5": a5rasterindexer.A5RasterIndexer,
25+
"isea9r": dggalrasterindexer.ISEA9RRasterIndexer,
26+
"isea7h": dggalrasterindexer.ISEA7HRasterIndexer,
2427
}
2528

2629

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
"""
2+
@author: ndemaio
3+
"""
4+
5+
from abc import abstractmethod
6+
from functools import reduce
7+
from numbers import Number
8+
from typing import Tuple
9+
10+
import h3pandas # Necessary import despite lack of explicit use
11+
12+
import dggal
13+
import pandas as pd
14+
import pyarrow as pa
15+
import xarray as xr
16+
import numpy as np
17+
import shapely
18+
19+
import raster2dggs.constants as const
20+
21+
from raster2dggs.indexers.rasterindexer import RasterIndexer
22+
23+
24+
# TODO memoisation; needs to use hashable inputs and outputs (int)
25+
def apply_n_reduce(f, n, x):
26+
def step(v):
27+
return f(v)[0]
28+
29+
return reduce(lambda v, _: step(v), range(n), x)
30+
31+
32+
class DGGALRasterIndexer(RasterIndexer):
33+
"""
34+
Provides integration for Uber's H3 DGGS.
35+
"""
36+
37+
@property
38+
@abstractmethod
39+
def dggrs(self) -> dggal.DGGRS:
40+
raise NotImplementedError
41+
42+
@property
43+
@abstractmethod
44+
def refinementRatio(self) -> int:
45+
raise NotImplementedError
46+
47+
def index_func(
48+
self,
49+
sdf: xr.DataArray,
50+
resolution: int,
51+
parent_res: int,
52+
nodata: Number = np.nan,
53+
band_labels: Tuple[str] = None,
54+
) -> pa.Table:
55+
"""
56+
Index a raster window to a DGGRS.
57+
Subsequent steps are necessary to resolve issues at the boundaries of windows.
58+
If windows are very small, or in strips rather than blocks, processing may be slower
59+
than necessary and the recommendation is to write different windows in the source raster.
60+
61+
Implementation of interface function.
62+
"""
63+
sdf: pd.DataFrame = (
64+
sdf.to_dataframe().drop(columns=["spatial_ref"]).reset_index()
65+
)
66+
subset: pd.DataFrame = sdf.dropna()
67+
subset = subset[subset.value != nodata]
68+
subset = pd.pivot_table(
69+
subset, values=const.DEFAULT_NAME, index=["x", "y"], columns=["band"]
70+
).reset_index()
71+
# Primary DGGSRS index
72+
cells = [
73+
self.dggrs.getZoneFromWGS84Centroid(resolution, dggal.GeoPoint(lon, lat))
74+
for lon, lat in zip(subset["y"], subset["x"])
75+
] # Vectorised
76+
dggrs_parent = [
77+
apply_n_reduce(self.dggrs.getZoneParents, resolution - parent_res, zone)
78+
for zone in cells
79+
]
80+
subset = subset.drop(columns=["x", "y"])
81+
index_col = self.index_col(resolution)
82+
subset[index_col] = pd.Series(
83+
map(self.dggrs.getZoneTextID, cells), index=subset.index
84+
)
85+
partition_col = self.partition_col(parent_res)
86+
subset[partition_col] = pd.Series(
87+
map(self.dggrs.getZoneTextID, dggrs_parent), index=subset.index
88+
)
89+
# Rename bands
90+
bands = sdf["band"].unique()
91+
columns = dict(zip(bands, band_labels))
92+
subset = subset.rename(columns=columns)
93+
return pa.Table.from_pandas(subset)
94+
95+
def cell_to_children_size(self, cell: str, desired_resolution: int) -> int:
96+
"""
97+
Determine total number of children at some offset resolution
98+
99+
Implementation of interface function.
100+
"""
101+
current_resolution = self.dggrs.getZoneLevel(self.dggrs.getZoneFromTextID(cell))
102+
n = desired_resolution - current_resolution
103+
return self.refinementRatio**n
104+
105+
@staticmethod
106+
def valid_set(cells: set) -> set[str]:
107+
"""
108+
Implementation of interface function.
109+
"""
110+
return set(filter(lambda c: (not pd.isna(c)), cells))
111+
112+
def parent_cells(self, cells: set, resolution) -> map:
113+
"""
114+
Implementation of interface function.
115+
"""
116+
# TODO appropriately handle potential for multiple parentage in dggal (e.g. ISEAH3)
117+
child_resolution = self.dggrs.getZoneLevel(self.dggrs.getZoneFromTextID(next(iter(cells))))
118+
return map(
119+
lambda zone: self.dggrs.getZoneTextID(apply_n_reduce(
120+
self.dggrs.getZoneParents, child_resolution - resolution, self.dggrs.getZoneFromTextID(zone)
121+
)), cells
122+
)
123+
124+
def expected_count(self, parent: str, resolution: int):
125+
"""
126+
Implementation of interface function.
127+
"""
128+
return self.cell_to_children_size(parent, resolution)
129+
130+
def cell_to_point(self, cell: str) -> shapely.geometry.Point:
131+
geo_point : dggal.GeoPoint = self.dggrs.getZoneWGS84Centroid(self.dggrs.getZoneFromTextID(cell))
132+
return shapely.Point(
133+
geo_point.lon, geo_point.lat
134+
)
135+
136+
def cell_to_polygon(self, cell: str) -> shapely.geometry.Polygon:
137+
geo_points : List[dggal.GeoPoint] = self.dggrs.getZoneWGS84Vertices(self.dggrs.getZoneFromTextID(cell))
138+
return shapely.Polygon(
139+
tuple([(p.lon, p.lat) for p in geo_points])
140+
)
141+
142+
143+
# def ISEA7HRasterIndexer(DGGALRasterIndexer):
144+
# def __init__(self, dggs: str):
145+
# super().__init__(dggs)
146+
# self.dggrs = dggal.ISEA7H()
147+
148+
# NB All zones of GNOSIS Global Grid and ISEA9R have single parents, whereas ISEA3H zones have one parent if they are a centroid child, and three parents otherwise if they are a vertex child. See dggrs.getMaxParents()
149+
150+
151+
class ISEA9RRasterIndexer(DGGALRasterIndexer):
152+
"""
153+
A raster indexer for the ISEA9R DGGS, an axis-aligned and equal-area DGGH based on the Icosahedral Snyder Equal-Area (ISEA) planar projection using rhombuses with a refinement ratio of 9.
154+
"""
155+
156+
def __init__(self, dggs: str):
157+
super().__init__(dggs)
158+
dggal.pydggal_setup(dggal.Application(appGlobals=globals()))
159+
self._dggrs = dggal.ISEA9R()
160+
self._refinementRatio = self.dggrs.getRefinementRatio()
161+
162+
@property
163+
def dggrs(self) -> dggal.DGGRS:
164+
return self._dggrs
165+
166+
@property
167+
def refinementRatio(self) -> int:
168+
return self._refinementRatio
169+
170+
class ISEA7HRasterIndexer(DGGALRasterIndexer):
171+
"""
172+
A raster indexer for the ISEA7H DGGS, an equal-area hexagonal grid with a refinement ratio of 7 defined in the ISEA projection.
173+
"""
174+
175+
def __init__(self, dggs: str):
176+
super().__init__(dggs)
177+
dggal.pydggal_setup(dggal.Application(appGlobals=globals()))
178+
self._dggrs = dggal.ISEA7H()
179+
self._refinementRatio = self.dggrs.getRefinementRatio()
180+
181+
@property
182+
def dggrs(self) -> dggal.DGGRS:
183+
return self._dggrs
184+
185+
@property
186+
def refinementRatio(self) -> int:
187+
return self._refinementRatio

raster2dggs/indexers/rasterindexer.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import numpy as np
1212

1313
from .. import constants as const
14-
from .. interfaces import IRasterIndexer
14+
from ..interfaces import IRasterIndexer
1515

1616

1717
class RasterIndexer(IRasterIndexer):
@@ -31,6 +31,12 @@ def __init__(self, dggs: str):
3131
"""
3232
self.dggs = dggs
3333

34+
def __dask_tokenize__(self):
35+
"""
36+
Only include stable, immutable fields that define behaviour
37+
"""
38+
return (type(self).__name__, self.dggs)
39+
3440
def index_col(self, resolution):
3541
pad_width = const.zero_padding(self.dggs)
3642
return f"{self.dggs}_{resolution:0{pad_width}d}"
@@ -134,7 +140,21 @@ def compaction(
134140
if not unprocessed_indices:
135141
return df
136142
band_cols = self.band_cols(df)
143+
# partition_col = self.partition_col(parent_res)
137144
compaction_map = {}
145+
146+
# sub = df.loc[list(unprocessed_indices)]
147+
# for parent, group in sub.groupby(partition_col, sort=False, observed=True):
148+
# if parent in compaction_map:
149+
# continue
150+
# expected = self.expected_count(parent, resolution)
151+
152+
# # All band columns identical across the group?
153+
# if len(group) == expected and (group[band_cols].nunique() == 1).all():
154+
# compact_row = group.iloc[0].copy()
155+
# compact_row.name = parent # parent becomes the new index
156+
# compaction_map[parent] = compact_row
157+
# unprocessed_indices -= set(group.index)
138158
for r in range(parent_res, resolution):
139159
parent_cells = self.parent_cells(unprocessed_indices, r)
140160
parent_groups = df.loc[list(unprocessed_indices)].groupby(

0 commit comments

Comments
 (0)