Skip to content

Commit 5763bf4

Browse files
abstract padding width
1 parent 01e1c2d commit 5763bf4

File tree

6 files changed

+34
-15
lines changed

6 files changed

+34
-15
lines changed

raster2dggs/common.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,18 @@ def assemble_kwargs(
107107

108108
return kwargs
109109

110+
def zero_padding(dggs: str) -> int:
111+
max_res_lookup = {
112+
"h3": const.MAX_H3,
113+
"rhp": const.MAX_RHP,
114+
"geohash": const.MAX_GEOHASH,
115+
"maidenhead": const.MAX_MAIDENHEAD,
116+
"s2": const.MAX_S2,
117+
}
118+
max_res = max_res_lookup.get(dggs)
119+
if max_res is None:
120+
raise ValueError(f"Unknown DGGS type: {dggs}")
121+
return len(str(max_res))
110122

111123
def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int) -> int:
112124
"""
@@ -161,7 +173,9 @@ def address_boundary_issues(
161173
)
162174
with TqdmCallback(desc="Reading window partitions"):
163175
# Set index as parent cell
164-
ddf = dd.read_parquet(pq_input).set_index(f"{dggs}_{parent_res:02}")
176+
pad_width = zero_padding(dggs)
177+
index_col = f"{dggs}_{parent_res:0{pad_width}d}"
178+
ddf = dd.read_parquet(pq_input).set_index(index_col)
165179

166180
with TqdmCallback(desc="Counting parents"):
167181
# Count parents, to get target number of partitions

raster2dggs/geohash.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from raster2dggs import __version__
1818

1919

20+
PAD_WIDTH = common.zero_padding('geohash')
21+
2022
def _geohashfunc(
2123
sdf: xr.DataArray,
2224
precision: int,
@@ -44,8 +46,8 @@ def _geohashfunc(
4446
# Secondary (parent) Geohash index, used later for partitioning
4547
geohash_parent = [gh[:parent_precision] for gh in geohash]
4648
subset = subset.drop(columns=["x", "y"])
47-
subset[f"geohash_{precision:02}"] = pd.Series(geohash, index=subset.index)
48-
subset[f"geohash_{parent_precision:02}"] = pd.Series(
49+
subset[f"geohash_{precision:0{PAD_WIDTH}d}"] = pd.Series(geohash, index=subset.index)
50+
subset[f"geohash_{parent_precision:0{PAD_WIDTH}d}"] = pd.Series(
4951
geohash_parent, index=subset.index
5052
)
5153
# Rename bands
@@ -69,10 +71,10 @@ def _geohash_parent_groupby(
6971
high resolution raster at a coarse Geohash precision.
7072
"""
7173
if decimals > 0:
72-
return df.groupby(f"geohash_{precision:02}").agg(aggfunc).round(decimals)
74+
return df.groupby(f"geohash_{precision:0{PAD_WIDTH}d}").agg(aggfunc).round(decimals)
7375
else:
7476
return (
75-
df.groupby(f"geohash_{precision:02}")
77+
df.groupby(f"geohash_{precision:0{PAD_WIDTH}d}")
7678
.agg(aggfunc)
7779
.round(decimals)
7880
.astype("Int64")

raster2dggs/h3.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import raster2dggs.common as common
1717
from raster2dggs import __version__
1818

19+
PAD_WIDTH = common.zero_padding("h3")
1920

2021
def _h3func(
2122
sdf: xr.DataArray,
@@ -63,10 +64,10 @@ def _h3_parent_groupby(
6364
high resolution raster at a coarser h3 resolution.
6465
"""
6566
if decimals > 0:
66-
return df.groupby(f"h3_{resolution:02}").agg(aggfunc).round(decimals)
67+
return df.groupby(f"h3_{resolution:0{PAD_WIDTH}d}").agg(aggfunc).round(decimals)
6768
else:
6869
return (
69-
df.groupby(f"h3_{resolution:02}")
70+
df.groupby(f"h3_{resolution:0{PAD_WIDTH}d}")
7071
.agg(aggfunc)
7172
.round(decimals)
7273
.astype("Int64")

raster2dggs/maidenhead.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@ def _maidenhead_parent_groupby(
6767
pandas .groupby function. This step is to ensure there are no duplicate Maidenhead indices, which will certainly happen when indexing most raster datasets as Maidenhead has low precision.
6868
"""
6969
if decimals > 0:
70-
return df.groupby(f"maidenhead_{precision:02}").agg(aggfunc).round(decimals)
70+
return df.groupby(f"maidenhead_{precision}").agg(aggfunc).round(decimals)
7171
else:
7272
return (
73-
df.groupby(f"maidenhead_{precision:02}")
73+
df.groupby(f"maidenhead_{precision}")
7474
.agg(aggfunc)
7575
.round(decimals)
7676
.astype("Int64")

raster2dggs/rHP.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import raster2dggs.common as common
1717
from raster2dggs import __version__
1818

19+
PAD_WIDTH = common.zero_padding("h3")
1920

2021
def _rhpfunc(
2122
sdf: xr.DataArray,
@@ -63,10 +64,10 @@ def _rhp_parent_groupby(
6364
high resolution raster at a coarser resolution.
6465
"""
6566
if decimals > 0:
66-
return df.groupby(f"rhp_{resolution:02}").agg(aggfunc).round(decimals)
67+
return df.groupby(f"rhp_{resolution:0{PAD_WIDTH}d}").agg(aggfunc).round(decimals)
6768
else:
6869
return (
69-
df.groupby(f"rhp_{resolution:02}")
70+
df.groupby(f"rhp_{resolution:0{PAD_WIDTH}d}")
7071
.agg(aggfunc)
7172
.round(decimals)
7273
.astype("Int64")

raster2dggs/s2.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import raster2dggs.common as common
1717
from raster2dggs import __version__
1818

19+
PAD_WIDTH = common.zero_padding("s2")
1920

2021
def _s2func(
2122
sdf: xr.DataArray,
@@ -44,8 +45,8 @@ def _s2func(
4445
s2 = [cell.parent(resolution).to_token() for cell in cells]
4546
s2_parent = [cell.parent(parent_res).to_token() for cell in cells]
4647
subset = subset.drop(columns=["x", "y"])
47-
subset[f"s2_{resolution:02}"] = pd.Series(s2, index=subset.index)
48-
subset[f"s2_{parent_res:02}"] = pd.Series(s2_parent, index=subset.index)
48+
subset[f"s2_{resolution:0{PAD_WIDTH}d}"] = pd.Series(s2, index=subset.index)
49+
subset[f"s2_{parent_res:0{PAD_WIDTH}d}"] = pd.Series(s2_parent, index=subset.index)
4950
# Renaming columns to actual band labels
5051
bands = sdf["band"].unique()
5152
band_names = dict(zip(bands, map(lambda i: band_labels[i - 1], bands)))
@@ -67,10 +68,10 @@ def _s2_parent_groupby(
6768
high resolution raster at a coarser S2 resolution.
6869
"""
6970
if decimals > 0:
70-
return df.groupby(f"s2_{resolution:02}").agg(aggfunc).round(decimals)
71+
return df.groupby(f"s2_{resolution:0{PAD_WIDTH}d}").agg(aggfunc).round(decimals)
7172
else:
7273
return (
73-
df.groupby(f"s2_{resolution:02}")
74+
df.groupby(f"s2_{resolution:0{PAD_WIDTH}d}")
7475
.agg(aggfunc)
7576
.round(decimals)
7677
.astype("Int64")

0 commit comments

Comments
 (0)