abstract padding width

alpha-beta-soup · alpha-beta-soup · commit 5763bf48fa94 · 2025-05-01T13:43:12.000+12:00
diff --git a/raster2dggs/common.py b/raster2dggs/common.py
@@ -107,6 +107,18 @@ def assemble_kwargs(
 
     return kwargs
 
+def zero_padding(dggs: str) -> int:
+    max_res_lookup = {
+        "h3": const.MAX_H3,
+        "rhp": const.MAX_RHP,
+        "geohash": const.MAX_GEOHASH,
+        "maidenhead": const.MAX_MAIDENHEAD,
+        "s2": const.MAX_S2,
+    }
+    max_res = max_res_lookup.get(dggs)
+    if max_res is None:
+        raise ValueError(f"Unknown DGGS type: {dggs}")
+    return len(str(max_res))
 
 def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int) -> int:
     """
@@ -161,7 +173,9 @@ def address_boundary_issues(
     )
     with TqdmCallback(desc="Reading window partitions"):
         # Set index as parent cell
-        ddf = dd.read_parquet(pq_input).set_index(f"{dggs}_{parent_res:02}")
+        pad_width = zero_padding(dggs)
+        index_col = f"{dggs}_{parent_res:0{pad_width}d}"
+        ddf = dd.read_parquet(pq_input).set_index(index_col)
 
     with TqdmCallback(desc="Counting parents"):
         # Count parents, to get target number of partitions
diff --git a/raster2dggs/geohash.py b/raster2dggs/geohash.py
@@ -17,6 +17,8 @@
 from raster2dggs import __version__
 
 
+PAD_WIDTH = common.zero_padding('geohash')
+
 def _geohashfunc(
     sdf: xr.DataArray,
     precision: int,
@@ -44,8 +46,8 @@ def _geohashfunc(
     # Secondary (parent) Geohash index, used later for partitioning
     geohash_parent = [gh[:parent_precision] for gh in geohash]
     subset = subset.drop(columns=["x", "y"])
-    subset[f"geohash_{precision:02}"] = pd.Series(geohash, index=subset.index)
-    subset[f"geohash_{parent_precision:02}"] = pd.Series(
+    subset[f"geohash_{precision:0{PAD_WIDTH}d}"] = pd.Series(geohash, index=subset.index)
+    subset[f"geohash_{parent_precision:0{PAD_WIDTH}d}"] = pd.Series(
         geohash_parent, index=subset.index
     )
     # Rename bands
@@ -69,10 +71,10 @@ def _geohash_parent_groupby(
     high resolution raster at a coarse Geohash precision.
     """
     if decimals > 0:
-        return df.groupby(f"geohash_{precision:02}").agg(aggfunc).round(decimals)
+        return df.groupby(f"geohash_{precision:0{PAD_WIDTH}d}").agg(aggfunc).round(decimals)
     else:
         return (
-            df.groupby(f"geohash_{precision:02}")
+            df.groupby(f"geohash_{precision:0{PAD_WIDTH}d}")
             .agg(aggfunc)
             .round(decimals)
             .astype("Int64")
diff --git a/raster2dggs/h3.py b/raster2dggs/h3.py
@@ -16,6 +16,7 @@
 import raster2dggs.common as common
 from raster2dggs import __version__
 
+PAD_WIDTH = common.zero_padding("h3")
 
 def _h3func(
     sdf: xr.DataArray,
@@ -63,10 +64,10 @@ def _h3_parent_groupby(
     high resolution raster at a coarser h3 resolution.
     """
     if decimals > 0:
-        return df.groupby(f"h3_{resolution:02}").agg(aggfunc).round(decimals)
+        return df.groupby(f"h3_{resolution:0{PAD_WIDTH}d}").agg(aggfunc).round(decimals)
     else:
         return (
-            df.groupby(f"h3_{resolution:02}")
+            df.groupby(f"h3_{resolution:0{PAD_WIDTH}d}")
             .agg(aggfunc)
             .round(decimals)
             .astype("Int64")
diff --git a/raster2dggs/maidenhead.py b/raster2dggs/maidenhead.py
@@ -67,10 +67,10 @@ def _maidenhead_parent_groupby(
     pandas .groupby function. This step is to ensure there are no duplicate Maidenhead indices, which will certainly happen when indexing most raster datasets as Maidenhead has low precision.
     """
     if decimals > 0:
-        return df.groupby(f"maidenhead_{precision:02}").agg(aggfunc).round(decimals)
+        return df.groupby(f"maidenhead_{precision}").agg(aggfunc).round(decimals)
     else:
         return (
-            df.groupby(f"maidenhead_{precision:02}")
+            df.groupby(f"maidenhead_{precision}")
             .agg(aggfunc)
             .round(decimals)
             .astype("Int64")
diff --git a/raster2dggs/rHP.py b/raster2dggs/rHP.py
@@ -16,6 +16,7 @@
 import raster2dggs.common as common
 from raster2dggs import __version__
 
+PAD_WIDTH = common.zero_padding("h3")
 
 def _rhpfunc(
     sdf: xr.DataArray,
@@ -63,10 +64,10 @@ def _rhp_parent_groupby(
     high resolution raster at a coarser resolution.
     """
     if decimals > 0:
-        return df.groupby(f"rhp_{resolution:02}").agg(aggfunc).round(decimals)
+        return df.groupby(f"rhp_{resolution:0{PAD_WIDTH}d}").agg(aggfunc).round(decimals)
     else:
         return (
-            df.groupby(f"rhp_{resolution:02}")
+            df.groupby(f"rhp_{resolution:0{PAD_WIDTH}d}")
             .agg(aggfunc)
             .round(decimals)
             .astype("Int64")
diff --git a/raster2dggs/s2.py b/raster2dggs/s2.py
@@ -16,6 +16,7 @@
 import raster2dggs.common as common
 from raster2dggs import __version__
 
+PAD_WIDTH = common.zero_padding("s2")
 
 def _s2func(
     sdf: xr.DataArray,
@@ -44,8 +45,8 @@ def _s2func(
     s2 = [cell.parent(resolution).to_token() for cell in cells]
     s2_parent = [cell.parent(parent_res).to_token() for cell in cells]
     subset = subset.drop(columns=["x", "y"])
-    subset[f"s2_{resolution:02}"] = pd.Series(s2, index=subset.index)
-    subset[f"s2_{parent_res:02}"] = pd.Series(s2_parent, index=subset.index)
+    subset[f"s2_{resolution:0{PAD_WIDTH}d}"] = pd.Series(s2, index=subset.index)
+    subset[f"s2_{parent_res:0{PAD_WIDTH}d}"] = pd.Series(s2_parent, index=subset.index)
     # Renaming columns to actual band labels
     bands = sdf["band"].unique()
     band_names = dict(zip(bands, map(lambda i: band_labels[i - 1], bands)))
@@ -67,10 +68,10 @@ def _s2_parent_groupby(
     high resolution raster at a coarser S2 resolution.
     """
     if decimals > 0:
-        return df.groupby(f"s2_{resolution:02}").agg(aggfunc).round(decimals)
+        return df.groupby(f"s2_{resolution:0{PAD_WIDTH}d}").agg(aggfunc).round(decimals)
     else:
         return (
-            df.groupby(f"s2_{resolution:02}")
+            df.groupby(f"s2_{resolution:0{PAD_WIDTH}d}")
             .agg(aggfunc)
             .round(decimals)
             .astype("Int64")