File tree Expand file tree Collapse file tree 6 files changed +34
-15
lines changed
Expand file tree Collapse file tree 6 files changed +34
-15
lines changed Original file line number Diff line number Diff line change @@ -107,6 +107,18 @@ def assemble_kwargs(
107107
108108 return kwargs
109109
110+ def zero_padding (dggs : str ) -> int :
111+ max_res_lookup = {
112+ "h3" : const .MAX_H3 ,
113+ "rhp" : const .MAX_RHP ,
114+ "geohash" : const .MAX_GEOHASH ,
115+ "maidenhead" : const .MAX_MAIDENHEAD ,
116+ "s2" : const .MAX_S2 ,
117+ }
118+ max_res = max_res_lookup .get (dggs )
119+ if max_res is None :
120+ raise ValueError (f"Unknown DGGS type: { dggs } " )
121+ return len (str (max_res ))
110122
111123def get_parent_res (dggs : str , parent_res : Union [None , int ], resolution : int ) -> int :
112124 """
@@ -161,7 +173,9 @@ def address_boundary_issues(
161173 )
162174 with TqdmCallback (desc = "Reading window partitions" ):
163175 # Set index as parent cell
164- ddf = dd .read_parquet (pq_input ).set_index (f"{ dggs } _{ parent_res :02} " )
176+ pad_width = zero_padding (dggs )
177+ index_col = f"{ dggs } _{ parent_res :0{pad_width }d} "
178+ ddf = dd .read_parquet (pq_input ).set_index (index_col )
165179
166180 with TqdmCallback (desc = "Counting parents" ):
167181 # Count parents, to get target number of partitions
Original file line number Diff line number Diff line change 1717from raster2dggs import __version__
1818
1919
20+ PAD_WIDTH = common .zero_padding ('geohash' )
21+
2022def _geohashfunc (
2123 sdf : xr .DataArray ,
2224 precision : int ,
@@ -44,8 +46,8 @@ def _geohashfunc(
4446 # Secondary (parent) Geohash index, used later for partitioning
4547 geohash_parent = [gh [:parent_precision ] for gh in geohash ]
4648 subset = subset .drop (columns = ["x" , "y" ])
47- subset [f"geohash_{ precision :02 } " ] = pd .Series (geohash , index = subset .index )
48- subset [f"geohash_{ parent_precision :02 } " ] = pd .Series (
49+ subset [f"geohash_{ precision :0{ PAD_WIDTH }d } " ] = pd .Series (geohash , index = subset .index )
50+ subset [f"geohash_{ parent_precision :0{ PAD_WIDTH }d } " ] = pd .Series (
4951 geohash_parent , index = subset .index
5052 )
5153 # Rename bands
@@ -69,10 +71,10 @@ def _geohash_parent_groupby(
6971 high resolution raster at a coarse Geohash precision.
7072 """
7173 if decimals > 0 :
72- return df .groupby (f"geohash_{ precision :02 } " ).agg (aggfunc ).round (decimals )
74+ return df .groupby (f"geohash_{ precision :0{ PAD_WIDTH }d } " ).agg (aggfunc ).round (decimals )
7375 else :
7476 return (
75- df .groupby (f"geohash_{ precision :02 } " )
77+ df .groupby (f"geohash_{ precision :0{ PAD_WIDTH }d } " )
7678 .agg (aggfunc )
7779 .round (decimals )
7880 .astype ("Int64" )
Original file line number Diff line number Diff line change 1616import raster2dggs .common as common
1717from raster2dggs import __version__
1818
19+ PAD_WIDTH = common .zero_padding ("h3" )
1920
2021def _h3func (
2122 sdf : xr .DataArray ,
@@ -63,10 +64,10 @@ def _h3_parent_groupby(
6364 high resolution raster at a coarser h3 resolution.
6465 """
6566 if decimals > 0 :
66- return df .groupby (f"h3_{ resolution :02 } " ).agg (aggfunc ).round (decimals )
67+ return df .groupby (f"h3_{ resolution :0{ PAD_WIDTH }d } " ).agg (aggfunc ).round (decimals )
6768 else :
6869 return (
69- df .groupby (f"h3_{ resolution :02 } " )
70+ df .groupby (f"h3_{ resolution :0{ PAD_WIDTH }d } " )
7071 .agg (aggfunc )
7172 .round (decimals )
7273 .astype ("Int64" )
Original file line number Diff line number Diff line change @@ -67,10 +67,10 @@ def _maidenhead_parent_groupby(
6767 pandas .groupby function. This step is to ensure there are no duplicate Maidenhead indices, which will certainly happen when indexing most raster datasets as Maidenhead has low precision.
6868 """
6969 if decimals > 0 :
70- return df .groupby (f"maidenhead_{ precision :02 } " ).agg (aggfunc ).round (decimals )
70+ return df .groupby (f"maidenhead_{ precision } " ).agg (aggfunc ).round (decimals )
7171 else :
7272 return (
73- df .groupby (f"maidenhead_{ precision :02 } " )
73+ df .groupby (f"maidenhead_{ precision } " )
7474 .agg (aggfunc )
7575 .round (decimals )
7676 .astype ("Int64" )
Original file line number Diff line number Diff line change 1616import raster2dggs .common as common
1717from raster2dggs import __version__
1818
19+ PAD_WIDTH = common .zero_padding ("h3" )
1920
2021def _rhpfunc (
2122 sdf : xr .DataArray ,
@@ -63,10 +64,10 @@ def _rhp_parent_groupby(
6364 high resolution raster at a coarser resolution.
6465 """
6566 if decimals > 0 :
66- return df .groupby (f"rhp_{ resolution :02 } " ).agg (aggfunc ).round (decimals )
67+ return df .groupby (f"rhp_{ resolution :0{ PAD_WIDTH }d } " ).agg (aggfunc ).round (decimals )
6768 else :
6869 return (
69- df .groupby (f"rhp_{ resolution :02 } " )
70+ df .groupby (f"rhp_{ resolution :0{ PAD_WIDTH }d } " )
7071 .agg (aggfunc )
7172 .round (decimals )
7273 .astype ("Int64" )
Original file line number Diff line number Diff line change 1616import raster2dggs .common as common
1717from raster2dggs import __version__
1818
19+ PAD_WIDTH = common .zero_padding ("s2" )
1920
2021def _s2func (
2122 sdf : xr .DataArray ,
@@ -44,8 +45,8 @@ def _s2func(
4445 s2 = [cell .parent (resolution ).to_token () for cell in cells ]
4546 s2_parent = [cell .parent (parent_res ).to_token () for cell in cells ]
4647 subset = subset .drop (columns = ["x" , "y" ])
47- subset [f"s2_{ resolution :02 } " ] = pd .Series (s2 , index = subset .index )
48- subset [f"s2_{ parent_res :02 } " ] = pd .Series (s2_parent , index = subset .index )
48+ subset [f"s2_{ resolution :0{ PAD_WIDTH }d } " ] = pd .Series (s2 , index = subset .index )
49+ subset [f"s2_{ parent_res :0{ PAD_WIDTH }d } " ] = pd .Series (s2_parent , index = subset .index )
4950 # Renaming columns to actual band labels
5051 bands = sdf ["band" ].unique ()
5152 band_names = dict (zip (bands , map (lambda i : band_labels [i - 1 ], bands )))
@@ -67,10 +68,10 @@ def _s2_parent_groupby(
6768 high resolution raster at a coarser S2 resolution.
6869 """
6970 if decimals > 0 :
70- return df .groupby (f"s2_{ resolution :02 } " ).agg (aggfunc ).round (decimals )
71+ return df .groupby (f"s2_{ resolution :0{ PAD_WIDTH }d } " ).agg (aggfunc ).round (decimals )
7172 else :
7273 return (
73- df .groupby (f"s2_{ resolution :02 } " )
74+ df .groupby (f"s2_{ resolution :0{ PAD_WIDTH }d } " )
7475 .agg (aggfunc )
7576 .round (decimals )
7677 .astype ("Int64" )
You can’t perform that action at this time.
0 commit comments