66
77import click
88import click_log
9+ import h3 as h3py
910import h3pandas # Necessary import despite lack of explicit use
1011import pandas as pd
1112import pyarrow as pa
@@ -75,6 +76,52 @@ def _h3_parent_groupby(
7576 )
7677
7778
79+ def h3_cell_to_children_size (cell , desired_resolution : int ) -> int :
80+ """
81+ Use h3 cell conversion to determine total number of children at some offset resolution
82+ """
83+ # NB we enumerate all children due to the presence of pentagonal cells
84+ # The H3 API has cellToChildrenSize, but it is not available in Python API?
85+ return len (h3py .cell_to_children (cell , desired_resolution ))
86+
87+
88+ def _h3_compaction (df : pd .DataFrame , resolution : int , parent_res : int ) -> pd .DataFrame :
89+ """
90+ Returns a compacted version of the input dataframe.
91+ Compaction only occurs if all values (i.e. bands) of the input share common values across all sibling cells.
92+ Compaction will not be performed beyond parent_res or resolution.
93+ It assumes and requires that the input has unique DGGS cell values as the index.
94+ """
95+ unprocessed_indices = set (df .index )
96+ compaction_map = {}
97+ for r in range (parent_res , resolution ):
98+ try :
99+ parent_cells = map (lambda x : h3py .cell_to_parent (x , r ), unprocessed_indices )
100+ grouped = df .loc [list (unprocessed_indices )].groupby (list (parent_cells ))
101+ except ValueError as e :
102+ # Indices that aren't DGGS cells; ignore and break
103+ # TODO how is this possible?
104+ break
105+ for parent , group in grouped :
106+ if parent in compaction_map :
107+ continue
108+ expected_count = h3_cell_to_children_size (parent , resolution )
109+ if len (group ) == expected_count and all (group .nunique () == 1 ):
110+ compact_row = group .iloc [0 ]
111+ compact_row .name = parent # Rename the index to the parent cell
112+ compaction_map [parent ] = compact_row
113+ unprocessed_indices -= set (group .index )
114+ else :
115+ # Didn't break
116+ compacted_df = pd .DataFrame (list (compaction_map .values ()))
117+ remaining_df = df .loc [list (unprocessed_indices )]
118+ result_df = pd .concat ([compacted_df , remaining_df ])
119+ result_df = result_df .rename_axis (df .index .name )
120+ return result_df
121+ # Did break
122+ return df
123+
124+
78125@click .command (context_settings = {"show_default" : True })
79126@click_log .simple_verbosity_option (common .LOGGER )
80127@click .argument ("raster_input" , type = click .Path (), nargs = 1 )
@@ -148,6 +195,12 @@ def _h3_parent_groupby(
148195 type = click .Path (),
149196 help = "Temporary data is created during the execution of this program. This parameter allows you to control where this data will be written." ,
150197)
198+ @click .option (
199+ "-co" ,
200+ "--compact" ,
201+ is_flag = True ,
202+ help = "Compact the H3 cells up to the parent resolution. Compaction is not applied for cells without identical values across all bands." ,
203+ )
151204@click .version_option (version = __version__ )
152205def h3 (
153206 raster_input : Union [str , Path ],
@@ -162,6 +215,7 @@ def h3(
162215 overwrite : bool ,
163216 warp_mem_limit : int ,
164217 resampling : str ,
218+ compact : bool ,
165219 tempdir : Union [str , Path ],
166220):
167221 """
@@ -192,6 +246,7 @@ def h3(
192246 "h3" ,
193247 _h3func ,
194248 _h3_parent_groupby ,
249+ _h3_compaction if compact else None ,
195250 raster_input ,
196251 output_directory ,
197252 int (resolution ),
0 commit comments