1313from . import constants as const
1414
1515
16- class RasterIndexer :
16+ class IRasterIndexer :
1717 """
1818 Provides a base class and interface for all indexers integrating a
1919 specific DGGS. It should never be instantiated directly because
20- many methods raise a NotImplementedError by design. The methods
20+ all methods raise a NotImplementedError by design. The methods
2121 should be implemented by the child classes deriving from this
2222 interface instead.
23- If specialised behaviour is required, methods may be
24- re-implemented by derived classes.
2523 """
2624
2725 def __init__ (self , dggs : str ):
@@ -30,34 +28,45 @@ def __init__(self, dggs: str):
3028 """
3129 self .dggs = dggs
3230
33- def index_col (self , resolution ):
34- pad_width = const .zero_padding (self .dggs )
35- return f"{ self .dggs } _{ resolution :0{pad_width }d} "
31+ def index_col (self , resolution : int ) -> str :
32+ """
33+ Returns the primary DGGS index column name, with zero padding so that column
34+ names across a DGGS' full resolution space have the same length.
35+ """
36+ raise NotImplementedError ()
3637
37- def partition_col (self , parent_resolution ):
38- pad_width = const .zero_padding (self .dggs )
39- return f"{ self .dggs } _{ parent_resolution :0{pad_width }d} "
38+ def partition_col (self , parent_resolution : int ) -> str :
39+ """
40+ Returns the partition DGGS index column name, with zero padding so that column
41+ names across a DGGS' full resolution space have the same length.
42+ """
43+ raise NotImplementedError ()
4044
41- def band_cols (self , df : pd .DataFrame ):
42- return [c for c in df .columns if not c .startswith (f"{ self .dggs } _" )]
45+ def band_cols (self , df : pd .DataFrame ) -> list [str ]:
46+ """
47+ Returns the column names representing raster bands from an input image.
48+ """
49+ raise NotImplementedError ()
4350
4451 @staticmethod
4552 def valid_set (cells : set ) -> set :
4653 """
47- Needs to be implemented by child class
54+ Given a set of DGGS cells of the same DGGS return the subset that are valid cell addresses.
4855 """
4956 raise NotImplementedError ()
5057
5158 @staticmethod
52- def parent_cells (cells : set , resolution ) -> map :
59+ def parent_cells (cells : set , resolution : int ) -> map :
5360 """
54- Needs to be implemented by child class
61+ Given a set of DGGS cells, return an iterable of parent cells at given resolution
5562 """
5663 raise NotImplementedError
5764
58- def expected_count (self , parent : str , resolution : int ):
65+ def expected_count (self , parent : str , resolution : int ) -> int :
5966 """
60- Needs to be implemented by child class
67+ Given a DGGS (parent) cell ID, and a target (child) resolution,
68+ return the expected number of child cells that completel represent this
69+ parent cell at the target resolution.
6170 """
6271 raise NotImplementedError
6372
@@ -70,7 +79,7 @@ def index_func(
7079 band_labels : Tuple [str ] = None ,
7180 ) -> pa .Table :
7281 """
73- Needs to be implemented by child class
82+ Function for primary indexation.
7483 """
7584 raise NotImplementedError ()
7685
@@ -89,27 +98,7 @@ def parent_groupby(
8998 cell values, which will happen when indexing a high resolution
9099 raster at a coarser DGGS resolution.
91100 """
92- index_col = self .index_col (resolution )
93- partition_col = self .partition_col (parent_res )
94- df = df .set_index (index_col )
95- if decimals > 0 :
96- gb = (
97- df .groupby ([partition_col , index_col ], sort = False , observed = True )
98- .agg (aggfunc )
99- .round (decimals )
100- )
101- else :
102- gb = (
103- df .groupby ([partition_col , index_col ], sort = False , observed = True )
104- .agg (aggfunc )
105- .round (decimals )
106- .astype ("Int64" )
107- )
108- # Move parent out to a column; keep child as the index
109- # MultiIndex levels are [partition_col, index_col] in that order
110- gb = gb .reset_index (level = 0 ) # parent -> column
111- gb .index .name = index_col # child remains index
112- return gb
101+ raise NotImplementedError
113102
114103 @staticmethod
115104 def cell_to_children_size (cell , desired_resolution : int ) -> int :
@@ -129,31 +118,4 @@ def compaction(
129118 It assumes that the input has unique DGGS cell values
130119 as the index.
131120 """
132- unprocessed_indices = self .valid_set (set (df .index ))
133- if not unprocessed_indices :
134- return df
135- band_cols = self .band_cols (df )
136- compaction_map = {}
137- for r in range (parent_res , resolution ):
138- parent_cells = self .parent_cells (unprocessed_indices , r )
139- parent_groups = df .loc [list (unprocessed_indices )].groupby (
140- list (parent_cells )
141- )
142- for parent , group in parent_groups :
143- if isinstance (parent , tuple ) and len (parent ) == 1 :
144- parent = parent [0 ]
145- if parent in compaction_map :
146- continue
147- expected_count = self .expected_count (parent , resolution )
148- if len (group ) == expected_count and all (
149- group [band_cols ].nunique () == 1
150- ):
151- compact_row = group .iloc [0 ]
152- compact_row .name = parent # Rename the index to the parent cell
153- compaction_map [parent ] = compact_row
154- unprocessed_indices -= set (group .index )
155- compacted_df = pd .DataFrame (list (compaction_map .values ()))
156- remaining_df = df .loc [list (unprocessed_indices )]
157- result_df = pd .concat ([compacted_df , remaining_df ])
158- result_df = result_df .rename_axis (df .index .name )
159- return result_df
121+ raise NotImplementedError ()
0 commit comments