Skip to content

Commit 9e17d9d

Browse files
refactoring: restore interface and partial implementation in indexer/rasterindexer
1 parent cab1ad6 commit 9e17d9d

File tree

10 files changed

+200
-77
lines changed

10 files changed

+200
-77
lines changed

raster2dggs/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
import raster2dggs.constants as const
3333
import raster2dggs.indexerfactory as idxfactory
3434

35-
from raster2dggs.interfaces import RasterIndexer
35+
from raster2dggs.interfaces import IRasterIndexer
3636

3737
LOGGER = logging.getLogger(__name__)
3838
click_log.basic_config(LOGGER)
@@ -144,7 +144,7 @@ def get_parent_res(dggs: str, parent_res: Union[None, int], resolution: int) ->
144144

145145

146146
def address_boundary_issues(
147-
indexer: RasterIndexer,
147+
indexer: IRasterIndexer,
148148
pq_input: tempfile.TemporaryDirectory,
149149
output: Path,
150150
resolution: int,

raster2dggs/indexerfactory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
@author: ndemaio
33
"""
44

5-
from raster2dggs.interfaces import RasterIndexer
5+
from raster2dggs.interfaces import IRasterIndexer
66

77
import raster2dggs.indexers.h3rasterindexer as h3rasterindexer
88
import raster2dggs.indexers.rhprasterindexer as rhprasterindexer
@@ -30,7 +30,7 @@
3030
"""
3131

3232

33-
def indexer_instance(dggs: str) -> RasterIndexer:
33+
def indexer_instance(dggs: str) -> IRasterIndexer:
3434
# Create and return appropriate indexer instance
3535
indexer = indexer_lookup[dggs]
3636
return indexer(dggs)

raster2dggs/indexers/a5rasterindexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import raster2dggs.constants as const
1515

16-
from raster2dggs.interfaces import RasterIndexer
16+
from raster2dggs.indexers.rasterindexer import RasterIndexer
1717

1818

1919
class A5RasterIndexer(RasterIndexer):

raster2dggs/indexers/geohashrasterindexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import raster2dggs.constants as const
1515

16-
from raster2dggs.interfaces import RasterIndexer
16+
from raster2dggs.indexers.rasterindexer import RasterIndexer
1717

1818

1919
class GeohashRasterIndexer(RasterIndexer):

raster2dggs/indexers/h3rasterindexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
import raster2dggs.constants as const
1717

18-
from raster2dggs.interfaces import RasterIndexer
18+
from raster2dggs.indexers.rasterindexer import RasterIndexer
1919

2020

2121
class H3RasterIndexer(RasterIndexer):

raster2dggs/indexers/maidenheadrasterindexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import raster2dggs.constants as const
1515

16-
from raster2dggs.interfaces import RasterIndexer
16+
from raster2dggs.indexers.rasterindexer import RasterIndexer
1717

1818

1919
class MaidenheadRasterIndexer(RasterIndexer):
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""
2+
@author: ndemaio
3+
"""
4+
5+
from numbers import Number
6+
from typing import Callable, Tuple, Union
7+
8+
import pandas as pd
9+
import pyarrow as pa
10+
import xarray as xr
11+
import numpy as np
12+
13+
from .. import constants as const
14+
from .. interfaces import IRasterIndexer
15+
16+
17+
class RasterIndexer(IRasterIndexer):
18+
"""
19+
Provides a partial implementation for raster indexers integrating a
20+
specific DGGS. It should never be instantiated directly because
21+
many methods raise a NotImplementedError by design. The methods
22+
should be implemented by the child classes deriving from this
23+
interface instead.
24+
If specialised behaviour is required, methods may be
25+
re-implemented by derived classes.
26+
"""
27+
28+
def __init__(self, dggs: str):
29+
"""
30+
Value used across all child classes
31+
"""
32+
self.dggs = dggs
33+
34+
def index_col(self, resolution):
35+
pad_width = const.zero_padding(self.dggs)
36+
return f"{self.dggs}_{resolution:0{pad_width}d}"
37+
38+
def partition_col(self, parent_resolution):
39+
pad_width = const.zero_padding(self.dggs)
40+
return f"{self.dggs}_{parent_resolution:0{pad_width}d}"
41+
42+
def band_cols(self, df: pd.DataFrame):
43+
return [c for c in df.columns if not c.startswith(f"{self.dggs}_")]
44+
45+
@staticmethod
46+
def valid_set(cells: set) -> set:
47+
"""
48+
Needs to be implemented by child class
49+
"""
50+
raise NotImplementedError()
51+
52+
@staticmethod
53+
def parent_cells(cells: set, resolution) -> map:
54+
"""
55+
Needs to be implemented by child class
56+
"""
57+
raise NotImplementedError
58+
59+
def expected_count(self, parent: str, resolution: int):
60+
"""
61+
Needs to be implemented by child class
62+
"""
63+
raise NotImplementedError
64+
65+
def index_func(
66+
self,
67+
sdf: xr.DataArray,
68+
resolution: int,
69+
parent_res: int,
70+
nodata: Number = np.nan,
71+
band_labels: Tuple[str] = None,
72+
) -> pa.Table:
73+
"""
74+
Needs to be implemented by child class
75+
"""
76+
raise NotImplementedError()
77+
78+
def parent_groupby(
79+
self,
80+
df,
81+
resolution: int,
82+
parent_res: int,
83+
aggfunc: Union[str, Callable],
84+
decimals: int,
85+
) -> pd.DataFrame:
86+
"""
87+
Function for aggregating the DGGS resolution values per parent
88+
partition. Each partition will be run through with a pandas
89+
groupby function. This step is to ensure there are no duplicate
90+
cell values, which will happen when indexing a high resolution
91+
raster at a coarser DGGS resolution.
92+
"""
93+
index_col = self.index_col(resolution)
94+
partition_col = self.partition_col(parent_res)
95+
df = df.set_index(index_col)
96+
if decimals > 0:
97+
gb = (
98+
df.groupby([partition_col, index_col], sort=False, observed=True)
99+
.agg(aggfunc)
100+
.round(decimals)
101+
)
102+
else:
103+
gb = (
104+
df.groupby([partition_col, index_col], sort=False, observed=True)
105+
.agg(aggfunc)
106+
.round(decimals)
107+
.astype("Int64")
108+
)
109+
# Move parent out to a column; keep child as the index
110+
# MultiIndex levels are [partition_col, index_col] in that order
111+
gb = gb.reset_index(level=0) # parent -> column
112+
gb.index.name = index_col # child remains index
113+
return gb
114+
115+
@staticmethod
116+
def cell_to_children_size(cell, desired_resolution: int) -> int:
117+
"""
118+
Needs to be implemented by child class
119+
"""
120+
raise NotImplementedError()
121+
122+
def compaction(
123+
self, df: pd.DataFrame, resolution: int, parent_res: int
124+
) -> pd.DataFrame:
125+
"""
126+
Returns a compacted version of the input dataframe.
127+
Compaction only occurs if all values (i.e. bands) of the input
128+
share common values across all sibling cells.
129+
Compaction will not be performed beyond parent_res.
130+
It assumes that the input has unique DGGS cell values
131+
as the index.
132+
"""
133+
unprocessed_indices = self.valid_set(set(df.index))
134+
if not unprocessed_indices:
135+
return df
136+
band_cols = self.band_cols(df)
137+
compaction_map = {}
138+
for r in range(parent_res, resolution):
139+
parent_cells = self.parent_cells(unprocessed_indices, r)
140+
parent_groups = df.loc[list(unprocessed_indices)].groupby(
141+
list(parent_cells)
142+
)
143+
for parent, group in parent_groups:
144+
if isinstance(parent, tuple) and len(parent) == 1:
145+
parent = parent[0]
146+
if parent in compaction_map:
147+
continue
148+
expected_count = self.expected_count(parent, resolution)
149+
if len(group) == expected_count and all(
150+
group[band_cols].nunique() == 1
151+
):
152+
compact_row = group.iloc[0]
153+
compact_row.name = parent # Rename the index to the parent cell
154+
compaction_map[parent] = compact_row
155+
unprocessed_indices -= set(group.index)
156+
compacted_df = pd.DataFrame(list(compaction_map.values()))
157+
remaining_df = df.loc[list(unprocessed_indices)]
158+
result_df = pd.concat([compacted_df, remaining_df])
159+
result_df = result_df.rename_axis(df.index.name)
160+
return result_df

raster2dggs/indexers/rhprasterindexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
import raster2dggs.constants as const
1717

18-
from raster2dggs.interfaces import RasterIndexer
18+
from raster2dggs.indexers.rasterindexer import RasterIndexer
1919

2020

2121
class RHPRasterIndexer(RasterIndexer):

raster2dggs/indexers/s2rasterindexer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313

1414
import raster2dggs.constants as const
1515

16-
from raster2dggs.interfaces import RasterIndexer
16+
from raster2dggs.indexers.rasterindexer import RasterIndexer
17+
1718

1819

1920
class S2RasterIndexer(RasterIndexer):

raster2dggs/interfaces.py

Lines changed: 29 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,13 @@
1313
from . import constants as const
1414

1515

16-
class RasterIndexer:
16+
class IRasterIndexer:
1717
"""
1818
Provides a base class and interface for all indexers integrating a
1919
specific DGGS. It should never be instantiated directly because
20-
many methods raise a NotImplementedError by design. The methods
20+
all methods raise a NotImplementedError by design. The methods
2121
should be implemented by the child classes deriving from this
2222
interface instead.
23-
If specialised behaviour is required, methods may be
24-
re-implemented by derived classes.
2523
"""
2624

2725
def __init__(self, dggs: str):
@@ -30,34 +28,45 @@ def __init__(self, dggs: str):
3028
"""
3129
self.dggs = dggs
3230

33-
def index_col(self, resolution):
34-
pad_width = const.zero_padding(self.dggs)
35-
return f"{self.dggs}_{resolution:0{pad_width}d}"
31+
def index_col(self, resolution : int) -> str:
32+
"""
33+
Returns the primary DGGS index column name, with zero padding so that column
34+
names across a DGGS' full resolution space have the same length.
35+
"""
36+
raise NotImplementedError()
3637

37-
def partition_col(self, parent_resolution):
38-
pad_width = const.zero_padding(self.dggs)
39-
return f"{self.dggs}_{parent_resolution:0{pad_width}d}"
38+
def partition_col(self, parent_resolution : int) -> str:
39+
"""
40+
Returns the partition DGGS index column name, with zero padding so that column
41+
names across a DGGS' full resolution space have the same length.
42+
"""
43+
raise NotImplementedError()
4044

41-
def band_cols(self, df: pd.DataFrame):
42-
return [c for c in df.columns if not c.startswith(f"{self.dggs}_")]
45+
def band_cols(self, df: pd.DataFrame) -> list[str]:
46+
"""
47+
Returns the column names representing raster bands from an input image.
48+
"""
49+
raise NotImplementedError()
4350

4451
@staticmethod
4552
def valid_set(cells: set) -> set:
4653
"""
47-
Needs to be implemented by child class
54+
Given a set of DGGS cells of the same DGGS return the subset that are valid cell addresses.
4855
"""
4956
raise NotImplementedError()
5057

5158
@staticmethod
52-
def parent_cells(cells: set, resolution) -> map:
59+
def parent_cells(cells: set, resolution: int) -> map:
5360
"""
54-
Needs to be implemented by child class
61+
Given a set of DGGS cells, return an iterable of parent cells at given resolution
5562
"""
5663
raise NotImplementedError
5764

58-
def expected_count(self, parent: str, resolution: int):
65+
def expected_count(self, parent: str, resolution: int) -> int:
5966
"""
60-
Needs to be implemented by child class
67+
Given a DGGS (parent) cell ID, and a target (child) resolution,
68+
return the expected number of child cells that completel represent this
69+
parent cell at the target resolution.
6170
"""
6271
raise NotImplementedError
6372

@@ -70,7 +79,7 @@ def index_func(
7079
band_labels: Tuple[str] = None,
7180
) -> pa.Table:
7281
"""
73-
Needs to be implemented by child class
82+
Function for primary indexation.
7483
"""
7584
raise NotImplementedError()
7685

@@ -89,27 +98,7 @@ def parent_groupby(
8998
cell values, which will happen when indexing a high resolution
9099
raster at a coarser DGGS resolution.
91100
"""
92-
index_col = self.index_col(resolution)
93-
partition_col = self.partition_col(parent_res)
94-
df = df.set_index(index_col)
95-
if decimals > 0:
96-
gb = (
97-
df.groupby([partition_col, index_col], sort=False, observed=True)
98-
.agg(aggfunc)
99-
.round(decimals)
100-
)
101-
else:
102-
gb = (
103-
df.groupby([partition_col, index_col], sort=False, observed=True)
104-
.agg(aggfunc)
105-
.round(decimals)
106-
.astype("Int64")
107-
)
108-
# Move parent out to a column; keep child as the index
109-
# MultiIndex levels are [partition_col, index_col] in that order
110-
gb = gb.reset_index(level=0) # parent -> column
111-
gb.index.name = index_col # child remains index
112-
return gb
101+
raise NotImplementedError
113102

114103
@staticmethod
115104
def cell_to_children_size(cell, desired_resolution: int) -> int:
@@ -129,31 +118,4 @@ def compaction(
129118
It assumes that the input has unique DGGS cell values
130119
as the index.
131120
"""
132-
unprocessed_indices = self.valid_set(set(df.index))
133-
if not unprocessed_indices:
134-
return df
135-
band_cols = self.band_cols(df)
136-
compaction_map = {}
137-
for r in range(parent_res, resolution):
138-
parent_cells = self.parent_cells(unprocessed_indices, r)
139-
parent_groups = df.loc[list(unprocessed_indices)].groupby(
140-
list(parent_cells)
141-
)
142-
for parent, group in parent_groups:
143-
if isinstance(parent, tuple) and len(parent) == 1:
144-
parent = parent[0]
145-
if parent in compaction_map:
146-
continue
147-
expected_count = self.expected_count(parent, resolution)
148-
if len(group) == expected_count and all(
149-
group[band_cols].nunique() == 1
150-
):
151-
compact_row = group.iloc[0]
152-
compact_row.name = parent # Rename the index to the parent cell
153-
compaction_map[parent] = compact_row
154-
unprocessed_indices -= set(group.index)
155-
compacted_df = pd.DataFrame(list(compaction_map.values()))
156-
remaining_df = df.loc[list(unprocessed_indices)]
157-
result_df = pd.concat([compacted_df, remaining_df])
158-
result_df = result_df.rename_axis(df.index.name)
159-
return result_df
121+
raise NotImplementedError()

0 commit comments

Comments
 (0)