Skip to content

Commit 1777457

Browse files
committed
Implement ags_to_brgi_db_mapping
1 parent afffca5 commit 1777457

File tree

7 files changed

+153
-39
lines changed

7 files changed

+153
-39
lines changed

examples/hk_kaitak_ags3/hk_kaitak_ags3_to_brgi_geodb.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,36 @@ def _(mo):
225225
return
226226

227227

228+
@app.cell
229+
def _(CRS, zip, zipfile):
230+
from bedrock_ge.gi.ags_parser import ags_to_brgi_db_mapping
231+
232+
projected_crs = CRS("EPSG:2326")
233+
vertrical_crs = CRS("EPSG:5738")
234+
235+
with zipfile.ZipFile(zip) as zip_ref:
236+
# Iterate over files and directories in the .zip archive
237+
for file_name in zip_ref.namelist():
238+
# Only process files that have an .ags or .AGS extension
239+
if file_name.lower().endswith(".ags"):
240+
print(f"\n🖥️ Processing {file_name} ...")
241+
with zip_ref.open(file_name) as ags3_file:
242+
# Convert content of a single AGS 3 file to a Dictionary of pandas dataframes (a database)
243+
ags3_to_brgi_db_mapping = ags_to_brgi_db_mapping(
244+
ags3_file, projected_crs, vertrical_crs
245+
)
246+
247+
# with zipfile.ZipFile(zip) as zip_ref:
248+
# file_name = "58358/GE201304.18A.ags"
249+
# print(f"\n🖥️ Processing {file_name} ...")
250+
# with zip_ref.open(file_name) as ags3_file:
251+
# # Convert content of a single AGS 3 file to a Dictionary of pandas dataframes (a database)
252+
# ags3_to_brgi_db_mapping = ags_to_brgi_db_mapping(
253+
# ags3_file, projected_crs, vertrical_crs
254+
# )
255+
return
256+
257+
228258
@app.cell
229259
def _(CRS, pd, zip, zip_of_ags3s_to_bedrock_gi_database):
230260
brgi_db = zip_of_ags3s_to_bedrock_gi_database(zip, CRS("EPSG:2326"))
52 KB
Binary file not shown.

src/bedrock_ge/gi/ags3.py

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from bedrock_ge.gi.io_utils import coerce_string, open_text_data_source
2020

2121

22-
def ags3_to_db(
22+
def ags3_to_dfs(
2323
source: str | Path | IO[str] | IO[bytes] | bytes, encoding: str
2424
) -> dict[str, pd.DataFrame]:
2525
"""Converts AGS 3 data to a dictionary of pandas DataFrames.
@@ -35,7 +35,7 @@ def ags3_to_db(
3535
a pandas DataFrame containing the data for that group.
3636
"""
3737
# Initialize dictionary and variables used in the AGS 3 read loop
38-
ags3_db = {}
38+
ags3_dfs = {}
3939
line_type = "line_0"
4040
group = ""
4141
headers: list[str] = ["", "", ""]
@@ -50,7 +50,7 @@ def ags3_to_db(
5050
if line.startswith('"**'):
5151
line_type = "group_name"
5252
if group:
53-
ags3_db[group] = pd.DataFrame(group_data, columns=headers)
53+
ags3_dfs[group] = pd.DataFrame(group_data, columns=headers)
5454

5555
group = line.strip(' ,"*')
5656
group_data = []
@@ -86,7 +86,7 @@ def ags3_to_db(
8686
continue
8787
elif len(data_row) != len(headers):
8888
print(
89-
f"\n🚨 CAUTION: The number of columns on line {i + 1} ({len(data_row)}) doesn't match the number of columns of group {group} ({len(headers)})!",
89+
f"\n🚨 CAUTION: The number of columns ({len(data_row)}) on line {i + 1} doesn't match the number of columns ({len(headers)}) of group {group}!",
9090
f"{group} headers: {headers}",
9191
f"Line {i + 1}: {data_row}",
9292
sep="\n",
@@ -113,75 +113,84 @@ def ags3_to_db(
113113
group_data.append(cleaned_data_row)
114114

115115
# Also add the last group's df to the dictionary of AGS dfs
116-
ags3_db[group] = pd.DataFrame(group_data, columns=headers).dropna(axis=1, how="all")
116+
ags3_dfs[group] = pd.DataFrame(group_data, columns=headers).dropna(
117+
axis=1, how="all"
118+
)
117119

118120
if not group:
119121
print(
120122
'🚨 ERROR: The provided AGS 3 data does not contain any groups, i.e. lines starting with "**'
121123
)
122124

123-
return ags3_db
125+
return ags3_dfs
124126

125127

126128
# TODO: AGS 3 table validation based on the AGS 3 data dictionary.
127129
def ags3_to_brgi_db_mapping(
128-
ags3_db: dict[str, pd.DataFrame],
130+
source: str | Path | IO[str] | IO[bytes] | bytes,
129131
projected_crs: CRS,
130-
vertical_crs: CRS = CRS(3855),
132+
vertical_crs: CRS,
133+
encoding: str,
131134
) -> BedrockGIDatabaseMapping:
132-
"""Map AGS 3 data to Bedrock GI data model.
135+
"""Map AGS 3 data to the Bedrock GI data model.
133136
134137
Args:
135138
ags3_db (dict[str, pd.DataFrame]): A dictionary of pandas DataFrames, i.e. database,
136139
where each key is an AGS 3 group, and the corresponding value is
137140
a pandas DataFrame containing the data for that group.
138141
projected_crs (CRS): Projected coordinate reference system (CRS).
139-
vertical_crs (CRS, optional): Vertical CRS.
140-
Defaults to the Earth Gravitational Model 2008.
142+
vertical_crs (CRS, optional): Vertical CRS. Defaults to EGM2008 height, EPSG:3855
143+
which measures the orthometric height w.r.t. the Earth Gravitational Model 2008.
144+
encoding (str): Encoding of the text file or bytes stream.
141145
142146
Returns:
143147
BedrockGIDatabaseMapping: Object that maps AGS 3 data to Bedrock GI data model.
144148
"""
145-
check_ags_proj_group(ags3_db["PROJ"])
149+
ags3_dfs = ags3_to_dfs(source, encoding)
150+
151+
check_ags_proj_group(ags3_dfs["PROJ"])
146152
ags3_project = ProjectTableMapping(
147-
data=ags3_db["PROJ"].to_dict(orient="records")[0],
148-
project_uid=ags3_db["PROJ"]["PROJ_ID"][0],
153+
data=ags3_dfs["PROJ"].to_dict(orient="records")[0],
154+
project_uid=ags3_dfs["PROJ"]["PROJ_ID"].iloc[0],
149155
horizontal_crs=projected_crs,
150156
vertical_crs=vertical_crs,
151157
)
152-
del ags3_db["PROJ"]
158+
del ags3_dfs["PROJ"]
153159

154-
Ags3HOLE.validate(ags3_db["HOLE"])
160+
Ags3HOLE.validate(ags3_dfs["HOLE"])
155161
ags3_location = LocationTableMapping(
156-
data=ags3_db["HOLE"],
162+
data=ags3_dfs["HOLE"],
157163
location_id_column="HOLE_ID",
158164
easting_column="HOLE_NATE",
159165
northing_column="HOLE_NATN",
160166
ground_level_elevation_column="HOLE_GL",
161167
depth_to_base_column="HOLE_FDEP",
162168
)
163-
del ags3_db["HOLE"]
169+
del ags3_dfs["HOLE"]
164170

165-
if "SAMP" in ags3_db.keys():
166-
Ags3SAMP.validate(ags3_db["SAMP"])
167-
samp_df = ags3_db["SAMP"]
171+
if "SAMP" in ags3_dfs.keys():
172+
Ags3SAMP.validate(ags3_dfs["SAMP"])
173+
samp_df = ags3_dfs["SAMP"]
168174
samp_df = _add_sample_source_id(samp_df)
169175
ags3_sample = SampleTableMapping(
170176
data=samp_df,
171177
location_id_column="HOLE_ID",
172178
sample_id_column="sample_source_id",
173179
depth_to_top_column="SAMP_TOP",
174180
)
175-
del ags3_db["SAMP"]
181+
del ags3_dfs["SAMP"]
176182
else:
177183
print("Your AGS 3 data doesn't contain a SAMP group, i.e. samples.")
184+
ags3_sample = None
178185

179186
ags3_lab_tests = []
180187
ags3_insitu_tests = []
181188
ags3_other_tables = []
182189

183-
for group, df in ags3_db.items():
184-
if "SAMP_TOP" in df.columns:
190+
for group, df in ags3_dfs.items():
191+
# Non-standard group names contain the "?" prefix.
192+
# => checking that "SAMP_TOP" / "HOLE_ID" is in the columns is too restrictive.
193+
if any("SAMP_TOP" in col for col in df.columns):
185194
df = _add_sample_source_id(df)
186195
ags3_lab_tests.append(
187196
LabTestTableMapping(
@@ -191,7 +200,7 @@ def ags3_to_brgi_db_mapping(
191200
sample_id_column="sample_source_id",
192201
)
193202
)
194-
elif "HOLE_ID" in df.columns:
203+
elif any("HOLE_ID" in col for col in df.columns):
195204
top_depth, base_depth = _get_depth_columns(group, list(df.columns))
196205
ags3_insitu_tests.append(
197206
InSituTestTableMapping(

src/bedrock_ge/gi/ags_parser.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
from __future__ import annotations
2+
3+
from pathlib import Path
4+
from typing import IO
5+
6+
import pandas as pd
7+
from pyproj import CRS
8+
9+
from bedrock_ge.gi.ags3 import ags3_to_brgi_db_mapping
10+
from bedrock_ge.gi.brgi_db_mapping import BedrockGIDatabaseMapping
11+
from bedrock_ge.gi.io_utils import detect_encoding, open_text_data_source
12+
13+
14+
def ags_to_brgi_db_mapping(
15+
source: str | Path | IO[str] | IO[bytes] | bytes,
16+
projected_crs: CRS,
17+
vertical_crs: CRS = CRS(3855),
18+
encoding: str | None = None,
19+
) -> BedrockGIDatabaseMapping:
20+
"""Map AGS 3 or AGS 4 data to the Bedrock GI data model.
21+
22+
Args:
23+
source (str | Path | IO[str] | IO[bytes] | bytes): The AGS file (str or Path)
24+
or a file-like object that represents the AGS file.
25+
projected_crs (CRS): Projected Coordinate Reference System (CRS). For example:
26+
- OSGB36 / British National Grid: `pyproj.CRS("EPSG:27700")`
27+
- Hong Kong 1980 Grid System: `pyproj.CRS("EPSG:2326")`
28+
vertical_crs (CRS, optional): Vertical CRS. Defaults to EGM2008 height, EPSG:3855
29+
which measures the orthometric height w.r.t. the Earth Gravitational Model 2008.
30+
- Ordnance Datum Newlyn (ODN) Height: `pyproj.CRS("EPSG:5701")`
31+
- Hong Kong Principle Datum (HKPD) Height: `pyproj.CRS("EPSG:5738")`
32+
encoding (str | None, optional): Encoding of the text file or bytes stream.
33+
Defaults to None. An attempt at detecting the encoding will be made if None.
34+
35+
Raises:
36+
ValueError: If the data does not match AGS 3 or AGS 4 format.
37+
38+
Returns:
39+
BedrockGIDatabaseMapping: Object that maps AGS 3 or AGS 4 data to Bedrock GI data model.
40+
"""
41+
if not encoding:
42+
encoding = detect_encoding(source)
43+
44+
# Get first non-blank line, None if all lines are blank
45+
with open_text_data_source(source, encoding=encoding) as f:
46+
first_line = next((line.strip() for line in f if line.strip()), None)
47+
48+
if first_line:
49+
if first_line.startswith('"**'):
50+
ags_version = 3
51+
brgi_db_mapping = ags3_to_brgi_db_mapping(
52+
source, projected_crs, vertical_crs, encoding
53+
)
54+
elif first_line.startswith('"GROUP"'):
55+
ags_version = 4
56+
# brgi_db_mapping = ags4_to_brgi_db_mapping(
57+
# source, projected_crs, vertical_crs, encoding
58+
# )
59+
else:
60+
# If first non-empty line doesn't match AGS 3 or AGS 4 format
61+
raise ValueError("The data provided is not valid AGS 3 or AGS 4 data.")
62+
else:
63+
raise ValueError("The file provided has only blank lines")
64+
65+
# Log information about the mapped AGS 3 or AGS 4 data
66+
project_uid = brgi_db_mapping.Project.project_uid
67+
n_gi_locations = len(brgi_db_mapping.Location.data)
68+
n_samples = len(brgi_db_mapping.Sample.data) if brgi_db_mapping.Sample else 0
69+
print_args = [
70+
f"AGS {ags_version} data was read for Project {project_uid}",
71+
f"This GI data contains {n_gi_locations} GI locations, {n_samples} samples and:",
72+
f" - In-Situ Tests: {[insitu_test.table_name for insitu_test in brgi_db_mapping.InSitu]}",
73+
]
74+
if brgi_db_mapping.Lab:
75+
print_args.append(
76+
f" - Lab Tests: {[lab_test.table_name for lab_test in brgi_db_mapping.Lab]}"
77+
)
78+
if brgi_db_mapping.Other:
79+
print_args.append(
80+
f" - Other Tables: {[other_table.table_name for other_table in brgi_db_mapping.Other]}"
81+
)
82+
print(*print_args, sep="\n", end="\n\n")
83+
84+
return brgi_db_mapping

src/bedrock_ge/gi/ags_schemas.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,6 @@ class BaseSAMP(pa.DataFrameModel):
8181

8282

8383
class Ags3SAMP(BaseSAMP):
84-
sample_id: Series[str] = pa.Field(
85-
# primary_key=True,
86-
unique=True,
87-
coerce=True,
88-
description="Sample unique identifier",
89-
# example="REF_TYPE_TOP_HOLE_ID",
90-
)
9184
HOLE_ID: Series[str] = pa.Field(
9285
# foreign_key="Ags3HOLE.HOLE_ID",
9386
description="Exploratory hole or location equivalent",

src/bedrock_ge/gi/brgi_db_mapping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ class BedrockGIDatabaseMapping(BaseModel):
6868
Location: LocationTableMapping
6969
InSitu: list[InSituTestTableMapping]
7070
Sample: Optional[SampleTableMapping] = None
71-
Lab: Optional[list[LabTestTableMapping]] = None
72-
Other: Optional[list[OtherTable]] = None
71+
Lab: Optional[list[LabTestTableMapping]] = []
72+
Other: Optional[list[OtherTable]] = []
7373

7474

7575
def map_to_brgi_db(brgi_db_mapping: BedrockGIDatabaseMapping) -> BedrockGIDatabase:

src/bedrock_ge/gi/gis_geometry.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
from pyproj.crs import CRS
1010
from shapely.geometry import LineString, Point
1111

12-
# TODO: change function type hints, such that pandera checks the dataframes against the Bedrock schemas
13-
1412

1513
def calculate_gis_geometry(
1614
no_gis_brgi_db: Dict[str, Union[pd.DataFrame, gpd.GeoDataFrame]],
@@ -52,13 +50,13 @@ def calculate_gis_geometry(
5250
print("Calculating GIS geometry for the Bedrock GI database tables...")
5351

5452
# Check if all projects have the same CRS
55-
if not brgi_db["Project"]["crs_wkt"].nunique() == 1:
53+
if not brgi_db["Project"]["horizontal_crs_wkt"].nunique() == 1:
5654
raise ValueError(
5755
"All projects must have the same CRS (Coordinate Reference System).\n"
58-
"Raise an issue on GitHub in case you need to be able to combine GI data that was acquired in multiple different CRS's."
56+
"Raise an issue on GitHub in case you need to be able to combine GI data that was acquired in multiple different CRSes."
5957
)
6058

61-
crs = CRS.from_wkt(brgi_db["Project"]["crs_wkt"].iloc[0])
59+
crs = CRS.from_wkt(brgi_db["Project"]["horizontal_crs_wkt"].iloc[0])
6260

6361
# Calculate GIS geometry for the 'Location' table
6462
if verbose:

0 commit comments

Comments
 (0)