77from pyproj import CRS
88
99from bedrock_ge .gi .ags_schemas import Ags3HOLE , Ags3SAMP , check_ags_proj_group
10- from bedrock_ge .gi .brgi_db_mapping import (
11- BedrockGIDatabaseMapping ,
10+ from bedrock_ge .gi .io_utils import coerce_string , open_text_data_source
11+ from bedrock_ge .gi .mapping_models import (
12+ BedrockGIMapping ,
1213 InSituTestTableMapping ,
1314 LabTestTableMapping ,
1415 LocationTableMapping ,
1516 OtherTable ,
1617 ProjectTableMapping ,
1718 SampleTableMapping ,
1819)
19- from bedrock_ge .gi .io_utils import coerce_string , open_text_data_source
2020
2121
2222def ags3_to_dfs (
2323 source : str | Path | IO [str ] | IO [bytes ] | bytes , encoding : str
2424) -> dict [str , pd .DataFrame ]:
2525 """Converts AGS 3 data to a dictionary of pandas DataFrames.
2626
27+ Also strips '?' from non-standard AGS 3 group and header names, in order to
28+ make the rest of the code more generic.
29+
2730 Args:
2831 source (str | Path | IO[str] | IO[bytes] | bytes): The AGS 3 file (str or Path)
2932 or a file-like object that represents the AGS 3 file.
@@ -52,14 +55,14 @@ def ags3_to_dfs(
5255 if group :
5356 ags3_dfs [group ] = pd .DataFrame (group_data , columns = headers )
5457
55- group = line .strip (' ,"*' )
58+ group = line .strip (' ,"*? ' )
5659 group_data = []
5760
5861 # In AGS 3 header names are prefixed with "*
5962 elif line .startswith ('"*' ):
6063 line_type = "headers"
6164 new_headers = line .split ('","' )
62- new_headers = [h .strip (' ,"*' ) for h in new_headers ]
65+ new_headers = [h .strip (' ,"*? ' ) for h in new_headers ]
6366
6467 # Some groups have so many headers that they span multiple lines.
6568 # Therefore we need to check whether the new headers are
@@ -131,7 +134,7 @@ def ags3_to_brgi_db_mapping(
131134 projected_crs : CRS ,
132135 vertical_crs : CRS ,
133136 encoding : str ,
134- ) -> BedrockGIDatabaseMapping :
137+ ) -> BedrockGIMapping :
135138 """Map AGS 3 data to the Bedrock GI data model.
136139
137140 Args:
@@ -180,7 +183,6 @@ def ags3_to_brgi_db_mapping(
180183 )
181184 del ags3_dfs ["SAMP" ]
182185 else :
183- print ("Your AGS 3 data doesn't contain a SAMP group, i.e. samples." )
184186 ags3_sample = None
185187
186188 ags3_lab_tests = []
@@ -190,7 +192,7 @@ def ags3_to_brgi_db_mapping(
190192 for group , df in ags3_dfs .items ():
191193 # Non-standard group names contain the "?" prefix.
192194 # => checking that "SAMP_TOP" / "HOLE_ID" is in the columns is too restrictive.
193- if any ( "SAMP_TOP" in col for col in df .columns ) :
195+ if "SAMP_TOP" in df .columns :
194196 df = _add_sample_source_id (df )
195197 ags3_lab_tests .append (
196198 LabTestTableMapping (
@@ -200,7 +202,7 @@ def ags3_to_brgi_db_mapping(
200202 sample_id_column = "sample_source_id" ,
201203 )
202204 )
203- elif any ( "HOLE_ID" in col for col in df .columns ) :
205+ elif "HOLE_ID" in df .columns :
204206 top_depth , base_depth = _get_depth_columns (group , list (df .columns ))
205207 ags3_insitu_tests .append (
206208 InSituTestTableMapping (
@@ -214,24 +216,26 @@ def ags3_to_brgi_db_mapping(
214216 else :
215217 ags3_other_tables .append (OtherTable (table_name = group , data = df ))
216218
217- ags3_brgi_db_mapping = BedrockGIDatabaseMapping (
219+ brgi_db_mapping = BedrockGIMapping (
218220 Project = ags3_project ,
219221 Location = ags3_location ,
220222 InSitu = ags3_insitu_tests ,
221223 Sample = ags3_sample ,
222224 Lab = ags3_lab_tests ,
223225 Other = ags3_other_tables ,
224226 )
225- return ags3_brgi_db_mapping
227+ return brgi_db_mapping
226228
227229
228230def _add_sample_source_id (df : pd .DataFrame ) -> pd .DataFrame :
229231 df ["sample_source_id" ] = (
230232 df ["SAMP_REF" ].astype (str )
231- + "_ "
233+ + "- "
232234 + df ["SAMP_TYPE" ].astype (str )
233- + "_ "
235+ + "- "
234236 + df ["SAMP_TOP" ].astype (str )
237+ + "-"
238+ + df ["HOLE_ID" ].astype (str )
235239 )
236240 return df
237241
0 commit comments