1919from bedrock_ge .gi .io_utils import coerce_string , open_text_data_source
2020
2121
22- def ags3_to_db (
22+ def ags3_to_dfs (
2323 source : str | Path | IO [str ] | IO [bytes ] | bytes , encoding : str
2424) -> dict [str , pd .DataFrame ]:
2525 """Converts AGS 3 data to a dictionary of pandas DataFrames.
@@ -35,7 +35,7 @@ def ags3_to_db(
3535 a pandas DataFrame containing the data for that group.
3636 """
3737 # Initialize dictionary and variables used in the AGS 3 read loop
38- ags3_db = {}
38+ ags3_dfs = {}
3939 line_type = "line_0"
4040 group = ""
4141 headers : list [str ] = ["" , "" , "" ]
@@ -50,7 +50,7 @@ def ags3_to_db(
5050 if line .startswith ('"**' ):
5151 line_type = "group_name"
5252 if group :
53- ags3_db [group ] = pd .DataFrame (group_data , columns = headers )
53+ ags3_dfs [group ] = pd .DataFrame (group_data , columns = headers )
5454
5555 group = line .strip (' ,"*' )
5656 group_data = []
@@ -86,7 +86,7 @@ def ags3_to_db(
8686 continue
8787 elif len (data_row ) != len (headers ):
8888 print (
89- f"\n 🚨 CAUTION: The number of columns on line { i + 1 } ( { len ( data_row ) } ) doesn't match the number of columns of group { group } ({ len (headers )} )!" ,
89+ f"\n 🚨 CAUTION: The number of columns ( { len ( data_row ) } ) on line { i + 1 } doesn't match the number of columns ({ len (headers )} ) of group { group } !" ,
9090 f"{ group } headers: { headers } " ,
9191 f"Line { i + 1 } : { data_row } " ,
9292 sep = "\n " ,
@@ -113,75 +113,84 @@ def ags3_to_db(
113113 group_data .append (cleaned_data_row )
114114
115115 # Also add the last group's df to the dictionary of AGS dfs
116- ags3_db [group ] = pd .DataFrame (group_data , columns = headers ).dropna (axis = 1 , how = "all" )
116+ ags3_dfs [group ] = pd .DataFrame (group_data , columns = headers ).dropna (
117+ axis = 1 , how = "all"
118+ )
117119
118120 if not group :
119121 print (
120122 '🚨 ERROR: The provided AGS 3 data does not contain any groups, i.e. lines starting with "**'
121123 )
122124
123- return ags3_db
125+ return ags3_dfs
124126
125127
126128# TODO: AGS 3 table validation based on the AGS 3 data dictionary.
127129def ags3_to_brgi_db_mapping (
128- ags3_db : dict [str , pd . DataFrame ] ,
130+ source : str | Path | IO [str ] | IO [ bytes ] | bytes ,
129131 projected_crs : CRS ,
130- vertical_crs : CRS = CRS (3855 ),
132+ vertical_crs : CRS ,
133+ encoding : str ,
131134) -> BedrockGIDatabaseMapping :
132- """Map AGS 3 data to Bedrock GI data model.
135+ """Map AGS 3 data to the Bedrock GI data model.
133136
134137 Args:
135138 ags3_db (dict[str, pd.DataFrame]): A dictionary of pandas DataFrames, i.e. database,
136139 where each key is an AGS 3 group, and the corresponding value is
137140 a pandas DataFrame containing the data for that group.
138141 projected_crs (CRS): Projected coordinate reference system (CRS).
139- vertical_crs (CRS, optional): Vertical CRS.
140- Defaults to the Earth Gravitational Model 2008.
142+ vertical_crs (CRS, optional): Vertical CRS. Defaults to EGM2008 height, EPSG:3855
143+ which measures the orthometric height w.r.t. the Earth Gravitational Model 2008.
144+ encoding (str): Encoding of the text file or bytes stream.
141145
142146 Returns:
143147 BedrockGIDatabaseMapping: Object that maps AGS 3 data to Bedrock GI data model.
144148 """
145- check_ags_proj_group (ags3_db ["PROJ" ])
149+ ags3_dfs = ags3_to_dfs (source , encoding )
150+
151+ check_ags_proj_group (ags3_dfs ["PROJ" ])
146152 ags3_project = ProjectTableMapping (
147- data = ags3_db ["PROJ" ].to_dict (orient = "records" )[0 ],
148- project_uid = ags3_db ["PROJ" ]["PROJ_ID" ][0 ],
153+ data = ags3_dfs ["PROJ" ].to_dict (orient = "records" )[0 ],
154+ project_uid = ags3_dfs ["PROJ" ]["PROJ_ID" ]. iloc [0 ],
149155 horizontal_crs = projected_crs ,
150156 vertical_crs = vertical_crs ,
151157 )
152- del ags3_db ["PROJ" ]
158+ del ags3_dfs ["PROJ" ]
153159
154- Ags3HOLE .validate (ags3_db ["HOLE" ])
160+ Ags3HOLE .validate (ags3_dfs ["HOLE" ])
155161 ags3_location = LocationTableMapping (
156- data = ags3_db ["HOLE" ],
162+ data = ags3_dfs ["HOLE" ],
157163 location_id_column = "HOLE_ID" ,
158164 easting_column = "HOLE_NATE" ,
159165 northing_column = "HOLE_NATN" ,
160166 ground_level_elevation_column = "HOLE_GL" ,
161167 depth_to_base_column = "HOLE_FDEP" ,
162168 )
163- del ags3_db ["HOLE" ]
169+ del ags3_dfs ["HOLE" ]
164170
165- if "SAMP" in ags3_db .keys ():
166- Ags3SAMP .validate (ags3_db ["SAMP" ])
167- samp_df = ags3_db ["SAMP" ]
171+ if "SAMP" in ags3_dfs .keys ():
172+ Ags3SAMP .validate (ags3_dfs ["SAMP" ])
173+ samp_df = ags3_dfs ["SAMP" ]
168174 samp_df = _add_sample_source_id (samp_df )
169175 ags3_sample = SampleTableMapping (
170176 data = samp_df ,
171177 location_id_column = "HOLE_ID" ,
172178 sample_id_column = "sample_source_id" ,
173179 depth_to_top_column = "SAMP_TOP" ,
174180 )
175- del ags3_db ["SAMP" ]
181+ del ags3_dfs ["SAMP" ]
176182 else :
177183 print ("Your AGS 3 data doesn't contain a SAMP group, i.e. samples." )
184+ ags3_sample = None
178185
179186 ags3_lab_tests = []
180187 ags3_insitu_tests = []
181188 ags3_other_tables = []
182189
183- for group , df in ags3_db .items ():
184- if "SAMP_TOP" in df .columns :
190+ for group , df in ags3_dfs .items ():
191+ # Non-standard group names contain the "?" prefix.
192+ # => checking that "SAMP_TOP" / "HOLE_ID" is in the columns is too restrictive.
193+ if any ("SAMP_TOP" in col for col in df .columns ):
185194 df = _add_sample_source_id (df )
186195 ags3_lab_tests .append (
187196 LabTestTableMapping (
@@ -191,7 +200,7 @@ def ags3_to_brgi_db_mapping(
191200 sample_id_column = "sample_source_id" ,
192201 )
193202 )
194- elif "HOLE_ID" in df .columns :
203+ elif any ( "HOLE_ID" in col for col in df .columns ) :
195204 top_depth , base_depth = _get_depth_columns (group , list (df .columns ))
196205 ags3_insitu_tests .append (
197206 InSituTestTableMapping (
0 commit comments