Skip to content

Commit 730f044

Browse files
committed
Fix marimo dtypes
1 parent 8db1bea commit 730f044

File tree

4 files changed

+31
-25
lines changed

4 files changed

+31
-25
lines changed

examples/hk_kaitak_ags3/hk_kaitak_ags3_to_brgi_geodb.py

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import marimo
1919

20-
__generated_with = "0.13.11"
20+
__generated_with = "0.13.15"
2121
app = marimo.App(
2222
app_title="Kai Tak, HK AGS 3 data to bedrock_ge.gi geodatabase",
2323
)
@@ -249,7 +249,14 @@ def _():
249249

250250

251251
@app.cell
252-
def _(CRS, ags_to_brgi_db_mapping, map_to_brgi_db, zip, zipfile):
252+
def _(
253+
CRS,
254+
ags_to_brgi_db_mapping,
255+
map_to_brgi_db,
256+
merge_databases,
257+
zip,
258+
zipfile,
259+
):
253260
projected_crs = CRS("EPSG:2326")
254261
vertrical_crs = CRS("EPSG:5738")
255262
brgi_dbs = []
@@ -266,32 +273,14 @@ def _(CRS, ags_to_brgi_db_mapping, map_to_brgi_db, zip, zipfile):
266273
)
267274
brgi_dbs.append(map_to_brgi_db(ags3_mapping))
268275

269-
# if not brgi_db_obj:
270-
# brgi_db_obj = brgi_db_from_1_ags3_file
271-
272-
# if brgi_db_obj and brgi_db_from_1_ags3_file:
273-
# brgi_db_obj = merge_databases(brgi_db_obj, brgi_db_from_1_ags3_file)
274-
275-
# print(f"i = {i}: brgi_db = {brgi_db_obj}")
276-
277-
# with zipfile.ZipFile(zip) as zip_ref:
278-
# file_name = "21659/9508008.AGS"
279-
# print(f"\n🖥️ Processing {file_name} ...")
280-
# with zip_ref.open(file_name) as ags3_file:
281-
# # Convert content of a single AGS 3 file to a Dictionary of pandas dataframes (a database)
282-
# ags3_mapping_obj = ags3_to_brgi_db_mapping(
283-
# ags3_file, projected_crs, vertrical_crs, "utf-8"
284-
# )
285-
286-
# brgi_db_obj = map_to_brgi_db(ags3_mapping_obj)
276+
merged_brgi_db = merge_databases(brgi_dbs)
287277

288-
# brgi_db_obj
289-
return (brgi_dbs,)
278+
return (merged_brgi_db,)
290279

291280

292281
@app.cell
293-
def _(brgi_dbs, merge_databases):
294-
merged_brgi_db = merge_databases(brgi_dbs)
282+
def _(merged_brgi_db):
283+
merged_brgi_db.InSituTests["ISPT"]
295284
return
296285

297286

src/bedrock_ge/gi/db_operations.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44

55
import pandas as pd
66

7+
from bedrock_ge.gi.io_utils import convert_dtypes_object_to_string
78
from bedrock_ge.gi.schemas import (
89
BedrockGIDatabase,
910
InSituTestSchema,
11+
LabTestSchema,
1012
LocationSchema,
1113
ProjectSchema,
1214
SampleSchema,
@@ -40,11 +42,13 @@ def merge_databases(
4042
project_dataframes = _filter_dataframes([db.Project for db in dbs])
4143
merged_project = pd.concat(project_dataframes, ignore_index=True)
4244
merged_project = merged_project.drop_duplicates().reset_index(drop=True)
45+
merged_project = convert_dtypes_object_to_string(merged_project.convert_dtypes())
4346
ProjectSchema.validate(merged_project)
4447

4548
location_dataframes = _filter_dataframes([db.Location for db in dbs])
4649
merged_location = pd.concat(location_dataframes, ignore_index=True)
4750
merged_location = merged_location.drop_duplicates().reset_index(drop=True)
51+
merged_location = convert_dtypes_object_to_string(merged_location.convert_dtypes())
4852
LocationSchema.validate(merged_location)
4953
check_foreign_key("project_uid", merged_project, merged_location)
5054

@@ -65,6 +69,7 @@ def merge_databases(
6569
)
6670
insitu_df = pd.concat(insitu_dataframes, ignore_index=True)
6771
insitu_df = insitu_df.drop_duplicates().reset_index(drop=True)
72+
insitu_df = convert_dtypes_object_to_string(insitu_df.convert_dtypes())
6873
InSituTestSchema.validate(insitu_df)
6974
check_foreign_key("project_uid", merged_project, insitu_df)
7075
check_foreign_key("location_uid", merged_location, insitu_df)
@@ -75,6 +80,7 @@ def merge_databases(
7580
if sample_dfs:
7681
merged_sample = pd.concat(sample_dfs, ignore_index=True)
7782
merged_sample = merged_sample.drop_duplicates().reset_index(drop=True)
83+
merged_sample = convert_dtypes_object_to_string(merged_sample.convert_dtypes())
7884
SampleSchema.validate(merged_sample)
7985
check_foreign_key("project_uid", merged_project, merged_sample)
8086

@@ -87,6 +93,8 @@ def merge_databases(
8793
]
8894
lab_df = pd.concat(lab_dfs, ignore_index=True)
8995
lab_df = lab_df.drop_duplicates().reset_index(drop=True)
96+
lab_df = convert_dtypes_object_to_string(lab_df.convert_dtypes())
97+
LabTestSchema.validate(lab_df)
9098
check_foreign_key("project_uid", merged_project, lab_df)
9199
check_foreign_key("sample_uid", merged_sample, lab_df)
92100
merged_lab[lab_table] = lab_df
@@ -100,6 +108,7 @@ def merge_databases(
100108
]
101109
other_df = pd.concat(other_dfs, ignore_index=True)
102110
other_df = other_df.drop_duplicates().reset_index(drop=True)
111+
other_df = convert_dtypes_object_to_string(other_df.convert_dtypes())
103112
check_foreign_key("project_uid", merged_project, other_df)
104113
merged_other[other_table] = other_df
105114

src/bedrock_ge/gi/io_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from typing import IO, ContextManager
1010

1111
import chardet
12+
import pandas as pd
1213

1314
DEFAULT_ENCODING = "utf-8"
1415

@@ -190,3 +191,9 @@ def coerce_string(string: str) -> None | bool | float | str:
190191
return value
191192
except ValueError:
192193
return string
194+
195+
196+
def convert_dtypes_object_to_string(dataframe: pd.DataFrame) -> pd.DataFrame:
197+
object_cols = dataframe.select_dtypes(include=["object"]).columns
198+
dataframe[object_cols] = dataframe[object_cols].astype("string")
199+
return dataframe

src/bedrock_ge/gi/mapper.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def map_to_brgi_db(brgi_db_mapping: BedrockGIMapping) -> BedrockGIDatabase:
107107
insitu_tests[insitu_mapping.table_name] = insitu_df.copy()
108108

109109
# Create the sample table
110+
sample_df = None
110111
if brgi_db_mapping.Sample:
111112
sample_df = pd.DataFrame(
112113
{
@@ -167,7 +168,7 @@ def map_to_brgi_db(brgi_db_mapping: BedrockGIMapping) -> BedrockGIDatabase:
167168
Project=project_df,
168169
Location=location_df,
169170
InSituTests=insitu_tests,
170-
Sample=sample_df if brgi_db_mapping.Sample else None,
171+
Sample=sample_df,
171172
LabTests=lab_tests,
172173
Other=other_tables,
173174
)

0 commit comments

Comments
 (0)