Skip to content

Commit c4bbb43

Browse files
amyfromandigithub-actions[bot]
authored andcommitted
Format code and sort imports
1 parent 31488e4 commit c4bbb43

File tree

4 files changed

+68
-43
lines changed

4 files changed

+68
-43
lines changed

map-integration/macrostrat/map_integration/custom_integrations/gems_utils.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ def col_empty(df, col: str) -> bool:
106106
s = df[col].astype(str).str.strip()
107107
return s.eq("").all()
108108

109-
110109
lithology_candidates = ("generallithology", "geomaterial")
111110
lith_cols = [c for c in lithology_candidates if c in meta_df]
112111
if lith_cols:
@@ -409,7 +408,16 @@ def col_empty(df: G.GeoDataFrame, col: str) -> bool:
409408
s = df[col].astype(str).str.strip()
410409
return s.eq("").all()
411410

412-
preferred = ["descrip", "comments", "strike", "dip", "point_type", 'orig_id', 'dip_dir', 'certainty']
411+
preferred = [
412+
"descrip",
413+
"comments",
414+
"strike",
415+
"dip",
416+
"point_type",
417+
"orig_id",
418+
"dip_dir",
419+
"certainty",
420+
]
413421
if all(col_empty(meta_df, c) for c in preferred):
414422
comments += (
415423
"All preferred line fields are missing or empty; "
@@ -419,7 +427,11 @@ def col_empty(df: G.GeoDataFrame, col: str) -> bool:
419427
elif col_empty(meta_df, "point_type") or col_empty(meta_df, "descrip"):
420428
comments += "Line required fields are empty or missing (name/type)."
421429
state = "pending"
422-
if col_empty(meta_df, "comments") or col_empty(meta_df, "strike") or col_empty(meta_df, "dip"):
430+
if (
431+
col_empty(meta_df, "comments")
432+
or col_empty(meta_df, "strike")
433+
or col_empty(meta_df, "dip")
434+
):
423435
comments += " Some preferred line fields are missing or empty."
424436

425437
return meta_df, comments, state
@@ -445,6 +457,7 @@ def map_lines_to_preferred_fields(
445457
actual_rename[col_lower_to_actual[src_lower]] = dst
446458

447459
meta_df = meta_df.rename(columns=actual_rename)
460+
448461
def col_empty(df: G.GeoDataFrame, col: str) -> bool:
449462
if col not in df.columns:
450463
return True

map-staging/macrostrat/map_staging/Arizona Gems Scraping/new_arizona_gdb_scraper.py

Lines changed: 50 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import re
55
import time
66
import zipfile
7-
from urllib.parse import unquote, urljoin, urlparse
87
from typing import Optional
8+
from urllib.parse import unquote, urljoin, urlparse
99

1010
import requests
1111
from bs4 import BeautifulSoup
@@ -27,17 +27,17 @@
2727
SAVE_METADATA_PATH = "metadata.csv"
2828
CSV_HEADERS = [
2929
"filename_prefix",
30-
"url", # the original repository page (item_url)
31-
"ref_title", # metadata title
32-
"authors", # semicolon-joined list of author names
33-
"ref_year", # numeric year or empty string
34-
"ref_source", # UA Library handle (or equivalent)
35-
"isbn_doi", # DOI or first API link href
36-
"license", # license type string
37-
"series", # e.g. DGM-209
38-
"keywords", # semicolon-joined keyword names
30+
"url", # the original repository page (item_url)
31+
"ref_title", # metadata title
32+
"authors", # semicolon-joined list of author names
33+
"ref_year", # numeric year or empty string
34+
"ref_source", # UA Library handle (or equivalent)
35+
"isbn_doi", # DOI or first API link href
36+
"license", # license type string
37+
"series", # e.g. DGM-209
38+
"keywords", # semicolon-joined keyword names
3939
"language", # language
40-
"description", # abstract
40+
"description", # abstract
4141
]
4242

4343
# inserts header row in csv
@@ -236,9 +236,10 @@ def filename_to_title_param(filename: str) -> str:
236236
return title_param
237237

238238

239-
240239
def get_collection_id(title_param: str, filename: str) -> Optional[str]:
241-
results = requests.get(f'https://data.azgs.arizona.edu/api/v1/metadata?collection_group=%21ADMM&title={title_param}')
240+
results = requests.get(
241+
f"https://data.azgs.arizona.edu/api/v1/metadata?collection_group=%21ADMM&title={title_param}"
242+
)
242243
results.raise_for_status()
243244
results = results.json()
244245
for collection in results.get("data", []):
@@ -252,7 +253,8 @@ def get_collection_id(title_param: str, filename: str) -> Optional[str]:
252253

253254
def get_collection_metadata(collection_id: str) -> dict:
254255
results = requests.get(
255-
f'https://data.azgs.arizona.edu/api/v1/metadata/{collection_id}')
256+
f"https://data.azgs.arizona.edu/api/v1/metadata/{collection_id}"
257+
)
256258
payload = results.json()
257259

258260
coll = payload.get("data", {})
@@ -261,16 +263,8 @@ def get_collection_metadata(collection_id: str) -> dict:
261263
top_links = coll.get("links", []) or []
262264
identifiers = meta.get("identifiers", {}) or {}
263265
license_info = meta.get("license", {}) or {}
264-
authors = [
265-
a.get("person")
266-
for a in meta.get("authors", [])
267-
if a.get("person")
268-
]
269-
keywords = [
270-
k.get("name")
271-
for k in meta.get("keywords", [])
272-
if k.get("name")
273-
]
266+
authors = [a.get("person") for a in meta.get("authors", []) if a.get("person")]
267+
keywords = [k.get("name") for k in meta.get("keywords", []) if k.get("name")]
274268
ref_source = meta_links[0].get("url") if meta_links else None
275269

276270
isbn_doi = identifiers.get("doi")
@@ -292,13 +286,13 @@ def get_collection_metadata(collection_id: str) -> dict:
292286
# Remove the entire boilerplate paragraph starting with "This geodatabase is part of..."
293287
# This pattern matches from "This geodatabase" through "U.S. Government."
294288
description = re.sub(
295-
r'\s*This geodatabase is part of a digital republication.*?U\.S\. Government\.',
296-
'',
289+
r"\s*This geodatabase is part of a digital republication.*?U\.S\. Government\.",
290+
"",
297291
description,
298-
flags=re.DOTALL | re.IGNORECASE
292+
flags=re.DOTALL | re.IGNORECASE,
299293
)
300-
description = re.sub(r'\n+', ' ', description)
301-
description = re.sub(r'\s+', ' ', description).strip()
294+
description = re.sub(r"\n+", " ", description)
295+
description = re.sub(r"\s+", " ", description).strip()
302296

303297
required_fields = {
304298
"authors": authors,
@@ -315,7 +309,6 @@ def get_collection_metadata(collection_id: str) -> dict:
315309
return required_fields
316310

317311

318-
319312
def download_gdb_zips(item_url: str):
320313
"""
321314
Download any .gdb.zip files on the page and record metadata in processed_item_urls.csv.
@@ -331,9 +324,9 @@ def download_gdb_zips(item_url: str):
331324

332325
for file_url in gdb_links:
333326
parsed = urlparse(file_url)
334-
filename = unquote(os.path.basename(parsed.path)) # e.g. 'WildcatHill.gdb.zip'
335-
title_param = filename_to_title_param(filename) # e.g. 'Wildcat+Hill'
336-
filename_prefix = strip_gdb_zip_suffixes(filename) # e.g. 'WildcatHill'
327+
filename = unquote(os.path.basename(parsed.path)) # e.g. 'WildcatHill.gdb.zip'
328+
title_param = filename_to_title_param(filename) # e.g. 'Wildcat+Hill'
329+
filename_prefix = strip_gdb_zip_suffixes(filename) # e.g. 'WildcatHill'
337330
download_ok = False
338331
if filename in downloaded_filenames:
339332
print(f"Already scraped this file... skipping: {filename}")
@@ -343,7 +336,7 @@ def download_gdb_zips(item_url: str):
343336
downloaded_filenames.add(filename)
344337
download_ok = True
345338
else:
346-
#trying downloading the gdb
339+
# trying downloading the gdb
347340
out_path = os.path.join(OUTPUT_DIR, filename)
348341
print(f"Downloading: {filename}")
349342
try:
@@ -363,7 +356,9 @@ def download_gdb_zips(item_url: str):
363356
download_ok = False
364357

365358
if not download_ok:
366-
print(f"Skipping metadata for {filename_prefix} because download failed and file is not present.")
359+
print(
360+
f"Skipping metadata for {filename_prefix} because download failed and file is not present."
361+
)
367362
continue
368363
else:
369364
# Get metadata via API
@@ -372,8 +367,12 @@ def download_gdb_zips(item_url: str):
372367

373368
# Map and write filename + metadata to CSV
374369
if metadata:
375-
authors_str = "; ".join(metadata["authors"]) if metadata["authors"] else ""
376-
keywords_str = "; ".join(metadata["keywords"]) if metadata["keywords"] else ""
370+
authors_str = (
371+
"; ".join(metadata["authors"]) if metadata["authors"] else ""
372+
)
373+
keywords_str = (
374+
"; ".join(metadata["keywords"]) if metadata["keywords"] else ""
375+
)
377376

378377
row = [
379378
filename_prefix, # filename_prefix
@@ -390,7 +389,20 @@ def download_gdb_zips(item_url: str):
390389
metadata["description"] or "", # description
391390
]
392391
else:
393-
row = [filename_prefix, item_url, "", "", "", "", "", "", "", "", "", ""]
392+
row = [
393+
filename_prefix,
394+
item_url,
395+
"",
396+
"",
397+
"",
398+
"",
399+
"",
400+
"",
401+
"",
402+
"",
403+
"",
404+
"",
405+
]
394406

395407
with open(SAVE_METADATA_PATH, "a", newline="") as f:
396408
writer = csv.writer(f)

services/api-v3/api/schemas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class Object(Base):
123123
{"schema": "storage"},
124124
)
125125
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
126-
#delete object_group_id
126+
# delete object_group_id
127127
object_group_id: Mapped[int] = mapped_column(
128128
ForeignKey("storage.object_group.id"), nullable=True
129129
)

services/tileserver/macrostrat/tileserver/map_ingestion/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ async def tilejson(
4040
url_path = request.url_for(
4141
"tile", **{"slug": slug, "z": "{z}", "x": "{x}", "y": "{y}"}
4242
)
43-
#TODO url_for resolves to http rather than https. find a better solution
43+
# TODO url_for resolves to http rather than https. find a better solution
4444
tile_endpoint = str(url_path)
4545
tile_endpoint = tile_endpoint.replace("http://", "https://")
4646
bounds_query = f"""

0 commit comments

Comments
 (0)