Skip to content

Commit 4c00acd

Browse files
committed
♻️ Move timestamp processing to parent class
1 parent ace20c1 commit 4c00acd

File tree

3 files changed

+158
-152
lines changed

3 files changed

+158
-152
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ All notable changes to this library are documented in this file.
1010
- Drop `summary` database table processing of `max_tmpf_qc`, `min_tmpf_qc`,
1111
`pday_qc`, and `snow_qc`. These are ill-designed and unused.
1212
- Drop poorly designed `iemdb` support within `webutil.iemapp`.
13+
- Internal refactor of `WMOProduct` timestamp processing in parent class.
1314

1415
### New Features
1516

src/pyiem/nws/product.py

Lines changed: 2 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,22 @@
22

33
import re
44
from collections import OrderedDict
5-
from datetime import datetime, timedelta, timezone
5+
from datetime import timedelta, timezone
66
from typing import Optional, Union
7-
from zoneinfo import ZoneInfo
87

98
from shapely.geometry import MultiPolygon, Polygon
109
from shapely.wkt import dumps
1110

1211
from pyiem import reference
1312
from pyiem.exceptions import InvalidPolygon, TextProductException
1413
from pyiem.nws import hvtec, ugc, vtec
15-
from pyiem.util import LOG, ddhhmm2datetime
14+
from pyiem.util import LOG
1615
from pyiem.wmo import WMOProduct
1716

1817
# The AWIPS Product Identifier is supposed to be 6chars as per directive,
1918
# but in practice it is sometimes something between 4 and 6 chars
2019
# We need to be careful this does not match the LDM sequence identifier
2120
AFOSRE = re.compile(r"^([A-Z0-9]{4,6})\s*\t*$", re.M)
22-
TIME_FMT = (
23-
"([0-9:]+) (AM|PM) ([A-Z][A-Z][A-Z]?T) ([A-Z][A-Z][A-Z]) "
24-
"([A-Z][A-Z][A-Z]) ([0-9]+) ([1-2][0-9][0-9][0-9])"
25-
)
26-
TIME_RE = re.compile(f"^{TIME_FMT}$", re.M | re.IGNORECASE)
27-
TIME_UTC_RE = re.compile(
28-
TIME_FMT.replace("(AM|PM) ([A-Z][A-Z][A-Z]?T)", r"(AM|PM)?\s?(UTC)"),
29-
re.M | re.I,
30-
)
31-
# Sometimes products have a duplicated timestamp in another tz
32-
TIME_EXT_RE = re.compile(
33-
rf"^{TIME_FMT}\s?/\s?{TIME_FMT}\s?/$", re.M | re.IGNORECASE
34-
)
35-
# Without the line start and end requirement
36-
TIME_RE_ANYWHERE = re.compile(f"{TIME_FMT}", re.IGNORECASE)
37-
TIME_STARTS_LINE = re.compile(r"^([0-9:]+) (AM|PM)")
3821

3922
TIME_MOT_LOC = re.compile(
4023
r"TIME\.\.\.MOT\.\.\.LOC\s+(?P<ztime>[0-9]{4})Z\s+"
@@ -171,54 +154,6 @@ def str2polygon(strdata):
171154
return Polygon(pts)
172155

173156

174-
def date_tokens2datetime(tokens):
175-
"""Convert tokens from MND regex to a valid time, if possible.
176-
177-
Returns:
178-
z (str): 3-4 char timezone string
179-
tz (datetime.timezone): of this product
180-
utcvalid (datetimetz): of this product
181-
"""
182-
tokens = list(tokens) # ensure mutable
183-
z = tokens[2].upper()
184-
tz = ZoneInfo(reference.name2pytz.get(z, "UTC"))
185-
hhmi = tokens[0]
186-
# False positive from regex
187-
if hhmi[0] == ":":
188-
hhmi = hhmi.replace(":", "")
189-
if hhmi.find(":") > -1:
190-
(hh, mi) = hhmi.split(":")
191-
elif len(hhmi) < 3:
192-
hh = hhmi
193-
mi = 0
194-
else:
195-
hh = hhmi[:-2]
196-
mi = hhmi[-2:]
197-
# Workaround another 24 hour clock issue
198-
if (
199-
tokens[2] in ["UTC", "GMT"]
200-
and tokens[1].upper() == "AM"
201-
and int(hh) == 12
202-
):
203-
hh = 0
204-
# Workaround 24 hour clock abuse
205-
if int(hh) >= 12 and (
206-
tokens[1].upper() == "PM" or tokens[2] in ["UTC", "GMT"]
207-
):
208-
# this is a hack to ensure this is PM when we are in UTC
209-
tokens[1] = "PM"
210-
hh = int(hh) - 12
211-
dstr = (
212-
f"{hh if int(hh) > 0 else 12}:{mi} "
213-
f"{tokens[1] if tokens[1] != '' else 'AM'} "
214-
f"{tokens[4]} {tokens[5]} {tokens[6]}"
215-
)
216-
# Careful here, need to go to UTC time first then come back!
217-
now = datetime.strptime(dstr, "%I:%M %p %b %d %Y")
218-
now += timedelta(hours=reference.offsets.get(z, 0))
219-
return z, tz, now.replace(tzinfo=timezone.utc)
220-
221-
222157
def qc_is_emergency(seg):
223158
"""Belt + Suspenders check that this segment is an emergency."""
224159
ffdt = seg.flood_tags.get("FLASH FLOOD DAMAGE THREAT")
@@ -675,15 +610,10 @@ def __init__(
675610
self.nwsli_provider = nwsli_provider
676611
self.unixtext = self.text.replace("\r", "")
677612
self.sections = self.unixtext.split("\n\n")
678-
# The "truth" timestamp
679-
self.valid = None
680613
self.segments = []
681-
self.z = None
682-
self.tz = None
683614
self.geometry = None
684615

685616
self.parse_afos()
686-
self._parse_valid(utcnow)
687617
if parse_segments:
688618
self.parse_segments()
689619

@@ -856,83 +786,6 @@ def get_product_id(self):
856786
pid += f"-{self.bbb}"
857787
return pid.strip()
858788

859-
def _parse_valid(self, provided_utcnow):
860-
"""Figure out the timestamp of this product.
861-
862-
Args:
863-
provided_utcnow (datetime): What our library was provided for the UTC
864-
timestamp, it could be None
865-
"""
866-
# The MND header hopefully has a full timestamp that is the best
867-
# truth that we can have for this product.
868-
tokens = TIME_RE.findall(self.unixtext)
869-
if not tokens:
870-
tokens = TIME_EXT_RE.findall(self.unixtext)
871-
if not tokens:
872-
tokens = TIME_RE_ANYWHERE.findall(self.unixtext)
873-
if not tokens:
874-
tokens = TIME_UTC_RE.findall(self.unixtext)
875-
if not tokens:
876-
# We are very desperate at this point, evasive action
877-
for line in self.unixtext.split("\n")[:15]:
878-
if TIME_STARTS_LINE.match(line):
879-
# Remove anything inside of () or //
880-
line = re.sub(r" \(.*?\)", "", line)
881-
line = re.sub(r" /.*?/", "", line)
882-
tokens = TIME_RE.findall(line)
883-
break
884-
if provided_utcnow is None and tokens:
885-
try:
886-
z, _tz, valid = date_tokens2datetime(tokens[0])
887-
if z not in reference.offsets:
888-
self.warnings.append(f"product timezone '{z}' unknown")
889-
except ValueError as exp:
890-
msg = (
891-
f"Invalid timestamp [{' '.join(tokens[0])}] found in "
892-
f"product [{self.wmo} {self.source} {self.afos}] header"
893-
)
894-
raise TextProductException(self.source[1:], msg) from exp
895-
896-
# Set the utcnow based on what we found by looking at the header
897-
self.utcnow = valid
898-
899-
# Search out the WMO header, this had better always be there
900-
# We only care about the first hit in the file, searching from top
901-
# Take the first hit, ignore others
902-
self.wmo_valid = ddhhmm2datetime(self.ddhhmm, self.utcnow)
903-
904-
# we can do no better
905-
self.valid = self.wmo_valid
906-
907-
# If we don't find anything, lets default to now, its the best
908-
if not tokens:
909-
return
910-
self.z, self.tz, self.valid = date_tokens2datetime(tokens[0])
911-
# We want to forgive two easy situations
912-
offset = (self.valid - self.wmo_valid).total_seconds()
913-
# 1. self.valid is off from WMO by approximately 12 hours (am/pm flip)
914-
if 42900 <= offset <= 43800:
915-
LOG.info(
916-
"Auto correcting AM/PM typo, %s -> %s",
917-
self.valid,
918-
self.wmo_valid,
919-
)
920-
self.warnings.append(
921-
"Detected AM/PM flip, adjusting product timestamp - 12 hours"
922-
)
923-
self.valid = self.valid - timedelta(hours=12)
924-
# 2. self.valid is off by approximate 1 year (year typo)
925-
if -367 * 86400 < offset < -364 * 86400:
926-
LOG.info(
927-
"Auto correcting year typo, %s -> %s",
928-
self.valid,
929-
self.wmo_valid,
930-
)
931-
self.warnings.append(
932-
"Detected year typo, adjusting product timestamp + 1 year"
933-
)
934-
self.valid = self.valid.replace(year=self.valid.year + 1)
935-
936789
def get_affected_wfos(self):
937790
"""Based on the ugc_provider, figure out which WFOs are impacted by
938791
this product"""

0 commit comments

Comments
 (0)