Skip to content

Commit 0ea001e

Browse files
committed
Implement UnInt and UnDelta for uncertain date durations
Based on previous experiments with uncertainties and portion libraries and feedback from @taylor-arnold
1 parent e664a81 commit 0ea001e

File tree

5 files changed

+335
-17
lines changed

5 files changed

+335
-17
lines changed

src/undate/converters/calendars/gregorian.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ class GregorianDateConverter(BaseCalendarConverter):
1313
#: calendar
1414
calendar_name: str = "Gregorian"
1515

16-
#: known non-leap year
16+
#: arbitrary known non-leap year
1717
NON_LEAP_YEAR: int = 2022
18+
#: arbitrary known leap year
19+
LEAP_YEAR: int = 2024
1820

1921
def min_month(self) -> int:
2022
"""First month for the Gregorian calendar."""
@@ -38,6 +40,7 @@ def max_day(self, year: int, month: int) -> int:
3840
_, max_day = monthrange(year, month)
3941
else:
4042
# if year and month are unknown, return maximum possible
43+
# TODO: should this return an IntervalRange?
4144
max_day = 31
4245

4346
return max_day

src/undate/date.py

Lines changed: 136 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from enum import IntEnum
2+
from dataclasses import dataclass, replace
3+
import operator
24

35
# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
4-
from typing import Optional, Union
6+
from typing import Optional, Union, Iterable
57

68
import numpy as np
79

@@ -29,6 +31,139 @@ def days(self) -> int:
2931
return int(self.astype("datetime64[D]").astype("int"))
3032

3133

34+
@dataclass
35+
class UnInt:
36+
lower: int
37+
upper: int
38+
39+
def __post_init__(self):
40+
# validate that lower value is less than upper
41+
if not self.lower < self.upper:
42+
raise ValueError(
43+
f"Lower value ({self.lower}) must be less than upper ({self.upper})"
44+
)
45+
46+
def __iter__(self) -> Iterable:
47+
# yield all integers in range from lower to upper, inclusive
48+
yield from range(self.lower, self.upper + 1)
49+
50+
def __gt__(self, other: object) -> bool:
51+
match other:
52+
case int():
53+
return self.upper > other
54+
case UnInt():
55+
return self.upper > other.lower
56+
case _:
57+
return NotImplemented
58+
59+
def __lt__(self, other: object) -> bool:
60+
match other:
61+
case int():
62+
return self.upper < other
63+
case UnInt():
64+
return self.upper < other.lower
65+
case _:
66+
return NotImplemented
67+
68+
def __contains__(self, other: object) -> bool:
69+
match other:
70+
case int():
71+
return other >= self.lower and other <= self.upper
72+
case UnInt():
73+
return other.lower >= self.lower and other.upper <= self.upper
74+
case _:
75+
# unsupported type: return false
76+
return False
77+
78+
def _replace_with(self, other_lower, other_upper, op):
79+
"""Create and return a new instance of UnInt using the specified
80+
operator (e.g. add, subtract) and other values to modify the values in
81+
the current UnInt instance."""
82+
return replace(
83+
self, lower=op(self.lower, other_lower), upper=op(self.upper, other_upper)
84+
)
85+
86+
def __add__(self, other: object) -> bool:
87+
match other:
88+
case int():
89+
# increase both values by the added amount
90+
add_values = (other, other)
91+
case UnInt():
92+
# subtract the upper and lower values by the other lower and upper
93+
# to include the largest range of possible values
94+
# (when calculating with uncertain values, the uncertainty increases)
95+
add_values = (other.lower, other.upper)
96+
case _:
97+
return NotImplemented
98+
99+
return self._replace_with(*add_values, operator.add)
100+
101+
def __sub__(self, other):
102+
match other:
103+
case int():
104+
# decrease both values by the subtracted amount
105+
sub_values = (other, other)
106+
case UnInt():
107+
# to determine the largest range of possible values,
108+
# subtract the other upper value from current lower
109+
# and other lower value from current upper
110+
sub_values = (other.upper, other.lower)
111+
case _:
112+
return NotImplemented
113+
114+
return self._replace_with(*sub_values, operator.sub)
115+
116+
117+
@dataclass
118+
class UnDelta:
119+
"""
120+
An uncertain timedelta, for durations where the number of days is uncertain.
121+
Initialize with a list of possible durations in days as integers, which are used
122+
to calculate a value for duration in :attr:`days` as an
123+
instance of :class:`UnInt`.
124+
"""
125+
126+
# NOTE: we will probably need other timedelta-like logic here besides days...
127+
128+
#: possible durations days, as an instance of :class:`UnInt`
129+
days: UnInt
130+
131+
def __init__(self, *days: int):
132+
if len(days) < 2:
133+
raise ValueError(
134+
"Must specify at least two values for an uncertain duration"
135+
)
136+
self.days = UnInt(min(days), max(days))
137+
138+
def __repr__(self):
139+
# customize string representation for simpler notation; default
140+
# specifies full UnInt initialization with upper and lower keywords
141+
return f"{self.__class__.__name__}(days=[{self.days.lower},{self.days.upper}])"
142+
143+
# TODO: what does equality for an uncertain range mean?
144+
# is an uncertain range ever equal to another uncertain range?
145+
146+
def __eq__(self, other: object) -> bool:
147+
# is an uncertain duration ever *equal* another, even if the values are the same?
148+
if other is self:
149+
return True
150+
return False
151+
152+
def __lt__(self, other: object) -> bool:
153+
match other:
154+
case Timedelta() | UnDelta():
155+
return self.days < other.days
156+
case _:
157+
return NotImplemented
158+
159+
def __gt__(self, other: object) -> bool:
160+
match other:
161+
case Timedelta() | UnDelta():
162+
return self.days > other.days
163+
case _:
164+
return NotImplemented
165+
166+
32167
#: timedelta for single day
33168
ONE_DAY = Timedelta(1) # ~ equivalent to datetime.timedelta(days=1)
34169
#: timedelta for a single year (non-leap year)

src/undate/undate.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from typing import Dict, Optional, Union
2121

2222
from undate.converters.base import BaseDateConverter
23-
from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta
23+
from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta, UnDelta
2424

2525

2626
class Calendar(StrEnum):
@@ -439,13 +439,14 @@ def _get_date_part(self, part: str) -> Optional[str]:
439439
value = self.initial_values.get(part)
440440
return str(value) if value else None
441441

442-
def duration(self) -> Timedelta:
442+
def duration(self) -> Timedelta | UnDelta:
443443
"""What is the duration of this date?
444444
Calculate based on earliest and latest date within range,
445445
taking into account the precision of the date even if not all
446446
parts of the date are known. Note that durations are inclusive
447447
(i.e., a closed interval) and include both the earliest and latest
448-
date rather than the difference between them."""
448+
date rather than the difference between them. Returns a :class:`undate.date.Timedelta` when
449+
possible, and an :class:`undate.date.UnDelta` when the duration is uncertain."""
449450

450451
# if precision is a single day, duration is one day
451452
# no matter when it is or what else is known
@@ -456,20 +457,48 @@ def duration(self) -> Timedelta:
456457
# calculate month duration within a single year (not min/max)
457458
if self.precision == DatePrecision.MONTH:
458459
latest = self.latest
460+
# if year is unknown, calculate month duration in
461+
# leap year and non-leap year, in case length varies
459462
if not self.known_year:
460-
# if year is unknown, calculate month duration in
461-
# a single year
462-
latest = Date(self.earliest.year, self.latest.month, self.latest.day)
463+
# TODO: should leap-year specific logic shift to the calendars,
464+
# since it works differently depending on the calendar?
465+
possible_years = [
466+
self.calendar_converter.LEAP_YEAR,
467+
self.calendar_converter.NON_LEAP_YEAR,
468+
]
469+
# TODO: what about partially known years like 191X ?
470+
else:
471+
# otherwise, get possible durations for all possible months
472+
# for a known year
473+
possible_years = [self.earliest.year]
474+
475+
# for every possible month and year, get max days for that month,
476+
possible_max_days = set()
477+
# appease mypy, which says month values could be None here
478+
if self.earliest.month is not None and self.latest.month is not None:
479+
for possible_month in range(self.earliest.month, self.latest.month + 1):
480+
for year in possible_years:
481+
possible_max_days.add(
482+
self.calendar_converter.max_day(year, possible_month)
483+
)
484+
485+
# if there is more than one possible value for month length,
486+
# whether due to leap year / non-leap year or ambiguous month,
487+
# return an uncertain delta
488+
if len(possible_max_days) > 1:
489+
return UnDelta(*possible_max_days)
490+
491+
# otherwise, calculate timedelta normally
492+
max_day = list(possible_max_days)[0]
493+
latest = Date(self.earliest.year, self.earliest.month, max_day)
463494

464-
# latest = datetime.date(
465-
# self.earliest.year, self.latest.month, self.latest.day
466-
# )
467495
delta = latest - self.earliest + ONE_DAY
468496
# month duration can't ever be more than 31 days
469497
# (could we ever know if it's smaller?)
470498

471499
# if granularity == month but not known month, duration = 31
472500
if delta.astype(int) > 31:
501+
# FIXME: this depends on calendar!
473502
return ONE_MONTH_MAX
474503
return delta
475504

0 commit comments

Comments
 (0)