Skip to content

Commit 3da4e22

Browse files
authored
Merge pull request #145 from dh-tech/bugfix/i143-compare-unknown-years
Update comparison methods to check for unknown years
2 parents 50f297f + b7be886 commit 3da4e22

File tree

3 files changed

+123
-18
lines changed

3 files changed

+123
-18
lines changed

src/undate/undate.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,10 @@ def __eq__(self, other: object) -> bool:
300300
# with this type
301301
return NotImplemented
302302

303+
# if either date has an unknown year, then not equal
304+
if not self.known_year or not other.known_year:
305+
return False
306+
303307
# if both dates are fully known, then earliest/latest check
304308
# is sufficient (and will work across calendars!)
305309

@@ -330,6 +334,14 @@ def __eq__(self, other: object) -> bool:
330334

331335
def __lt__(self, other: object) -> bool:
332336
other = self._comparison_type(other)
337+
if other is NotImplemented:
338+
# return NotImplemented to indicate comparison is not supported
339+
# with this type
340+
return NotImplemented
341+
342+
# if either date has a completely unknown year, then we can't compare
343+
if self.unknown_year or other.unknown_year:
344+
return False
333345

334346
# if this date ends before the other date starts,
335347
# return true (this date is earlier, so it is less)
@@ -366,19 +378,38 @@ def __gt__(self, other: object) -> bool:
366378
# define gt ourselves so we can support > comparison with datetime.date,
367379
# but rely on existing less than implementation.
368380
# strictly greater than must rule out equals
381+
382+
# if either date has a completely unknown year, then we can't compare
383+
# NOTE: this means that gt and lt will both be false when comparing
384+
# with a date with an unknown year...
385+
if self.unknown_year or isinstance(other, Undate) and other.unknown_year:
386+
return False
387+
369388
return not (self < other or self == other)
370389

371390
def __le__(self, other: object) -> bool:
391+
# if either date has a completely unknown year, then we can't compare
392+
if self.unknown_year or isinstance(other, Undate) and other.unknown_year:
393+
return False
394+
372395
return self == other or self < other
373396

374397
def __contains__(self, other: object) -> bool:
375398
# if the two dates are strictly equal, don't consider
376399
# either one as containing the other
377400
other = self._comparison_type(other)
401+
if other is NotImplemented:
402+
# return NotImplemented to indicate comparison is not supported
403+
# with this type
404+
return NotImplemented
378405

379406
if self == other:
380407
return False
381408

409+
# if either date has a completely unknown year, then we can't determine
410+
if self.unknown_year or other.unknown_year:
411+
return False
412+
382413
return all(
383414
[
384415
self.earliest <= other.earliest,
@@ -415,19 +446,30 @@ def to_undate(cls, other: object) -> "Undate":
415446

416447
@property
417448
def known_year(self) -> bool:
449+
"year is fully known"
418450
return self.is_known("year")
419451

452+
@property
453+
def unknown_year(self) -> bool:
454+
"year is completely unknown"
455+
return self.is_unknown("year")
456+
420457
def is_known(self, part: str) -> bool:
421-
"""Check if a part of the date (year, month, day) is known.
458+
"""Check if a part of the date (year, month, day) is fully known.
422459
Returns False if unknown or only partially known."""
423460
# TODO: should we use constants or enum for values?
424461

425462
# if we have an integer, then consider the date known
426463
# if we have a string, then it is only partially known; return false
427464
return isinstance(self.initial_values[part], int)
428465

466+
def is_unknown(self, part: str) -> bool:
467+
"""Check if a part of the date (year, month, day) is completely unknown."""
468+
return self.initial_values.get(part) is None
469+
429470
def is_partially_known(self, part: str) -> bool:
430-
# TODO: should XX / XXXX really be considered partially known? other code seems to assume this, so we'll preserve the behavior
471+
# TODO: should XX / XXXX really be considered partially known?
472+
# other code seems to assume this, so we'll preserve the behavior
431473
return isinstance(self.initial_values[part], str)
432474
# and self.initial_values[part].replace(self.MISSING_DIGIT, "") != ""
433475

@@ -537,8 +579,15 @@ def duration(self) -> Timedelta | UnDelta:
537579
if self.precision == DatePrecision.DAY:
538580
return ONE_DAY
539581

540-
possible_max_days = set()
582+
# if year is known and no values are partially known,
583+
# we can calculate a time delta based on earliest + latest
584+
if self.known_year and not any(
585+
[self.is_partially_known(part) for part in ["year", "month", "day"]]
586+
):
587+
# subtract earliest from latest and add a day to include start day in the count
588+
return self.latest - self.earliest + ONE_DAY
541589

590+
possible_max_days = set()
542591
# if precision is month and year is unknown,
543592
# calculate month duration within a single year (not min/max)
544593
if self.precision == DatePrecision.MONTH:
@@ -584,13 +633,9 @@ def duration(self) -> Timedelta | UnDelta:
584633

585634
# if there is more than one possible value for number of days
586635
# due to range including lear year / non-leap year, return an uncertain delta
587-
if possible_max_days:
588-
if len(possible_max_days) > 1:
589-
return UnDelta(*possible_max_days)
590-
return Timedelta(possible_max_days.pop())
591-
592-
# otherwise, subtract earliest from latest and add a day to include start day in the count
593-
return self.latest - self.earliest + ONE_DAY
636+
if len(possible_max_days) > 1:
637+
return UnDelta(*possible_max_days)
638+
return Timedelta(possible_max_days.pop())
594639

595640
def _missing_digit_minmax(
596641
self, value: str, min_val: int, max_val: int

tests/test_converters/test_iso8601.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ def test_parse_singledate(self):
77
assert ISO8601DateFormat().parse("2002") == Undate(2002)
88
assert ISO8601DateFormat().parse("1991-05") == Undate(1991, 5)
99
assert ISO8601DateFormat().parse("1991-05-03") == Undate(1991, 5, 3)
10-
# missing year but month/day known
11-
assert ISO8601DateFormat().parse("--05-03") == Undate(month=5, day=3)
10+
# missing year but month/day known; compare repr string
11+
assert repr(ISO8601DateFormat().parse("--05-03")) == repr(
12+
Undate(month=5, day=3)
13+
)
1214

1315
def test_parse_singledate_unequal(self):
1416
assert ISO8601DateFormat().parse("2002") != Undate(2003)

tests/test_undate.py

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,12 @@ def test_year_property(self):
189189
# unset year
190190
assert Undate(month=12, day=31).year == "XXXX"
191191

192-
# NOTE: no longer supported to inistalize undate with no date information
192+
# NOTE: no longer supported to initialize undate with no date information
193193
# force method to hit conditional for date precision
194-
# some_century = Undate()
195-
# some_century.precision = DatePrecision.CENTURY
196-
# assert some_century.year is None
194+
some_century = Undate(year="X")
195+
some_century.initial_values["year"] = None
196+
some_century.precision = DatePrecision.CENTURY
197+
assert some_century.year is None
197198

198199
def test_month_property(self):
199200
# one, two digit month
@@ -233,7 +234,8 @@ def test_eq(self):
233234
assert Undate(2022) == Undate(2022)
234235
assert Undate(2022, 10) == Undate(2022, 10)
235236
assert Undate(2022, 10, 1) == Undate(2022, 10, 1)
236-
assert Undate(month=2, day=7) == Undate(month=2, day=7)
237+
# dates without a known year cannot known to be equal
238+
assert not Undate(month=2, day=7) == Undate(month=2, day=7)
237239

238240
# something we can't convert for comparison should return NotImplemented
239241
assert Undate(2022).__eq__("not a date") == NotImplemented
@@ -259,6 +261,8 @@ def test_not_eq(self):
259261
# partially unknown dates should NOT be considered equal
260262
assert Undate("19XX") != Undate("19XX")
261263
assert Undate(1980, "XX") != Undate(1980, "XX")
264+
# same dates with unknown years should not be considered equal
265+
assert Undate(month=2, day=7) != Undate(month=2, day=7)
262266

263267
testdata_lt_gt = [
264268
# dates to test for gt/lt comparison: earlier date, later date
@@ -307,7 +311,23 @@ def test_lte(self, earlier, later):
307311
assert earlier <= later
308312
assert later >= earlier
309313

314+
def test_gt_lt_unknown_years(self):
315+
# unknown years cannot be compared on either side...
316+
year100 = Undate(100)
317+
some_january = Undate(month=1)
318+
assert not year100 < some_january
319+
assert not year100 <= some_january
320+
assert not year100 > some_january
321+
assert not year100 >= some_january
322+
assert not some_january < year100
323+
assert not some_january <= year100
324+
assert not some_january > year100
325+
assert not some_january >= year100
326+
310327
def test_lt_notimplemented(self):
328+
# unsupported type should bail out and return NotImplemented
329+
assert Undate(2022).__lt__("foo") == NotImplemented
330+
311331
# how to compare mixed precision where dates overlap?
312332
# if the second date falls *within* earliest/latest,
313333
# then it is not clearly less; not implemented?
@@ -340,6 +360,9 @@ def test_lt_notimplemented(self):
340360
def test_contains(self, date1, date2):
341361
assert date1 in date2
342362

363+
# unsupported type should bail out and return NotImplemented
364+
assert Undate(2022).__contains__("foo") == NotImplemented
365+
343366
testdata_not_contains = [
344367
# dates not in range
345368
(Undate(1980), Undate(2020)),
@@ -359,6 +382,9 @@ def test_contains(self, date1, date2):
359382
(Undate(1980, "XX"), Undate(1980, "XX")),
360383
# - partially unknown month to unknown month
361384
(Undate(1801, "1X"), Undate(1801, "XX")),
385+
# fully unknown year
386+
(Undate(month=6, day=1), Undate(2022)),
387+
(Undate(1950), Undate(day=31)),
362388
]
363389

364390
@pytest.mark.parametrize("date1,date2", testdata_not_contains)
@@ -514,6 +540,7 @@ def test_partiallyknownyear_duration(self):
514540
assert Undate("XXX", calendar="Hebrew").duration().days == UnInt(353, 385)
515541

516542
def test_known_year(self):
543+
# known OR partially known
517544
assert Undate(2022).known_year is True
518545
assert Undate(month=2, day=5).known_year is False
519546
# partially known year is not known
@@ -535,6 +562,34 @@ def test_is_known_day(self):
535562
assert Undate(month=1, day="X5").is_known("day") is False
536563
assert Undate(month=1, day="XX").is_known("day") is False
537564

565+
def test_unknown_year(self):
566+
# fully unknown year
567+
assert Undate(month=2, day=5).unknown_year is True
568+
# known or partially known years = all false for unknown
569+
assert Undate(2022).unknown_year is False
570+
# partially known year is not unknown
571+
assert Undate("19XX").unknown_year is False
572+
# fully known string year should be known
573+
assert Undate("1900").unknown_year is False
574+
575+
def test_is_unknown_month(self):
576+
# fully unknown month
577+
assert Undate(2022).is_unknown("month") is True
578+
assert Undate(day=10).is_unknown("month") is True
579+
assert Undate(2022, 2).is_unknown("month") is False
580+
assert Undate(2022, "5").is_unknown("month") is False
581+
assert Undate(2022, "1X").is_unknown("month") is False
582+
assert Undate(2022, "XX").is_unknown("month") is False
583+
584+
def test_is_unknown_day(self):
585+
# fully unknown day
586+
assert Undate(1984).is_unknown("day") is True
587+
assert Undate(month=5).is_unknown("day") is True
588+
assert Undate(month=1, day=3).is_unknown("day") is False
589+
assert Undate(month=1, day="5").is_unknown("day") is False
590+
assert Undate(month=1, day="X5").is_unknown("day") is False
591+
assert Undate(month=1, day="XX").is_unknown("day") is False
592+
538593
def test_parse(self):
539594
assert Undate.parse("1984", "EDTF") == Undate(1984)
540595
assert Undate.parse("1984-04", "EDTF") == Undate(1984, 4)
@@ -545,7 +600,10 @@ def test_parse(self):
545600

546601
assert Undate.parse("1984", "ISO8601") == Undate(1984)
547602
assert Undate.parse("1984-04", "ISO8601") == Undate(1984, 4)
548-
assert Undate.parse("--12-31", "ISO8601") == Undate(month=12, day=31)
603+
# dates with unknown year are not equal; compare repr string
604+
assert repr(Undate.parse("--12-31", "ISO8601")) == repr(
605+
Undate(month=12, day=31)
606+
)
549607

550608
# unsupported format
551609
with pytest.raises(ValueError, match="Unsupported format"):

0 commit comments

Comments
 (0)