diff --git a/juriscraper/AbstractSite.py b/juriscraper/AbstractSite.py index a3a1b0b77..7abcd3312 100644 --- a/juriscraper/AbstractSite.py +++ b/juriscraper/AbstractSite.py @@ -1,6 +1,6 @@ import hashlib import json -from datetime import date, datetime +from datetime import date, datetime, timedelta from typing import Dict, List, Tuple import certifi @@ -257,6 +257,7 @@ def _check_sanity(self): prior_case_name = name i += 1 + future_date_count = 0 for index, case_date in enumerate(self.case_dates): if not isinstance(case_date, date): raise InsanityException( @@ -266,24 +267,30 @@ def _check_sanity(self): ) # Sanitize case date, fix typo of current year if present fixed_date = fix_future_year_typo(case_date) + case_name = self.case_names[index] if fixed_date != case_date: logger.info( "Date year typo detected. Converting %s to %s " - "for case '%s' in %s" - % ( - case_date, - fixed_date, - self.case_names[index], - self.court_id, - ) + "for case '%s' in %s", + case_date, + fixed_date, + case_name, + self.court_id, ) case_date = fixed_date self.case_dates[index] = fixed_date - if case_date.year > 2025: - raise InsanityException( - "%s: member of case_dates list is from way in the future, " - "with value %s" % (self.court_id, case_date.year) - ) + + # dates should not be in the future. Tolerate a week + if case_date > (date.today() + timedelta(days=7)): + future_date_count += 1 + error = f"{self.court_id}: {case_date} date is in the future. Case '{case_name}'" + logger.error(error) + + # Interrupt data ingestion if more than 1 record has a bad date + if future_date_count > 1: + raise InsanityException( + f"More than 1 case has a date in the future. Last case: {error}" + ) # Is cookies a dict? if type(self.cookies) != dict: