@@ -26,6 +26,7 @@ def __init__(self, *args, **kwargs):
2626 self .court_id = self .__module__
2727 self .url = f"https://ecf.dcd.uscourts.gov/cgi-bin/Opinions.pl?{ date .today ().year } "
2828 self .status = "Published"
29+ self .make_backscrape_iterable (kwargs )
2930
3031 def _process_html (self ):
3132 """
@@ -72,3 +73,37 @@ def get_docket_document_number_from_url(self, url: str) -> Tuple[str, str]:
7273 doc_number = match .group (6 ) if match else url
7374
7475 return doc_number
76+
77+ def _download_backwards (self , year : int ) -> None :
78+ """Build URL with year input and scrape
79+
80+ :param year: year to scrape
81+ :return None
82+ """
83+ self .url = f"https://ecf.dcd.uscourts.gov/cgi-bin/Opinions.pl?{ year } "
84+ self .html = self ._download ()
85+ self ._process_html ()
86+
87+ def make_backscrape_iterable (self , kwargs : dict ) -> None :
88+ """Checks if backscrape start and end arguments have been passed
89+ by caller, and parses them accordingly
90+
91+ :param kwargs: passed when initializing the scraper, may or
92+ may not contain backscrape controlling arguments
93+ :return None
94+ """
95+ start_date = kwargs .get ("backscrape_start" )
96+ end_date = kwargs .get ("backscrape_end" )
97+
98+ start = (
99+ datetime .strptime (start_date , "%m/%d/%Y" ).year
100+ if start_date
101+ else date .today ().year
102+ )
103+ end = (
104+ datetime .strptime (end_date , "%m/%d/%Y" ).year + 1
105+ if end_date
106+ else date .today ().year
107+ )
108+
109+ self .back_scrape_iterable = range (max (2005 , start ), end )
0 commit comments