diff --git a/data_collection/gazette/spiders/sp/sp_sumare.py b/data_collection/gazette/spiders/sp/sp_sumare.py index 56811c651..8fb293f0f 100644 --- a/data_collection/gazette/spiders/sp/sp_sumare.py +++ b/data_collection/gazette/spiders/sp/sp_sumare.py @@ -16,16 +16,16 @@ def parse(self, response): gazettes = response.css("li.umDO") for gazette in gazettes: - title = gazette.css("a::attr(title)").get() + title = gazette.css(".file-title::text").get() url = gazette.css("a::attr(href)").get() - str_date = gazette.css(".areaData::text").get() + str_date = gazette.css(".areaMetade::text").get() date = datetime.strptime(str_date, "%d/%m/%Y").date() if not (self.start_date <= date <= self.end_date): continue yield Gazette( - edition_number=re.search(r"\d+", title).group(0), + edition_number=re.search(r"\d+", title.strip()).group(0), date=date, file_urls=[response.urljoin(url)], is_extra_edition="extra" in title.lower(),