From d4d3e42abf9a6c567dd5f17d4b4b1ea6c89cd38f Mon Sep 17 00:00:00 2001 From: "@marcoamarelo" Date: Fri, 5 Jan 2024 18:17:13 -0300 Subject: [PATCH] =?UTF-8?q?Incluindo=20par=C3=A2metro=20de=20start=5Fdate?= =?UTF-8?q?=20e=20end=5Fdate=20no=20spider=20de=20Niter=C3=B3i/RJ.=20fixes?= =?UTF-8?q?=20okfn-brasil/querido-diario#635?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_collection/gazette/spiders/rj/rj_niteroi.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/data_collection/gazette/spiders/rj/rj_niteroi.py b/data_collection/gazette/spiders/rj/rj_niteroi.py index 9ae0b0614..b55b5e947 100644 --- a/data_collection/gazette/spiders/rj/rj_niteroi.py +++ b/data_collection/gazette/spiders/rj/rj_niteroi.py @@ -11,8 +11,9 @@ class RjNiteroiSpider(BaseGazetteSpider): name = "rj_niteroi" allowed_domains = ["niteroi.rj.gov.br"] start_urls = ["http://www.niteroi.rj.gov.br"] - download_url = "http://pgm.niteroi.rj.gov.br/downloads/do/{}/{}/{:02d}.pdf" + download_url = "http://www.niteroi.rj.gov.br/wp-content/uploads/do/{}/{}/{:02d}.pdf" start_date = dt.date(2003, 7, 1) + end_date = dt.date.today() month_names = [ "01_Jan", @@ -30,7 +31,8 @@ class RjNiteroiSpider(BaseGazetteSpider): ] def parse(self, response): - parsing_date = dt.date.today() + parsing_date = self.end_date + while parsing_date >= self.start_date: month = self.month_names[parsing_date.month - 1] url = self.download_url.format(parsing_date.year, month, parsing_date.day)