diff --git a/data_collection/gazette/spiders/base/barcodigital.py b/data_collection/gazette/spiders/base/barcodigital.py new file mode 100644 index 000000000..c59e75c68 --- /dev/null +++ b/data_collection/gazette/spiders/base/barcodigital.py @@ -0,0 +1,49 @@ +from datetime import date, datetime + +from dateutil.rrule import MONTHLY, rrule +from scrapy import Request + +from gazette.items import Gazette +from gazette.spiders.base import BaseGazetteSpider + + +class BarcoDigitalSpider(BaseGazetteSpider): + EDITION_TYPE_NORMAL = 1 + EDITION_TYPE_EXTRA = 2 + EDITION_TYPE_SUPPLEMENT = 3 + + def start_requests(self): + initial_date = date(self.start_date.year, self.start_date.month, 1) + end_date = self.end_date + + periods_of_interest = [ + (date.year, date.month) + for date in rrule(freq=MONTHLY, dtstart=initial_date, until=end_date) + ] + + for year, month in periods_of_interest: + url = ( + f"{self.base_url}/api/publico/diario/calendario?mes={month}&ano={year}" + ) + yield Request(url) + + def parse(self, response): + for documents in response.json().values(): + for document in documents: + document_date = datetime.strptime( + document.get("data"), "%Y-%m-%d" + ).date() + + if document_date > self.end_date: + continue + elif document_date < self.start_date: + return + + yield Gazette( + date=document_date, + edition_number=document.get("edicao"), + is_extra_edition=document.get("tipo_edicao_id") + != self.EDITION_TYPE_NORMAL, + file_urls=[f"{self.base_url}/arquivo/{document.get('url')}"], + power="executive", + ) diff --git a/data_collection/gazette/spiders/to/to_lagoa_de_tocantins.py b/data_collection/gazette/spiders/to/to_lagoa_de_tocantins.py new file mode 100644 index 000000000..2b3895da9 --- /dev/null +++ b/data_collection/gazette/spiders/to/to_lagoa_de_tocantins.py @@ -0,0 +1,12 @@ +from datetime import date + +from gazette.spiders.base.barcodigital import BarcoDigitalSpider + + +class ToLagoaDeTocatinsSpider(BarcoDigitalSpider): + name = "to_lagoa_de_tocantins" + TERRITORY_ID = "1711951" + allowed_domains = ["api-lagoadotocantins.barcodigital.com.br"] + base_url = "https://api-lagoadotocantins.barcodigital.com.br" + + start_date = date(year=2018, month=5, day=1) diff --git a/data_collection/gazette/spiders/to/to_recursolandia.py b/data_collection/gazette/spiders/to/to_recursolandia.py new file mode 100644 index 000000000..a5b2e8494 --- /dev/null +++ b/data_collection/gazette/spiders/to/to_recursolandia.py @@ -0,0 +1,12 @@ +from datetime import date + +from gazette.spiders.base.barcodigital import BarcoDigitalSpider + + +class ToRecursolandiaSpider(BarcoDigitalSpider): + name = "to_recursolandia" + TERRITORY_ID = "1718501" + allowed_domains = ["api-recursolandia.barcodigital.com.br"] + base_url = "https://api-recursolandia.barcodigital.com.br" + + start_date = date(year=2019, month=11, day=11)