From f4b7ba885ac39d80d725e32d37d603ef568df320 Mon Sep 17 00:00:00 2001 From: Paulo Victor LS <34511811+paulovictorls@users.noreply.github.com> Date: Thu, 16 Dec 2021 17:20:52 -0300 Subject: [PATCH] feat | add the possibility to set a timer between pages --- README.md | 2 ++ zapimoveis_scraper/__init__.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f6e7cfb..ec7873c 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ zap.search(localization="go+goiania++setor-oeste", num_pages=5) * default: 'casas' * dictionaty\_out (boolean): Specifies the method output (list of objects or dictionary) * default: False +* time_to_wait (float): time to wait until the script scrapes the next page + * default: 0 #### Scraped attributes: The objects returned from `search` contain the following attributes: diff --git a/zapimoveis_scraper/__init__.py b/zapimoveis_scraper/__init__.py index 4fbeb5e..b34a97e 100644 --- a/zapimoveis_scraper/__init__.py +++ b/zapimoveis_scraper/__init__.py @@ -30,6 +30,7 @@ from urllib.request import Request, urlopen from bs4 import BeautifulSoup import json +import time from zapimoveis_scraper.enums import ZapAcao, ZapTipo from zapimoveis_scraper.item import ZapItem @@ -110,7 +111,7 @@ def get_ZapItem(listing): return item -def search(localization='go+goiania++setor-marista', num_pages=1, acao=ZapAcao.aluguel.value, tipo=ZapTipo.casas.value, dictionary_out = False): +def search(localization='go+goiania++setor-marista', num_pages=1, acao=ZapAcao.aluguel.value, tipo=ZapTipo.apartamentos.value, dictionary_out = False, time_to_wait=0): page = 1 items = [] @@ -125,7 +126,8 @@ def search(localization='go+goiania++setor-marista', num_pages=1, acao=ZapAcao.a items.append(get_ZapItem(listing)) page += 1 - + time.sleep(time_to_wait) + if dictionary_out: return convert_dict(items)