From 0b690232cd9788514dcc370ce4b5e974e33e0101 Mon Sep 17 00:00:00 2001 From: Paulo Victor LS <34511811+paulovictorls@users.noreply.github.com> Date: Wed, 15 Dec 2021 16:34:33 -0300 Subject: [PATCH 1/3] feat | gets the monthly condo fee --- zapimoveis_scraper/__init__.py | 5 +++-- zapimoveis_scraper/item.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/zapimoveis_scraper/__init__.py b/zapimoveis_scraper/__init__.py index 0629e47..4fbeb5e 100644 --- a/zapimoveis_scraper/__init__.py +++ b/zapimoveis_scraper/__init__.py @@ -76,7 +76,7 @@ def convert_dict(data): #start dictonary dicts = defaultdict(list) #create a list with the keys - keys = ['price','bedrooms','bathrooms','vacancies','total_area_m2','address','description', 'link'] + keys = ['price', 'condo_fee', 'bedrooms','bathrooms','vacancies','total_area_m2','address','description', 'link'] #simple for loops to create the dictionary for i in keys: @@ -99,6 +99,7 @@ def get_ZapItem(listing): item = ZapItem() item.link = listing['link']['href'] item.price = listing['listing']['pricingInfos'][0].get('price', None) if len(listing['listing']['pricingInfos']) > 0 else 0 + item.condo_fee = listing['listing']['pricingInfos'][0].get('monthlyCondoFee', None) if len(listing['listing']['pricingInfos']) > 0 else 0 item.bedrooms = listing['listing']['bedrooms'][0] if len(listing['listing']['bedrooms']) > 0 else 0 item.bathrooms = listing['listing']['bathrooms'][0] if len(listing['listing']['bathrooms']) > 0 else 0 item.vacancies = listing['listing']['parkingSpaces'][0] if len(listing['listing']['parkingSpaces']) > 0 else 0 @@ -128,4 +129,4 @@ def search(localization='go+goiania++setor-marista', num_pages=1, acao=ZapAcao.a if dictionary_out: return convert_dict(items) - return items + return items \ No newline at end of file diff --git a/zapimoveis_scraper/item.py b/zapimoveis_scraper/item.py index 7af5d31..eee913e 100644 --- a/zapimoveis_scraper/item.py +++ b/zapimoveis_scraper/item.py @@ -2,6 +2,7 @@ class ZapItem: description = None price = None + condo_fee = None bedrooms = None bathrooms = None total_area_m2 = None From eede8751c2052b6a946fdfdc0350d9ebca6542fd Mon Sep 17 00:00:00 2001 From: Paulo Victor Laurindo Santos <34511811+paulovictorls@users.noreply.github.com> Date: Thu, 16 Dec 2021 17:12:02 -0300 Subject: [PATCH 2/3] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 86a6619..f6e7cfb 100644 --- a/README.md +++ b/README.md @@ -36,9 +36,10 @@ zap.search(localization="go+goiania++setor-oeste", num_pages=5) The objects returned from `search` contain the following attributes: * description: property description * price: property price (monthly) +* condo\_fee: property condo fee (monthly) * bedrooms: number of bedrooms on property * bathrooms: number of bathrooms on property * total\_area\_m2: property area (square meters) * vacancies: parking spots available on property * address: property address -* link: link of the property \ No newline at end of file +* link: link of the property From f4b7ba885ac39d80d725e32d37d603ef568df320 Mon Sep 17 00:00:00 2001 From: Paulo Victor LS <34511811+paulovictorls@users.noreply.github.com> Date: Thu, 16 Dec 2021 17:20:52 -0300 Subject: [PATCH 3/3] feat | add the possibility to set a timer between pages --- README.md | 2 ++ zapimoveis_scraper/__init__.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f6e7cfb..ec7873c 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ zap.search(localization="go+goiania++setor-oeste", num_pages=5) * default: 'casas' * dictionaty\_out (boolean): Specifies the method output (list of objects or dictionary) * default: False +* time_to_wait (float): time to wait until the script scrapes the next page + * default: 0 #### Scraped attributes: The objects returned from `search` contain the following attributes: diff --git a/zapimoveis_scraper/__init__.py b/zapimoveis_scraper/__init__.py index 4fbeb5e..b34a97e 100644 --- a/zapimoveis_scraper/__init__.py +++ b/zapimoveis_scraper/__init__.py @@ -30,6 +30,7 @@ from urllib.request import Request, urlopen from bs4 import BeautifulSoup import json +import time from zapimoveis_scraper.enums import ZapAcao, ZapTipo from zapimoveis_scraper.item import ZapItem @@ -110,7 +111,7 @@ def get_ZapItem(listing): return item -def search(localization='go+goiania++setor-marista', num_pages=1, acao=ZapAcao.aluguel.value, tipo=ZapTipo.casas.value, dictionary_out = False): +def search(localization='go+goiania++setor-marista', num_pages=1, acao=ZapAcao.aluguel.value, tipo=ZapTipo.apartamentos.value, dictionary_out = False, time_to_wait=0): page = 1 items = [] @@ -125,7 +126,8 @@ def search(localization='go+goiania++setor-marista', num_pages=1, acao=ZapAcao.a items.append(get_ZapItem(listing)) page += 1 - + time.sleep(time_to_wait) + if dictionary_out: return convert_dict(items)