Skip to content

Commit

Permalink
fix: name of item fetched from caasa.it, remove ls from locations sel…
Browse files Browse the repository at this point in the history
…ect, update ttl record on sqlite
  • Loading branch information
pinkynrg committed Aug 15, 2024
1 parent f3a6a5e commit 5c4f42b
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 16 deletions.
10 changes: 10 additions & 0 deletions server/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ class Meta:
database = db
table_name = 'locations'

def get_nome_for(self, source):
return self.nome

def get_provincia_nome_for(self, source):
if source == 'caasa.it':
if self.provincia_nome == "Reggio nell'Emilia":
return 'Reggio Emilia'
return self.nome


class House(Model):
uuid = CharField(primary_key=True)
url = CharField()
Expand Down
4 changes: 2 additions & 2 deletions server/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from peewee import fn

def fetch_homes(location: Location):
# Check if the lowest updated_at of House is > 1 day ago
one_day_ago = datetime.now() - timedelta(days=1)
# Check if the lowest updated_at of House is > 1 day ago
one_day_ago = datetime.now() - timedelta(seconds=60)
lowest_updated_at = House.select(fn.Min(House.updated_at)).where((House.city == location.nome) & (House.province == location.provincia_nome)).scalar()

if lowest_updated_at is None or lowest_updated_at < one_day_ago:
Expand Down
28 changes: 15 additions & 13 deletions server/services/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@
class ComuniItalia:
HOST = "raw.githubusercontent.com"

def trasform_city_name(name):
if name == "Reggio nell'Emilia":
return "Reggio Emilia"
return name

def fetch():
html_text = get(ComuniItalia.HOST, "matteocontrini/comuni-json/master/comuni.json")
json_string = unidecode(html_text)
Expand All @@ -23,14 +18,14 @@ def fetch():
upsert_record(
Location,
'codice',
nome=ComuniItalia.trasform_city_name(comune.get('nome', '')),
nome=comune.get('nome', ''),
codice=comune.get('codice', ''),
zona_codice=comune['zona']['codice'] if 'zona' in comune else None,
zona_nome=comune['zona']['nome'] if 'zona' in comune else None,
regione_codice=comune['regione']['codice'] if 'regione' in comune else None,
regione_nome=comune['regione']['nome'] if 'regione' in comune else None,
provincia_codice=comune['provincia']['codice'] if 'provincia' in comune else None,
provincia_nome=ComuniItalia.trasform_city_name(comune['provincia']['nome'] if 'provincia' in comune else None),
provincia_nome=comune['provincia']['nome'] if 'provincia' in comune else None,
sigla=comune.get('sigla', ''),
codiceCatastale=comune.get('codiceCatastale', ''),
cap=comune['cap'] if 'cap' in comune else None,
Expand Down Expand Up @@ -251,13 +246,18 @@ def get_main_image(soup):
return None

def get_comment(soup):
return soup.find('div', attrs={"class": "opinion-main-text"}).get_text(strip=True)
return " ".join(soup.find('div', attrs={"class": "opinion-main-text"}).stripped_strings)

def generate_readable_title(url, comune):
match = re.search(r'-([a-zA-Z0-9]*\d+[a-zA-Z0-9]*)-', url)
uuid = match.group(1) if match else 'N/A'
return f"{comune} (Caasa.it ID: {uuid})"

try:
html_text = get(Caasa.HOST, page_link)
soup = BeautifulSoup(html_text, "html.parser")
price = get_value_from_label(soup, "Prezzo")
title = page_link
title = generate_readable_title(page_link, comune)
location = get_value_from_label(soup, "Zona OMI")
comment = get_comment(soup)
m2_string = get_value_from_label(soup, "Superficie")
Expand Down Expand Up @@ -341,13 +341,15 @@ def get_final_url(page):
max_size = max_size,
)

return "/{province}/{comune}/{types}/in-vendita.html?page={page}&{filters}".format(
province=Caasa.format_name(location.provincia_nome),
comune=Caasa.format_name(location.nome),
url = "/{province}/{comune}/{types}/in-vendita.html?page={page}&{filters}".format(
province=Caasa.format_name(location.get_provincia_nome_for('caasa.it')),
comune=Caasa.format_name(location.get_nome_for('caasa.it')),
types='-o-'.join(Caasa.ELEMENT_TYPES),
filters=filters,
page = page,
)

return url

def get_house_link(item_html):
fav_container = item_html.find("div", attrs={"class": "favorite-add"})
Expand Down Expand Up @@ -410,7 +412,7 @@ def get_house_link(item_html):
# Create a new function that takes both fixed_param and link
get_house_data_with_meta = partial(
Caasa.get_house_data,
location.nome,
location.get_nome_for('caasa.it'),
)

with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
Expand Down
2 changes: 1 addition & 1 deletion src/components/Request/Request.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ interface TreeSelectCity {
const Request = ({
className,
}: RequestProps) => {
const [citiesTree, setCitiesTree] = useLocalStorage<TreeSelectCity[]>('location', [])
const [citiesTree, setCitiesTree] = useState<TreeSelectCity[]>([])
const [request, setRequest] = useLocalStorage<string | null>('requestUUID', null)
const [error, setError] = useState<string | null>(null)
const [codes, setCodes] = useState(null)
Expand Down

0 comments on commit 5c4f42b

Please sign in to comment.