-
Notifications
You must be signed in to change notification settings - Fork 3
/
4-gethosts.py
62 lines (46 loc) · 1.6 KB
/
4-gethosts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import sys
import requests
from bs4 import BeautifulSoup
import time
import json
import random
filename = "output/airbnb_flat.json"
def getUser(link):
try:
r = requests.get(link)
time.sleep(2)
soup = BeautifulSoup(r.content, "html5lib")
result = soup.find(
"script", {'data-hypernova-key': 'p3show_marketplacebundlejs'})
result = result.text.split('<!--')[1]
result = result.split('-->')[0]
result_json = json.loads(result)
hostid = "https://www.airbnb.com" + \
result_json["bootstrapData"]["reduxData"]["marketplacePdp"]["listingInfo"]["listing"]["primary_host"]["profile_path"]
hostname = result_json["bootstrapData"]["reduxData"]["marketplacePdp"]["listingInfo"]["listing"]["primary_host"]["host_name"]
return [hostname, hostid]
except Exception as e:
print e
def getList(filename):
airbnb_list = []
i = 0
airbnb = open(filename, "r")
airbnb = json.loads(airbnb.read())
for listing in airbnb:
print "working on %s" % i
i += 1
if listing.get("hostname") == None:
link = "https://www.airbnb.com/rooms/%s" % listing["id"]
randtime = random.randint(1, 4)
time.sleep(randtime)
host = getUser(link)
try:
listing["hostname"] = host[0]
listing["hostid"] = host[1]
except Exception as e:
print "skipped"
else:
print listing["hostname"]
with open(filename, "wb") as f:
f.write(json.dumps(airbnb))
getList(filename)