-
Notifications
You must be signed in to change notification settings - Fork 5
/
youdao_spider.py
34 lines (28 loc) · 937 Bytes
/
youdao_spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
__author__ = 'hugowen'
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
import tornado.httpclient
def is_chinese(uchar):
if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
return True
else:
return False
if __name__ == "__main__":
cli = tornado.httpclient.HTTPClient()
link = 'http://dict.youdao.com/search?q='
search = raw_input('search: ')
link += search
#print link
data = cli.fetch(link)
body = data.body.decode('utf8')
soup = BeautifulSoup(body)
group = soup.find_all(class_ = 'trans-container')
if is_chinese(search.decode('utf8')):
content = group[0].find('ul').find('p')
print content.find_all('span')[0].get_text()
for ele in content.find_all(class_ = 'contentTitle'):
print ele.find('a').get_text()
else:
content = group[0].find('ul').find_all('li')
for ele in content:
print ele.get_text()