-
Notifications
You must be signed in to change notification settings - Fork 1
/
test.txt~
executable file
·20 lines (20 loc) · 1.85 KB
/
test.txt~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
scrapy crawl spiderRicerca -a allowed_domain=itiscuneo.gov.it -a start_url=http://www.itiscuneo.gov.it/
scrapy crawl spiderRicerca -a allowed_domain=istruzione.it -a start_url=http://www.istruzione.it/
scrapy crawl spiderRicerca -a allowed_domain=lastampa.it -a start_url=http://www.lastampa.it/
scrapy crawl spiderRicerca -a allowed_domain=repubblica.it -a start_url=http://www.repubblica.it/
scrapy crawl spiderRicerca -a allowed_domain=corriere.it -a start_url=http://www.corriere.it/
scrapy crawl spiderRicerca -a allowed_domain=studenti.it -a start_url=http://www.studenti.it/
scrapy crawl spiderRicerca -a allowed_domain=abctribe.com -a start_url=http://it-it.abctribe.com/
scrapy crawl spiderRicerca -a allowed_domain=wikipedia.org -a start_url=https://it.wikipedia.org
scrapy crawl spiderRicerca -a allowed_domain=tomshw.it -a start_url=http://www.tomshw.it/
scrapy crawl spiderRicerca -a allowed_domain=paginebianche.it -a start_url=http://www.paginebianche.it
scrapy crawl spiderRicerca -a allowed_domain=vallauri.edu -a start_url=http://www.vallauri.edu
scrapy crawl spiderRicerca -a allowed_domain=facebook.com -a start_url=https://www.facebook.com
scrapy crawl spiderRicerca -a allowed_domain=linkedin.com -a start_url=https://it.linkedin.com
scrapy crawl spiderRicerca -a allowed_domain=twitter.com -a start_url=https://twitter.com
scrapy crawl spiderRicerca -a allowed_domain=youtube.com -a start_url=https://www.youtube.com
scrapy crawl spiderRicerca -a allowed_domain=github.com -a start_url=https://github.com/
scrapy crawl spiderRicerca -a allowed_domain=polito.it -a start_url=http://www.polito.it/
scrapy crawl spiderRicerca -a allowed_domain=unito.it -a start_url=http://www.unito.it/
scrapy crawl spiderRicerca -a allowed_domain=paginegialle.it -a start_url=http://www.paginegialle.it/
scrapy crawl spiderRicerca -a allowed_domain=amazon.it -a start_url=http://www.amazon.it/