-
Notifications
You must be signed in to change notification settings - Fork 0
/
craigslist-scraper.js
56 lines (48 loc) · 1.54 KB
/
craigslist-scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
var request = require("request");
var cheerio = require("cheerio");
var querystring = require("querystring");
exports.query = function(baseUrl, category, searchString, includeNearby, resultProcessor, callback) {
var self = this;
baseUrl = baseUrl.replace(/\/$/, '');
searchUrl = baseUrl + '/search/' + category + '?' + querystring.stringify({
query: searchString
});
request({
uri: searchUrl
}, function(error, response, body) {
var $ = cheerio.load(body);
// console.log(body);
var count = 0;
// console.log($('#toc_rows h4.nearby').text());
$('#toc_rows p.row').each(function() {
count += 1;
var row = $(this);
var date = row.find('span.date').first().text();
var link = row.find('span.pl > a').first();
var text = link.text();
var href = link.attr('href');
var price = row.find('span.price').first().text();
var loc = row.find('span.pnr small').first().text();
// check if href is relative
// for now, let's not include "Nearby area" results
// TODO: flag if nearby results should be included
if (href.match(/^(\/|\.\.\/)/)) {
href = baseUrl + href;
} else if (!includeNearby) {ƒ
return false;
}
// console.log({date: date, text: text, href: href, price: price, loc: loc});
resultProcessor({
date: date,
text: text,
href: href,
price: price,
loc: loc
});
});
console.log("TOTAL RESULTS: ", count);
if (callback) {
callback();
}
});
}