-
Notifications
You must be signed in to change notification settings - Fork 0
/
example3.js
102 lines (71 loc) · 3.1 KB
/
example3.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// example2.js - Example #2 - Goes to http://www.horseheadspickapart.com/content.php?id=46
// and returns a JSON array of vehicle results
var request = require('request-promise');
var cheerio = require('cheerio');
var async = require('async');
var host = 'http://row52.com/';
var page = 'Search/Index?Page=1&MakeId=145&ModelId=0&Year=&Distance=50&Sort=DateAdded&SortDirection=desc&ZipCode=14850';
var options = {
method: 'get',
uri: host + '/' + page,
json: true,
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
}
};
var results = Array();
request(options).then(function(response) {
var $ = cheerio.load(response);
var num_of_pages = $('.pagingform > span.count').first().text().match(/(\d+)/g)[0];
num_of_pages = parseInt(num_of_pages);
var pages_to_scrape = Array();
// Commit this sin because I don't know how to do this using async properly and have a deadline
for (var i = 1; i <= num_of_pages; i++) {
// Get an array of page numbers eg. [1, 2, 3]
pages_to_scrape.push(i);
}
async.forEach(pages_to_scrape, function (page_number, callback) {
var page_url = host + '/' + 'Search/Index?Page=' + page_number + '&MakeId=145&ModelId=0&Year=&Distance=50&Sort=DateAdded&SortDirection=desc&ZipCode=14850';
var page_options = options;
page_options.uri = page_url;
request(page_options).then(function(page_result) {
var $ = cheerio.load(page_result);
$('.vin > span[itemprop="name"]').each(function() {
results.push($(this).text());
});
callback();
}).error(function(error) {
callback('Error ' + error.statusCode + ': ' + error.message);
});
}, function(error) {
if (error) {
console.log('Error: ' + error);
}
console.log('Got ' + results.length + ' VINs');
var parsed_results = Array();
async.forEach(results, function(vin, vin_callback) {
var vin_url = host + '/' + 'Vehicle/Vin/' + vin;
var vin_options = options;
vin_options['uri'] = vin_url;
request(vin_options).then(function(vin_result) {
parsed_results[vin] = {};
$ = cheerio.load(vin_result);
$('tbody > tr').each(function() {
var property = $(this).find('td').first().text().toString();
var value = $(this).find('td.subtle').text().toString();
parsed_results[vin][property] = value;
});
vin_callback();
}).error(function(error) {
vin_callback('Error ' + error.statusCode + ': ' + error.message);
});
}, function(error) {
if (error) {
console.log('Error: ' + error);
}
console.dir(parsed_results);
});
});
}).error(function(error) {
return console.error('Error: ' + console.dir(error));
});