forked from frankzl/bahn-price-analyzer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.js
114 lines (88 loc) · 3.04 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
const mkdirp = require('mkdirp');
const path = '/tmp/db-price-analysis'
mkdirp ( path, function(err) {
});
const prices = require('db-prices')
const moment = require('moment-timezone')
const {inspect} = require('util')
const tz = 'Europe/Berlin'
/*
prices('8000105', '8011160', when)
.then((routes) => {
console.log(inspect(routes, {depth: null}))
})
.catch((err) => {
console.error(err)
process.exit(1)
})
*/
const homedir = require('os').homedir();
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
const csvWriter = createCsvWriter({
path: path + '/data.csv',
header: [
{id: 'date', title: 'date'},
{id: 'departure_time', title: 'departure_time'},
{id: 'arrival_time', title: 'arrival_time'} ,
{id: 'stops', title: 'stops'} ,
{id: 'price', title: 'price'} ,
{id: 'tickettype', title: 'tickettype'} ,
{id: 'fulltxt', title: 'fulltxt'} ,
]
});
const writeDict = ( records ) => {
csvWriter.writeRecords(records) // returns a promise
.then(() => {
//console.log('...Done');
});
}
const dateToInt = function(date){
const day = date.getDate()
const month = date.getMonth() + 1
const year = date.getFullYear()
return year * 10000 + month * 100 + day
}
const getData = function ( from_id, to_id, from_date, to_date, prev_routes){
const end = to_date;
const day = from_date;
if(end.getTime() < day.getTime()){
writeDict( prev_routes);
return;
}
const when = moment.tz(day.getTime(), tz).hour(0).minute(0).second(0).day(day.getDay()).toDate();
prices(from_id, to_id, when)
.then((routes) => {
// console.log(inspect(routes, {depth: null}))
routes = routes.map( (route) => {
return {
date: dateToInt(from_date),
departure_time: route.legs[0].departure,
arrival_time: route.legs[route.legs.length - 1].arrival,
stops: route.legs.length,
price: route.price.amount,
tickettype: route.price.name,
fulltxt: encodeURI(JSON.stringify(route))
}
})
routes = routes.filter( (route) => !route.tickettype.includes("Bus"))
const tomorrow = from_date;
tomorrow.setDate(tomorrow.getDate() + 1);
getData( from_id, to_id, tomorrow, to_date, prev_routes.concat(routes));
})
.catch((err) => {
console.error(err);
process.exit(1)
})
}
//getData(8098160, 8000105, new Date("2018-11-18"), new Date("2018-11-19"), [])
const argv = require('minimist')(process.argv.slice(2));
// console.dir(argv);
getData(argv.f, argv.t, new Date(argv.s), new Date(argv.e), []);
/*
const stations = require('db-stations')
stations()
.on('data', function(data){
console.log("{ name: \"" + data.name + "\", id: "+data.id + "},")
})
.on('error', console.error)
*/