-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathindex.js
152 lines (122 loc) · 4.15 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
const puppeteer = require('puppeteer-extra');
const ObjectsToCSV = require('objects-to-csv');
const stealthPlugin = require('puppeteer-extra-plugin-stealth');
// Change These Values To Search
const CITY = 'Delhi';
const KEYWORD = 'Pediatrician';
const INITIAL_PAGE = 0;
const NUMBER_OF_PAGES = 50;
const NUMBER_CODE_MAP = {
'icon-acb': '0',
'icon-yz': '1',
'icon-wx': '2',
'icon-vu': '3',
'icon-ts': '4',
'icon-rq': '5',
'icon-po': '6',
'icon-nm': '7',
'icon-lk': '8',
'icon-ji': '9',
'icon-dc': '+',
'icon-fe': '(',
'icon-hg': ')',
'icon-ba': '-',
};
const autoScroll = async (page) => {
await page.evaluate(async () => {
await new Promise((resolve, _) => {
let totalHeight = 0;
const distance = 100;
const timer = setInterval(() => {
const scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
clearInterval(timer);
resolve();
}
}, 100);
});
});
};
const parsePage = async (pageNumber) => {
puppeteer.use(stealthPlugin());
let directory = [];
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox'],
});
try {
const page = await browser.newPage();
await page.goto(`https://www.justdial.com/${CITY}/${KEYWORD}/page-${pageNumber}`);
await autoScroll(page);
directory = await page.evaluate((NUMBER_CODE_MAP) => {
const entries = [];
const listings = document.getElementsByClassName('cntanr');
for (const listing of listings) {
const name = listing.getElementsByClassName('lng_cont_name')[0].textContent;
const url = listing.attributes[1].value;
const phoneNumberArr = Array.from(listing.getElementsByClassName('mobilesv'));
const phoneNumber = phoneNumberArr.map((number) => NUMBER_CODE_MAP[number.classList[1]]).join('');
entries.push({name, url, phoneNumber});
}
return entries;
}, NUMBER_CODE_MAP);
console.log('Total Listings Found:', directory.length);
for await (const listing of directory) {
console.log('Navigating to Listing:', listing.name);
await page.goto(listing.url);
const details = await page.evaluate(() => {
const name = document.getElementsByClassName('rstotle')[0].textContent.trim();
const rating = document.getElementsByClassName('total-rate')[0].textContent;
const votes = document.getElementsByClassName('votes')[0].textContent.trim();
const address = document.getElementById('fulladdress').getElementsByClassName('lng_add')[0].textContent;
removedn('showmore');
const categoriesArr = Array.from(document.getElementsByClassName('showmore')[0].children);
const categories = categoriesArr.map((category) => category.textContent.trim()).join(', ');
const servicesNode = document.getElementsByClassName('.quickinfowrp')[0];
let details = {
name,
rating,
votes,
address,
categories,
};
if (servicesNode) {
const servicesArr = Array.from(document.getElementsByClassName('.quickinfowrp')[0].getElementsByClassName('text'));
const services = servicesArr.map((service) => service.textContent.trim()).join(', ');
details = {
...details,
services,
};
}
return details;
});
const listingIndex = directory.findIndex((x) => x.url === listing.url);
directory[listingIndex] = {...directory[listingIndex], ...details};
}
} catch (e) {
console.error(e);
} finally {
const csv = new ObjectsToCSV(directory);
await csv.toDisk('./directory.csv', {
allColumns: true,
append: true,
});
await browser.close();
}
};
const main = async () => {
const pages = [...Array(NUMBER_OF_PAGES).keys()];
try {
for await (const page of pages) {
const pageNumber = INITIAL_PAGE + page + 1;
console.log('Starting with page', pageNumber);
await parsePage(pageNumber);
}
} catch (e) {
console.error(e);
}
console.log('Script Execution Complete');
};
main();