-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharticleParser.js
81 lines (65 loc) · 2.26 KB
/
articleParser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import makeRequest from './makeRequest.js';
export class Article {
constructor() {
this.title = null;
this.scholarUrl = null;
this.externalArticleUrl = null;
this.pdfLink = null;
this.totalCitations = 0;
}
}
export class ArticleParser {
constructor() {
this.articleDom;
this.article = new Article();
this.contentsNodeList;
this.notFoundMsg = '<Value not found.>';
}
async generateArticle(articleUrl) {
try {
this.articleDom = await makeRequest(articleUrl);
this.article.scholarUrl = articleUrl;
this.parseTitle();
this.parseArticleLink();
this.parsePdfLink();
this.contentsNodeList = this.articleDom.querySelectorAll('div[id*="table"] > div');
this.parseContents();
return Promise.resolve(this.article);
} catch (err) {
// console.error('Error while parsing article at ' + articleUrl, error);
return Promise.reject(err);
}
}
verify(value, property) {
return value ? value[property] : this.notFoundMsg;
}
parseTitle() {
let title = this.articleDom.querySelector('div[id*="title"]').lastChild;
if (!title) title = this.articleDom.querySelector('div[id*="title"]').textContent;
this.article.title = this.verify(title, 'textContent');
}
parseArticleLink() {
let externalArticleUrl = this.articleDom.querySelector('a[class*="title"]');
this.article.externalArticleUrl = this.verify(externalArticleUrl, 'href');
}
parsePdfLink() {
let pdfLink = this.articleDom.querySelector('div[class*="title"] > a');
this.article.pdfLink = this.verify(pdfLink, 'href');
}
parseContents() {
// let specialCases = new Set(['Total citations', 'Scholar articles'])
for (let node of this.contentsNodeList) {
let key = this.formatKey(node.children[0].textContent);
if (!/totalCitations/.test(key) && !/scholar\s?articles/i.test(key)) {
this.article[key] = node.children[1].textContent;
} else if (/totalCitations/.test(key)) {
this.article.totalCitations = parseInt(node.children[1].children[0].textContent.match(/\d+/));
}
}
}
formatKey(key) {
key = key.replaceAll(/\s(\w)/g, match => match[1].toUpperCase());
key = key.replace(/^\w/, match => match[0].toLowerCase());
return key;
}
}