-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetMysql.js
135 lines (110 loc) · 3.61 KB
/
getMysql.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
// https://dev.mysql.com/doc/refman/5.7/en/sql-statements.html // 第 13 章 SQL 语句
class GetMysqlConfig {
BaseUrl = 'https://dev.mysql.com/doc/refman/5.7/en/';
// indexes.json
static indexes = [];
// 判断是否写入过某一个章节
static chaptersNumMap = new Map();
// 防止http 403
static sleep = (milliseconds) => {
return new Promise(resolve => setTimeout(resolve, milliseconds))
}
}
class GetMysql extends GetMysqlConfig {
url = '';
sourceHtml = '';
cheerioHtml = '';
constructor(pageUrl) {
super();
this.url = this.BaseUrl + pageUrl;
}
async getPageRequest() {
try {
const response = await axios(this.url);
this.sourceHtml = response.data;
this.getPageHtml();
return this.cheerioHtml;
} catch (err) {
console.error(err);
throw err;
}
}
getPageHtml() {
this.cheerioHtml = cheerio.load(this.sourceHtml);
}
}
const handleSinglePage = async (cheerioHtml) => {
let singlePageData = {
t: "",
d: "",
p: "",
};
let outHtml = "";
const docsBody = cheerioHtml("#docs-body");
// 标题
const title = cheerio.load(docsBody.html())(".titlepage .title");
const titleText = title.text();
// 分割文章标题中的章节号和章节标题( )
const [chaptersNum, chaptersTitle] = titleText.split(/\u{00A0}/u);
// 避免重复写入
if (GetMysqlConfig.chaptersNumMap.has(chaptersNum)) {
console.log("已经存在", chaptersNum, chaptersTitle);
return;
}
GetMysqlConfig.chaptersNumMap.set(chaptersNum, chaptersTitle);
// indexes.json信息
singlePageData.t = chaptersTitle; // 章节文字
singlePageData.d = titleText; // TODO 优化描述
// 去除 / 和 限制长度 生成路径
singlePageData.p = `doc/en/${chaptersTitle?.replace(/\//, ' ').slice(0, 245) || titleText}.html`;
// 导航
const toc = cheerio.load(docsBody.html())(".toc .toc");
if (toc.length > 0) {
const toc = cheerio.load(docsBody.html())(".toc a");
for (let i = 0; i < toc.length; i++) {
const pageUrl = toc[i].attribs.href;
await GetMysql.sleep(10000);
new GetMysql(pageUrl).getPageRequest().then(handleSinglePage);
}
}
// 文章
const section = cheerioHtml("#docs-body>.section");
if (section.length > 0) {
outHtml = docsBody.html();
}
// TODO 描述
// const desc = cheerio.load(docsBody.html())(".section>.language-sql:nth-child(1)");
// const descText = desc.text();
// console.log(descText)
outData(singlePageData, outHtml);
}
const insterHtml = (outHtml, singlePageData) => {
return `<!DOCTYPE html>
<html lang="zh-Hant-CN">
<head>
<meta charset="UTF-8">
<title>${singlePageData.t}</title>
<link href="../docs.css" rel="stylesheet">
</head>
<body>
${outHtml}
</body>
</html>`
}
const outData = (singlePageData, outHtml) => {
GetMysqlConfig.indexes.push(singlePageData);
fs.writeFile(`./mysql/indexes.json`, JSON.stringify(GetMysqlConfig.indexes), function (err) {
if (err) throw err;
console.log(`indexes.json - ok!`);
});
fs.writeFile(`./mysql/${singlePageData.p}`, insterHtml(outHtml, singlePageData), function (err) {
if (err) throw err;
console.log(`${singlePageData.p} - ok!`);
});
}
// start
const getMysql = new GetMysql('sql-statements.html');
getMysql.getPageRequest().then(handleSinglePage);