Skip to content

Commit

Permalink
fix(route): 更新联合早报时间获取方法 (#12789)
Browse files Browse the repository at this point in the history
* Update util.js

* Update util.js

* Update util.js

* Update util.js

* Update util.js

* refactor: use cache.tryGet

* Update util.js

* Update util.js

* Update util.js

* fix: zaobao content order

fix #10309

---------

Co-authored-by: TonyRL <[email protected]>
  • Loading branch information
dzx-dzx and TonyRL authored Jul 14, 2023
1 parent 5ba92d6 commit 5e63059
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 31 deletions.
69 changes: 38 additions & 31 deletions lib/v2/zaobao/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const date = require('@/utils/date');
const timezone = require('@/utils/timezone');
const { art } = require('@/utils/render');
const path = require('path');
const { base32 } = require('rfc4648');

const baseUrl = 'https://www.zaobao.com';
const got_ins = got.extend({
Expand Down Expand Up @@ -35,22 +36,14 @@ const parseList = async (ctx, sectionUrl) => {
data = $('.clearfix').find('.list-block');
}

const title = $('#breadcrumbs > a')
.toArray()
.reduce((acc, cV, cI) => {
if (cI > 0) {
return acc + '-' + $(cV).text();
}
return '';
}, '');
const title = $('meta[property="og:title"]').attr('content');

const resultList = await Promise.all(
data.toArray().map(async (item) => {
data.toArray().map((item) => {
const $item = $(item);
// addBack: for HK version
let link = $item.find('a').addBack('a')[0].attribs.href;

let resultItem = {};

if (link[0] !== '/') {
// https://www.zaobao.com/interactive-graphics
const title = $item.find('a').text();
Expand All @@ -71,31 +64,29 @@ const parseList = async (ctx, sectionUrl) => {
pubDate = pubDate === dateString ? undefined : timezone(pubDate, +8);
}

resultItem = {
return {
title,
description: title,
pubDate,
link,
};
return Promise.resolve(resultItem);
} else {
link = baseUrl + link;
}

const value = await ctx.cache.get(link);
link = baseUrl + link;

if (value) {
resultItem = JSON.parse(value);
} else {
return ctx.cache.tryGet(link, async () => {
const article = await got_ins.get(link);
const $1 = cheerio.load(article.data);

let time = $1('.date-published', '.content').slice(0, 1).text().trim();
if (!time) {
// for HK version
time = $1('.author-info').find('.mgt10').slice(0, 1).text().trim();
}
time = time.replace('年', '-').replace('月', '-').replace('日', '').replace('发布 /', '');
const time = (() => {
if ($1("head script[type='application/json']").text() !== '') {
// SG
return new Date(Number(JSON.parse($1("head script[type='application/json']").text())?.articleDetails?.created) * 1000);
} else {
// HK
return new Date(JSON.parse($1("head script[type='application/ld+json']").eq(1).text())?.datePublished);
}
})();

$1('.overlay-microtransaction').remove();
$1('#video-freemium-player').remove();
Expand All @@ -104,6 +95,7 @@ const parseList = async (ctx, sectionUrl) => {
let articleBodyNode = $1('.article-content-rawhtml');
if (articleBodyNode.length === 0) {
// for HK version
orderContent($1('.article-body'));
articleBodyNode = $1('.article-body');
}

Expand Down Expand Up @@ -152,27 +144,42 @@ const parseList = async (ctx, sectionUrl) => {
});
}

resultItem = {
return {
// <- for SG version -> for HK version
title: $1('h1', '.content').text().trim() || $1('h1.article-title').text(),
description: art(path.join(__dirname, 'templates/zaobao.art'), {
articleBody,
imageDataArray,
}),
pubDate: timezone(time, +8), // zaobao seems always UTC+8
pubDate: time,
link,
};
ctx.cache.set(link, JSON.stringify(resultItem));
}

return Promise.resolve(resultItem);
});
})
);
return {
title,
resultList,
};
};

const orderContent = (parent) => {
parent
.children()
.toArray()
.sort((a, b) => {
const index = Buffer.from(base32.parse('GM======')).toString(); // substring(3)
a = Buffer.from(base32.parse(parent.find(a).data('s').substring(index))).toString();
b = Buffer.from(base32.parse(parent.find(b).data('s').substring(index))).toString();
return a - b;
})
.forEach((e, i) => {
parent.find(e).attr('s', i);
parent.append(e);
});
};

module.exports = {
parseList,
orderContent,
};
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
"rand-user-agent": "1.0.109",
"re2": "1.19.1",
"require-all": "3.0.0",
"rfc4648": "1.5.2",
"rss-parser": "3.13.0",
"showdown": "2.1.0",
"simplecc-wasm": "0.1.5",
Expand Down
7 changes: 7 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5e63059

Please sign in to comment.