diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index ad0964965f..82a0ff9e37 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -5,6 +5,7 @@
### Enhancements
- **Print Telegram user info of bot**: Print the bot's Telegram user info when the bot is started. This is to help bot managers to find the bot's username and user ID when deploying the bot.
+- **Minor refactor**: Some internal functions have been refactored to improve performance and maintainability.
### Bug fixes
diff --git a/docs/CHANGELOG.zh.md b/docs/CHANGELOG.zh.md
index c67d165afc..3566ef8547 100644
--- a/docs/CHANGELOG.zh.md
+++ b/docs/CHANGELOG.zh.md
@@ -5,6 +5,7 @@
### 增强
- **打印 bot 的 Telegram 用户信息**: 在 bot 启动时打印 bot 的 Telegram 用户信息。这是为了帮助 bot 管理员在部署 bot 时找到 bot 的用户名和用户 ID。
+- **次要的重构**: 重构了一些内部函数以提高性能和可维护性。
### Bug 修复
diff --git a/src/parsing/emojify.json b/src/parsing/emojify.json
deleted file mode 100644
index 13341a29e2..0000000000
--- a/src/parsing/emojify.json
+++ /dev/null
@@ -1,103 +0,0 @@
-{
- "微笑": "🙂",
- "可爱": "😊",
- "太开心": "😆",
- "鼓掌": "👏",
- "嘻嘻": "😁",
- "哈哈": "😄",
- "笑cry": "😂",
- "挤眼": "😜",
- "馋嘴": "😋",
- "黑线": "😑",
- "汗": "😓",
- "哼": "😠",
- "怒": "😡",
- "可怜": "🥺",
- "失望": "😞",
- "悲伤": "😢",
- "泪": "😭",
- "害羞": "😳",
- "爱你": "🥰",
- "亲亲": "😚",
- "色": "😍",
- "阴险": "😏",
- "偷笑": "🤭",
- "酷": "😎",
- "并不简单": "🧐",
- "思考": "🤔",
- "晕": "😵",
- "骷髅": "💀",
- "嘘": "🤫",
- "闭嘴": "🤐",
- "傻眼": "😮",
- "吃惊": "😲",
- "吐": "🤮",
- "感冒": "😷",
- "生病": "🤒",
- "拜拜": "👋",
- "鄙视": "🖕",
- "白眼": "🙄",
- "抓狂": "😖",
- "怒骂": "🤬",
- "钱": "🤑",
- "哈欠": "🥱",
- "困": "😴",
- "睡": "😪",
- "吃瓜": "🍉",
- "酸": "🍋",
- "喵喵": "🐱",
- "抱抱": "🤗",
- "摊手": "🤷",
- "跪了": "🧎",
- "鲜花": "🌹",
- "给你小心心": "💝",
- "心": "❤",
- "伤心": "💔",
- "握手": "🤝",
- "赞": "👍",
- "good": "👍",
- "弱": "👎",
- "NO": "✋",
- "耶": "✌",
- "拳头": "✊",
- "ok": "👌",
- "加油": "💪",
- "haha": "🤟",
- "熊猫": "🐼",
- "兔子": "🐰",
- "猪头": "🐷",
- "太阳": "🌞",
- "月亮": "🌙",
- "浮云": "☁",
- "下雨": "🌧",
- "微风": "🍃",
- "围观": "👨👧👦",
- "飞机": "✈",
- "照相机": "📷",
- "话筒": "🎙",
- "蜡烛": "🕯",
- "音乐": "🎵",
- "可乐": "🥤",
- "干杯": "🍻",
- "蛋糕": "🎂",
- "礼物": "🎁",
- "钟": "⏰",
- "肥皂": "🧼",
- "绿丝带": "🎗",
- "围脖": "🧣",
- "圣诞老人": "🎅",
- "文明遛狗": "🐕",
- "最右": " →_→ ",
- "五仁月饼": "🥮",
- "弗莱见钱眼开": "🤑",
- "棒棒糖": "🍭",
- "炸鸡腿": "🍗",
- "点亮平安灯": "🏮",
- "点亮橙色": "🖐",
- "看涨": "📈",
- "看跌": "📉",
- "星星": "⭐",
- "空星": "★",
- "全家福": "👪",
- "圆月": "🌕"
-}
diff --git a/src/parsing/utils.py b/src/parsing/utils.py
index 12667d43c6..28ef443fab 100644
--- a/src/parsing/utils.py
+++ b/src/parsing/utils.py
@@ -2,7 +2,6 @@
from typing import Optional, Sequence, Union, Final, Iterable
import re
-import json
import string
from contextlib import suppress
from bs4.element import Tag
@@ -11,9 +10,9 @@
from telethon.tl.types import TypeMessageEntity
from functools import partial
from urllib.parse import urljoin
-from os import path
from itertools import chain
+from .weibo_emojify_map import EMOJIFY_MAP
from .. import log
from ..aio_helper import run_async
from ..compat import parsing_utils_html_validator_minify, INT64_T_MAX
@@ -83,10 +82,6 @@
sorted(set(SPACES + INVALID_CHARACTERS + string.punctuation + string.whitespace))
)
-# load emoji dict
-with open(path.join(path.dirname(__file__), 'emojify.json'), 'r', encoding='utf-8') as emojify_json:
- EMOJI_DICT = json.load(emojify_json)
-
replaceInvalidCharacter = partial(re.compile(rf'[{INVALID_CHARACTERS}]').sub, ' ') # use initially
replaceSpecialSpace = partial(re.compile(rf'[{SPACES[1:]}]').sub, ' ') # use carefully
stripBr = partial(re.compile(r'\s*
\s*').sub, '
')
@@ -121,9 +116,9 @@ def resolve_relative_link(base: Optional[str], url: Optional[str]) -> str:
def emojify(xml):
xml = emojize(xml, language='alias', variant='emoji_type')
- for emoticon, emoji in EMOJI_DICT.items():
+ for emoticon_phrase, emoji in EMOJIFY_MAP.items():
# emojify weibo emoticons, get all here: https://api.weibo.com/2/emotions.json?source=1362404091
- xml = xml.replace(f'[{emoticon}]', emoji)
+ xml = xml.replace(emoticon_phrase, emoji)
return xml
diff --git a/src/parsing/weibo_emojify_map.py b/src/parsing/weibo_emojify_map.py
new file mode 100644
index 0000000000..c55e71d98d
--- /dev/null
+++ b/src/parsing/weibo_emojify_map.py
@@ -0,0 +1,107 @@
+from __future__ import annotations
+
+from typing import Final
+
+EMOJIFY_MAP: Final[dict] = {
+ "[微笑]": "🙂",
+ "[可爱]": "😊",
+ "[太开心]": "😆",
+ "[鼓掌]": "👏",
+ "[嘻嘻]": "😁",
+ "[哈哈]": "😄",
+ "[笑cry]": "😂",
+ "[挤眼]": "😜",
+ "[馋嘴]": "😋",
+ "[黑线]": "😑",
+ "[汗]": "😓",
+ "[哼]": "😠",
+ "[怒]": "😡",
+ "[可怜]": "🥺",
+ "[失望]": "😞",
+ "[悲伤]": "😢",
+ "[泪]": "😭",
+ "[害羞]": "😳",
+ "[爱你]": "🥰",
+ "[亲亲]": "😚",
+ "[色]": "😍",
+ "[阴险]": "😏",
+ "[偷笑]": "🤭",
+ "[酷]": "😎",
+ "[并不简单]": "🧐",
+ "[思考]": "🤔",
+ "[晕]": "😵",
+ "[骷髅]": "💀",
+ "[嘘]": "🤫",
+ "[闭嘴]": "🤐",
+ "[傻眼]": "😮",
+ "[吃惊]": "😲",
+ "[吐]": "🤮",
+ "[感冒]": "😷",
+ "[生病]": "🤒",
+ "[拜拜]": "👋",
+ "[鄙视]": "🖕",
+ "[白眼]": "🙄",
+ "[抓狂]": "😖",
+ "[怒骂]": "🤬",
+ "[钱]": "🤑",
+ "[哈欠]": "🥱",
+ "[困]": "😴",
+ "[睡]": "😪",
+ "[吃瓜]": "🍉",
+ "[酸]": "🍋",
+ "[喵喵]": "🐱",
+ "[抱抱]": "🤗",
+ "[摊手]": "🤷",
+ "[跪了]": "🧎",
+ "[鲜花]": "🌹",
+ "[给你小心心]": "💝",
+ "[心]": "❤",
+ "[伤心]": "💔",
+ "[握手]": "🤝",
+ "[赞]": "👍",
+ "[good]": "👍",
+ "[弱]": "👎",
+ "[NO]": "✋",
+ "[耶]": "✌",
+ "[拳头]": "✊",
+ "[ok]": "👌",
+ "[加油]": "💪",
+ "[haha]": "🤟",
+ "[熊猫]": "🐼",
+ "[兔子]": "🐰",
+ "[猪头]": "🐷",
+ "[太阳]": "🌞",
+ "[月亮]": "🌙",
+ "[浮云]": "☁",
+ "[下雨]": "🌧",
+ "[微风]": "🍃",
+ "[围观]": "👨👧👦",
+ "[飞机]": "✈",
+ "[照相机]": "📷",
+ "[话筒]": "🎙",
+ "[蜡烛]": "🕯",
+ "[音乐]": "🎵",
+ "[可乐]": "🥤",
+ "[干杯]": "🍻",
+ "[蛋糕]": "🎂",
+ "[礼物]": "🎁",
+ "[钟]": "⏰",
+ "[肥皂]": "🧼",
+ "[绿丝带]": "🎗",
+ "[围脖]": "🧣",
+ "[圣诞老人]": "🎅",
+ "[文明遛狗]": "🐕",
+ "[最右]": " →_→ ",
+ "[五仁月饼]": "🥮",
+ "[弗莱见钱眼开]": "🤑",
+ "[棒棒糖]": "🍭",
+ "[炸鸡腿]": "🍗",
+ "[点亮平安灯]": "🏮",
+ "[点亮橙色]": "🖐",
+ "[看涨]": "📈",
+ "[看跌]": "📉",
+ "[星星]": "⭐",
+ "[空星]": "★",
+ "[全家福]": "👪",
+ "[圆月]": "🌕"
+}