-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathwiz2md.py
More file actions
95 lines (81 loc) · 4.24 KB
/
wiz2md.py
File metadata and controls
95 lines (81 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""批量转换老版为知笔记.md.ziw文件为标准markdown文件
为知笔记只支持逐个笔记导出为markdown文件,此脚本可以从老版为知笔记数据文件夹自动搜索markdown方式记录的笔记文件,并批量导出为标准markdown文件。
"""
import pathlib
import zipfile
import shutil
from lxml import etree
def get_markdown_files(data_path):
"""获取.md.ziw文件列表"""
md_files = list(data_path.glob('**/*.md*.ziw'))
print(f'共发现{len(md_files)}个Markdown文件。')
return md_files
def ziw2md(md_file, export_md_path, tmp_path, abs_img_path=False):
"""将.md.ziw文件转为标准md文件,导出图片和附件文件到本地目录"""
ziw_zip = zipfile.ZipFile(md_file)
ziw_zip.extractall(tmp_path)
ziw_zip.close()
print(f"正在转换《{md_file.stem}》……")
export_md_file = export_md_path.joinpath(md_file.parent.stem, md_file.stem.replace('.md', '')+'.md')
export_attachment_path = export_md_file.parent / export_md_file.stem # 图片、附件保存目录
with open(tmp_path / 'index.html', encoding='utf-16') as f1:
content = f1.read()
content = content.replace('</div>', '\n')
content = content.replace('<br>', '\n')
content = content.replace('<br/>', '\n')
'''
pattern1 = re.compile(r'<!doctype.*?</head>', re.DOTALL | re.IGNORECASE | re.MULTILINE)
content = pattern1.sub('', content)
pattern2 = re.compile(r'.*WizHtmlContentBegin-->', re.DOTALL | re.IGNORECASE | re.MULTILINE)
content = pattern2.sub('', content)
content = re.sub(r'<body.*?>', '', content)
content = content.replace('<', '<')
content = content.replace('>', '>')
content = content.replace(' ', ' ')
content = content.replace('<div>', '')
content = content.replace('</div>', '\n')
content = content.replace('<br/>', '\n')
content = content.replace('<br>', '\n')
content = content.replace('</body></html>', '')
# content = html2text.html2text(content)
content = content.replace(r'\---', '---').strip()
content = re.sub(r'<ed_tag name="markdownimage" .*?</ed_tag>', '', content).strip() # 替换包含图片链接文件的文末内容
'''
tree = etree.HTML(content)
content = tree.xpath('//body')[0].xpath('string(.)')
# 将图片文件链接改为相应目录
if abs_img_path:
content = content.replace('index_files', str(export_attachment_path))
else:
content = content.replace('index_files', export_attachment_path.stem)
# 分目录输出markdown文件
if not (export_md_path / md_file.parent.stem).exists():
(export_md_path / md_file.parent.stem).mkdir()
with open(export_md_file, 'w', encoding='utf-8') as f2:
f2.write(content)
print(f'已导出:{export_md_file}。')
# 将index_files目录下图片文件复制到以markdown文件标题命名的目录
if (tmp_path / 'index_files').exists():
# shutil.copytree((tmp_path / 'index_files'), export_attachment_path, dirs_exist_ok=True)
(tmp_path / 'index_files').rename(export_attachment_path)
# 将附件目录下文件复制到以markdown文件标题命名的目录
attachment_path = md_file.parent.joinpath(md_file.stem, '.md_Attachments')
if attachment_path.exists():
if not export_attachment_path.exists():
export_attachment_path.mkdir()
for attachment in attachment_path.glob('*.*'):
shutil.copy2(attachment, export_attachment_path)
# shutil.rmtree(tmp_path)
if __name__ == "__main__":
wizdata_path = pathlib.Path(r'C:\QMDownload\Backup\Wiz Knowledge\Data\[email protected]')
export_md_path = pathlib.Path(r'C:\QMDownload\Backup\Wiz Knowledge\exported_md')
tmp_path = export_md_path / 'temp'
keyword = '困境与解法'
md_files = get_markdown_files(wizdata_path)
for md_file in md_files:
# 导出全部markdown文件
if not keyword:
ziw2md(md_file, export_md_path, tmp_path, abs_img_path=False)
# 只导出文件名包含指定关键字的markdown文件
elif keyword in md_file.stem:
ziw2md(md_file, export_md_path, tmp_path, abs_img_path=False)