103 lines
2.7 KiB
Python
103 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
批量将 hk-bank 目录及子目录下的 *.md 文件转换为 PDF
|
|
- 含 PlantUML 图表的文件使用 lua-filter 在线渲染
|
|
- 含 emoji 的文件先替换为纯文本符号
|
|
"""
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import shutil
|
|
|
|
BASE_DIR = "/Users/jiahong/Documents/Job/docs/Certificates/hk-bank"
|
|
LUA_FILTER = "/Users/jiahong/Documents/Job/docs/.vscode/plantuml-filter.lua"
|
|
PANDOC_BASE = [
|
|
"pandoc",
|
|
"--pdf-engine=xelatex",
|
|
"-V", "mainfont=STSong",
|
|
"-V", "monofont=STFangsong",
|
|
"-V", "geometry:margin=2.5cm",
|
|
"-V", "CJKmainfont=STSong",
|
|
"--lua-filter", LUA_FILTER,
|
|
]
|
|
|
|
EMOJI_MAP = {
|
|
"✅": "[OK]",
|
|
"❌": "[X]",
|
|
"⬜": "[ ]",
|
|
"⬛": "[#]",
|
|
"📄": "[doc]",
|
|
"⚠️": "[!]",
|
|
"🔍": "[search]",
|
|
"📜": "[scroll]",
|
|
"📝": "[note]",
|
|
}
|
|
|
|
def replace_emoji(text):
|
|
for emoji, replacement in EMOJI_MAP.items():
|
|
text = text.replace(emoji, replacement)
|
|
# 通用 emoji 清除(保留中英文、标点、符号)
|
|
text = re.sub(r'[\U00010000-\U0010ffff]', '', text)
|
|
return text
|
|
|
|
def has_plantuml(content):
|
|
return '@startuml' in content or '```plantuml' in content
|
|
|
|
def convert_md_to_pdf(md_path):
|
|
with open(md_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
pdf_path = os.path.splitext(md_path)[0] + ".pdf"
|
|
need_clean = False
|
|
|
|
# 处理 emoji
|
|
cleaned = replace_emoji(content)
|
|
if cleaned != content:
|
|
tmp_md = md_path + ".tmp.md"
|
|
with open(tmp_md, 'w', encoding='utf-8') as f:
|
|
f.write(cleaned)
|
|
src = tmp_md
|
|
need_clean = True
|
|
else:
|
|
src = md_path
|
|
|
|
cmd = PANDOC_BASE + [src, "-o", pdf_path]
|
|
|
|
print(f" 转换: {os.path.relpath(md_path, BASE_DIR)}")
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
if need_clean and os.path.exists(src):
|
|
os.remove(src)
|
|
|
|
if os.path.exists(pdf_path):
|
|
print(f" ✓ 生成: {os.path.relpath(pdf_path, BASE_DIR)}")
|
|
return True
|
|
else:
|
|
print(f" ✗ 失败: {os.path.relpath(md_path, BASE_DIR)}")
|
|
if result.stderr:
|
|
print(f" 错误: {result.stderr[:300]}")
|
|
return False
|
|
|
|
def main():
|
|
md_files = []
|
|
for root, dirs, files in os.walk(BASE_DIR):
|
|
# 跳过隐藏目录
|
|
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
|
for f in sorted(files):
|
|
if f.endswith('.md'):
|
|
md_files.append(os.path.join(root, f))
|
|
|
|
print(f"共发现 {len(md_files)} 个 Markdown 文件\n")
|
|
ok, fail = 0, 0
|
|
for md in md_files:
|
|
success = convert_md_to_pdf(md)
|
|
if success:
|
|
ok += 1
|
|
else:
|
|
fail += 1
|
|
|
|
print(f"\n完成:{ok} 个成功,{fail} 个失败")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|