This commit is contained in:
jiahong
2026-04-25 10:12:52 +08:00
parent c59b4c225d
commit 6f97f46e69
44 changed files with 309 additions and 44 deletions
+102
View File
@@ -0,0 +1,102 @@
#!/usr/bin/env python3
"""
批量将 hk-bank 目录及子目录下的 *.md 文件转换为 PDF
- 含 PlantUML 图表的文件使用 lua-filter 在线渲染
- 含 emoji 的文件先替换为纯文本符号
"""
import os
import re
import subprocess
import shutil
BASE_DIR = "/Users/jiahong/Documents/Job/docs/Certificates/hk-bank"
LUA_FILTER = "/Users/jiahong/Documents/Job/docs/.vscode/plantuml-filter.lua"
PANDOC_BASE = [
"pandoc",
"--pdf-engine=xelatex",
"-V", "mainfont=STSong",
"-V", "monofont=STFangsong",
"-V", "geometry:margin=2.5cm",
"-V", "CJKmainfont=STSong",
"--lua-filter", LUA_FILTER,
]
EMOJI_MAP = {
"": "[OK]",
"": "[X]",
"": "[ ]",
"": "[#]",
"📄": "[doc]",
"⚠️": "[!]",
"🔍": "[search]",
"📜": "[scroll]",
"📝": "[note]",
}
def replace_emoji(text):
for emoji, replacement in EMOJI_MAP.items():
text = text.replace(emoji, replacement)
# 通用 emoji 清除(保留中英文、标点、符号)
text = re.sub(r'[\U00010000-\U0010ffff]', '', text)
return text
def has_plantuml(content):
return '@startuml' in content or '```plantuml' in content
def convert_md_to_pdf(md_path):
with open(md_path, 'r', encoding='utf-8') as f:
content = f.read()
pdf_path = os.path.splitext(md_path)[0] + ".pdf"
need_clean = False
# 处理 emoji
cleaned = replace_emoji(content)
if cleaned != content:
tmp_md = md_path + ".tmp.md"
with open(tmp_md, 'w', encoding='utf-8') as f:
f.write(cleaned)
src = tmp_md
need_clean = True
else:
src = md_path
cmd = PANDOC_BASE + [src, "-o", pdf_path]
print(f" 转换: {os.path.relpath(md_path, BASE_DIR)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if need_clean and os.path.exists(src):
os.remove(src)
if os.path.exists(pdf_path):
print(f" ✓ 生成: {os.path.relpath(pdf_path, BASE_DIR)}")
return True
else:
print(f" ✗ 失败: {os.path.relpath(md_path, BASE_DIR)}")
if result.stderr:
print(f" 错误: {result.stderr[:300]}")
return False
def main():
md_files = []
for root, dirs, files in os.walk(BASE_DIR):
# 跳过隐藏目录
dirs[:] = [d for d in dirs if not d.startswith('.')]
for f in sorted(files):
if f.endswith('.md'):
md_files.append(os.path.join(root, f))
print(f"共发现 {len(md_files)} 个 Markdown 文件\n")
ok, fail = 0, 0
for md in md_files:
success = convert_md_to_pdf(md)
if success:
ok += 1
else:
fail += 1
print(f"\n完成:{ok} 个成功,{fail} 个失败")
if __name__ == "__main__":
main()