up

2026-04-25 10:12:52 +08:00
parent c59b4c225d
commit 6f97f46e69
44 changed files with 309 additions and 44 deletions
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""
+批量将 hk-bank 目录及子目录下的 *.md 文件转换为 PDF
+- 含 PlantUML 图表的文件使用 lua-filter 在线渲染
+- 含 emoji 的文件先替换为纯文本符号
+"""
+import os
+import re
+import subprocess
+import shutil
+
+BASE_DIR = "/Users/jiahong/Documents/Job/docs/Certificates/hk-bank"
+LUA_FILTER = "/Users/jiahong/Documents/Job/docs/.vscode/plantuml-filter.lua"
+PANDOC_BASE = [
+    "pandoc",
+    "--pdf-engine=xelatex",
+    "-V", "mainfont=STSong",
+    "-V", "monofont=STFangsong",
+    "-V", "geometry:margin=2.5cm",
+    "-V", "CJKmainfont=STSong",
+    "--lua-filter", LUA_FILTER,
+]
+
+EMOJI_MAP = {
+    "✅": "[OK]",
+    "❌": "[X]",
+    "⬜": "[ ]",
+    "⬛": "[#]",
+    "📄": "[doc]",
+    "⚠️": "[!]",
+    "🔍": "[search]",
+    "📜": "[scroll]",
+    "📝": "[note]",
+}
+
+def replace_emoji(text):
+    for emoji, replacement in EMOJI_MAP.items():
+        text = text.replace(emoji, replacement)
+    # 通用 emoji 清除（保留中英文、标点、符号）
+    text = re.sub(r'[\U00010000-\U0010ffff]', '', text)
+    return text
+
+def has_plantuml(content):
+    return '@startuml' in content or '```plantuml' in content
+
+def convert_md_to_pdf(md_path):
+    with open(md_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    pdf_path = os.path.splitext(md_path)[0] + ".pdf"
+    need_clean = False
+
+    # 处理 emoji
+    cleaned = replace_emoji(content)
+    if cleaned != content:
+        tmp_md = md_path + ".tmp.md"
+        with open(tmp_md, 'w', encoding='utf-8') as f:
+            f.write(cleaned)
+        src = tmp_md
+        need_clean = True
+    else:
+        src = md_path
+
+    cmd = PANDOC_BASE + [src, "-o", pdf_path]
+
+    print(f"  转换: {os.path.relpath(md_path, BASE_DIR)}")
+    result = subprocess.run(cmd, capture_output=True, text=True)
+
+    if need_clean and os.path.exists(src):
+        os.remove(src)
+
+    if os.path.exists(pdf_path):
+        print(f"  ✓ 生成: {os.path.relpath(pdf_path, BASE_DIR)}")
+        return True
+    else:
+        print(f"  ✗ 失败: {os.path.relpath(md_path, BASE_DIR)}")
+        if result.stderr:
+            print(f"    错误: {result.stderr[:300]}")
+        return False
+
+def main():
+    md_files = []
+    for root, dirs, files in os.walk(BASE_DIR):
+        # 跳过隐藏目录
+        dirs[:] = [d for d in dirs if not d.startswith('.')]
+        for f in sorted(files):
+            if f.endswith('.md'):
+                md_files.append(os.path.join(root, f))
+
+    print(f"共发现 {len(md_files)} 个 Markdown 文件\n")
+    ok, fail = 0, 0
+    for md in md_files:
+        success = convert_md_to_pdf(md)
+        if success:
+            ok += 1
+        else:
+            fail += 1
+
+    print(f"\n完成：{ok} 个成功，{fail} 个失败")
+
+if __name__ == "__main__":
+    main()