#!/usr/bin/env python3 """ 批量将 hk-bank 目录及子目录下的 *.md 文件转换为 PDF - 含 PlantUML 图表的文件使用 lua-filter 在线渲染 - 含 emoji 的文件先替换为纯文本符号 """ import os import re import subprocess import shutil BASE_DIR = "/Users/jiahong/Documents/Job/docs/Certificates/hk-bank" LUA_FILTER = "/Users/jiahong/Documents/Job/docs/.vscode/plantuml-filter.lua" PANDOC_BASE = [ "pandoc", "--pdf-engine=xelatex", "-V", "mainfont=STSong", "-V", "monofont=STFangsong", "-V", "geometry:margin=2.5cm", "-V", "CJKmainfont=STSong", "--lua-filter", LUA_FILTER, ] EMOJI_MAP = { "✅": "[OK]", "❌": "[X]", "⬜": "[ ]", "⬛": "[#]", "📄": "[doc]", "⚠️": "[!]", "🔍": "[search]", "📜": "[scroll]", "📝": "[note]", } def replace_emoji(text): for emoji, replacement in EMOJI_MAP.items(): text = text.replace(emoji, replacement) # 通用 emoji 清除(保留中英文、标点、符号) text = re.sub(r'[\U00010000-\U0010ffff]', '', text) return text def has_plantuml(content): return '@startuml' in content or '```plantuml' in content def convert_md_to_pdf(md_path): with open(md_path, 'r', encoding='utf-8') as f: content = f.read() pdf_path = os.path.splitext(md_path)[0] + ".pdf" need_clean = False # 处理 emoji cleaned = replace_emoji(content) if cleaned != content: tmp_md = md_path + ".tmp.md" with open(tmp_md, 'w', encoding='utf-8') as f: f.write(cleaned) src = tmp_md need_clean = True else: src = md_path cmd = PANDOC_BASE + [src, "-o", pdf_path] print(f" 转换: {os.path.relpath(md_path, BASE_DIR)}") result = subprocess.run(cmd, capture_output=True, text=True) if need_clean and os.path.exists(src): os.remove(src) if os.path.exists(pdf_path): print(f" ✓ 生成: {os.path.relpath(pdf_path, BASE_DIR)}") return True else: print(f" ✗ 失败: {os.path.relpath(md_path, BASE_DIR)}") if result.stderr: print(f" 错误: {result.stderr[:300]}") return False def main(): md_files = [] for root, dirs, files in os.walk(BASE_DIR): # 跳过隐藏目录 dirs[:] = [d for d in dirs if not d.startswith('.')] for f in sorted(files): if f.endswith('.md'): md_files.append(os.path.join(root, f)) print(f"共发现 {len(md_files)} 个 Markdown 文件\n") ok, fail = 0, 0 for md in md_files: success = convert_md_to_pdf(md) if success: ok += 1 else: fail += 1 print(f"\n完成:{ok} 个成功,{fail} 个失败") if __name__ == "__main__": main()