up
This commit is contained in:
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
批量将 hk-bank 目录及子目录下的 *.md 文件转换为 PDF
|
||||
- 含 PlantUML 图表的文件使用 lua-filter 在线渲染
|
||||
- 含 emoji 的文件先替换为纯文本符号
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import shutil
|
||||
|
||||
BASE_DIR = "/Users/jiahong/Documents/Job/docs/Certificates/hk-bank"
|
||||
LUA_FILTER = "/Users/jiahong/Documents/Job/docs/.vscode/plantuml-filter.lua"
|
||||
PANDOC_BASE = [
|
||||
"pandoc",
|
||||
"--pdf-engine=xelatex",
|
||||
"-V", "mainfont=STSong",
|
||||
"-V", "monofont=STFangsong",
|
||||
"-V", "geometry:margin=2.5cm",
|
||||
"-V", "CJKmainfont=STSong",
|
||||
"--lua-filter", LUA_FILTER,
|
||||
]
|
||||
|
||||
EMOJI_MAP = {
|
||||
"✅": "[OK]",
|
||||
"❌": "[X]",
|
||||
"⬜": "[ ]",
|
||||
"⬛": "[#]",
|
||||
"📄": "[doc]",
|
||||
"⚠️": "[!]",
|
||||
"🔍": "[search]",
|
||||
"📜": "[scroll]",
|
||||
"📝": "[note]",
|
||||
}
|
||||
|
||||
def replace_emoji(text):
|
||||
for emoji, replacement in EMOJI_MAP.items():
|
||||
text = text.replace(emoji, replacement)
|
||||
# 通用 emoji 清除(保留中英文、标点、符号)
|
||||
text = re.sub(r'[\U00010000-\U0010ffff]', '', text)
|
||||
return text
|
||||
|
||||
def has_plantuml(content):
|
||||
return '@startuml' in content or '```plantuml' in content
|
||||
|
||||
def convert_md_to_pdf(md_path):
|
||||
with open(md_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
pdf_path = os.path.splitext(md_path)[0] + ".pdf"
|
||||
need_clean = False
|
||||
|
||||
# 处理 emoji
|
||||
cleaned = replace_emoji(content)
|
||||
if cleaned != content:
|
||||
tmp_md = md_path + ".tmp.md"
|
||||
with open(tmp_md, 'w', encoding='utf-8') as f:
|
||||
f.write(cleaned)
|
||||
src = tmp_md
|
||||
need_clean = True
|
||||
else:
|
||||
src = md_path
|
||||
|
||||
cmd = PANDOC_BASE + [src, "-o", pdf_path]
|
||||
|
||||
print(f" 转换: {os.path.relpath(md_path, BASE_DIR)}")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if need_clean and os.path.exists(src):
|
||||
os.remove(src)
|
||||
|
||||
if os.path.exists(pdf_path):
|
||||
print(f" ✓ 生成: {os.path.relpath(pdf_path, BASE_DIR)}")
|
||||
return True
|
||||
else:
|
||||
print(f" ✗ 失败: {os.path.relpath(md_path, BASE_DIR)}")
|
||||
if result.stderr:
|
||||
print(f" 错误: {result.stderr[:300]}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
md_files = []
|
||||
for root, dirs, files in os.walk(BASE_DIR):
|
||||
# 跳过隐藏目录
|
||||
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
||||
for f in sorted(files):
|
||||
if f.endswith('.md'):
|
||||
md_files.append(os.path.join(root, f))
|
||||
|
||||
print(f"共发现 {len(md_files)} 个 Markdown 文件\n")
|
||||
ok, fail = 0, 0
|
||||
for md in md_files:
|
||||
success = convert_md_to_pdf(md)
|
||||
if success:
|
||||
ok += 1
|
||||
else:
|
||||
fail += 1
|
||||
|
||||
print(f"\n完成:{ok} 个成功,{fail} 个失败")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user