188 lines
5.3 KiB
Python
188 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
批量将 Markdown 转为 PDF —— 通过 Google Chrome Headless 浏览器渲染
|
|
1. Markdown → HTML(含代码高亮、表格样式)
|
|
2. HTML → 写入临时文件 → Chrome --headless --print-to-pdf
|
|
"""
|
|
import glob
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import markdown
|
|
from markdown.extensions.tables import TableExtension
|
|
from markdown.extensions.fenced_code import FencedCodeExtension
|
|
from markdown.extensions.codehilite import CodeHiliteExtension
|
|
from markdown.extensions.toc import TocExtension
|
|
|
|
# ─── 配置 ───
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
DIRS = [f"{i:02d}-*" for i in range(1, 14)]
|
|
PATTERNS = ["*-鉴别材料.md", "*-源程序.md"]
|
|
CHROME = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
|
|
|
CSS = """
|
|
@page {
|
|
size: A4;
|
|
margin: 20mm 18mm 20mm 18mm;
|
|
}
|
|
body {
|
|
font-family: "PingFang SC", "Microsoft YaHei", "STSong", "Noto Sans CJK SC", sans-serif;
|
|
font-size: 11pt;
|
|
line-height: 1.7;
|
|
color: #222;
|
|
max-width: 100%;
|
|
padding: 0 10px;
|
|
margin: 0;
|
|
}
|
|
h1 { font-size: 20pt; border-bottom: 2px solid #333; padding-bottom: 6px; margin-top: 24pt; }
|
|
h2 { font-size: 16pt; border-bottom: 1px solid #999; padding-bottom: 4px; margin-top: 20pt; }
|
|
h3 { font-size: 13pt; margin-top: 16pt; }
|
|
h4 { font-size: 12pt; margin-top: 12pt; }
|
|
table {
|
|
border-collapse: collapse;
|
|
width: 100%;
|
|
margin: 12px 0;
|
|
font-size: 10pt;
|
|
page-break-inside: auto;
|
|
}
|
|
th, td {
|
|
border: 1px solid #bbb;
|
|
padding: 6px 10px;
|
|
text-align: left;
|
|
}
|
|
th { background-color: #f0f0f0; font-weight: bold; }
|
|
tr:nth-child(even) { background-color: #fafafa; }
|
|
code {
|
|
font-family: "SF Mono", "Menlo", "Consolas", "Monaco", monospace;
|
|
font-size: 9.5pt;
|
|
background: #f5f5f5;
|
|
padding: 1px 4px;
|
|
border-radius: 3px;
|
|
}
|
|
pre {
|
|
background: #f6f8fa;
|
|
border: 1px solid #e1e4e8;
|
|
border-radius: 6px;
|
|
padding: 12px 16px;
|
|
overflow-x: auto;
|
|
font-size: 8.5pt;
|
|
line-height: 1.5;
|
|
page-break-inside: auto;
|
|
white-space: pre-wrap;
|
|
word-wrap: break-word;
|
|
}
|
|
pre code {
|
|
background: transparent;
|
|
padding: 0;
|
|
font-size: inherit;
|
|
}
|
|
blockquote {
|
|
border-left: 4px solid #dfe2e5;
|
|
padding: 4px 16px;
|
|
margin: 12px 0;
|
|
color: #555;
|
|
background: #f9f9f9;
|
|
}
|
|
img { max-width: 100%; }
|
|
hr { border: none; border-top: 1px solid #ddd; margin: 20px 0; }
|
|
.codehilite { background: #f6f8fa; border-radius: 6px; padding: 12px 16px; }
|
|
"""
|
|
|
|
def md_to_html(md_path):
|
|
"""将 Markdown 文件转换为完整的 HTML 页面"""
|
|
with open(md_path, 'r', encoding='utf-8') as f:
|
|
md_text = f.read()
|
|
|
|
extensions = [
|
|
TableExtension(),
|
|
FencedCodeExtension(),
|
|
CodeHiliteExtension(css_class='codehilite', guess_lang=False),
|
|
TocExtension(permalink=False),
|
|
]
|
|
html_body = markdown.markdown(md_text, extensions=extensions)
|
|
|
|
title = os.path.splitext(os.path.basename(md_path))[0]
|
|
html = f"""<!DOCTYPE html>
|
|
<html lang="zh-CN">
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<title>{title}</title>
|
|
<style>{CSS}</style>
|
|
</head>
|
|
<body>
|
|
{html_body}
|
|
</body>
|
|
</html>"""
|
|
return html
|
|
|
|
|
|
def html_to_pdf_chrome(html_content, pdf_path):
|
|
"""使用 Google Chrome headless 将 HTML 转为 PDF"""
|
|
with tempfile.NamedTemporaryFile(suffix='.html', mode='w', encoding='utf-8', delete=False) as f:
|
|
f.write(html_content)
|
|
html_path = f.name
|
|
|
|
try:
|
|
cmd = [
|
|
CHROME,
|
|
'--headless',
|
|
'--disable-gpu',
|
|
'--no-sandbox',
|
|
'--disable-software-rasterizer',
|
|
'--run-all-compositor-stages-before-draw',
|
|
f'--print-to-pdf={pdf_path}',
|
|
'--no-pdf-header-footer',
|
|
html_path,
|
|
]
|
|
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
return os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0
|
|
except Exception as e:
|
|
print(f" Chrome 错误: {e}")
|
|
return False
|
|
finally:
|
|
os.unlink(html_path)
|
|
|
|
|
|
def main():
|
|
# 收集所有目标文件
|
|
md_files = []
|
|
for dir_pattern in DIRS:
|
|
for pattern in PATTERNS:
|
|
found = sorted(glob.glob(os.path.join(BASE_DIR, dir_pattern, pattern)))
|
|
md_files.extend(found)
|
|
|
|
if not md_files:
|
|
print("❌ 未找到目标 md 文件")
|
|
return
|
|
|
|
print(f"找到 {len(md_files)} 个 Markdown 文件待转换")
|
|
print(f"使用 Chrome Headless 浏览器渲染\n")
|
|
|
|
success = 0
|
|
failed = 0
|
|
for i, md_path in enumerate(md_files, 1):
|
|
rel = os.path.relpath(md_path, BASE_DIR)
|
|
pdf_name = os.path.splitext(os.path.basename(md_path))[0] + '.pdf'
|
|
pdf_path = os.path.join(os.path.dirname(md_path), pdf_name)
|
|
|
|
print(f"[{i:2d}/{len(md_files)}] {rel}")
|
|
try:
|
|
html = md_to_html(md_path)
|
|
if html_to_pdf_chrome(html, pdf_path):
|
|
size_kb = os.path.getsize(pdf_path) / 1024
|
|
print(f" ✅ → {pdf_name} ({size_kb:.0f} KB)")
|
|
success += 1
|
|
else:
|
|
print(f" ❌ PDF 生成失败")
|
|
failed += 1
|
|
except Exception as e:
|
|
print(f" ❌ 错误: {e}")
|
|
failed += 1
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"转换完成: {success} 成功, {failed} 失败, 共 {len(md_files)} 个文件")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|