软件著作权

2026-04-08 08:19:24 +08:00
parent 6da6c9e632
commit 696dc36ca7
27 changed files with 187 additions and 0 deletions
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""
+批量将 Markdown 转为 PDF —— 通过 Google Chrome Headless 浏览器渲染
+1. Markdown → HTML（含代码高亮、表格样式）
+2. HTML → 写入临时文件 → Chrome --headless --print-to-pdf
+"""
+import glob
+import os
+import subprocess
+import tempfile
+import markdown
+from markdown.extensions.tables import TableExtension
+from markdown.extensions.fenced_code import FencedCodeExtension
+from markdown.extensions.codehilite import CodeHiliteExtension
+from markdown.extensions.toc import TocExtension
+
+# ─── 配置 ───
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DIRS = [f"{i:02d}-*" for i in range(1, 14)]
+PATTERNS = ["*-鉴别材料.md", "*-源程序.md"]
+CHROME = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+
+CSS = """
+@page {
+    size: A4;
+    margin: 20mm 18mm 20mm 18mm;
+}
+body {
+    font-family: "PingFang SC", "Microsoft YaHei", "STSong", "Noto Sans CJK SC", sans-serif;
+    font-size: 11pt;
+    line-height: 1.7;
+    color: #222;
+    max-width: 100%;
+    padding: 0 10px;
+    margin: 0;
+}
+h1 { font-size: 20pt; border-bottom: 2px solid #333; padding-bottom: 6px; margin-top: 24pt; }
+h2 { font-size: 16pt; border-bottom: 1px solid #999; padding-bottom: 4px; margin-top: 20pt; }
+h3 { font-size: 13pt; margin-top: 16pt; }
+h4 { font-size: 12pt; margin-top: 12pt; }
+table {
+    border-collapse: collapse;
+    width: 100%;
+    margin: 12px 0;
+    font-size: 10pt;
+    page-break-inside: auto;
+}
+th, td {
+    border: 1px solid #bbb;
+    padding: 6px 10px;
+    text-align: left;
+}
+th { background-color: #f0f0f0; font-weight: bold; }
+tr:nth-child(even) { background-color: #fafafa; }
+code {
+    font-family: "SF Mono", "Menlo", "Consolas", "Monaco", monospace;
+    font-size: 9.5pt;
+    background: #f5f5f5;
+    padding: 1px 4px;
+    border-radius: 3px;
+}
+pre {
+    background: #f6f8fa;
+    border: 1px solid #e1e4e8;
+    border-radius: 6px;
+    padding: 12px 16px;
+    overflow-x: auto;
+    font-size: 8.5pt;
+    line-height: 1.5;
+    page-break-inside: auto;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+pre code {
+    background: transparent;
+    padding: 0;
+    font-size: inherit;
+}
+blockquote {
+    border-left: 4px solid #dfe2e5;
+    padding: 4px 16px;
+    margin: 12px 0;
+    color: #555;
+    background: #f9f9f9;
+}
+img { max-width: 100%; }
+hr { border: none; border-top: 1px solid #ddd; margin: 20px 0; }
+.codehilite { background: #f6f8fa; border-radius: 6px; padding: 12px 16px; }
+"""
+
+def md_to_html(md_path):
+    """将 Markdown 文件转换为完整的 HTML 页面"""
+    with open(md_path, 'r', encoding='utf-8') as f:
+        md_text = f.read()
+
+    extensions = [
+        TableExtension(),
+        FencedCodeExtension(),
+        CodeHiliteExtension(css_class='codehilite', guess_lang=False),
+        TocExtension(permalink=False),
+    ]
+    html_body = markdown.markdown(md_text, extensions=extensions)
+
+    title = os.path.splitext(os.path.basename(md_path))[0]
+    html = f"""<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="utf-8">
+<title>{title}</title>
+<style>{CSS}</style>
+</head>
+<body>
+{html_body}
+</body>
+</html>"""
+    return html
+
+
+def html_to_pdf_chrome(html_content, pdf_path):
+    """使用 Google Chrome headless 将 HTML 转为 PDF"""
+    with tempfile.NamedTemporaryFile(suffix='.html', mode='w', encoding='utf-8', delete=False) as f:
+        f.write(html_content)
+        html_path = f.name
+
+    try:
+        cmd = [
+            CHROME,
+            '--headless',
+            '--disable-gpu',
+            '--no-sandbox',
+            '--disable-software-rasterizer',
+            '--run-all-compositor-stages-before-draw',
+            f'--print-to-pdf={pdf_path}',
+            '--no-pdf-header-footer',
+            html_path,
+        ]
+        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+        return os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0
+    except Exception as e:
+        print(f"  Chrome 错误: {e}")
+        return False
+    finally:
+        os.unlink(html_path)
+
+
+def main():
+    # 收集所有目标文件
+    md_files = []
+    for dir_pattern in DIRS:
+        for pattern in PATTERNS:
+            found = sorted(glob.glob(os.path.join(BASE_DIR, dir_pattern, pattern)))
+            md_files.extend(found)
+
+    if not md_files:
+        print("❌ 未找到目标 md 文件")
+        return
+
+    print(f"找到 {len(md_files)} 个 Markdown 文件待转换")
+    print(f"使用 Chrome Headless 浏览器渲染\n")
+
+    success = 0
+    failed = 0
+    for i, md_path in enumerate(md_files, 1):
+        rel = os.path.relpath(md_path, BASE_DIR)
+        pdf_name = os.path.splitext(os.path.basename(md_path))[0] + '.pdf'
+        pdf_path = os.path.join(os.path.dirname(md_path), pdf_name)
+
+        print(f"[{i:2d}/{len(md_files)}] {rel}")
+        try:
+            html = md_to_html(md_path)
+            if html_to_pdf_chrome(html, pdf_path):
+                size_kb = os.path.getsize(pdf_path) / 1024
+                print(f"       ✅ → {pdf_name} ({size_kb:.0f} KB)")
+                success += 1
+            else:
+                print(f"       ❌ PDF 生成失败")
+                failed += 1
+        except Exception as e:
+            print(f"       ❌ 错误: {e}")
+            failed += 1
+
+    print(f"\n{'='*50}")
+    print(f"转换完成: {success} 成功, {failed} 失败, 共 {len(md_files)} 个文件")
+
+
+if __name__ == '__main__':
+    main()